1 #-----------------------------------------------------------------------------
4 # Author(s): David M. Beazley (dave@dabeaz.com)
6 # Copyright (C) 2001-2007, David M. Beazley
8 # This library is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU Lesser General Public
10 # License as published by the Free Software Foundation; either
11 # version 2.1 of the License, or (at your option) any later version.
13 # This library is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # Lesser General Public License for more details.
18 # You should have received a copy of the GNU Lesser General Public
19 # License along with this library; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 # See the file COPYING for a complete copy of the LGPL.
25 # This implements an LR parser that is constructed from grammar rules defined
26 # as Python functions. The grammer is specified by supplying the BNF inside
27 # Python documentation strings. The inspiration for this technique was borrowed
28 # from John Aycock's Spark parsing system. PLY might be viewed as cross between
29 # Spark and the GNU bison utility.
31 # The current implementation is only somewhat object-oriented. The
32 # LR parser itself is defined in terms of an object (which allows multiple
33 # parsers to co-exist). However, most of the variables used during table
34 # construction are defined in terms of global variables. Users shouldn't
35 # notice unless they are trying to define multiple parsers at the same
36 # time using threads (in which case they should have their head examined).
38 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
39 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
40 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
41 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
42 # by the more efficient DeRemer and Pennello algorithm.
44 # :::::::: WARNING :::::::
46 # Construction of LR parsing tables is fairly complicated and expensive.
47 # To make this module run fast, a *LOT* of work has been put into
48 # optimization---often at the expensive of readability and what might
49 # consider to be good Python "coding style." Modify the code at your
51 # ----------------------------------------------------------------------------
55 #-----------------------------------------------------------------------------
56 # === User configurable parameters ===
58 # Change these to modify the default behavior of yacc (if you wish)
59 #-----------------------------------------------------------------------------
61 yaccdebug
= 1 # Debugging mode. If set, yacc generates a
62 # a 'parser.out' file in the current directory
64 debug_file
= 'parser.out' # Default name of the debugging file
65 tab_module
= 'parsetab' # Default name of the table module
66 default_lr
= 'LALR' # Default LR table generation method
68 error_count
= 3 # Number of symbols that must be shifted to leave recovery mode
70 import re
, types
, sys
, cStringIO
, md5
, os
.path
72 # Exception raised for yacc-related errors
73 class YaccError(Exception): pass
75 # Available instance types. This is used when parsers are defined by a class.
76 # it's a little funky because I want to preserve backwards compatibility
77 # with Python 2.0 where types.ObjectType is undefined.
80 _INSTANCETYPE
= (types
.InstanceType
, types
.ObjectType
)
81 except AttributeError:
82 _INSTANCETYPE
= types
.InstanceType
83 class object: pass # Note: needed if no new-style classes present
85 #-----------------------------------------------------------------------------
86 # === LR Parsing Engine ===
88 # The following classes are used for the LR parser itself. These are not
89 # used during table construction and are independent of the actual LR
90 # table generation algorithm
91 #-----------------------------------------------------------------------------
93 # This class is used to hold non-terminal grammar symbols during parsing.
94 # It normally has the following attributes set:
95 # .type = Grammar symbol type
96 # .value = Symbol value
97 # .lineno = Starting line number
98 # .endlineno = Ending line number (optional, set automatically)
99 # .lexpos = Starting lex position
100 # .endlexpos = Ending lex position (optional, set automatically)
102 class YaccSymbol(object):
103 def __str__(self
): return self
.type
104 def __repr__(self
): return str(self
)
106 # This class is a wrapper around the objects actually passed to each
107 # grammar rule. Index lookup and assignment actually assign the
108 # .value attribute of the underlying YaccSymbol object.
109 # The lineno() method returns the line number of a given
110 # item (or 0 if not defined). The linespan() method returns
111 # a tuple of (startline,endline) representing the range of lines
112 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
113 # representing the range of positional information for a symbol.
115 class YaccProduction
:
116 def __init__(self
,s
,stack
=None):
120 def __getitem__(self
,n
):
121 if n
>= 0: return self
.slice[n
].value
122 else: return self
.stack
[n
].value
124 def __setitem__(self
,n
,v
):
125 self
.slice[n
].value
= v
127 def __getslice__(self
,i
,j
):
128 return [s
.value
for s
in self
.slice[i
:j
]]
131 return len(self
.slice)
134 return getattr(self
.slice[n
],"lineno",0)
136 def linespan(self
,n
):
137 startline
= getattr(self
.slice[n
],"lineno",0)
138 endline
= getattr(self
.slice[n
],"endlineno",startline
)
139 return startline
,endline
142 return getattr(self
.slice[n
],"lexpos",0)
145 startpos
= getattr(self
.slice[n
],"lexpos",0)
146 endpos
= getattr(self
.slice[n
],"endlexpos",startpos
)
147 return startpos
,endpos
149 def pushback(self
,n
):
151 raise ValueError, "Expected a positive value"
152 if n
> (len(self
.slice)-1):
153 raise ValueError, "Can't push %d tokens. Only %d are available." % (n
,len(self
.slice)-1)
155 self
.pbstack
.append(self
.slice[-i
-1])
157 # The LR Parsing engine. This is defined as a class so that multiple parsers
158 # can exist in the same process. A user never instantiates this directly.
159 # Instead, the global yacc() function should be used to create a suitable Parser
163 def __init__(self
,magic
=None):
165 # This is a hack to keep users from trying to instantiate a Parser
169 raise YaccError
, "Can't instantiate Parser. Use yacc() instead."
171 # Reset internal state
172 self
.productions
= None # List of productions
173 self
.errorfunc
= None # Error handling function
174 self
.action
= { } # LR Action table
175 self
.goto
= { } # LR goto table
176 self
.require
= { } # Attribute require table
177 self
.method
= "Unknown LR" # Table construction method used
183 del self
.statestack
[:]
187 self
.symstack
.append(sym
)
188 self
.statestack
.append(0)
190 def parse(self
,input=None,lexer
=None,debug
=0,tracking
=0):
191 lookahead
= None # Current lookahead symbol
192 lookaheadstack
= [ ] # Stack of lookahead symbols
193 actions
= self
.action
# Local reference to action table
194 goto
= self
.goto
# Local reference to goto table
195 prod
= self
.productions
# Local reference to production list
196 pslice
= YaccProduction(None) # Production object passed to grammar rules
197 errorcount
= 0 # Used during error recovery
199 # If no lexer was given, we will try to use the lex module
207 # If input was supplied, pass to lexer
212 get_token
= lexer
.token
214 statestack
= [ ] # Stack of parsing states
215 self
.statestack
= statestack
216 symstack
= [ ] # Stack of grammar symbols
217 self
.symstack
= symstack
219 pslice
.stack
= symstack
# Put in the production
220 errtoken
= None # Err token
222 # The start state is assumed to be (0,$end)
230 # Get the next symbol on the input. If a lookahead symbol
231 # is already set, we just use that. Otherwise, we'll pull
232 # the next token off of the lookaheadstack or from the lexer
236 if not lookaheadstack
:
237 lookahead
= get_token() # Get the next token
239 lookahead
= lookaheadstack
.pop()
241 lookahead
= YaccSymbol()
242 lookahead
.type = '$end'
244 errorlead
= ("%s . %s" % (" ".join([xx
.type for xx
in symstack
][1:]), str(lookahead
))).lstrip()
246 # Check the action table
247 ltype
= lookahead
.type
248 t
= actions
[state
].get(ltype
)
254 # shift a symbol on the stack
256 # Error, end of input
257 sys
.stderr
.write("yacc: Parse error. EOF\n")
262 sys
.stderr
.write("%-60s shift state %s\n" % (errorlead
, t
))
263 symstack
.append(lookahead
)
266 # Decrease error count on successful shift
267 if errorcount
: errorcount
-=1
271 # reduce a symbol on the stack, emit a production
276 # Get production function
278 sym
.type = pname
# Production name
281 sys
.stderr
.write("%-60s reduce %d\n" % (errorlead
, -t
))
284 targ
= symstack
[-plen
-1:]
288 sym
.lineno
= t1
.lineno
289 sym
.lexpos
= t1
.lexpos
291 sym
.endlineno
= getattr(t1
,"endlineno",t1
.lineno
)
292 sym
.endlexpos
= getattr(t1
,"endlexpos",t1
.lexpos
)
294 del statestack
[-plen
:]
297 sym
.lineno
= lexer
.lineno
298 sym
.lexpos
= lexer
.lexpos
302 # Call the grammar rule with our special slice object
305 # If there was a pushback, put that on the stack
307 lookaheadstack
.append(lookahead
)
308 for _t
in pslice
.pbstack
:
309 lookaheadstack
.append(_t
)
314 state
= goto
[statestack
[-1]][pname
]
315 statestack
.append(state
)
320 return getattr(n
,"value",None)
324 sys
.stderr
.write(errorlead
+ "\n")
325 # We have some kind of parsing error here. To handle
326 # this, we are going to push the current token onto
327 # the tokenstack and replace it with an 'error' token.
328 # If there are any synchronization rules, they may
331 # In addition to pushing the error token, we call call
332 # the user defined p_error() function if this is the
333 # first syntax error. This function is only called if
335 if errorcount
== 0 or self
.errorok
:
336 errorcount
= error_count
339 if errtoken
.type == '$end':
340 errtoken
= None # End of file!
342 global errok
,token
,restart
343 errok
= self
.errok
# Set some special functions available in error recovery
345 restart
= self
.restart
346 tok
= self
.errorfunc(errtoken
)
347 del errok
, token
, restart
# Delete special functions
350 # User must have done some kind of panic
351 # mode recovery on their own. The
352 # returned token is the next lookahead
358 if hasattr(errtoken
,"lineno"): lineno
= lookahead
.lineno
361 sys
.stderr
.write("yacc: Syntax error at line %d, token=%s\n" % (lineno
, errtoken
.type))
363 sys
.stderr
.write("yacc: Syntax error, token=%s" % errtoken
.type)
365 sys
.stderr
.write("yacc: Parse error in input. EOF\n")
369 errorcount
= error_count
371 # case 1: the statestack only has 1 entry on it. If we're in this state, the
372 # entire parse has been rolled back and we're completely hosed. The token is
373 # discarded and we just keep going.
375 if len(statestack
) <= 1 and lookahead
.type != '$end':
378 # Nuke the pushback stack
379 del lookaheadstack
[:]
382 # case 2: the statestack has a couple of entries on it, but we're
383 # at the end of the file. nuke the top entry and generate an error token
385 # Start nuking entries on the stack
386 if lookahead
.type == '$end':
387 # Whoa. We're really hosed here. Bail out
390 if lookahead
.type != 'error':
392 if sym
.type == 'error':
393 # Hmmm. Error is on top of stack, we'll just nuke input
394 # symbol and continue
399 if hasattr(lookahead
,"lineno"):
400 t
.lineno
= lookahead
.lineno
402 lookaheadstack
.append(lookahead
)
410 # Call an error function here
411 raise RuntimeError, "yacc: internal parser error!!!\n"
413 # -----------------------------------------------------------------------------
414 # === Parser Construction ===
416 # The following functions and variables are used to implement the yacc() function
417 # itself. This is pretty hairy stuff involving lots of error checking,
418 # construction of LR items, kernels, and so forth. Although a lot of
419 # this work is done using global variables, the resulting Parser object
420 # is completely self contained--meaning that it is safe to repeatedly
421 # call yacc() with different grammars in the same application.
422 # -----------------------------------------------------------------------------
424 # -----------------------------------------------------------------------------
427 # This function checks to see if there are duplicated p_rulename() functions
428 # in the parser module file. Without this function, it is really easy for
429 # users to make mistakes by cutting and pasting code fragments (and it's a real
430 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
431 # we just do a little regular expression pattern matching of def statements
432 # to try and detect duplicates.
433 # -----------------------------------------------------------------------------
435 def validate_file(filename
):
436 base
,ext
= os
.path
.splitext(filename
)
437 if ext
!= '.py': return 1 # No idea. Assume it's okay.
441 lines
= f
.readlines()
446 # Match def p_funcname(
447 fre
= re
.compile(r
'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
455 prev
= counthash
.get(name
)
457 counthash
[name
] = linen
459 sys
.stderr
.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename
,linen
,name
,prev
))
464 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
465 def validate_dict(d
):
466 for n
,v
in d
.items():
467 if n
[0:2] == 'p_' and type(v
) in (types
.FunctionType
, types
.MethodType
): continue
468 if n
[0:2] == 't_': continue
471 sys
.stderr
.write("yacc: Warning. '%s' not defined as a function\n" % n
)
472 if 1 and isinstance(v
,types
.FunctionType
) and v
.func_code
.co_argcount
== 1:
474 doc
= v
.__doc
__.split(" ")
476 sys
.stderr
.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v
.func_code
.co_filename
, v
.func_code
.co_firstlineno
,n
))
477 except StandardError:
480 # -----------------------------------------------------------------------------
481 # === GRAMMAR FUNCTIONS ===
483 # The following global variables and functions are used to store, manipulate,
484 # and verify the grammar rules specified by the user.
485 # -----------------------------------------------------------------------------
487 # Initialize all of the global variables used during grammar construction
488 def initialize_vars():
489 global Productions
, Prodnames
, Prodmap
, Terminals
490 global Nonterminals
, First
, Follow
, Precedence
, LRitems
491 global Errorfunc
, Signature
, Requires
493 Productions
= [None] # A list of all of the productions. The first
494 # entry is always reserved for the purpose of
495 # building an augmented grammar
497 Prodnames
= { } # A dictionary mapping the names of nonterminals to a list of all
498 # productions of that nonterminal.
500 Prodmap
= { } # A dictionary that is only used to detect duplicate
503 Terminals
= { } # A dictionary mapping the names of terminal symbols to a
504 # list of the rules where they are used.
506 Nonterminals
= { } # A dictionary mapping names of nonterminals to a list
507 # of rule numbers where they are used.
509 First
= { } # A dictionary of precomputed FIRST(x) symbols
511 Follow
= { } # A dictionary of precomputed FOLLOW(x) symbols
513 Precedence
= { } # Precedence rules for each terminal. Contains tuples of the
514 # form ('right',level) or ('nonassoc', level) or ('left',level)
516 LRitems
= [ ] # A list of all LR items for the grammar. These are the
517 # productions with the "dot" like E -> E . PLUS E
519 Errorfunc
= None # User defined error handler
521 Signature
= md5
.new() # Digital signature of the grammar rules, precedence
522 # and other information. Used to determined when a
523 # parsing table needs to be regenerated.
525 Requires
= { } # Requires list
527 # File objects used when creating the parser.out debugging file
529 _vf
= cStringIO
.StringIO()
530 _vfc
= cStringIO
.StringIO()
532 # -----------------------------------------------------------------------------
535 # This class stores the raw information about a single production or grammar rule.
536 # It has a few required attributes:
538 # name - Name of the production (nonterminal)
539 # prod - A list of symbols making up its production
540 # number - Production number.
542 # In addition, a few additional attributes are used to help with debugging or
543 # optimization of table generation.
545 # file - File where production action is defined.
546 # lineno - Line number where action is defined
547 # func - Action function
548 # prec - Precedence level
549 # lr_next - Next LR item. Example, if we are ' E -> E . PLUS E'
550 # then lr_next refers to 'E -> E PLUS . E'
551 # lr_index - LR item index (location of the ".") in the prod list.
552 # lookaheads - LALR lookahead symbols for this item
553 # len - Length of the production (number of symbols on right hand side)
554 # -----------------------------------------------------------------------------
557 def __init__(self
,**kw
):
558 for k
,v
in kw
.items():
561 self
.lr0_added
= 0 # Flag indicating whether or not added to LR0 closure
562 self
.lr1_added
= 0 # Flag indicating whether or not added to LR1
564 self
.lookaheads
= { }
566 self
.setnumbers
= [ ]
570 s
= "%s -> %s" % (self
.name
," ".join(self
.prod
))
572 s
= "%s -> <empty>" % self
.name
578 # Compute lr_items from the production
580 if n
> len(self
.prod
): return None
583 p
.prod
= list(self
.prod
)
584 p
.number
= self
.number
587 p
.setnumbers
= self
.setnumbers
589 p
.prod
= tuple(p
.prod
)
593 # Precompute list of productions immediately following
595 p
.lrafter
= Prodnames
[p
.prod
[n
+1]]
596 except (IndexError,KeyError),e
:
599 p
.lrbefore
= p
.prod
[n
-1]
605 class MiniProduction
:
608 # regex matching identifiers
609 _is_identifier
= re
.compile(r
'^[a-zA-Z0-9_-]+$')
611 # -----------------------------------------------------------------------------
614 # Given an action function, this function assembles a production rule.
615 # The production rule is assumed to be found in the function's docstring.
616 # This rule has the general syntax:
618 # name1 ::= production1
623 # name2 ::= production1
626 # -----------------------------------------------------------------------------
628 def add_production(f
,file,line
,prodname
,syms
):
630 if Terminals
.has_key(prodname
):
631 sys
.stderr
.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line
,prodname
))
633 if prodname
== 'error':
634 sys
.stderr
.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line
,prodname
))
637 if not _is_identifier
.match(prodname
):
638 sys
.stderr
.write("%s:%d: Illegal rule name '%s'\n" % (file,line
,prodname
))
641 for x
in range(len(syms
)):
647 sys
.stderr
.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line
,s
, prodname
))
649 if not Terminals
.has_key(c
):
655 if not _is_identifier
.match(s
) and s
!= '%prec':
656 sys
.stderr
.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line
,s
, prodname
))
659 # See if the rule is already in the rulemap
660 map = "%s -> %s" % (prodname
,syms
)
661 if Prodmap
.has_key(map):
663 sys
.stderr
.write("%s:%d: Duplicate rule %s.\n" % (file,line
, m
))
664 sys
.stderr
.write("%s:%d: Previous definition at %s:%d\n" % (file,line
, m
.file, m
.line
))
673 p
.number
= len(Productions
)
676 Productions
.append(p
)
678 if not Nonterminals
.has_key(prodname
):
679 Nonterminals
[prodname
] = [ ]
681 # Add all terminals to Terminals
683 while i
< len(p
.prod
):
687 precname
= p
.prod
[i
+1]
689 sys
.stderr
.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p
.file,p
.line
))
692 prec
= Precedence
.get(precname
,None)
694 sys
.stderr
.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p
.file,p
.line
,precname
))
702 if Terminals
.has_key(t
):
703 Terminals
[t
].append(p
.number
)
704 # Is a terminal. We'll assign a precedence to p based on this
705 if not hasattr(p
,"prec"):
706 p
.prec
= Precedence
.get(t
,('right',0))
708 if not Nonterminals
.has_key(t
):
709 Nonterminals
[t
] = [ ]
710 Nonterminals
[t
].append(p
.number
)
713 if not hasattr(p
,"prec"):
716 # Set final length of productions
718 p
.prod
= tuple(p
.prod
)
720 # Calculate unique syms in the production
726 # Add to the global productions list
728 Prodnames
[p
.name
].append(p
)
730 Prodnames
[p
.name
] = [ p
]
733 # Given a raw rule function, this function rips out its doc string
734 # and adds rules to the grammar
737 line
= f
.func_code
.co_firstlineno
738 file = f
.func_code
.co_filename
741 if isinstance(f
,types
.MethodType
):
746 if f
.func_code
.co_argcount
> reqdargs
:
747 sys
.stderr
.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line
,f
.__name
__))
750 if f
.func_code
.co_argcount
< reqdargs
:
751 sys
.stderr
.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line
,f
.__name
__))
755 # Split the doc string into lines
756 pstrings
= f
.__doc
__.splitlines()
765 # This is a continuation of a previous rule
767 sys
.stderr
.write("%s:%d: Misplaced '|'.\n" % (file,dline
))
782 if assign
!= ':' and assign
!= '::=':
783 sys
.stderr
.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline
))
787 e
= add_production(f
,file,dline
,prodname
,syms
)
791 except StandardError:
792 sys
.stderr
.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline
,ps
))
795 sys
.stderr
.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line
,f
.__name
__))
799 # Cycle checking code (Michael Dyck)
801 def compute_reachable():
803 Find each symbol that can be reached from the start symbol.
804 Print a warning for any nonterminals that can't be reached.
805 (Unused terminals have already had their warning.)
808 for s
in Terminals
.keys() + Nonterminals
.keys():
811 mark_reachable_from( Productions
[0].prod
[0], Reachable
)
813 for s
in Nonterminals
.keys():
815 sys
.stderr
.write("yacc: Symbol '%s' is unreachable.\n" % s
)
817 def mark_reachable_from(s
, Reachable
):
819 Mark all symbols that are reachable from symbol s.
822 # We've already reached symbol s.
825 for p
in Prodnames
.get(s
,[]):
827 mark_reachable_from(r
, Reachable
)
829 # -----------------------------------------------------------------------------
830 # compute_terminates()
832 # This function looks at the various parsing rules and tries to detect
833 # infinite recursion cycles (grammar rules where there is no possible way
834 # to derive a string of only terminals).
835 # -----------------------------------------------------------------------------
836 def compute_terminates():
838 Raise an error for any symbols that don't terminate.
843 for t
in Terminals
.keys():
846 Terminates
['$end'] = 1
850 # Initialize to false:
851 for n
in Nonterminals
.keys():
854 # Then propagate termination until no change:
857 for (n
,pl
) in Prodnames
.items():
858 # Nonterminal n terminates iff any of its productions terminates.
860 # Production p terminates iff all of its rhs symbols terminate.
862 if not Terminates
[s
]:
863 # The symbol s does not terminate,
864 # so production p does not terminate.
868 # didn't break from the loop,
869 # so every symbol s terminates
870 # so production p terminates.
874 # symbol n terminates!
875 if not Terminates
[n
]:
878 # Don't need to consider any more productions for this n.
885 for (s
,terminates
) in Terminates
.items():
887 if not Prodnames
.has_key(s
) and not Terminals
.has_key(s
) and s
!= 'error':
888 # s is used-but-not-defined, and we've already warned of that,
889 # so it would be overkill to say that it's also non-terminating.
892 sys
.stderr
.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s
)
897 # -----------------------------------------------------------------------------
898 # verify_productions()
900 # This function examines all of the supplied rules to see if they seem valid.
901 # -----------------------------------------------------------------------------
902 def verify_productions(cycle_check
=1):
904 for p
in Productions
:
908 if not Prodnames
.has_key(s
) and not Terminals
.has_key(s
) and s
!= 'error':
909 sys
.stderr
.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p
.file,p
.line
,s
))
914 # Now verify all of the tokens
916 _vf
.write("Unused terminals:\n\n")
917 for s
,v
in Terminals
.items():
918 if s
!= 'error' and not v
:
919 sys
.stderr
.write("yacc: Warning. Token '%s' defined, but not used.\n" % s
)
920 if yaccdebug
: _vf
.write(" %s\n"% s
)
923 # Print out all of the productions
925 _vf
.write("\nGrammar\n\n")
926 for i
in range(1,len(Productions
)):
927 _vf
.write("Rule %-5d %s\n" % (i
, Productions
[i
]))
930 # Verify the use of all productions
931 for s
,v
in Nonterminals
.items():
934 sys
.stderr
.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p
.file,p
.line
, s
))
939 sys
.stderr
.write("yacc: Warning. There is 1 unused token.\n")
941 sys
.stderr
.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok
)
944 sys
.stderr
.write("yacc: Warning. There is 1 unused rule.\n")
946 sys
.stderr
.write("yacc: Warning. There are %d unused rules.\n" % unused_prod
)
949 _vf
.write("\nTerminals, with rules where they appear\n\n")
950 ks
= Terminals
.keys()
953 _vf
.write("%-20s : %s\n" % (k
, " ".join([str(s
) for s
in Terminals
[k
]])))
954 _vf
.write("\nNonterminals, with rules where they appear\n\n")
955 ks
= Nonterminals
.keys()
958 _vf
.write("%-20s : %s\n" % (k
, " ".join([str(s
) for s
in Nonterminals
[k
]])))
962 error
+= compute_terminates()
963 # error += check_cycles()
966 # -----------------------------------------------------------------------------
969 # This function walks the list of productions and builds a complete set of the
970 # LR items. The LR items are stored in two ways: First, they are uniquely
971 # numbered and placed in the list _lritems. Second, a linked list of LR items
972 # is built for each production. For example:
978 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
979 # -----------------------------------------------------------------------------
982 for p
in Productions
:
988 lastlri
.lr_next
= lri
990 lri
.lr_num
= len(LRitems
)
995 # In order for the rest of the parser generator to work, we need to
996 # guarantee that no more lritems are generated. Therefore, we nuke
997 # the p.lr_item method. (Only used in debugging)
998 # Production.lr_item = None
1000 # -----------------------------------------------------------------------------
1003 # Given a list of precedence rules, add to the precedence table.
1004 # -----------------------------------------------------------------------------
1006 def add_precedence(plist
):
1014 if prec
!= 'left' and prec
!= 'right' and prec
!= 'nonassoc':
1015 sys
.stderr
.write("yacc: Invalid precedence '%s'\n" % prec
)
1018 if Precedence
.has_key(t
):
1019 sys
.stderr
.write("yacc: Precedence already specified for terminal '%s'\n" % t
)
1022 Precedence
[t
] = (prec
,plevel
)
1024 sys
.stderr
.write("yacc: Invalid precedence table.\n")
1029 # -----------------------------------------------------------------------------
1032 # Compute the augmented grammar. This is just a rule S' -> start where start
1033 # is the starting symbol.
1034 # -----------------------------------------------------------------------------
1036 def augment_grammar(start
=None):
1038 start
= Productions
[1].name
1039 Productions
[0] = Production(name
="S'",prod
=[start
],number
=0,len=1,prec
=('right',0),func
=None)
1040 Productions
[0].usyms
= [ start
]
1041 Nonterminals
[start
].append(0)
1044 # -------------------------------------------------------------------------
1047 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1049 # During execution of compute_first1, the result may be incomplete.
1050 # Afterward (e.g., when called from compute_follow()), it will be complete.
1051 # -------------------------------------------------------------------------
1054 # We are computing First(x1,x2,x3,...,xn)
1057 x_produces_empty
= 0
1059 # Add all the non-<empty> symbols of First[x] to the result.
1062 x_produces_empty
= 1
1064 if f
not in result
: result
.append(f
)
1066 if x_produces_empty
:
1067 # We have to consider the next x in beta,
1068 # i.e. stay in the loop.
1071 # We don't have to consider any further symbols in beta.
1074 # There was no 'break' from the loop,
1075 # so x_produces_empty was true for all x in beta,
1076 # so beta produces empty as well.
1077 result
.append('<empty>')
1083 # Given a non-terminal. This function computes the set of all symbols
1084 # that might follow it. Dragon book, p. 189.
1086 def compute_follow(start
=None):
1087 # Add '$end' to the follow list of the start symbol
1088 for k
in Nonterminals
.keys():
1092 start
= Productions
[1].name
1094 Follow
[start
] = [ '$end' ]
1098 for p
in Productions
[1:]:
1099 # Here is the production set
1100 for i
in range(len(p
.prod
)):
1102 if Nonterminals
.has_key(B
):
1103 # Okay. We got a non-terminal in a production
1104 fst
= first(p
.prod
[i
+1:])
1107 if f
!= '<empty>' and f
not in Follow
[B
]:
1112 if hasempty
or i
== (len(p
.prod
)-1):
1113 # Add elements of follow(a) to follow(b)
1114 for f
in Follow
[p
.name
]:
1115 if f
not in Follow
[B
]:
1118 if not didadd
: break
1121 _vf
.write('\nFollow:\n')
1122 for k
in Nonterminals
.keys():
1123 _vf
.write("%-20s : %s\n" % (k
, " ".join([str(s
) for s
in Follow
[k
]])))
1125 # -------------------------------------------------------------------------
1128 # Compute the value of FIRST1(X) for all symbols
1129 # -------------------------------------------------------------------------
1130 def compute_first1():
1133 for t
in Terminals
.keys():
1136 First
['$end'] = ['$end']
1137 First
['#'] = ['#'] # what's this for?
1141 # Initialize to the empty set:
1142 for n
in Nonterminals
.keys():
1145 # Then propagate symbols until no change:
1148 for n
in Nonterminals
.keys():
1149 for p
in Prodnames
[n
]:
1150 for f
in first(p
.prod
):
1151 if f
not in First
[n
]:
1152 First
[n
].append( f
)
1158 _vf
.write('\nFirst:\n')
1159 for k
in Nonterminals
.keys():
1160 _vf
.write("%-20s : %s\n" %
1161 (k
, " ".join([str(s
) for s
in First
[k
]])))
1163 # -----------------------------------------------------------------------------
1164 # === SLR Generation ===
1166 # The following functions are used to construct SLR (Simple LR) parsing tables
1167 # as described on p.221-229 of the dragon book.
1168 # -----------------------------------------------------------------------------
1170 # Global variables for the LR parsing engine
1172 global _lr_action
, _lr_goto
, _lr_method
1173 global _lr_goto_cache
, _lr0_cidhash
1175 _lr_action
= { } # Action table
1176 _lr_goto
= { } # Goto table
1177 _lr_method
= "Unknown" # LR method used
1178 _lr_goto_cache
= { }
1182 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1183 # prodlist is a list of productions.
1185 _add_count
= 0 # Counter used to detect cycles
1191 prodlist
= Productions
1193 # Add everything in I to J
1200 if x
.lr0_added
== _add_count
: continue
1203 x
.lr0_added
= _add_count
1208 # Compute the LR(0) goto function goto(I,X) where I is a set
1209 # of LR(0) items and X is a grammar symbol. This function is written
1210 # in a way that guarantees uniqueness of the generated goto sets
1211 # (i.e. the same goto set will never be returned as two different Python
1212 # objects). With uniqueness, we can later do fast set comparisons using
1213 # id(obj) instead of element-wise comparison.
1216 # First we look for a previously cached entry
1217 g
= _lr_goto_cache
.get((id(I
),x
),None)
1220 # Now we generate the goto set in a way that guarantees uniqueness
1223 s
= _lr_goto_cache
.get(x
,None)
1226 _lr_goto_cache
[x
] = s
1231 if n
and n
.lrbefore
== x
:
1232 s1
= s
.get(id(n
),None)
1238 g
= s
.get('$end',None)
1245 _lr_goto_cache
[(id(I
),x
)] = g
1250 # Compute the LR(0) sets of item function
1253 C
= [ lr0_closure([Productions
[0].lr_next
]) ]
1256 _lr0_cidhash
[id(I
)] = i
1259 # Loop over the items in C and each grammar symbols
1265 # Collect all of the symbols that could possibly be in the goto(I,X) sets
1271 for x
in asyms
.keys():
1274 if _lr0_cidhash
.has_key(id(g
)): continue
1275 _lr0_cidhash
[id(g
)] = len(C
)
1280 # -----------------------------------------------------------------------------
1281 # ==== LALR(1) Parsing ====
1283 # LALR(1) parsing is almost exactly the same as SLR except that instead of
1284 # relying upon Follow() sets when performing reductions, a more selective
1285 # lookahead set that incorporates the state of the LR(0) machine is utilized.
1286 # Thus, we mainly just have to focus on calculating the lookahead sets.
1288 # The method used here is due to DeRemer and Pennelo (1982).
1290 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
1291 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
1292 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
1294 # Further details can also be found in:
1296 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
1297 # McGraw-Hill Book Company, (1985).
1299 # Note: This implementation is a complete replacement of the LALR(1)
1300 # implementation in PLY-1.x releases. That version was based on
1301 # a less efficient algorithm and it had bugs in its implementation.
1302 # -----------------------------------------------------------------------------
1304 # -----------------------------------------------------------------------------
1305 # compute_nullable_nonterminals()
1307 # Creates a dictionary containing all of the non-terminals that might produce
1308 # an empty production.
1309 # -----------------------------------------------------------------------------
1311 def compute_nullable_nonterminals():
1315 for p
in Productions
[1:]:
1317 nullable
[p
.name
] = 1
1320 if not nullable
.has_key(t
): break
1322 nullable
[p
.name
] = 1
1323 if len(nullable
) == num_nullable
: break
1324 num_nullable
= len(nullable
)
1327 # -----------------------------------------------------------------------------
1328 # find_nonterminal_trans(C)
1330 # Given a set of LR(0) items, this functions finds all of the non-terminal
1331 # transitions. These are transitions in which a dot appears immediately before
1332 # a non-terminal. Returns a list of tuples of the form (state,N) where state
1333 # is the state number and N is the nonterminal symbol.
1335 # The input C is the set of LR(0) items.
1336 # -----------------------------------------------------------------------------
1338 def find_nonterminal_transitions(C
):
1340 for state
in range(len(C
)):
1342 if p
.lr_index
< p
.len - 1:
1343 t
= (state
,p
.prod
[p
.lr_index
+1])
1344 if Nonterminals
.has_key(t
[1]):
1345 if t
not in trans
: trans
.append(t
)
1349 # -----------------------------------------------------------------------------
1352 # Computes the DR(p,A) relationships for non-terminal transitions. The input
1353 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
1355 # Returns a list of terminals.
1356 # -----------------------------------------------------------------------------
1358 def dr_relation(C
,trans
,nullable
):
1363 g
= lr0_goto(C
[state
],N
)
1365 if p
.lr_index
< p
.len - 1:
1366 a
= p
.prod
[p
.lr_index
+1]
1367 if Terminals
.has_key(a
):
1368 if a
not in terms
: terms
.append(a
)
1370 # This extra bit is to handle the start state
1371 if state
== 0 and N
== Productions
[0].prod
[0]:
1372 terms
.append('$end')
1376 # -----------------------------------------------------------------------------
1379 # Computes the READS() relation (p,A) READS (t,C).
1380 # -----------------------------------------------------------------------------
1382 def reads_relation(C
, trans
, empty
):
1383 # Look for empty transitions
1387 g
= lr0_goto(C
[state
],N
)
1388 j
= _lr0_cidhash
.get(id(g
),-1)
1390 if p
.lr_index
< p
.len - 1:
1391 a
= p
.prod
[p
.lr_index
+ 1]
1392 if empty
.has_key(a
):
1397 # -----------------------------------------------------------------------------
1398 # compute_lookback_includes()
1400 # Determines the lookback and includes relations
1404 # This relation is determined by running the LR(0) state machine forward.
1405 # For example, starting with a production "N : . A B C", we run it forward
1406 # to obtain "N : A B C ." We then build a relationship between this final
1407 # state and the starting state. These relationships are stored in a dictionary
1412 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
1414 # This relation is used to determine non-terminal transitions that occur
1415 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
1416 # if the following holds:
1418 # B -> LAT, where T -> epsilon and p' -L-> p
1420 # L is essentially a prefix (which may be empty), T is a suffix that must be
1421 # able to derive an empty string. State p' must lead to state p with the string L.
1423 # -----------------------------------------------------------------------------
1425 def compute_lookback_includes(C
,trans
,nullable
):
1427 lookdict
= {} # Dictionary of lookback relations
1428 includedict
= {} # Dictionary of include relations
1430 # Make a dictionary of non-terminal transitions
1435 # Loop over all transitions and compute lookbacks and includes
1436 for state
,N
in trans
:
1440 if p
.name
!= N
: continue
1442 # Okay, we have a name match. We now follow the production all the way
1443 # through the state machine until we get the . on the right hand side
1445 lr_index
= p
.lr_index
1447 while lr_index
< p
.len - 1:
1448 lr_index
= lr_index
+ 1
1449 t
= p
.prod
[lr_index
]
1451 # Check to see if this symbol and state are a non-terminal transition
1452 if dtrans
.has_key((j
,t
)):
1453 # Yes. Okay, there is some chance that this is an includes relation
1454 # the only way to know for certain is whether the rest of the
1455 # production derives empty
1459 if Terminals
.has_key(p
.prod
[li
]): break # No forget it
1460 if not nullable
.has_key(p
.prod
[li
]): break
1463 # Appears to be a relation between (j,t) and (state,N)
1464 includes
.append((j
,t
))
1466 g
= lr0_goto(C
[j
],t
) # Go to next set
1467 j
= _lr0_cidhash
.get(id(g
),-1) # Go to next state
1469 # When we get here, j is the final state, now we have to locate the production
1471 if r
.name
!= p
.name
: continue
1472 if r
.len != p
.len: continue
1474 # This look is comparing a production ". A B C" with "A B C ."
1475 while i
< r
.lr_index
:
1476 if r
.prod
[i
] != p
.prod
[i
+1]: break
1481 if not includedict
.has_key(i
): includedict
[i
] = []
1482 includedict
[i
].append((state
,N
))
1483 lookdict
[(state
,N
)] = lookb
1485 return lookdict
,includedict
1487 # -----------------------------------------------------------------------------
1491 # The following two functions are used to compute set valued functions
1494 # F(x) = F'(x) U U{F(y) | x R y}
1496 # This is used to compute the values of Read() sets as well as FOLLOW sets
1497 # in LALR(1) generation.
1499 # Inputs: X - An input set
1501 # FP - Set-valued function
1502 # ------------------------------------------------------------------------------
1504 def digraph(X
,R
,FP
):
1511 if N
[x
] == 0: traverse(x
,N
,stack
,F
,X
,R
,FP
)
1514 def traverse(x
,N
,stack
,F
,X
,R
,FP
):
1518 F
[x
] = FP(x
) # F(X) <- F'(x)
1520 rel
= R(x
) # Get y's related to x
1523 traverse(y
,N
,stack
,F
,X
,R
,FP
)
1524 N
[x
] = min(N
[x
],N
[y
])
1525 for a
in F
.get(y
,[]):
1526 if a
not in F
[x
]: F
[x
].append(a
)
1528 N
[stack
[-1]] = sys
.maxint
1530 element
= stack
.pop()
1532 N
[stack
[-1]] = sys
.maxint
1534 element
= stack
.pop()
1536 # -----------------------------------------------------------------------------
1537 # compute_read_sets()
1539 # Given a set of LR(0) items, this function computes the read sets.
1541 # Inputs: C = Set of LR(0) items
1542 # ntrans = Set of nonterminal transitions
1543 # nullable = Set of empty transitions
1545 # Returns a set containing the read sets
1546 # -----------------------------------------------------------------------------
1548 def compute_read_sets(C
, ntrans
, nullable
):
1549 FP
= lambda x
: dr_relation(C
,x
,nullable
)
1550 R
= lambda x
: reads_relation(C
,x
,nullable
)
1551 F
= digraph(ntrans
,R
,FP
)
1554 # -----------------------------------------------------------------------------
1555 # compute_follow_sets()
1557 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
1558 # and an include set, this function computes the follow sets
1560 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
1563 # ntrans = Set of nonterminal transitions
1564 # readsets = Readset (previously computed)
1565 # inclsets = Include sets (previously computed)
1567 # Returns a set containing the follow sets
1568 # -----------------------------------------------------------------------------
1570 def compute_follow_sets(ntrans
,readsets
,inclsets
):
1571 FP
= lambda x
: readsets
[x
]
1572 R
= lambda x
: inclsets
.get(x
,[])
1573 F
= digraph(ntrans
,R
,FP
)
1576 # -----------------------------------------------------------------------------
1579 # Attaches the lookahead symbols to grammar rules.
1581 # Inputs: lookbacks - Set of lookback relations
1582 # followset - Computed follow set
1584 # This function directly attaches the lookaheads to productions contained
1585 # in the lookbacks set
1586 # -----------------------------------------------------------------------------
1588 def add_lookaheads(lookbacks
,followset
):
1589 for trans
,lb
in lookbacks
.items():
1590 # Loop over productions in lookback
1592 if not p
.lookaheads
.has_key(state
):
1593 p
.lookaheads
[state
] = []
1594 f
= followset
.get(trans
,[])
1596 if a
not in p
.lookaheads
[state
]: p
.lookaheads
[state
].append(a
)
1598 # -----------------------------------------------------------------------------
1599 # add_lalr_lookaheads()
1601 # This function does all of the work of adding lookahead information for use
1603 # -----------------------------------------------------------------------------
1605 def add_lalr_lookaheads(C
):
1606 # Determine all of the nullable nonterminals
1607 nullable
= compute_nullable_nonterminals()
1609 # Find all non-terminal transitions
1610 trans
= find_nonterminal_transitions(C
)
1613 readsets
= compute_read_sets(C
,trans
,nullable
)
1615 # Compute lookback/includes relations
1616 lookd
, included
= compute_lookback_includes(C
,trans
,nullable
)
1618 # Compute LALR FOLLOW sets
1619 followsets
= compute_follow_sets(trans
,readsets
,included
)
1621 # Add all of the lookaheads
1622 add_lookaheads(lookd
,followsets
)
1624 # -----------------------------------------------------------------------------
1627 # This function constructs the parse tables for SLR or LALR
1628 # -----------------------------------------------------------------------------
1629 def lr_parse_table(method
):
1631 goto
= _lr_goto
# Goto array
1632 action
= _lr_action
# Action array
1633 actionp
= { } # Action production array (temporary)
1641 sys
.stderr
.write("yacc: Generating %s parsing table...\n" % method
)
1642 _vf
.write("\n\nParsing method: %s\n\n" % method
)
1644 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1645 # This determines the number of states
1649 if method
== 'LALR':
1650 add_lalr_lookaheads(C
)
1653 # Build the parser table, state by state
1656 # Loop over each production in I
1657 actlist
= [ ] # List of actions
1662 _vf
.write("\nstate %d\n\n" % st
)
1664 _vf
.write(" (%d) %s\n" % (p
.number
, str(p
)))
1669 if p
.len == p
.lr_index
+ 1:
1671 # Start symbol. Accept!
1672 st_action
["$end"] = 0
1673 st_actionp
["$end"] = p
1675 # We are at the end of a production. Reduce!
1676 if method
== 'LALR':
1677 laheads
= p
.lookaheads
[st
]
1679 laheads
= Follow
[p
.name
]
1681 actlist
.append((a
,p
,"reduce using rule %d (%s)" % (p
.number
,p
)))
1682 r
= st_action
.get(a
,None)
1684 # Whoa. Have a shift/reduce or reduce/reduce conflict
1686 # Need to decide on shift or reduce here
1687 # By default we favor shifting. Need to add
1688 # some precedence rules here.
1689 sprec
,slevel
= Productions
[st_actionp
[a
].number
].prec
1690 rprec
,rlevel
= Precedence
.get(a
,('right',0))
1691 if (slevel
< rlevel
) or ((slevel
== rlevel
) and (rprec
== 'left')):
1692 # We really need to reduce here.
1693 st_action
[a
] = -p
.number
1695 if not slevel
and not rlevel
:
1696 _vfc
.write("shift/reduce conflict in state %d resolved as reduce.\n" % st
)
1697 _vf
.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a
)
1699 elif (slevel
== rlevel
) and (rprec
== 'nonassoc'):
1702 # Hmmm. Guess we'll keep the shift
1704 _vfc
.write("shift/reduce conflict in state %d resolved as shift.\n" % st
)
1705 _vf
.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a
)
1708 # Reduce/reduce conflict. In this case, we favor the rule
1709 # that was defined first in the grammar file
1710 oldp
= Productions
[-r
]
1711 pp
= Productions
[p
.number
]
1712 if oldp
.line
> pp
.line
:
1713 st_action
[a
] = -p
.number
1715 # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
1717 _vfc
.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st
, st_actionp
[a
].number
, st_actionp
[a
]))
1718 _vf
.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a
,st_actionp
[a
].number
, st_actionp
[a
]))
1720 sys
.stderr
.write("Unknown conflict in state %d\n" % st
)
1722 st_action
[a
] = -p
.number
1726 a
= p
.prod
[i
+1] # Get symbol right after the "."
1727 if Terminals
.has_key(a
):
1729 j
= _lr0_cidhash
.get(id(g
),-1)
1731 # We are in a shift state
1732 actlist
.append((a
,p
,"shift and go to state %d" % j
))
1733 r
= st_action
.get(a
,None)
1735 # Whoa have a shift/reduce or shift/shift conflict
1738 sys
.stderr
.write("Shift/shift conflict in state %d\n" % st
)
1740 # Do a precedence check.
1741 # - if precedence of reduce rule is higher, we reduce.
1742 # - if precedence of reduce is same and left assoc, we reduce.
1743 # - otherwise we shift
1744 rprec
,rlevel
= Productions
[st_actionp
[a
].number
].prec
1745 sprec
,slevel
= Precedence
.get(a
,('right',0))
1746 if (slevel
> rlevel
) or ((slevel
== rlevel
) and (rprec
!= 'left')):
1747 # We decide to shift here... highest precedence to shift
1752 _vfc
.write("shift/reduce conflict in state %d resolved as shift.\n" % st
)
1753 _vf
.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a
)
1754 elif (slevel
== rlevel
) and (rprec
== 'nonassoc'):
1757 # Hmmm. Guess we'll keep the reduce
1758 if not slevel
and not rlevel
:
1760 _vfc
.write("shift/reduce conflict in state %d resolved as reduce.\n" % st
)
1761 _vf
.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a
)
1764 sys
.stderr
.write("Unknown conflict in state %d\n" % st
)
1769 except StandardError,e
:
1770 print sys
.exc_info()
1771 raise YaccError
, "Hosed in lr_parse_table"
1773 # Print the actions associated with each terminal
1776 for a
,p
,m
in actlist
:
1777 if st_action
.has_key(a
):
1778 if p
is st_actionp
[a
]:
1779 _vf
.write(" %-15s %s\n" % (a
,m
))
1780 _actprint
[(a
,m
)] = 1
1782 for a
,p
,m
in actlist
:
1783 if st_action
.has_key(a
):
1784 if p
is not st_actionp
[a
]:
1785 if not _actprint
.has_key((a
,m
)):
1786 _vf
.write(" ! %-15s [ %s ]\n" % (a
,m
))
1787 _actprint
[(a
,m
)] = 1
1789 # Construct the goto table for this state
1795 if Nonterminals
.has_key(s
):
1797 for n
in nkeys
.keys():
1799 j
= _lr0_cidhash
.get(id(g
),-1)
1803 _vf
.write(" %-30s shift and go to state %d\n" % (n
,j
))
1805 action
[st
] = st_action
1806 actionp
[st
] = st_actionp
1812 if n_srconflict
== 1:
1813 sys
.stderr
.write("yacc: %d shift/reduce conflict\n" % n_srconflict
)
1814 if n_srconflict
> 1:
1815 sys
.stderr
.write("yacc: %d shift/reduce conflicts\n" % n_srconflict
)
1816 if n_rrconflict
== 1:
1817 sys
.stderr
.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict
)
1818 if n_rrconflict
> 1:
1819 sys
.stderr
.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict
)
1821 # -----------------------------------------------------------------------------
1822 # ==== LR Utility functions ====
1823 # -----------------------------------------------------------------------------
1825 # -----------------------------------------------------------------------------
1826 # _lr_write_tables()
1828 # This function writes the LR parsing tables to a file
1829 # -----------------------------------------------------------------------------
1831 def lr_write_tables(modulename
=tab_module
,outputdir
=''):
1832 filename
= os
.path
.join(outputdir
,modulename
) + ".py"
1834 f
= open(filename
,"w")
1838 # This file is automatically generated. Do not edit.
1843 """ % (filename
, repr(_lr_method
), repr(Signature
.digest())))
1845 # Change smaller to 0 to go back to original tables
1848 # Factor out names to try and make smaller
1852 for s
,nd
in _lr_action
.items():
1853 for name
,v
in nd
.items():
1861 f
.write("\n_lr_action_items = {")
1862 for k
,v
in items
.items():
1863 f
.write("%r:([" % k
)
1875 for _k, _v in _lr_action_items.items():
1876 for _x,_y in zip(_v[0],_v[1]):
1877 if not _lr_action.has_key(_x): _lr_action[_x] = { }
1878 _lr_action[_x][_k] = _y
1879 del _lr_action_items
1883 f
.write("\n_lr_action = { ");
1884 for k
,v
in _lr_action
.items():
1885 f
.write("(%r,%r):%r," % (k
[0],k
[1],v
))
1889 # Factor out names to try and make smaller
1892 for s
,nd
in _lr_goto
.items():
1893 for name
,v
in nd
.items():
1901 f
.write("\n_lr_goto_items = {")
1902 for k
,v
in items
.items():
1903 f
.write("%r:([" % k
)
1915 for _k, _v in _lr_goto_items.items():
1916 for _x,_y in zip(_v[0],_v[1]):
1917 if not _lr_goto.has_key(_x): _lr_goto[_x] = { }
1918 _lr_goto[_x][_k] = _y
1922 f
.write("\n_lr_goto = { ");
1923 for k
,v
in _lr_goto
.items():
1924 f
.write("(%r,%r):%r," % (k
[0],k
[1],v
))
1927 # Write production table
1928 f
.write("_lr_productions = [\n")
1929 for p
in Productions
:
1932 f
.write(" (%r,%d,%r,%r,%d),\n" % (p
.name
, p
.len, p
.func
.__name
__,p
.file,p
.line
))
1934 f
.write(" (%r,%d,None,None,None),\n" % (p
.name
, p
.len))
1942 print >>sys
.stderr
, "Unable to create '%s'" % filename
1943 print >>sys
.stderr
, e
1946 def lr_read_tables(module
=tab_module
,optimize
=0):
1947 global _lr_action
, _lr_goto
, _lr_productions
, _lr_method
1949 exec "import %s as parsetab" % module
1951 if (optimize
) or (Signature
.digest() == parsetab
._lr
_signature
):
1952 _lr_action
= parsetab
._lr
_action
1953 _lr_goto
= parsetab
._lr
_goto
1954 _lr_productions
= parsetab
._lr
_productions
1955 _lr_method
= parsetab
._lr
_method
1960 except (ImportError,AttributeError):
1964 # -----------------------------------------------------------------------------
1967 # Build the parser module
1968 # -----------------------------------------------------------------------------
1970 def yacc(method
=default_lr
, debug
=yaccdebug
, module
=None, tabmodule
=tab_module
, start
=None, check_recursion
=1, optimize
=0,write_tables
=1,debugfile
=debug_file
,outputdir
=''):
1979 # Add parsing method to signature
1980 Signature
.update(method
)
1982 # If a "module" parameter was supplied, extract its dictionary.
1983 # Note: a module may in fact be an instance as well.
1986 # User supplied a module object.
1987 if isinstance(module
, types
.ModuleType
):
1988 ldict
= module
.__dict
__
1989 elif isinstance(module
, _INSTANCETYPE
):
1990 _items
= [(k
,getattr(module
,k
)) for k
in dir(module
)]
1995 raise ValueError,"Expected a module"
1998 # No module given. We might be able to get information from the caller.
1999 # Throw an exception and unwind the traceback to get the globals
2003 except RuntimeError:
2004 e
,b
,t
= sys
.exc_info()
2006 f
= f
.f_back
# Walk out to our calling function
2007 ldict
= f
.f_globals
# Grab its globals dictionary
2009 # Add starting symbol to signature
2011 start
= ldict
.get("start",None)
2013 Signature
.update(start
)
2015 # If running in optimized mode. We're going to
2017 if (optimize
and lr_read_tables(tabmodule
,1)):
2020 for p
in _lr_productions
:
2022 Productions
.append(None)
2024 m
= MiniProduction()
2030 m
.func
= ldict
[p
[2]]
2031 Productions
.append(m
)
2034 # Get the tokens map
2035 if (module
and isinstance(module
,_INSTANCETYPE
)):
2036 tokens
= getattr(module
,"tokens",None)
2038 tokens
= ldict
.get("tokens",None)
2041 raise YaccError
,"module does not define a list 'tokens'"
2042 if not (isinstance(tokens
,types
.ListType
) or isinstance(tokens
,types
.TupleType
)):
2043 raise YaccError
,"tokens must be a list or tuple."
2045 # Check to see if a requires dictionary is defined.
2046 requires
= ldict
.get("require",None)
2048 if not (isinstance(requires
,types
.DictType
)):
2049 raise YaccError
,"require must be a dictionary."
2051 for r
,v
in requires
.items():
2053 if not (isinstance(v
,types
.ListType
)):
2055 v1
= [x
.split(".") for x
in v
]
2057 except StandardError:
2058 print >>sys
.stderr
, "Invalid specification for rule '%s' in require. Expected a list of strings" % r
2061 # Build the dictionary of terminals. We a record a 0 in the
2062 # dictionary to track whether or not a terminal is actually
2063 # used in the grammar
2065 if 'error' in tokens
:
2066 print >>sys
.stderr
, "yacc: Illegal token 'error'. Is a reserved word."
2067 raise YaccError
,"Illegal token name"
2070 if Terminals
.has_key(n
):
2071 print >>sys
.stderr
, "yacc: Warning. Token '%s' multiply defined." % n
2074 Terminals
['error'] = [ ]
2076 # Get the precedence map (if any)
2077 prec
= ldict
.get("precedence",None)
2079 if not (isinstance(prec
,types
.ListType
) or isinstance(prec
,types
.TupleType
)):
2080 raise YaccError
,"precedence must be a list or tuple."
2081 add_precedence(prec
)
2082 Signature
.update(repr(prec
))
2085 if not Precedence
.has_key(n
):
2086 Precedence
[n
] = ('right',0) # Default, right associative, 0 precedence
2088 # Look for error handler
2089 ef
= ldict
.get('p_error',None)
2091 if isinstance(ef
,types
.FunctionType
):
2093 elif isinstance(ef
, types
.MethodType
):
2096 raise YaccError
,"'p_error' defined, but is not a function or method."
2097 eline
= ef
.func_code
.co_firstlineno
2098 efile
= ef
.func_code
.co_filename
2101 if (ef
.func_code
.co_argcount
!= 1+ismethod
):
2102 raise YaccError
,"%s:%d: p_error() requires 1 argument." % (efile
,eline
)
2106 print >>sys
.stderr
, "yacc: Warning. no p_error() function is defined."
2108 # Get the list of built-in functions with p_ prefix
2109 symbols
= [ldict
[f
] for f
in ldict
.keys()
2110 if (type(ldict
[f
]) in (types
.FunctionType
, types
.MethodType
) and ldict
[f
].__name
__[:2] == 'p_'
2111 and ldict
[f
].__name
__ != 'p_error')]
2113 # Check for non-empty symbols
2114 if len(symbols
) == 0:
2115 raise YaccError
,"no rules of the form p_rulename are defined."
2117 # Sort the symbols by line number
2118 symbols
.sort(lambda x
,y
: cmp(x
.func_code
.co_firstlineno
,y
.func_code
.co_firstlineno
))
2120 # Add all of the symbols to the grammar
2122 if (add_function(f
)) < 0:
2125 files
[f
.func_code
.co_filename
] = None
2127 # Make a signature of the docstrings
2130 Signature
.update(f
.__doc
__)
2135 raise YaccError
,"Unable to construct parser."
2137 if not lr_read_tables(tabmodule
):
2140 for filename
in files
.keys():
2141 if not validate_file(filename
):
2144 # Validate dictionary
2145 validate_dict(ldict
)
2147 if start
and not Prodnames
.has_key(start
):
2148 raise YaccError
,"Bad starting symbol '%s'" % start
2150 augment_grammar(start
)
2151 error
= verify_productions(cycle_check
=check_recursion
)
2152 otherfunc
= [ldict
[f
] for f
in ldict
.keys()
2153 if (type(f
) in (types
.FunctionType
,types
.MethodType
) and ldict
[f
].__name
__[:2] != 'p_')]
2156 raise YaccError
,"Unable to construct parser."
2160 compute_follow(start
)
2162 if method
in ['SLR','LALR']:
2163 lr_parse_table(method
)
2165 raise YaccError
, "Unknown parsing method '%s'" % method
2168 lr_write_tables(tabmodule
,outputdir
)
2172 f
= open(os
.path
.join(outputdir
,debugfile
),"w")
2173 f
.write(_vfc
.getvalue())
2175 f
.write(_vf
.getvalue())
2178 print >>sys
.stderr
, "yacc: can't create '%s'" % debugfile
,e
2180 # Made it here. Create a parser object and set up its internal state.
2181 # Set global parse() method to bound method of parser object.
2184 p
.productions
= Productions
2185 p
.errorfunc
= Errorfunc
2186 p
.action
= _lr_action
2188 p
.method
= _lr_method
2189 p
.require
= Requires
2197 # Clean up all of the globals we created
2202 # yacc_cleanup function. Delete all of the global variables
2203 # used during table construction
2206 global _lr_action
, _lr_goto
, _lr_method
, _lr_goto_cache
2207 del _lr_action
, _lr_goto
, _lr_method
, _lr_goto_cache
2209 global Productions
, Prodnames
, Prodmap
, Terminals
2210 global Nonterminals
, First
, Follow
, Precedence
, LRitems
2211 global Errorfunc
, Signature
, Requires
2213 del Productions
, Prodnames
, Prodmap
, Terminals
2214 del Nonterminals
, First
, Follow
, Precedence
, LRitems
2215 del Errorfunc
, Signature
, Requires
2221 # Stub that raises an error if parsing is attempted without first calling yacc()
2222 def parse(*args
,**kwargs
):
2223 raise YaccError
, "yacc: No parser built with yacc()"