1 #-----------------------------------------------------------------------------
4 # Author: David M. Beazley (dave@dabeaz.com)
6 # Copyright (C) 2001-2007, David M. Beazley
8 # This library is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU Lesser General Public
10 # License as published by the Free Software Foundation; either
11 # version 2.1 of the License, or (at your option) any later version.
13 # This library is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # Lesser General Public License for more details.
18 # You should have received a copy of the GNU Lesser General Public
19 # License along with this library; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 # See the file COPYING for a complete copy of the LGPL.
23 #-----------------------------------------------------------------------------
29 # Regular expression used to match valid token names
30 _is_identifier
= re
.compile(r
'^[a-zA-Z0-9_]+$')
32 # Available instance types. This is used when lexers are defined by a class.
33 # It's a little funky because I want to preserve backwards compatibility
34 # with Python 2.0 where types.ObjectType is undefined.
37 _INSTANCETYPE
= (types
.InstanceType
, types
.ObjectType
)
38 except AttributeError:
39 _INSTANCETYPE
= types
.InstanceType
40 class object: pass # Note: needed if no new-style classes present
42 # Exception thrown when invalid token encountered and no default error
44 class LexError(Exception):
45 def __init__(self
,message
,s
):
46 self
.args
= (message
,)
50 class LexToken(object):
52 return "LexToken(%s,%r,%d,%d)" % (self
.type,self
.value
,self
.lineno
,self
.lexpos
)
58 # -----------------------------------------------------------------------------
61 # This class encapsulates all of the methods and data associated with a lexer.
63 # input() - Store a new string in the lexer
64 # token() - Get the next token
65 # -----------------------------------------------------------------------------
69 self
.lexre
= None # Master regular expression. This is a list of
70 # tuples (re,findex) where re is a compiled
71 # regular expression and findex is a list
72 # mapping regex group numbers to rules
73 self
.lexretext
= None # Current regular expression strings
74 self
.lexstatere
= {} # Dictionary mapping lexer states to master regexs
75 self
.lexstateretext
= {} # Dictionary mapping lexer states to regex strings
76 self
.lexstate
= "INITIAL" # Current lexer state
77 self
.lexstatestack
= [] # Stack of lexer states
78 self
.lexstateinfo
= None # State information
79 self
.lexstateignore
= {} # Dictionary of ignored characters for each state
80 self
.lexstateerrorf
= {} # Dictionary of error functions for each state
81 self
.lexreflags
= 0 # Optional re compile flags
82 self
.lexdata
= None # Actual input data (as a string)
83 self
.lexpos
= 0 # Current position in input text
84 self
.lexlen
= 0 # Length of the input text
85 self
.lexerrorf
= None # Error rule (if any)
86 self
.lextokens
= None # List of valid tokens
87 self
.lexignore
= "" # Ignored characters
88 self
.lexliterals
= "" # Literal characters that can be passed through
89 self
.lexmodule
= None # Module
90 self
.lineno
= 1 # Current line number
91 self
.lexdebug
= 0 # Debugging mode
92 self
.lexoptimize
= 0 # Optimized mode
94 def clone(self
,object=None):
96 c
.lexstatere
= self
.lexstatere
97 c
.lexstateinfo
= self
.lexstateinfo
98 c
.lexstateretext
= self
.lexstateretext
99 c
.lexstate
= self
.lexstate
100 c
.lexstatestack
= self
.lexstatestack
101 c
.lexstateignore
= self
.lexstateignore
102 c
.lexstateerrorf
= self
.lexstateerrorf
103 c
.lexreflags
= self
.lexreflags
104 c
.lexdata
= self
.lexdata
105 c
.lexpos
= self
.lexpos
106 c
.lexlen
= self
.lexlen
107 c
.lextokens
= self
.lextokens
108 c
.lexdebug
= self
.lexdebug
109 c
.lineno
= self
.lineno
110 c
.lexoptimize
= self
.lexoptimize
111 c
.lexliterals
= self
.lexliterals
112 c
.lexmodule
= self
.lexmodule
114 # If the object parameter has been supplied, it means we are attaching the
115 # lexer to a new object. In this case, we have to rebind all methods in
116 # the lexstatere and lexstateerrorf tables.
120 for key
, ritem
in self
.lexstatere
.items():
122 for cre
, findex
in ritem
:
125 if not f
or not f
[0]:
128 newfindex
.append((getattr(object,f
[0].__name
__),f
[1]))
129 newre
.append((cre
,newfindex
))
131 c
.lexstatere
= newtab
132 c
.lexstateerrorf
= { }
133 for key
, ef
in self
.lexstateerrorf
.items():
134 c
.lexstateerrorf
[key
] = getattr(object,ef
.__name
__)
137 # Set up other attributes
141 # ------------------------------------------------------------
142 # writetab() - Write lexer information to a table file
143 # ------------------------------------------------------------
144 def writetab(self
,tabfile
):
145 tf
= open(tabfile
+".py","w")
146 tf
.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile
,__version__
))
147 tf
.write("_lextokens = %s\n" % repr(self
.lextokens
))
148 tf
.write("_lexreflags = %s\n" % repr(self
.lexreflags
))
149 tf
.write("_lexliterals = %s\n" % repr(self
.lexliterals
))
150 tf
.write("_lexstateinfo = %s\n" % repr(self
.lexstateinfo
))
153 for key
, lre
in self
.lexstatere
.items():
155 for i
in range(len(lre
)):
156 titem
.append((self
.lexstateretext
[key
][i
],_funcs_to_names(lre
[i
][1])))
159 tf
.write("_lexstatere = %s\n" % repr(tabre
))
160 tf
.write("_lexstateignore = %s\n" % repr(self
.lexstateignore
))
163 for key
, ef
in self
.lexstateerrorf
.items():
165 taberr
[key
] = ef
.__name
__
168 tf
.write("_lexstateerrorf = %s\n" % repr(taberr
))
171 # ------------------------------------------------------------
172 # readtab() - Read lexer information from a tab file
173 # ------------------------------------------------------------
174 def readtab(self
,tabfile
,fdict
):
175 exec "import %s as lextab" % tabfile
176 self
.lextokens
= lextab
._lextokens
177 self
.lexreflags
= lextab
._lexreflags
178 self
.lexliterals
= lextab
._lexliterals
179 self
.lexstateinfo
= lextab
._lexstateinfo
180 self
.lexstateignore
= lextab
._lexstateignore
181 self
.lexstatere
= { }
182 self
.lexstateretext
= { }
183 for key
,lre
in lextab
._lexstatere
.items():
186 for i
in range(len(lre
)):
187 titem
.append((re
.compile(lre
[i
][0],lextab
._lexreflags
),_names_to_funcs(lre
[i
][1],fdict
)))
188 txtitem
.append(lre
[i
][0])
189 self
.lexstatere
[key
] = titem
190 self
.lexstateretext
[key
] = txtitem
191 self
.lexstateerrorf
= { }
192 for key
,ef
in lextab
._lexstateerrorf
.items():
193 self
.lexstateerrorf
[key
] = fdict
[ef
]
194 self
.begin('INITIAL')
196 # ------------------------------------------------------------
197 # input() - Push a new string into the lexer
198 # ------------------------------------------------------------
200 if not (isinstance(s
,types
.StringType
) or isinstance(s
,types
.UnicodeType
)):
201 raise ValueError, "Expected a string"
206 # ------------------------------------------------------------
207 # begin() - Changes the lexing state
208 # ------------------------------------------------------------
209 def begin(self
,state
):
210 if not self
.lexstatere
.has_key(state
):
211 raise ValueError, "Undefined state"
212 self
.lexre
= self
.lexstatere
[state
]
213 self
.lexretext
= self
.lexstateretext
[state
]
214 self
.lexignore
= self
.lexstateignore
.get(state
,"")
215 self
.lexerrorf
= self
.lexstateerrorf
.get(state
,None)
216 self
.lexstate
= state
218 # ------------------------------------------------------------
219 # push_state() - Changes the lexing state and saves old on stack
220 # ------------------------------------------------------------
221 def push_state(self
,state
):
222 self
.lexstatestack
.append(self
.lexstate
)
225 # ------------------------------------------------------------
226 # pop_state() - Restores the previous state
227 # ------------------------------------------------------------
229 self
.begin(self
.lexstatestack
.pop())
231 # ------------------------------------------------------------
232 # current_state() - Returns the current lexing state
233 # ------------------------------------------------------------
234 def current_state(self
):
237 # ------------------------------------------------------------
238 # skip() - Skip ahead n characters
239 # ------------------------------------------------------------
243 # ------------------------------------------------------------
244 # token() - Return the next token from the Lexer
246 # Note: This function has been carefully implemented to be as fast
247 # as possible. Don't make changes unless you really know what
249 # ------------------------------------------------------------
251 # Make local copies of frequently referenced attributes
254 lexignore
= self
.lexignore
255 lexdata
= self
.lexdata
257 while lexpos
< lexlen
:
258 # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
259 if lexdata
[lexpos
] in lexignore
:
263 # Look for a regular expression match
264 for lexre
,lexindexfunc
in self
.lexre
:
265 m
= lexre
.match(lexdata
,lexpos
)
268 # Set last match in lexer so that rules can access it if they want
271 # Create a token for return
273 tok
.value
= m
.group()
274 tok
.lineno
= self
.lineno
280 func
,tok
.type = lexindexfunc
[i
]
284 # If no token type was set, it's an ignored token
285 if tok
.type: return tok
288 # if func not callable, it means it's an ignored token
289 if not callable(func
):
292 # If token is processed by a function, call it
295 # Every function must return a token, if nothing, we just move to next token
297 lexpos
= self
.lexpos
# This is here in case user has updated lexpos.
300 # Verify type of the token. If not in the token map, raise an error
301 if not self
.lexoptimize
:
302 if not self
.lextokens
.has_key(newtok
.type):
303 raise LexError
, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
304 func
.func_code
.co_filename
, func
.func_code
.co_firstlineno
,
305 func
.__name
__, newtok
.type),lexdata
[lexpos
:])
309 # No match, see if in literals
310 if lexdata
[lexpos
] in self
.lexliterals
:
312 tok
.value
= lexdata
[lexpos
]
313 tok
.lineno
= self
.lineno
317 self
.lexpos
= lexpos
+ 1
320 # No match. Call t_error() if defined.
323 tok
.value
= self
.lexdata
[lexpos
:]
324 tok
.lineno
= self
.lineno
329 newtok
= self
.lexerrorf(tok
)
330 if lexpos
== self
.lexpos
:
331 # Error method didn't change text position at all. This is an error.
332 raise LexError
, ("Scanning error. Illegal character '%s'" % (lexdata
[lexpos
]), lexdata
[lexpos
:])
334 if not newtok
: continue
338 raise LexError
, ("Illegal character '%s' at index %d" % (lexdata
[lexpos
],lexpos
), lexdata
[lexpos
:])
340 self
.lexpos
= lexpos
+ 1
341 if self
.lexdata
is None:
342 raise RuntimeError, "No input string given with input()"
345 # -----------------------------------------------------------------------------
348 # This checks to see if there are duplicated t_rulename() functions or strings
349 # in the parser input file. This is done using a simple regular expression
350 # match on each line in the filename.
351 # -----------------------------------------------------------------------------
353 def _validate_file(filename
):
355 base
,ext
= os
.path
.splitext(filename
)
356 if ext
!= '.py': return 1 # No idea what the file is. Return OK
360 lines
= f
.readlines()
365 fre
= re
.compile(r
'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
366 sre
= re
.compile(r
'\s*(t_[a-zA-Z_0-9]*)\s*=')
376 prev
= counthash
.get(name
)
378 counthash
[name
] = linen
380 print >>sys
.stderr
, "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename
,linen
,name
,prev
)
385 # -----------------------------------------------------------------------------
388 # Given a list of regular expression functions, this converts it to a list
389 # suitable for output to a table file
390 # -----------------------------------------------------------------------------
392 def _funcs_to_names(funclist
):
396 result
.append((f
[0].__name
__,f
[1]))
401 # -----------------------------------------------------------------------------
404 # Given a list of regular expression function names, this converts it back to
406 # -----------------------------------------------------------------------------
408 def _names_to_funcs(namelist
,fdict
):
412 result
.append((fdict
[n
[0]],n
[1]))
417 # -----------------------------------------------------------------------------
420 # This function takes a list of all of the regex components and attempts to
421 # form the master regular expression. Given limitations in the Python re
422 # module, it may be necessary to break the master regex into separate expressions.
423 # -----------------------------------------------------------------------------
425 def _form_master_re(relist
,reflags
,ldict
,toknames
):
426 if not relist
: return []
427 regex
= "|".join(relist
)
429 lexre
= re
.compile(regex
,re
.VERBOSE | reflags
)
431 # Build the index to function map for the matching engine
432 lexindexfunc
= [ None ] * (max(lexre
.groupindex
.values())+1)
433 for f
,i
in lexre
.groupindex
.items():
434 handle
= ldict
.get(f
,None)
435 if type(handle
) in (types
.FunctionType
, types
.MethodType
):
436 lexindexfunc
[i
] = (handle
,toknames
[handle
.__name
__])
437 elif handle
is not None:
438 # If rule was specified as a string, we build an anonymous
439 # callback function to carry out the action
440 if f
.find("ignore_") > 0:
441 lexindexfunc
[i
] = (None,None)
443 lexindexfunc
[i
] = (None, toknames
[f
])
445 return [(lexre
,lexindexfunc
)],[regex
]
447 m
= int(len(relist
)/2)
449 llist
, lre
= _form_master_re(relist
[:m
],reflags
,ldict
,toknames
)
450 rlist
, rre
= _form_master_re(relist
[m
:],reflags
,ldict
,toknames
)
451 return llist
+rlist
, lre
+rre
453 # -----------------------------------------------------------------------------
454 # def _statetoken(s,names)
456 # Given a declaration name s of the form "t_" and a dictionary whose keys are
457 # state names, this function returns a tuple (states,tokenname) where states
458 # is a tuple of state names and tokenname is the name of the token. For example,
459 # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
460 # -----------------------------------------------------------------------------
462 def _statetoken(s
,names
):
465 for i
in range(1,len(parts
)):
466 if not names
.has_key(parts
[i
]) and parts
[i
] != 'ANY': break
468 states
= tuple(parts
[1:i
])
470 states
= ('INITIAL',)
473 states
= tuple(names
.keys())
475 tokenname
= "_".join(parts
[i
:])
476 return (states
,tokenname
)
478 # -----------------------------------------------------------------------------
481 # Build all of the regular expression rules from definitions in the supplied module
482 # -----------------------------------------------------------------------------
483 def lex(module
=None,object=None,debug
=0,optimize
=0,lextab
="lextab",reflags
=0,nowarn
=0):
486 stateinfo
= { 'INITIAL' : 'inclusive'}
490 lexobj
.lexdebug
= debug
491 lexobj
.lexoptimize
= optimize
497 if object: module
= object
500 # User supplied a module object.
501 if isinstance(module
, types
.ModuleType
):
502 ldict
= module
.__dict
__
503 elif isinstance(module
, _INSTANCETYPE
):
504 _items
= [(k
,getattr(module
,k
)) for k
in dir(module
)]
509 raise ValueError,"Expected a module or instance"
510 lexobj
.lexmodule
= module
513 # No module given. We might be able to get information from the caller.
517 e
,b
,t
= sys
.exc_info()
519 f
= f
.f_back
# Walk out to our calling function
520 ldict
= f
.f_globals
# Grab its globals dictionary
522 if optimize
and lextab
:
524 lexobj
.readtab(lextab
,ldict
)
533 # Get the tokens, states, and literals variables (if any)
534 if (module
and isinstance(module
,_INSTANCETYPE
)):
535 tokens
= getattr(module
,"tokens",None)
536 states
= getattr(module
,"states",None)
537 literals
= getattr(module
,"literals","")
539 tokens
= ldict
.get("tokens",None)
540 states
= ldict
.get("states",None)
541 literals
= ldict
.get("literals","")
544 raise SyntaxError,"lex: module does not define 'tokens'"
545 if not (isinstance(tokens
,types
.ListType
) or isinstance(tokens
,types
.TupleType
)):
546 raise SyntaxError,"lex: tokens must be a list or tuple."
548 # Build a dictionary of valid token names
549 lexobj
.lextokens
= { }
552 if not _is_identifier
.match(n
):
553 print >>sys
.stderr
, "lex: Bad token name '%s'" % n
555 if warn
and lexobj
.lextokens
.has_key(n
):
556 print >>sys
.stderr
, "lex: Warning. Token '%s' multiply defined." % n
557 lexobj
.lextokens
[n
] = None
559 for n
in tokens
: lexobj
.lextokens
[n
] = None
562 print "lex: tokens = '%s'" % lexobj
.lextokens
.keys()
566 if not (isinstance(c
,types
.StringType
) or isinstance(c
,types
.UnicodeType
)) or len(c
) > 1:
567 print >>sys
.stderr
, "lex: Invalid literal %s. Must be a single character" % repr(c
)
572 print >>sys
.stderr
, "lex: Invalid literals specification. literals must be a sequence of characters."
575 lexobj
.lexliterals
= literals
579 if not (isinstance(states
,types
.TupleType
) or isinstance(states
,types
.ListType
)):
580 print >>sys
.stderr
, "lex: states must be defined as a tuple or list."
584 if not isinstance(s
,types
.TupleType
) or len(s
) != 2:
585 print >>sys
.stderr
, "lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s
)
589 if not isinstance(name
,types
.StringType
):
590 print >>sys
.stderr
, "lex: state name %s must be a string" % repr(name
)
593 if not (statetype
== 'inclusive' or statetype
== 'exclusive'):
594 print >>sys
.stderr
, "lex: state type for state %s must be 'inclusive' or 'exclusive'" % name
597 if stateinfo
.has_key(name
):
598 print >>sys
.stderr
, "lex: state '%s' already defined." % name
601 stateinfo
[name
] = statetype
603 # Get a list of symbols with the t_ or s_ prefix
604 tsymbols
= [f
for f
in ldict
.keys() if f
[:2] == 't_' ]
606 # Now build up a list of functions and a list of strings
608 funcsym
= { } # Symbols defined as functions
609 strsym
= { } # Symbols defined as strings
610 toknames
= { } # Mapping of symbols to token names
612 for s
in stateinfo
.keys():
616 ignore
= { } # Ignore strings by state
617 errorf
= { } # Error functions by state
619 if len(tsymbols
) == 0:
620 raise SyntaxError,"lex: no rules of the form t_rulename are defined."
624 states
, tokname
= _statetoken(f
,stateinfo
)
625 toknames
[f
] = tokname
628 for s
in states
: funcsym
[s
].append((f
,t
))
629 elif (isinstance(t
, types
.StringType
) or isinstance(t
,types
.UnicodeType
)):
630 for s
in states
: strsym
[s
].append((f
,t
))
632 print >>sys
.stderr
, "lex: %s not defined as a function or string" % f
635 # Sort the functions by line number
636 for f
in funcsym
.values():
637 f
.sort(lambda x
,y
: cmp(x
[1].func_code
.co_firstlineno
,y
[1].func_code
.co_firstlineno
))
639 # Sort the strings by regular expression length
640 for s
in strsym
.values():
641 s
.sort(lambda x
,y
: (len(x
[1]) < len(y
[1])) - (len(x
[1]) > len(y
[1])))
645 # Build the master regular expressions
646 for state
in stateinfo
.keys():
649 # Add rules defined by functions first
650 for fname
, f
in funcsym
[state
]:
651 line
= f
.func_code
.co_firstlineno
652 file = f
.func_code
.co_filename
654 tokname
= toknames
[fname
]
656 ismethod
= isinstance(f
, types
.MethodType
)
659 nargs
= f
.func_code
.co_argcount
665 print >>sys
.stderr
, "%s:%d: Rule '%s' has too many arguments." % (file,line
,f
.__name
__)
670 print >>sys
.stderr
, "%s:%d: Rule '%s' requires an argument." % (file,line
,f
.__name
__)
674 if tokname
== 'ignore':
675 print >>sys
.stderr
, "%s:%d: Rule '%s' must be defined as a string." % (file,line
,f
.__name
__)
679 if tokname
== 'error':
686 c
= re
.compile("(?P<%s>%s)" % (f
.__name
__,f
.__doc
__), re
.VERBOSE | reflags
)
688 print >>sys
.stderr
, "%s:%d: Regular expression for rule '%s' matches empty string." % (file,line
,f
.__name
__)
692 print >>sys
.stderr
, "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line
,f
.__name
__,e
)
694 print >>sys
.stderr
, "%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line
, f
.__name
__)
699 print "lex: Adding rule %s -> '%s' (state '%s')" % (f
.__name
__,f
.__doc
__, state
)
701 # Okay. The regular expression seemed okay. Let's append it to the master regular
702 # expression we're building
704 regex_list
.append("(?P<%s>%s)" % (f
.__name
__,f
.__doc
__))
706 print >>sys
.stderr
, "%s:%d: No regular expression defined for rule '%s'" % (file,line
,f
.__name
__)
708 # Now add all of the simple rules
709 for name
,r
in strsym
[state
]:
710 tokname
= toknames
[name
]
712 if tokname
== 'ignore':
714 print >>sys
.stderr
, "lex: Warning. %s contains a literal backslash '\\'" % name
719 if tokname
== 'error':
720 raise SyntaxError,"lex: Rule '%s' must be defined as a function" % name
724 if not lexobj
.lextokens
.has_key(tokname
) and tokname
.find("ignore_") < 0:
725 print >>sys
.stderr
, "lex: Rule '%s' defined for an unspecified token %s." % (name
,tokname
)
729 c
= re
.compile("(?P<%s>%s)" % (name
,r
),re
.VERBOSE | reflags
)
731 print >>sys
.stderr
, "lex: Regular expression for rule '%s' matches empty string." % name
735 print >>sys
.stderr
, "lex: Invalid regular expression for rule '%s'. %s" % (name
,e
)
737 print >>sys
.stderr
, "lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name
742 print "lex: Adding rule %s -> '%s' (state '%s')" % (name
,r
,state
)
744 regex_list
.append("(?P<%s>%s)" % (name
,r
))
747 print >>sys
.stderr
, "lex: No rules defined for state '%s'" % state
750 regexs
[state
] = regex_list
754 for f
in files
.keys():
755 if not _validate_file(f
):
759 raise SyntaxError,"lex: Unable to build lexer."
761 # From this point forward, we're reasonably confident that we can build the lexer.
762 # No more errors will be generated, but there might be some warning messages.
764 # Build the master regular expressions
766 for state
in regexs
.keys():
767 lexre
, re_text
= _form_master_re(regexs
[state
],reflags
,ldict
,toknames
)
768 lexobj
.lexstatere
[state
] = lexre
769 lexobj
.lexstateretext
[state
] = re_text
771 for i
in range(len(re_text
)):
772 print "lex: state '%s'. regex[%d] = '%s'" % (state
, i
, re_text
[i
])
774 # For inclusive states, we need to add the INITIAL state
775 for state
,type in stateinfo
.items():
776 if state
!= "INITIAL" and type == 'inclusive':
777 lexobj
.lexstatere
[state
].extend(lexobj
.lexstatere
['INITIAL'])
778 lexobj
.lexstateretext
[state
].extend(lexobj
.lexstateretext
['INITIAL'])
780 lexobj
.lexstateinfo
= stateinfo
781 lexobj
.lexre
= lexobj
.lexstatere
["INITIAL"]
782 lexobj
.lexretext
= lexobj
.lexstateretext
["INITIAL"]
784 # Set up ignore variables
785 lexobj
.lexstateignore
= ignore
786 lexobj
.lexignore
= lexobj
.lexstateignore
.get("INITIAL","")
788 # Set up error functions
789 lexobj
.lexstateerrorf
= errorf
790 lexobj
.lexerrorf
= errorf
.get("INITIAL",None)
791 if warn
and not lexobj
.lexerrorf
:
792 print >>sys
.stderr
, "lex: Warning. no t_error rule is defined."
794 # Check state information for ignore and error rules
795 for s
,stype
in stateinfo
.items():
796 if stype
== 'exclusive':
797 if warn
and not errorf
.has_key(s
):
798 print >>sys
.stderr
, "lex: Warning. no error rule is defined for exclusive state '%s'" % s
799 if warn
and not ignore
.has_key(s
) and lexobj
.lexignore
:
800 print >>sys
.stderr
, "lex: Warning. no ignore rule is defined for exclusive state '%s'" % s
801 elif stype
== 'inclusive':
802 if not errorf
.has_key(s
):
803 errorf
[s
] = errorf
.get("INITIAL",None)
804 if not ignore
.has_key(s
):
805 ignore
[s
] = ignore
.get("INITIAL","")
808 # Create global versions of the token() and input() functions
813 # If in optimize mode, we write the lextab
814 if lextab
and optimize
:
815 lexobj
.writetab(lextab
)
819 # -----------------------------------------------------------------------------
822 # This runs the lexer as a main program
823 # -----------------------------------------------------------------------------
825 def runmain(lexer
=None,data
=None):
828 filename
= sys
.argv
[1]
833 print "Reading from standard input (type EOF to end):"
834 data
= sys
.stdin
.read()
849 print "(%s,%r,%d,%d)" % (tok
.type, tok
.value
, tok
.lineno
,tok
.lexpos
)
852 # -----------------------------------------------------------------------------
855 # This decorator function can be used to set the regex expression on a function
856 # when its docstring might need to be set in an alternative way
857 # -----------------------------------------------------------------------------
865 # Alternative spelling of the TOKEN decorator