configs: fix minor config bugs posted on the mailing list
[gem5.git] / ext / ply / ply / cpp.py
1 # -----------------------------------------------------------------------------
2 # cpp.py
3 #
4 # Author: David Beazley (http://www.dabeaz.com)
5 # Copyright (C) 2007
6 # All rights reserved
7 #
8 # This module implements an ANSI-C style lexical preprocessor for PLY.
9 # -----------------------------------------------------------------------------
10 from __future__ import generators
11
12 # -----------------------------------------------------------------------------
13 # Default preprocessor lexer definitions. These tokens are enough to get
14 # a basic preprocessor working. Other modules may import these if they want
15 # -----------------------------------------------------------------------------
16
17 tokens = (
18 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND'
19 )
20
21 literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
22
23 # Whitespace
24 def t_CPP_WS(t):
25 r'\s+'
26 t.lexer.lineno += t.value.count("\n")
27 return t
28
29 t_CPP_POUND = r'\#'
30 t_CPP_DPOUND = r'\#\#'
31
32 # Identifier
33 t_CPP_ID = r'[A-Za-z_][\w_]*'
34
35 # Integer literal
36 def CPP_INTEGER(t):
37 r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)'
38 return t
39
40 t_CPP_INTEGER = CPP_INTEGER
41
42 # Floating literal
43 t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
44
45 # String literal
46 def t_CPP_STRING(t):
47 r'\"([^\\\n]|(\\(.|\n)))*?\"'
48 t.lexer.lineno += t.value.count("\n")
49 return t
50
51 # Character constant 'c' or L'c'
52 def t_CPP_CHAR(t):
53 r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
54 t.lexer.lineno += t.value.count("\n")
55 return t
56
57 # Comment
58 def t_CPP_COMMENT(t):
59 r'(/\*(.|\n)*?\*/)|(//.*?\n)'
60 t.lexer.lineno += t.value.count("\n")
61 return t
62
63 def t_error(t):
64 t.type = t.value[0]
65 t.value = t.value[0]
66 t.lexer.skip(1)
67 return t
68
69 import re
70 import copy
71 import time
72 import os.path
73
74 # -----------------------------------------------------------------------------
75 # trigraph()
76 #
77 # Given an input string, this function replaces all trigraph sequences.
78 # The following mapping is used:
79 #
80 # ??= #
81 # ??/ \
82 # ??' ^
83 # ??( [
84 # ??) ]
85 # ??! |
86 # ??< {
87 # ??> }
88 # ??- ~
89 # -----------------------------------------------------------------------------
90
91 _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
92 _trigraph_rep = {
93 '=':'#',
94 '/':'\\',
95 "'":'^',
96 '(':'[',
97 ')':']',
98 '!':'|',
99 '<':'{',
100 '>':'}',
101 '-':'~'
102 }
103
104 def trigraph(input):
105 return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
106
107 # ------------------------------------------------------------------
108 # Macro object
109 #
110 # This object holds information about preprocessor macros
111 #
112 # .name - Macro name (string)
113 # .value - Macro value (a list of tokens)
114 # .arglist - List of argument names
115 # .variadic - Boolean indicating whether or not variadic macro
116 # .vararg - Name of the variadic parameter
117 #
118 # When a macro is created, the macro replacement token sequence is
119 # pre-scanned and used to create patch lists that are later used
120 # during macro expansion
121 # ------------------------------------------------------------------
122
123 class Macro(object):
124 def __init__(self,name,value,arglist=None,variadic=False):
125 self.name = name
126 self.value = value
127 self.arglist = arglist
128 self.variadic = variadic
129 if variadic:
130 self.vararg = arglist[-1]
131 self.source = None
132
133 # ------------------------------------------------------------------
134 # Preprocessor object
135 #
136 # Object representing a preprocessor. Contains macro definitions,
137 # include directories, and other information
138 # ------------------------------------------------------------------
139
140 class Preprocessor(object):
141 def __init__(self,lexer=None):
142 if lexer is None:
143 lexer = lex.lexer
144 self.lexer = lexer
145 self.macros = { }
146 self.path = []
147 self.temp_path = []
148
149 # Probe the lexer for selected tokens
150 self.lexprobe()
151
152 tm = time.localtime()
153 self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
154 self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
155 self.parser = None
156
157 # -----------------------------------------------------------------------------
158 # tokenize()
159 #
160 # Utility function. Given a string of text, tokenize into a list of tokens
161 # -----------------------------------------------------------------------------
162
163 def tokenize(self,text):
164 tokens = []
165 self.lexer.input(text)
166 while True:
167 tok = self.lexer.token()
168 if not tok: break
169 tokens.append(tok)
170 return tokens
171
172 # ---------------------------------------------------------------------
173 # error()
174 #
175 # Report a preprocessor error/warning of some kind
176 # ----------------------------------------------------------------------
177
178 def error(self,file,line,msg):
179 print >>sys.stderr,"%s:%d %s" % (file,line,msg)
180
181 # ----------------------------------------------------------------------
182 # lexprobe()
183 #
184 # This method probes the preprocessor lexer object to discover
185 # the token types of symbols that are important to the preprocessor.
186 # If this works right, the preprocessor will simply "work"
187 # with any suitable lexer regardless of how tokens have been named.
188 # ----------------------------------------------------------------------
189
190 def lexprobe(self):
191
192 # Determine the token type for identifiers
193 self.lexer.input("identifier")
194 tok = self.lexer.token()
195 if not tok or tok.value != "identifier":
196 print "Couldn't determine identifier type"
197 else:
198 self.t_ID = tok.type
199
200 # Determine the token type for integers
201 self.lexer.input("12345")
202 tok = self.lexer.token()
203 if not tok or int(tok.value) != 12345:
204 print "Couldn't determine integer type"
205 else:
206 self.t_INTEGER = tok.type
207 self.t_INTEGER_TYPE = type(tok.value)
208
209 # Determine the token type for strings enclosed in double quotes
210 self.lexer.input("\"filename\"")
211 tok = self.lexer.token()
212 if not tok or tok.value != "\"filename\"":
213 print "Couldn't determine string type"
214 else:
215 self.t_STRING = tok.type
216
217 # Determine the token type for whitespace--if any
218 self.lexer.input(" ")
219 tok = self.lexer.token()
220 if not tok or tok.value != " ":
221 self.t_SPACE = None
222 else:
223 self.t_SPACE = tok.type
224
225 # Determine the token type for newlines
226 self.lexer.input("\n")
227 tok = self.lexer.token()
228 if not tok or tok.value != "\n":
229 self.t_NEWLINE = None
230 print "Couldn't determine token for newlines"
231 else:
232 self.t_NEWLINE = tok.type
233
234 self.t_WS = (self.t_SPACE, self.t_NEWLINE)
235
236 # Check for other characters used by the preprocessor
237 chars = [ '<','>','#','##','\\','(',')',',','.']
238 for c in chars:
239 self.lexer.input(c)
240 tok = self.lexer.token()
241 if not tok or tok.value != c:
242 print "Unable to lex '%s' required for preprocessor" % c
243
244 # ----------------------------------------------------------------------
245 # add_path()
246 #
247 # Adds a search path to the preprocessor.
248 # ----------------------------------------------------------------------
249
250 def add_path(self,path):
251 self.path.append(path)
252
253 # ----------------------------------------------------------------------
254 # group_lines()
255 #
256 # Given an input string, this function splits it into lines. Trailing whitespace
257 # is removed. Any line ending with \ is grouped with the next line. This
258 # function forms the lowest level of the preprocessor---grouping into text into
259 # a line-by-line format.
260 # ----------------------------------------------------------------------
261
262 def group_lines(self,input):
263 lex = self.lexer.clone()
264 lines = [x.rstrip() for x in input.splitlines()]
265 for i in xrange(len(lines)):
266 j = i+1
267 while lines[i].endswith('\\') and (j < len(lines)):
268 lines[i] = lines[i][:-1]+lines[j]
269 lines[j] = ""
270 j += 1
271
272 input = "\n".join(lines)
273 lex.input(input)
274 lex.lineno = 1
275
276 current_line = []
277 while True:
278 tok = lex.token()
279 if not tok:
280 break
281 current_line.append(tok)
282 if tok.type in self.t_WS and '\n' in tok.value:
283 yield current_line
284 current_line = []
285
286 if current_line:
287 yield current_line
288
289 # ----------------------------------------------------------------------
290 # tokenstrip()
291 #
292 # Remove leading/trailing whitespace tokens from a token list
293 # ----------------------------------------------------------------------
294
295 def tokenstrip(self,tokens):
296 i = 0
297 while i < len(tokens) and tokens[i].type in self.t_WS:
298 i += 1
299 del tokens[:i]
300 i = len(tokens)-1
301 while i >= 0 and tokens[i].type in self.t_WS:
302 i -= 1
303 del tokens[i+1:]
304 return tokens
305
306
307 # ----------------------------------------------------------------------
308 # collect_args()
309 #
310 # Collects comma separated arguments from a list of tokens. The arguments
311 # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
312 # where tokencount is the number of tokens consumed, args is a list of arguments,
313 # and positions is a list of integers containing the starting index of each
314 # argument. Each argument is represented by a list of tokens.
315 #
316 # When collecting arguments, leading and trailing whitespace is removed
317 # from each argument.
318 #
319 # This function properly handles nested parenthesis and commas---these do not
320 # define new arguments.
321 # ----------------------------------------------------------------------
322
323 def collect_args(self,tokenlist):
324 args = []
325 positions = []
326 current_arg = []
327 nesting = 1
328 tokenlen = len(tokenlist)
329
330 # Search for the opening '('.
331 i = 0
332 while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
333 i += 1
334
335 if (i < tokenlen) and (tokenlist[i].value == '('):
336 positions.append(i+1)
337 else:
338 self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
339 return 0, [], []
340
341 i += 1
342
343 while i < tokenlen:
344 t = tokenlist[i]
345 if t.value == '(':
346 current_arg.append(t)
347 nesting += 1
348 elif t.value == ')':
349 nesting -= 1
350 if nesting == 0:
351 if current_arg:
352 args.append(self.tokenstrip(current_arg))
353 positions.append(i)
354 return i+1,args,positions
355 current_arg.append(t)
356 elif t.value == ',' and nesting == 1:
357 args.append(self.tokenstrip(current_arg))
358 positions.append(i+1)
359 current_arg = []
360 else:
361 current_arg.append(t)
362 i += 1
363
364 # Missing end argument
365 self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
366 return 0, [],[]
367
368 # ----------------------------------------------------------------------
369 # macro_prescan()
370 #
371 # Examine the macro value (token sequence) and identify patch points
372 # This is used to speed up macro expansion later on---we'll know
373 # right away where to apply patches to the value to form the expansion
374 # ----------------------------------------------------------------------
375
376 def macro_prescan(self,macro):
377 macro.patch = [] # Standard macro arguments
378 macro.str_patch = [] # String conversion expansion
379 macro.var_comma_patch = [] # Variadic macro comma patch
380 i = 0
381 while i < len(macro.value):
382 if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
383 argnum = macro.arglist.index(macro.value[i].value)
384 # Conversion of argument to a string
385 if i > 0 and macro.value[i-1].value == '#':
386 macro.value[i] = copy.copy(macro.value[i])
387 macro.value[i].type = self.t_STRING
388 del macro.value[i-1]
389 macro.str_patch.append((argnum,i-1))
390 continue
391 # Concatenation
392 elif (i > 0 and macro.value[i-1].value == '##'):
393 macro.patch.append(('c',argnum,i-1))
394 del macro.value[i-1]
395 continue
396 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
397 macro.patch.append(('c',argnum,i))
398 i += 1
399 continue
400 # Standard expansion
401 else:
402 macro.patch.append(('e',argnum,i))
403 elif macro.value[i].value == '##':
404 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
405 ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
406 (macro.value[i+1].value == macro.vararg):
407 macro.var_comma_patch.append(i-1)
408 i += 1
409 macro.patch.sort(key=lambda x: x[2],reverse=True)
410
411 # ----------------------------------------------------------------------
412 # macro_expand_args()
413 #
414 # Given a Macro and list of arguments (each a token list), this method
415 # returns an expanded version of a macro. The return value is a token sequence
416 # representing the replacement macro tokens
417 # ----------------------------------------------------------------------
418
419 def macro_expand_args(self,macro,args):
420 # Make a copy of the macro token sequence
421 rep = [copy.copy(_x) for _x in macro.value]
422
423 # Make string expansion patches. These do not alter the length of the replacement sequence
424
425 str_expansion = {}
426 for argnum, i in macro.str_patch:
427 if argnum not in str_expansion:
428 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
429 rep[i] = copy.copy(rep[i])
430 rep[i].value = str_expansion[argnum]
431
432 # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
433 comma_patch = False
434 if macro.variadic and not args[-1]:
435 for i in macro.var_comma_patch:
436 rep[i] = None
437 comma_patch = True
438
439 # Make all other patches. The order of these matters. It is assumed that the patch list
440 # has been sorted in reverse order of patch location since replacements will cause the
441 # size of the replacement sequence to expand from the patch point.
442
443 expanded = { }
444 for ptype, argnum, i in macro.patch:
445 # Concatenation. Argument is left unexpanded
446 if ptype == 'c':
447 rep[i:i+1] = args[argnum]
448 # Normal expansion. Argument is macro expanded first
449 elif ptype == 'e':
450 if argnum not in expanded:
451 expanded[argnum] = self.expand_macros(args[argnum])
452 rep[i:i+1] = expanded[argnum]
453
454 # Get rid of removed comma if necessary
455 if comma_patch:
456 rep = [_i for _i in rep if _i]
457
458 return rep
459
460
461 # ----------------------------------------------------------------------
462 # expand_macros()
463 #
464 # Given a list of tokens, this function performs macro expansion.
465 # The expanded argument is a dictionary that contains macros already
466 # expanded. This is used to prevent infinite recursion.
467 # ----------------------------------------------------------------------
468
469 def expand_macros(self,tokens,expanded=None):
470 if expanded is None:
471 expanded = {}
472 i = 0
473 while i < len(tokens):
474 t = tokens[i]
475 if t.type == self.t_ID:
476 if t.value in self.macros and t.value not in expanded:
477 # Yes, we found a macro match
478 expanded[t.value] = True
479
480 m = self.macros[t.value]
481 if not m.arglist:
482 # A simple macro
483 ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
484 for e in ex:
485 e.lineno = t.lineno
486 tokens[i:i+1] = ex
487 i += len(ex)
488 else:
489 # A macro with arguments
490 j = i + 1
491 while j < len(tokens) and tokens[j].type in self.t_WS:
492 j += 1
493 if tokens[j].value == '(':
494 tokcount,args,positions = self.collect_args(tokens[j:])
495 if not m.variadic and len(args) != len(m.arglist):
496 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
497 i = j + tokcount
498 elif m.variadic and len(args) < len(m.arglist)-1:
499 if len(m.arglist) > 2:
500 self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
501 else:
502 self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
503 i = j + tokcount
504 else:
505 if m.variadic:
506 if len(args) == len(m.arglist)-1:
507 args.append([])
508 else:
509 args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
510 del args[len(m.arglist):]
511
512 # Get macro replacement text
513 rep = self.macro_expand_args(m,args)
514 rep = self.expand_macros(rep,expanded)
515 for r in rep:
516 r.lineno = t.lineno
517 tokens[i:j+tokcount] = rep
518 i += len(rep)
519 del expanded[t.value]
520 continue
521 elif t.value == '__LINE__':
522 t.type = self.t_INTEGER
523 t.value = self.t_INTEGER_TYPE(t.lineno)
524
525 i += 1
526 return tokens
527
528 # ----------------------------------------------------------------------
529 # evalexpr()
530 #
531 # Evaluate an expression token sequence for the purposes of evaluating
532 # integral expressions.
533 # ----------------------------------------------------------------------
534
535 def evalexpr(self,tokens):
536 # tokens = tokenize(line)
537 # Search for defined macros
538 i = 0
539 while i < len(tokens):
540 if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
541 j = i + 1
542 needparen = False
543 result = "0L"
544 while j < len(tokens):
545 if tokens[j].type in self.t_WS:
546 j += 1
547 continue
548 elif tokens[j].type == self.t_ID:
549 if tokens[j].value in self.macros:
550 result = "1L"
551 else:
552 result = "0L"
553 if not needparen: break
554 elif tokens[j].value == '(':
555 needparen = True
556 elif tokens[j].value == ')':
557 break
558 else:
559 self.error(self.source,tokens[i].lineno,"Malformed defined()")
560 j += 1
561 tokens[i].type = self.t_INTEGER
562 tokens[i].value = self.t_INTEGER_TYPE(result)
563 del tokens[i+1:j+1]
564 i += 1
565 tokens = self.expand_macros(tokens)
566 for i,t in enumerate(tokens):
567 if t.type == self.t_ID:
568 tokens[i] = copy.copy(t)
569 tokens[i].type = self.t_INTEGER
570 tokens[i].value = self.t_INTEGER_TYPE("0L")
571 elif t.type == self.t_INTEGER:
572 tokens[i] = copy.copy(t)
573 # Strip off any trailing suffixes
574 tokens[i].value = str(tokens[i].value)
575 while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
576 tokens[i].value = tokens[i].value[:-1]
577
578 expr = "".join([str(x.value) for x in tokens])
579 expr = expr.replace("&&"," and ")
580 expr = expr.replace("||"," or ")
581 expr = expr.replace("!"," not ")
582 try:
583 result = eval(expr)
584 except StandardError:
585 self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
586 result = 0
587 return result
588
589 # ----------------------------------------------------------------------
590 # parsegen()
591 #
592 # Parse an input string/
593 # ----------------------------------------------------------------------
594 def parsegen(self,input,source=None):
595
596 # Replace trigraph sequences
597 t = trigraph(input)
598 lines = self.group_lines(t)
599
600 if not source:
601 source = ""
602
603 self.define("__FILE__ \"%s\"" % source)
604
605 self.source = source
606 chunk = []
607 enable = True
608 iftrigger = False
609 ifstack = []
610
611 for x in lines:
612 for i,tok in enumerate(x):
613 if tok.type not in self.t_WS: break
614 if tok.value == '#':
615 # Preprocessor directive
616
617 for tok in x:
618 if tok in self.t_WS and '\n' in tok.value:
619 chunk.append(tok)
620
621 dirtokens = self.tokenstrip(x[i+1:])
622 if dirtokens:
623 name = dirtokens[0].value
624 args = self.tokenstrip(dirtokens[1:])
625 else:
626 name = ""
627 args = []
628
629 if name == 'define':
630 if enable:
631 for tok in self.expand_macros(chunk):
632 yield tok
633 chunk = []
634 self.define(args)
635 elif name == 'include':
636 if enable:
637 for tok in self.expand_macros(chunk):
638 yield tok
639 chunk = []
640 oldfile = self.macros['__FILE__']
641 for tok in self.include(args):
642 yield tok
643 self.macros['__FILE__'] = oldfile
644 self.source = source
645 elif name == 'undef':
646 if enable:
647 for tok in self.expand_macros(chunk):
648 yield tok
649 chunk = []
650 self.undef(args)
651 elif name == 'ifdef':
652 ifstack.append((enable,iftrigger))
653 if enable:
654 if not args[0].value in self.macros:
655 enable = False
656 iftrigger = False
657 else:
658 iftrigger = True
659 elif name == 'ifndef':
660 ifstack.append((enable,iftrigger))
661 if enable:
662 if args[0].value in self.macros:
663 enable = False
664 iftrigger = False
665 else:
666 iftrigger = True
667 elif name == 'if':
668 ifstack.append((enable,iftrigger))
669 if enable:
670 result = self.evalexpr(args)
671 if not result:
672 enable = False
673 iftrigger = False
674 else:
675 iftrigger = True
676 elif name == 'elif':
677 if ifstack:
678 if ifstack[-1][0]: # We only pay attention if outer "if" allows this
679 if enable: # If already true, we flip enable False
680 enable = False
681 elif not iftrigger: # If False, but not triggered yet, we'll check expression
682 result = self.evalexpr(args)
683 if result:
684 enable = True
685 iftrigger = True
686 else:
687 self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
688
689 elif name == 'else':
690 if ifstack:
691 if ifstack[-1][0]:
692 if enable:
693 enable = False
694 elif not iftrigger:
695 enable = True
696 iftrigger = True
697 else:
698 self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
699
700 elif name == 'endif':
701 if ifstack:
702 enable,iftrigger = ifstack.pop()
703 else:
704 self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
705 else:
706 # Unknown preprocessor directive
707 pass
708
709 else:
710 # Normal text
711 if enable:
712 chunk.extend(x)
713
714 for tok in self.expand_macros(chunk):
715 yield tok
716 chunk = []
717
718 # ----------------------------------------------------------------------
719 # include()
720 #
721 # Implementation of file-inclusion
722 # ----------------------------------------------------------------------
723
724 def include(self,tokens):
725 # Try to extract the filename and then process an include file
726 if not tokens:
727 return
728 if tokens:
729 if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
730 tokens = self.expand_macros(tokens)
731
732 if tokens[0].value == '<':
733 # Include <...>
734 i = 1
735 while i < len(tokens):
736 if tokens[i].value == '>':
737 break
738 i += 1
739 else:
740 print "Malformed #include <...>"
741 return
742 filename = "".join([x.value for x in tokens[1:i]])
743 path = self.path + [""] + self.temp_path
744 elif tokens[0].type == self.t_STRING:
745 filename = tokens[0].value[1:-1]
746 path = self.temp_path + [""] + self.path
747 else:
748 print "Malformed #include statement"
749 return
750 for p in path:
751 iname = os.path.join(p,filename)
752 try:
753 data = open(iname,"r").read()
754 dname = os.path.dirname(iname)
755 if dname:
756 self.temp_path.insert(0,dname)
757 for tok in self.parsegen(data,filename):
758 yield tok
759 if dname:
760 del self.temp_path[0]
761 break
762 except IOError,e:
763 pass
764 else:
765 print "Couldn't find '%s'" % filename
766
767 # ----------------------------------------------------------------------
768 # define()
769 #
770 # Define a new macro
771 # ----------------------------------------------------------------------
772
773 def define(self,tokens):
774 if isinstance(tokens,(str,unicode)):
775 tokens = self.tokenize(tokens)
776
777 linetok = tokens
778 try:
779 name = linetok[0]
780 if len(linetok) > 1:
781 mtype = linetok[1]
782 else:
783 mtype = None
784 if not mtype:
785 m = Macro(name.value,[])
786 self.macros[name.value] = m
787 elif mtype.type in self.t_WS:
788 # A normal macro
789 m = Macro(name.value,self.tokenstrip(linetok[2:]))
790 self.macros[name.value] = m
791 elif mtype.value == '(':
792 # A macro with arguments
793 tokcount, args, positions = self.collect_args(linetok[1:])
794 variadic = False
795 for a in args:
796 if variadic:
797 print "No more arguments may follow a variadic argument"
798 break
799 astr = "".join([str(_i.value) for _i in a])
800 if astr == "...":
801 variadic = True
802 a[0].type = self.t_ID
803 a[0].value = '__VA_ARGS__'
804 variadic = True
805 del a[1:]
806 continue
807 elif astr[-3:] == "..." and a[0].type == self.t_ID:
808 variadic = True
809 del a[1:]
810 # If, for some reason, "." is part of the identifier, strip off the name for the purposes
811 # of macro expansion
812 if a[0].value[-3:] == '...':
813 a[0].value = a[0].value[:-3]
814 continue
815 if len(a) > 1 or a[0].type != self.t_ID:
816 print "Invalid macro argument"
817 break
818 else:
819 mvalue = self.tokenstrip(linetok[1+tokcount:])
820 i = 0
821 while i < len(mvalue):
822 if i+1 < len(mvalue):
823 if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
824 del mvalue[i]
825 continue
826 elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
827 del mvalue[i+1]
828 i += 1
829 m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
830 self.macro_prescan(m)
831 self.macros[name.value] = m
832 else:
833 print "Bad macro definition"
834 except LookupError:
835 print "Bad macro definition"
836
837 # ----------------------------------------------------------------------
838 # undef()
839 #
840 # Undefine a macro
841 # ----------------------------------------------------------------------
842
843 def undef(self,tokens):
844 id = tokens[0].value
845 try:
846 del self.macros[id]
847 except LookupError:
848 pass
849
850 # ----------------------------------------------------------------------
851 # parse()
852 #
853 # Parse input text.
854 # ----------------------------------------------------------------------
855 def parse(self,input,source=None,ignore={}):
856 self.ignore = ignore
857 self.parser = self.parsegen(input,source)
858
859 # ----------------------------------------------------------------------
860 # token()
861 #
862 # Method to return individual tokens
863 # ----------------------------------------------------------------------
864 def token(self):
865 try:
866 while True:
867 tok = self.parser.next()
868 if tok.type not in self.ignore: return tok
869 except StopIteration:
870 self.parser = None
871 return None
872
873 if __name__ == '__main__':
874 import ply.lex as lex
875 lexer = lex.lex()
876
877 # Run a preprocessor
878 import sys
879 f = open(sys.argv[1])
880 input = f.read()
881
882 p = Preprocessor(lexer)
883 p.parse(input,sys.argv[1])
884 while True:
885 tok = p.token()
886 if not tok: break
887 print p.source, tok
888
889
890
891
892
893
894
895
896
897
898