ext/ply/ply/cpp.py

   1 # -----------------------------------------------------------------------------
   2 # cpp.py
   3 #
   4 # Author:  David Beazley (http://www.dabeaz.com)
   5 # Copyright (C) 2007
   6 # All rights reserved
   7 #
   8 # This module implements an ANSI-C style lexical preprocessor for PLY.
   9 # -----------------------------------------------------------------------------
  10 from __future__ import generators
  11
  12 # -----------------------------------------------------------------------------
  13 # Default preprocessor lexer definitions.   These tokens are enough to get
  14 # a basic preprocessor working.   Other modules may import these if they want
  15 # -----------------------------------------------------------------------------
  16
  17 tokens = (
  18    'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND'
  19 )
  20
  21 literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
  22
  23 # Whitespace
  24 def t_CPP_WS(t):
  25     r'\s+'
  26     t.lexer.lineno += t.value.count("\n")
  27     return t
  28
  29 t_CPP_POUND = r'\#'
  30 t_CPP_DPOUND = r'\#\#'
  31
  32 # Identifier
  33 t_CPP_ID = r'[A-Za-z_][\w_]*'
  34
  35 # Integer literal
  36 def CPP_INTEGER(t):
  37     r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)'
  38     return t
  39
  40 t_CPP_INTEGER = CPP_INTEGER
  41
  42 # Floating literal
  43 t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
  44
  45 # String literal
  46 def t_CPP_STRING(t):
  47     r'\"([^\\\n]|(\\(.|\n)))*?\"'
  48     t.lexer.lineno += t.value.count("\n")
  49     return t
  50
  51 # Character constant 'c' or L'c'
  52 def t_CPP_CHAR(t):
  53     r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
  54     t.lexer.lineno += t.value.count("\n")
  55     return t
  56
  57 # Comment
  58 def t_CPP_COMMENT(t):
  59     r'(/\*(.|\n)*?\*/)|(//.*?\n)'
  60     t.lexer.lineno += t.value.count("\n")
  61     return t
  62
  63 def t_error(t):
  64     t.type = t.value[0]
  65     t.value = t.value[0]
  66     t.lexer.skip(1)
  67     return t
  68
  69 import re
  70 import copy
  71 import time
  72 import os.path
  73
  74 # -----------------------------------------------------------------------------
  75 # trigraph()
  76 #
  77 # Given an input string, this function replaces all trigraph sequences.
  78 # The following mapping is used:
  79 #
  80 #     ??=    #
  81 #     ??/    \
  82 #     ??'    ^
  83 #     ??(    [
  84 #     ??)    ]
  85 #     ??!    |
  86 #     ??<    {
  87 #     ??>    }
  88 #     ??-    ~
  89 # -----------------------------------------------------------------------------
  90
  91 _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
  92 _trigraph_rep = {
  93     '=':'#',
  94     '/':'\\',
  95     "'":'^',
  96     '(':'[',
  97     ')':']',
  98     '!':'|',
  99     '<':'{',
 100     '>':'}',
 101     '-':'~'
 102 }
 103
 104 def trigraph(input):
 105     return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
 106
 107 # ------------------------------------------------------------------
 108 # Macro object
 109 #
 110 # This object holds information about preprocessor macros
 111 #
 112 #    .name      - Macro name (string)
 113 #    .value     - Macro value (a list of tokens)
 114 #    .arglist   - List of argument names
 115 #    .variadic  - Boolean indicating whether or not variadic macro
 116 #    .vararg    - Name of the variadic parameter
 117 #
 118 # When a macro is created, the macro replacement token sequence is
 119 # pre-scanned and used to create patch lists that are later used
 120 # during macro expansion
 121 # ------------------------------------------------------------------
 122
 123 class Macro(object):
 124     def __init__(self,name,value,arglist=None,variadic=False):
 125         self.name = name
 126         self.value = value
 127         self.arglist = arglist
 128         self.variadic = variadic
 129         if variadic:
 130             self.vararg = arglist[-1]
 131         self.source = None
 132
 133 # ------------------------------------------------------------------
 134 # Preprocessor object
 135 #
 136 # Object representing a preprocessor.  Contains macro definitions,
 137 # include directories, and other information
 138 # ------------------------------------------------------------------
 139
 140 class Preprocessor(object):
 141     def __init__(self,lexer=None):
 142         if lexer is None:
 143             lexer = lex.lexer
 144         self.lexer = lexer
 145         self.macros = { }
 146         self.path = []
 147         self.temp_path = []
 148
 149         # Probe the lexer for selected tokens
 150         self.lexprobe()
 151
 152         tm = time.localtime()
 153         self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
 154         self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
 155         self.parser = None
 156
 157     # -----------------------------------------------------------------------------
 158     # tokenize()
 159     #
 160     # Utility function. Given a string of text, tokenize into a list of tokens
 161     # -----------------------------------------------------------------------------
 162
 163     def tokenize(self,text):
 164         tokens = []
 165         self.lexer.input(text)
 166         while True:
 167             tok = self.lexer.token()
 168             if not tok: break
 169             tokens.append(tok)
 170         return tokens
 171
 172     # ---------------------------------------------------------------------
 173     # error()
 174     #
 175     # Report a preprocessor error/warning of some kind
 176     # ----------------------------------------------------------------------
 177
 178     def error(self,file,line,msg):
 179         print >>sys.stderr,"%s:%d %s" % (file,line,msg)
 180
 181     # ----------------------------------------------------------------------
 182     # lexprobe()
 183     #
 184     # This method probes the preprocessor lexer object to discover
 185     # the token types of symbols that are important to the preprocessor.
 186     # If this works right, the preprocessor will simply "work"
 187     # with any suitable lexer regardless of how tokens have been named.
 188     # ----------------------------------------------------------------------
 189
 190     def lexprobe(self):
 191
 192         # Determine the token type for identifiers
 193         self.lexer.input("identifier")
 194         tok = self.lexer.token()
 195         if not tok or tok.value != "identifier":
 196             print "Couldn't determine identifier type"
 197         else:
 198             self.t_ID = tok.type
 199
 200         # Determine the token type for integers
 201         self.lexer.input("12345")
 202         tok = self.lexer.token()
 203         if not tok or int(tok.value) != 12345:
 204             print "Couldn't determine integer type"
 205         else:
 206             self.t_INTEGER = tok.type
 207             self.t_INTEGER_TYPE = type(tok.value)
 208
 209         # Determine the token type for strings enclosed in double quotes
 210         self.lexer.input("\"filename\"")
 211         tok = self.lexer.token()
 212         if not tok or tok.value != "\"filename\"":
 213             print "Couldn't determine string type"
 214         else:
 215             self.t_STRING = tok.type
 216
 217         # Determine the token type for whitespace--if any
 218         self.lexer.input("  ")
 219         tok = self.lexer.token()
 220         if not tok or tok.value != "  ":
 221             self.t_SPACE = None
 222         else:
 223             self.t_SPACE = tok.type
 224
 225         # Determine the token type for newlines
 226         self.lexer.input("\n")
 227         tok = self.lexer.token()
 228         if not tok or tok.value != "\n":
 229             self.t_NEWLINE = None
 230             print "Couldn't determine token for newlines"
 231         else:
 232             self.t_NEWLINE = tok.type
 233
 234         self.t_WS = (self.t_SPACE, self.t_NEWLINE)
 235
 236         # Check for other characters used by the preprocessor
 237         chars = [ '<','>','#','##','\\','(',')',',','.']
 238         for c in chars:
 239             self.lexer.input(c)
 240             tok = self.lexer.token()
 241             if not tok or tok.value != c:
 242                 print "Unable to lex '%s' required for preprocessor" % c
 243
 244     # ----------------------------------------------------------------------
 245     # add_path()
 246     #
 247     # Adds a search path to the preprocessor.
 248     # ----------------------------------------------------------------------
 249
 250     def add_path(self,path):
 251         self.path.append(path)
 252
 253     # ----------------------------------------------------------------------
 254     # group_lines()
 255     #
 256     # Given an input string, this function splits it into lines.  Trailing whitespace
 257     # is removed.   Any line ending with \ is grouped with the next line.  This
 258     # function forms the lowest level of the preprocessor---grouping into text into
 259     # a line-by-line format.
 260     # ----------------------------------------------------------------------
 261
 262     def group_lines(self,input):
 263         lex = self.lexer.clone()
 264         lines = [x.rstrip() for x in input.splitlines()]
 265         for i in xrange(len(lines)):
 266             j = i+1
 267             while lines[i].endswith('\\') and (j < len(lines)):
 268                 lines[i] = lines[i][:-1]+lines[j]
 269                 lines[j] = ""
 270                 j += 1
 271
 272         input = "\n".join(lines)
 273         lex.input(input)
 274         lex.lineno = 1
 275
 276         current_line = []
 277         while True:
 278             tok = lex.token()
 279             if not tok:
 280                 break
 281             current_line.append(tok)
 282             if tok.type in self.t_WS and '\n' in tok.value:
 283                 yield current_line
 284                 current_line = []
 285
 286         if current_line:
 287             yield current_line
 288
 289     # ----------------------------------------------------------------------
 290     # tokenstrip()
 291     #
 292     # Remove leading/trailing whitespace tokens from a token list
 293     # ----------------------------------------------------------------------
 294
 295     def tokenstrip(self,tokens):
 296         i = 0
 297         while i < len(tokens) and tokens[i].type in self.t_WS:
 298             i += 1
 299         del tokens[:i]
 300         i = len(tokens)-1
 301         while i >= 0 and tokens[i].type in self.t_WS:
 302             i -= 1
 303         del tokens[i+1:]
 304         return tokens
 305
 306
 307     # ----------------------------------------------------------------------
 308     # collect_args()
 309     #
 310     # Collects comma separated arguments from a list of tokens.   The arguments
 311     # must be enclosed in parenthesis.  Returns a tuple (tokencount,args,positions)
 312     # where tokencount is the number of tokens consumed, args is a list of arguments,
 313     # and positions is a list of integers containing the starting index of each
 314     # argument.  Each argument is represented by a list of tokens.
 315     #
 316     # When collecting arguments, leading and trailing whitespace is removed
 317     # from each argument.
 318     #
 319     # This function properly handles nested parenthesis and commas---these do not
 320     # define new arguments.
 321     # ----------------------------------------------------------------------
 322
 323     def collect_args(self,tokenlist):
 324         args = []
 325         positions = []
 326         current_arg = []
 327         nesting = 1
 328         tokenlen = len(tokenlist)
 329
 330         # Search for the opening '('.
 331         i = 0
 332         while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
 333             i += 1
 334
 335         if (i < tokenlen) and (tokenlist[i].value == '('):
 336             positions.append(i+1)
 337         else:
 338             self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
 339             return 0, [], []
 340
 341         i += 1
 342
 343         while i < tokenlen:
 344             t = tokenlist[i]
 345             if t.value == '(':
 346                 current_arg.append(t)
 347                 nesting += 1
 348             elif t.value == ')':
 349                 nesting -= 1
 350                 if nesting == 0:
 351                     if current_arg:
 352                         args.append(self.tokenstrip(current_arg))
 353                         positions.append(i)
 354                     return i+1,args,positions
 355                 current_arg.append(t)
 356             elif t.value == ',' and nesting == 1:
 357                 args.append(self.tokenstrip(current_arg))
 358                 positions.append(i+1)
 359                 current_arg = []
 360             else:
 361                 current_arg.append(t)
 362             i += 1
 363
 364         # Missing end argument
 365         self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
 366         return 0, [],[]
 367
 368     # ----------------------------------------------------------------------
 369     # macro_prescan()
 370     #
 371     # Examine the macro value (token sequence) and identify patch points
 372     # This is used to speed up macro expansion later on---we'll know
 373     # right away where to apply patches to the value to form the expansion
 374     # ----------------------------------------------------------------------
 375
 376     def macro_prescan(self,macro):
 377         macro.patch     = []             # Standard macro arguments
 378         macro.str_patch = []             # String conversion expansion
 379         macro.var_comma_patch = []       # Variadic macro comma patch
 380         i = 0
 381         while i < len(macro.value):
 382             if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
 383                 argnum = macro.arglist.index(macro.value[i].value)
 384                 # Conversion of argument to a string
 385                 if i > 0 and macro.value[i-1].value == '#':
 386                     macro.value[i] = copy.copy(macro.value[i])
 387                     macro.value[i].type = self.t_STRING
 388                     del macro.value[i-1]
 389                     macro.str_patch.append((argnum,i-1))
 390                     continue
 391                 # Concatenation
 392                 elif (i > 0 and macro.value[i-1].value == '##'):
 393                     macro.patch.append(('c',argnum,i-1))
 394                     del macro.value[i-1]
 395                     continue
 396                 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
 397                     macro.patch.append(('c',argnum,i))
 398                     i += 1
 399                     continue
 400                 # Standard expansion
 401                 else:
 402                     macro.patch.append(('e',argnum,i))
 403             elif macro.value[i].value == '##':
 404                 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
 405                         ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
 406                         (macro.value[i+1].value == macro.vararg):
 407                     macro.var_comma_patch.append(i-1)
 408             i += 1
 409         macro.patch.sort(key=lambda x: x[2],reverse=True)
 410
 411     # ----------------------------------------------------------------------
 412     # macro_expand_args()
 413     #
 414     # Given a Macro and list of arguments (each a token list), this method
 415     # returns an expanded version of a macro.  The return value is a token sequence
 416     # representing the replacement macro tokens
 417     # ----------------------------------------------------------------------
 418
 419     def macro_expand_args(self,macro,args):
 420         # Make a copy of the macro token sequence
 421         rep = [copy.copy(_x) for _x in macro.value]
 422
 423         # Make string expansion patches.  These do not alter the length of the replacement sequence
 424
 425         str_expansion = {}
 426         for argnum, i in macro.str_patch:
 427             if argnum not in str_expansion:
 428                 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
 429             rep[i] = copy.copy(rep[i])
 430             rep[i].value = str_expansion[argnum]
 431
 432         # Make the variadic macro comma patch.  If the variadic macro argument is empty, we get rid
 433         comma_patch = False
 434         if macro.variadic and not args[-1]:
 435             for i in macro.var_comma_patch:
 436                 rep[i] = None
 437                 comma_patch = True
 438
 439         # Make all other patches.   The order of these matters.  It is assumed that the patch list
 440         # has been sorted in reverse order of patch location since replacements will cause the
 441         # size of the replacement sequence to expand from the patch point.
 442
 443         expanded = { }
 444         for ptype, argnum, i in macro.patch:
 445             # Concatenation.   Argument is left unexpanded
 446             if ptype == 'c':
 447                 rep[i:i+1] = args[argnum]
 448             # Normal expansion.  Argument is macro expanded first
 449             elif ptype == 'e':
 450                 if argnum not in expanded:
 451                     expanded[argnum] = self.expand_macros(args[argnum])
 452                 rep[i:i+1] = expanded[argnum]
 453
 454         # Get rid of removed comma if necessary
 455         if comma_patch:
 456             rep = [_i for _i in rep if _i]
 457
 458         return rep
 459
 460
 461     # ----------------------------------------------------------------------
 462     # expand_macros()
 463     #
 464     # Given a list of tokens, this function performs macro expansion.
 465     # The expanded argument is a dictionary that contains macros already
 466     # expanded.  This is used to prevent infinite recursion.
 467     # ----------------------------------------------------------------------
 468
 469     def expand_macros(self,tokens,expanded=None):
 470         if expanded is None:
 471             expanded = {}
 472         i = 0
 473         while i < len(tokens):
 474             t = tokens[i]
 475             if t.type == self.t_ID:
 476                 if t.value in self.macros and t.value not in expanded:
 477                     # Yes, we found a macro match
 478                     expanded[t.value] = True
 479
 480                     m = self.macros[t.value]
 481                     if not m.arglist:
 482                         # A simple macro
 483                         ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
 484                         for e in ex:
 485                             e.lineno = t.lineno
 486                         tokens[i:i+1] = ex
 487                         i += len(ex)
 488                     else:
 489                         # A macro with arguments
 490                         j = i + 1
 491                         while j < len(tokens) and tokens[j].type in self.t_WS:
 492                             j += 1
 493                         if tokens[j].value == '(':
 494                             tokcount,args,positions = self.collect_args(tokens[j:])
 495                             if not m.variadic and len(args) !=  len(m.arglist):
 496                                 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
 497                                 i = j + tokcount
 498                             elif m.variadic and len(args) < len(m.arglist)-1:
 499                                 if len(m.arglist) > 2:
 500                                     self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
 501                                 else:
 502                                     self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
 503                                 i = j + tokcount
 504                             else:
 505                                 if m.variadic:
 506                                     if len(args) == len(m.arglist)-1:
 507                                         args.append([])
 508                                     else:
 509                                         args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
 510                                         del args[len(m.arglist):]
 511
 512                                 # Get macro replacement text
 513                                 rep = self.macro_expand_args(m,args)
 514                                 rep = self.expand_macros(rep,expanded)
 515                                 for r in rep:
 516                                     r.lineno = t.lineno
 517                                 tokens[i:j+tokcount] = rep
 518                                 i += len(rep)
 519                     del expanded[t.value]
 520                     continue
 521                 elif t.value == '__LINE__':
 522                     t.type = self.t_INTEGER
 523                     t.value = self.t_INTEGER_TYPE(t.lineno)
 524
 525             i += 1
 526         return tokens
 527
 528     # ----------------------------------------------------------------------
 529     # evalexpr()
 530     #
 531     # Evaluate an expression token sequence for the purposes of evaluating
 532     # integral expressions.
 533     # ----------------------------------------------------------------------
 534
 535     def evalexpr(self,tokens):
 536         # tokens = tokenize(line)
 537         # Search for defined macros
 538         i = 0
 539         while i < len(tokens):
 540             if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
 541                 j = i + 1
 542                 needparen = False
 543                 result = "0L"
 544                 while j < len(tokens):
 545                     if tokens[j].type in self.t_WS:
 546                         j += 1
 547                         continue
 548                     elif tokens[j].type == self.t_ID:
 549                         if tokens[j].value in self.macros:
 550                             result = "1L"
 551                         else:
 552                             result = "0L"
 553                         if not needparen: break
 554                     elif tokens[j].value == '(':
 555                         needparen = True
 556                     elif tokens[j].value == ')':
 557                         break
 558                     else:
 559                         self.error(self.source,tokens[i].lineno,"Malformed defined()")
 560                     j += 1
 561                 tokens[i].type = self.t_INTEGER
 562                 tokens[i].value = self.t_INTEGER_TYPE(result)
 563                 del tokens[i+1:j+1]
 564             i += 1
 565         tokens = self.expand_macros(tokens)
 566         for i,t in enumerate(tokens):
 567             if t.type == self.t_ID:
 568                 tokens[i] = copy.copy(t)
 569                 tokens[i].type = self.t_INTEGER
 570                 tokens[i].value = self.t_INTEGER_TYPE("0L")
 571             elif t.type == self.t_INTEGER:
 572                 tokens[i] = copy.copy(t)
 573                 # Strip off any trailing suffixes
 574                 tokens[i].value = str(tokens[i].value)
 575                 while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
 576                     tokens[i].value = tokens[i].value[:-1]
 577
 578         expr = "".join([str(x.value) for x in tokens])
 579         expr = expr.replace("&&"," and ")
 580         expr = expr.replace("||"," or ")
 581         expr = expr.replace("!"," not ")
 582         try:
 583             result = eval(expr)
 584         except StandardError:
 585             self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
 586             result = 0
 587         return result
 588
 589     # ----------------------------------------------------------------------
 590     # parsegen()
 591     #
 592     # Parse an input string/
 593     # ----------------------------------------------------------------------
 594     def parsegen(self,input,source=None):
 595
 596         # Replace trigraph sequences
 597         t = trigraph(input)
 598         lines = self.group_lines(t)
 599
 600         if not source:
 601             source = ""
 602
 603         self.define("__FILE__ \"%s\"" % source)
 604
 605         self.source = source
 606         chunk = []
 607         enable = True
 608         iftrigger = False
 609         ifstack = []
 610
 611         for x in lines:
 612             for i,tok in enumerate(x):
 613                 if tok.type not in self.t_WS: break
 614             if tok.value == '#':
 615                 # Preprocessor directive
 616
 617                 for tok in x:
 618                     if tok in self.t_WS and '\n' in tok.value:
 619                         chunk.append(tok)
 620
 621                 dirtokens = self.tokenstrip(x[i+1:])
 622                 if dirtokens:
 623                     name = dirtokens[0].value
 624                     args = self.tokenstrip(dirtokens[1:])
 625                 else:
 626                     name = ""
 627                     args = []
 628
 629                 if name == 'define':
 630                     if enable:
 631                         for tok in self.expand_macros(chunk):
 632                             yield tok
 633                         chunk = []
 634                         self.define(args)
 635                 elif name == 'include':
 636                     if enable:
 637                         for tok in self.expand_macros(chunk):
 638                             yield tok
 639                         chunk = []
 640                         oldfile = self.macros['__FILE__']
 641                         for tok in self.include(args):
 642                             yield tok
 643                         self.macros['__FILE__'] = oldfile
 644                         self.source = source
 645                 elif name == 'undef':
 646                     if enable:
 647                         for tok in self.expand_macros(chunk):
 648                             yield tok
 649                         chunk = []
 650                         self.undef(args)
 651                 elif name == 'ifdef':
 652                     ifstack.append((enable,iftrigger))
 653                     if enable:
 654                         if not args[0].value in self.macros:
 655                             enable = False
 656                             iftrigger = False
 657                         else:
 658                             iftrigger = True
 659                 elif name == 'ifndef':
 660                     ifstack.append((enable,iftrigger))
 661                     if enable:
 662                         if args[0].value in self.macros:
 663                             enable = False
 664                             iftrigger = False
 665                         else:
 666                             iftrigger = True
 667                 elif name == 'if':
 668                     ifstack.append((enable,iftrigger))
 669                     if enable:
 670                         result = self.evalexpr(args)
 671                         if not result:
 672                             enable = False
 673                             iftrigger = False
 674                         else:
 675                             iftrigger = True
 676                 elif name == 'elif':
 677                     if ifstack:
 678                         if ifstack[-1][0]:     # We only pay attention if outer "if" allows this
 679                             if enable:         # If already true, we flip enable False
 680                                 enable = False
 681                             elif not iftrigger:   # If False, but not triggered yet, we'll check expression
 682                                 result = self.evalexpr(args)
 683                                 if result:
 684                                     enable  = True
 685                                     iftrigger = True
 686                     else:
 687                         self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
 688
 689                 elif name == 'else':
 690                     if ifstack:
 691                         if ifstack[-1][0]:
 692                             if enable:
 693                                 enable = False
 694                             elif not iftrigger:
 695                                 enable = True
 696                                 iftrigger = True
 697                     else:
 698                         self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
 699
 700                 elif name == 'endif':
 701                     if ifstack:
 702                         enable,iftrigger = ifstack.pop()
 703                     else:
 704                         self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
 705                 else:
 706                     # Unknown preprocessor directive
 707                     pass
 708
 709             else:
 710                 # Normal text
 711                 if enable:
 712                     chunk.extend(x)
 713
 714         for tok in self.expand_macros(chunk):
 715             yield tok
 716         chunk = []
 717
 718     # ----------------------------------------------------------------------
 719     # include()
 720     #
 721     # Implementation of file-inclusion
 722     # ----------------------------------------------------------------------
 723
 724     def include(self,tokens):
 725         # Try to extract the filename and then process an include file
 726         if not tokens:
 727             return
 728         if tokens:
 729             if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
 730                 tokens = self.expand_macros(tokens)
 731
 732             if tokens[0].value == '<':
 733                 # Include <...>
 734                 i = 1
 735                 while i < len(tokens):
 736                     if tokens[i].value == '>':
 737                         break
 738                     i += 1
 739                 else:
 740                     print "Malformed #include <...>"
 741                     return
 742                 filename = "".join([x.value for x in tokens[1:i]])
 743                 path = self.path + [""] + self.temp_path
 744             elif tokens[0].type == self.t_STRING:
 745                 filename = tokens[0].value[1:-1]
 746                 path = self.temp_path + [""] + self.path
 747             else:
 748                 print "Malformed #include statement"
 749                 return
 750         for p in path:
 751             iname = os.path.join(p,filename)
 752             try:
 753                 data = open(iname,"r").read()
 754                 dname = os.path.dirname(iname)
 755                 if dname:
 756                     self.temp_path.insert(0,dname)
 757                 for tok in self.parsegen(data,filename):
 758                     yield tok
 759                 if dname:
 760                     del self.temp_path[0]
 761                 break
 762             except IOError,e:
 763                 pass
 764         else:
 765             print "Couldn't find '%s'" % filename
 766
 767     # ----------------------------------------------------------------------
 768     # define()
 769     #
 770     # Define a new macro
 771     # ----------------------------------------------------------------------
 772
 773     def define(self,tokens):
 774         if isinstance(tokens,(str,unicode)):
 775             tokens = self.tokenize(tokens)
 776
 777         linetok = tokens
 778         try:
 779             name = linetok[0]
 780             if len(linetok) > 1:
 781                 mtype = linetok[1]
 782             else:
 783                 mtype = None
 784             if not mtype:
 785                 m = Macro(name.value,[])
 786                 self.macros[name.value] = m
 787             elif mtype.type in self.t_WS:
 788                 # A normal macro
 789                 m = Macro(name.value,self.tokenstrip(linetok[2:]))
 790                 self.macros[name.value] = m
 791             elif mtype.value == '(':
 792                 # A macro with arguments
 793                 tokcount, args, positions = self.collect_args(linetok[1:])
 794                 variadic = False
 795                 for a in args:
 796                     if variadic:
 797                         print "No more arguments may follow a variadic argument"
 798                         break
 799                     astr = "".join([str(_i.value) for _i in a])
 800                     if astr == "...":
 801                         variadic = True
 802                         a[0].type = self.t_ID
 803                         a[0].value = '__VA_ARGS__'
 804                         variadic = True
 805                         del a[1:]
 806                         continue
 807                     elif astr[-3:] == "..." and a[0].type == self.t_ID:
 808                         variadic = True
 809                         del a[1:]
 810                         # If, for some reason, "." is part of the identifier, strip off the name for the purposes
 811                         # of macro expansion
 812                         if a[0].value[-3:] == '...':
 813                             a[0].value = a[0].value[:-3]
 814                         continue
 815                     if len(a) > 1 or a[0].type != self.t_ID:
 816                         print "Invalid macro argument"
 817                         break
 818                 else:
 819                     mvalue = self.tokenstrip(linetok[1+tokcount:])
 820                     i = 0
 821                     while i < len(mvalue):
 822                         if i+1 < len(mvalue):
 823                             if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
 824                                 del mvalue[i]
 825                                 continue
 826                             elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
 827                                 del mvalue[i+1]
 828                         i += 1
 829                     m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
 830                     self.macro_prescan(m)
 831                     self.macros[name.value] = m
 832             else:
 833                 print "Bad macro definition"
 834         except LookupError:
 835             print "Bad macro definition"
 836
 837     # ----------------------------------------------------------------------
 838     # undef()
 839     #
 840     # Undefine a macro
 841     # ----------------------------------------------------------------------
 842
 843     def undef(self,tokens):
 844         id = tokens[0].value
 845         try:
 846             del self.macros[id]
 847         except LookupError:
 848             pass
 849
 850     # ----------------------------------------------------------------------
 851     # parse()
 852     #
 853     # Parse input text.
 854     # ----------------------------------------------------------------------
 855     def parse(self,input,source=None,ignore={}):
 856         self.ignore = ignore
 857         self.parser = self.parsegen(input,source)
 858
 859     # ----------------------------------------------------------------------
 860     # token()
 861     #
 862     # Method to return individual tokens
 863     # ----------------------------------------------------------------------
 864     def token(self):
 865         try:
 866             while True:
 867                 tok = self.parser.next()
 868                 if tok.type not in self.ignore: return tok
 869         except StopIteration:
 870             self.parser = None
 871             return None
 872
 873 if __name__ == '__main__':
 874     import ply.lex as lex
 875     lexer = lex.lex()
 876
 877     # Run a preprocessor
 878     import sys
 879     f = open(sys.argv[1])
 880     input = f.read()
 881
 882     p = Preprocessor(lexer)
 883     p.parse(input,sys.argv[1])
 884     while True:
 885         tok = p.token()
 886         if not tok: break
 887         print p.source, tok
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898