Make the lexer pass whitespace through (as OTHER tokens) for text lines.

author Carl Worth <cworth@cworth.org>

Tue, 25 May 2010 22:04:32 +0000 (15:04 -0700)

committer Carl Worth <cworth@cworth.org>

Tue, 25 May 2010 22:04:32 +0000 (15:04 -0700)
author Carl Worth <cworth@cworth.org>
Tue, 25 May 2010 22:04:32 +0000 (15:04 -0700)
committer Carl Worth <cworth@cworth.org>
Tue, 25 May 2010 22:04:32 +0000 (15:04 -0700)
diff --git a/glcpp-lex.l b/glcpp-lex.l

index f1dd11ea9bdcac0f640db4479734a9b5c1c9d748..7b5cdd57a0fb0646f41fb1dd4b7ab062e52f7b86 100644 (file)
--- a/glcpp-lex.l
+++ b/glcpp-lex.l
@@ -32,6 +32,21 @@
  %option reentrant noyywrap
  %option extra-type="glcpp_parser_t *"
  
+       /* This lexer has two states:
+        *
+        * The CONTROL state is for control lines (directives)
+        * It lexes exactly as specified in the C99 specification.
+        *
+        * The INITIAL state is for input lines. In this state, we
+        * make the OTHER token much more broad in that it now
+        * includes tokens consisting entirely of whitespace. This
+        * allows us to pass text through verbatim. It avoids the
+        * "inadvertent token pasting" problem that would occur if we
+        * just printed tokens, while also avoiding excess whitespace
+        * insertion in the output.*/
+
+%x CONTROL
+
  SPACE          [[:space:]]
  NONSPACE       [^[:space:]]
  NEWLINE                [\n]
@@ -48,75 +63,104 @@ HEXADECIMAL_INTEGER        0[xX][0-9a-fA-F]+[uU]?
  %%
  
  {HASH}define{HSPACE}+/{IDENTIFIER}"(" {
+       BEGIN CONTROL;
         return HASH_DEFINE_FUNC;
  }
  
  {HASH}define {
+       BEGIN CONTROL;
         return HASH_DEFINE_OBJ;
  }
  
  {HASH}undef {
+       BEGIN CONTROL;
         return HASH_UNDEF;
  }
  
  {HASH} {
+       BEGIN CONTROL;
         return HASH;
  }
  
-{IDENTIFIER} {
+<CONTROL>{IDENTIFIER} {
         yylval.str = xtalloc_strdup (yyextra, yytext);
         return IDENTIFIER;
  }
  
-"<<"  {
+<CONTROL>"<<"  {
         return LEFT_SHIFT;
  }
  
-">>" {
+<CONTROL>">>" {
         return RIGHT_SHIFT;
  }
  
-"<=" {
+<CONTROL>"<=" {
         return LESS_OR_EQUAL;
  }
  
-">=" {
+<CONTROL>">=" {
         return GREATER_OR_EQUAL;
  }
  
-"==" {
+<CONTROL>"==" {
         return EQUAL;
  }
  
-"!=" {
+<CONTROL>"!=" {
         return NOT_EQUAL;
  }
  
-"&&" {
+<CONTROL>"&&" {
         return AND;
  }
  
-"||" {
+<CONTROL>"||" {
         return OR;
  }
  
-"##" {
+<CONTROL>"##" {
         return PASTE;
  }
  
-{PUNCTUATION} {
+<CONTROL>{PUNCTUATION} {
         return yytext[0];
  }
  
-\n {
+<CONTROL>{OTHER} {
+       yylval.str = xtalloc_strdup (yyextra, yytext);
+       return OTHER;
+}
+
+<CONTROL>{HSPACE}+
+
+<CONTROL>\n {
+       BEGIN INITIAL;
         return NEWLINE;
  }
  
-{OTHER} {
+{IDENTIFIER} {
+       yylval.str = xtalloc_strdup (yyextra, yytext);
+       return IDENTIFIER;
+}
+
+{OTHER}+ {
+       yylval.str = xtalloc_strdup (yyextra, yytext);
+       return OTHER;
+}
+
+{HSPACE}+ {
         yylval.str = xtalloc_strdup (yyextra, yytext);
         return OTHER;
  }
  
-{HSPACE}+
+\n {
+       return NEWLINE;
+}
+
+. {
+       yylval.str = xtalloc_strdup (yyextra, yytext);
+       return OTHER;
+}
  
  %%
diff --git a/glcpp-parse.y b/glcpp-parse.y

index 991b8a0b856ab19a69f9e0b75bd32ee3051668ca..957421b864e0002e815d97f66ac4b388d4044d32 100644 (file)
--- a/glcpp-parse.y
+++ b/glcpp-parse.y
@@ -517,8 +517,6 @@ _token_list_print (token_list_t *list)
  
         for (node = list->head; node; node = node->next) {
                 _token_print (node->token);
-               if (node->next)
-                       printf (" ");
         }
  }
  
diff --git a/tests/glcpp-test b/tests/glcpp-test

index 34cca8833011a032c1201023c397bf448f3a19f6..8074e471197736e8632aa3461eee0831a2090443 100755 (executable)
--- a/tests/glcpp-test
+++ b/tests/glcpp-test
@@ -9,5 +9,5 @@ for test in *.c; do
      gcc -E $test -o $test.gcc
  #    grep -v '^#' < $test.gcc > $test.expected
      grep -v '^[        ]*#' < $test > $test.expected
-    diff -w -u $test.expected $test.out
+    diff -u $test.expected $test.out
  done
author	Carl Worth <cworth@cworth.org>
	Tue, 25 May 2010 22:04:32 +0000 (15:04 -0700)
committer	Carl Worth <cworth@cworth.org>
	Tue, 25 May 2010 22:04:32 +0000 (15:04 -0700)
glcpp-lex.l		patch \| blob \| history
glcpp-parse.y		patch \| blob \| history
tests/glcpp-test		patch \| blob \| history