Fix (and add test for) function-like macro invocation with newlines.
authorCarl Worth <cworth@cworth.org>
Mon, 17 May 2010 20:19:04 +0000 (13:19 -0700)
committerCarl Worth <cworth@cworth.org>
Mon, 17 May 2010 20:21:13 +0000 (13:21 -0700)
The test has a newline before the left parenthesis, and newlines to
separate the parentheses from the argument.

The fix involves more state in the lexer to only return a NEWLINE
token when termniating a directive. This is very similar to our
previous fix with extra lexer state to only return the SPACE token
when it would be significant for the parser.

With this change, the exact number and positioning of newlines in the
output is now different compared to "gcc -E" so we add a -B option to
diff when testing to ignore that.

glcpp-lex.l
glcpp-parse.y
tests/026-define-func-extra-newlines.c [new file with mode: 0644]
tests/glcpp-test

index 97ff1175f1bf4d52fe075a82b6081b02620dbf80..4cb73c5d715f61744d3fc2c75b69ed64300e5f7d 100644 (file)
 
 #include "glcpp.h"
 #include "glcpp-parse.h"
+
+/* Yes, a macro with a return statement in it is evil. But surely no
+ * more evil than all the code generation happening with flex in the
+ * first place. */
+#define LEXIFY_IDENTIFIER do {                                 \
+       yylval.str = xtalloc_strdup (yyextra, yytext);          \
+       switch (glcpp_parser_macro_type (yyextra, yylval.str))  \
+       {                                                       \
+               case MACRO_TYPE_UNDEFINED:                      \
+                       return IDENTIFIER;                      \
+               break;                                          \
+               case MACRO_TYPE_OBJECT:                         \
+                       return OBJ_MACRO;                       \
+               break;                                          \
+               case MACRO_TYPE_FUNCTION:                       \
+                       return FUNC_MACRO;                      \
+               break;                                          \
+       }                                                       \
+ } while (0)
+
 %}
 
 %option reentrant noyywrap
 %option extra-type="glcpp_parser_t *"
 
 %x ST_DEFINE
+%x ST_DEFVAL_START
 %x ST_DEFVAL
+%x ST_UNDEF
+%x ST_UNDEF_END
 
 SPACE          [[:space:]]
 NONSPACE       [^[:space:]]
@@ -46,9 +69,20 @@ TOKEN                [^[:space:](),]+
 %%
 
 {HASH}undef{HSPACE}* {
+       BEGIN ST_UNDEF;
        return UNDEF;
 }
 
+<ST_UNDEF>{IDENTIFIER} {
+       BEGIN ST_UNDEF_END;
+       LEXIFY_IDENTIFIER;
+}
+
+<ST_UNDEF_END>\n {
+       BEGIN INITIAL;
+       return NEWLINE;
+}
+
        /* We use the ST_DEFINE and ST_DEFVAL states so that we can
         * pass a space token, (yes, a token for whitespace!), since
         * the preprocessor specification requires distinguishing
@@ -60,40 +94,48 @@ TOKEN               [^[:space:](),]+
 }
 
 <ST_DEFINE>{IDENTIFIER}        {
-       BEGIN ST_DEFVAL;
+       BEGIN ST_DEFVAL_START;
        yylval.str = xtalloc_strdup (yyextra, yytext);
        return IDENTIFIER;
 }
 
-<ST_DEFVAL>\n {
+<ST_DEFVAL_START>\n {
        BEGIN INITIAL;
        return NEWLINE;
 }
 
-<ST_DEFVAL>{HSPACE}+ {
-       BEGIN INITIAL;
+<ST_DEFVAL_START>{HSPACE}+ {
+       BEGIN ST_DEFVAL;
        return SPACE;
 }
 
-<ST_DEFVAL>"(" {
-       BEGIN INITIAL;
+<ST_DEFVAL_START>"("   {
+       BEGIN ST_DEFVAL;
        return '(';
 }
 
-{IDENTIFIER} {
+<ST_DEFVAL>{IDENTIFIER} {
+       LEXIFY_IDENTIFIER;
+}
+
+<ST_DEFVAL>[(),] {
+       return yytext[0];
+}
+
+<ST_DEFVAL>{TOKEN} {
        yylval.str = xtalloc_strdup (yyextra, yytext);
-       switch (glcpp_parser_macro_type (yyextra, yylval.str))
-       {
-               case MACRO_TYPE_UNDEFINED:
-                       return IDENTIFIER;
-               break;
-               case MACRO_TYPE_OBJECT:
-                       return OBJ_MACRO;
-               break;
-               case MACRO_TYPE_FUNCTION:
-                       return FUNC_MACRO;
-               break;
-       }
+       return TOKEN;
+}
+
+<ST_DEFVAL>\n {
+       BEGIN INITIAL;
+       return NEWLINE;
+}
+
+<ST_DEFVAL>{HSPACE}+
+
+{IDENTIFIER} {
+       LEXIFY_IDENTIFIER;
 }
 
 [(),]  {
@@ -106,7 +148,7 @@ TOKEN               [^[:space:](),]+
 }
 
 \n {
-       return NEWLINE;
+       printf ("\n");
 }
 
 {HSPACE}+
index 959083578e7121edb34505e3fc56a165d01e677b..b2eaa5ba69637c22ec2bd1432588970423c6d8e1 100644 (file)
@@ -149,7 +149,6 @@ content:
                _print_string_list ($1);
        }
 |      directive_with_newline { printf ("\n"); }
-|      NEWLINE { printf ("\n"); }
 |      '('     { printf ("("); }
 |      ')'     { printf (")"); }
 |      ','     { printf (","); }
diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c
new file mode 100644 (file)
index 0000000..0d83740
--- /dev/null
@@ -0,0 +1,6 @@
+#define foo(a) bar
+
+foo
+(
+1
+)
index bd204de1e2f5d6a211512153c826409e3eed6238..673a4f45e96af735f98180447f87297bd22aa59f 100755 (executable)
@@ -5,5 +5,5 @@ for test in *.c; do
     ../glcpp < $test > $test.out
     gcc -E $test -o $test.gcc
     grep -v '^#' < $test.gcc > $test.expected
-    diff -w -u $test.expected $test.out
+    diff -B -w -u $test.expected $test.out
 done