Treat newlines as space when invoking a function-like macro invocation.
authorCarl Worth <cworth@cworth.org>
Wed, 26 May 2010 22:57:10 +0000 (15:57 -0700)
committerCarl Worth <cworth@cworth.org>
Wed, 26 May 2010 23:04:31 +0000 (16:04 -0700)
This adds three new pieces of state to the parser, (is_control_line,
newline_as_space, and paren_count), and a large amount of messy
code. I'd definitely like to see a cleaner solution for this.

With this fix, the "define-func-extra-newlines" now passes so we put
it back to test #26 where it was originally (lately it has been known
as test #55).

Also, we tweak test 25 slightly. Previously this test was ending a
file function-like macro name that was not actually a macro (not
followed by a left parenthesis). As is, this fix was making that test
fail because the text_line production expects to see a terminating
NEWLINE, but that NEWLINE is now getting turned into a SPACE here.

This seems unlikely to be a problem in the wild, (function macros
being used in a non-macro sense seems rare enough---but more than
likely they won't happen at the end of a file). Still, we document
this shortcoming in the README.

README
glcpp-parse.y
glcpp.h
tests/025-func-macro-as-non-macro.c
tests/026-define-func-extra-newlines.c [new file with mode: 0644]
tests/055-define-func-extra-newlines.c [deleted file]

diff --git a/README b/README
index f0f64c2644a9c572d3234383033c64dfccad219b..ab42a3ffe1278b06b48a41b90576d233f69f6fdf 100644 (file)
--- a/README
+++ b/README
@@ -24,3 +24,7 @@ parentheses.
 
 The #error, #pragma, #extension, #version, and #line macros are not
 yet supported.
+
+A file that ends with a function-like macro name as the last
+non-whitespace token will result in a parse error, (where it should be
+passed through as is).
\ No newline at end of file
index a809ebf3af566dd0b09666021d43acd727e54dcc..1346b65aff6196a1b471158ade777688a2c3072f 100644 (file)
@@ -856,6 +856,9 @@ glcpp_parser_create (void)
                                           hash_table_string_compare);
        parser->active = _string_list_create (parser);
        parser->space_tokens = 1;
+       parser->newline_as_space = 0;
+       parser->in_control_line = 0;
+       parser->paren_count = 0;
 
        parser->skip_stack = NULL;
 
@@ -1274,8 +1277,62 @@ glcpp_parser_lex (glcpp_parser_t *parser)
        token_node_t *node;
        int ret;
 
-       if (parser->lex_from_list == NULL)
-               return glcpp_lex (parser->scanner);
+       if (parser->lex_from_list == NULL) {
+               ret = glcpp_lex (parser->scanner);
+
+               /* XXX: This ugly block of code exists for the sole
+                * purpose of converting a NEWLINE token into a SPACE
+                * token, but only in the case where we have seen a
+                * function-like macro name, but have not yet seen its
+                * closing parenthesis.
+                *
+                * There's perhaps a more compact way to do this with
+                * mid-rule actions in the grammar.
+                *
+                * I'm definitely not pleased with the complexity of
+                * this code here.
+                */
+               if (parser->newline_as_space)
+               {
+                       if (ret == '(') {
+                               parser->paren_count++;
+                       } else if (ret == ')') {
+                               parser->paren_count--;
+                               if (parser->paren_count == 0)
+                                       parser->newline_as_space = 0;
+                       } else if (ret == NEWLINE) {
+                               ret = SPACE;
+                       } else if (ret != SPACE) {
+                               if (parser->paren_count == 0)
+                                       parser->newline_as_space = 0;
+                       }
+               }
+               else if (parser->in_control_line)
+               {
+                       if (ret == NEWLINE)
+                               parser->in_control_line = 0;
+               }
+               else if (ret == HASH_DEFINE_OBJ || ret == HASH_DEFINE_FUNC ||
+                          ret == HASH_UNDEF || ret == HASH_IF ||
+                          ret == HASH_IFDEF || ret == HASH_IFNDEF ||
+                          ret == HASH_ELIF || ret == HASH_ELSE ||
+                          ret == HASH_ENDIF || ret == HASH)
+               {
+                       parser->in_control_line = 1;
+               }
+               else if (ret == IDENTIFIER)
+               {
+                       macro_t *macro;
+                       macro = hash_table_find (parser->defines,
+                                                yylval.str);
+                       if (macro && macro->is_function) {
+                               parser->newline_as_space = 1;
+                               parser->paren_count = 0;
+                       }
+               }
+
+               return ret;
+       }
 
        node = parser->lex_from_node;
 
diff --git a/glcpp.h b/glcpp.h
index e5be1a6cd62da399bb4708957344e1c20c99df03..5c8c304a9ca737073fa9f87caad45ae564448b5b 100644 (file)
--- a/glcpp.h
+++ b/glcpp.h
@@ -128,6 +128,9 @@ struct glcpp_parser {
        struct hash_table *defines;
        string_list_t *active;
        int space_tokens;
+       int newline_as_space;
+       int in_control_line;
+       int paren_count;
        skip_node_t *skip_stack;
        token_list_t *lex_from_list;
        token_node_t *lex_from_node;
index 3dbe026d9dd0035bb325308a590f95f1ee766858..b433671d1bfd0fd04df2693ab36708748b9bcf18 100644 (file)
@@ -1,2 +1,2 @@
 #define foo(bar) bar
-foo
+foo bar
diff --git a/tests/026-define-func-extra-newlines.c b/tests/026-define-func-extra-newlines.c
new file mode 100644 (file)
index 0000000..0d83740
--- /dev/null
@@ -0,0 +1,6 @@
+#define foo(a) bar
+
+foo
+(
+1
+)
diff --git a/tests/055-define-func-extra-newlines.c b/tests/055-define-func-extra-newlines.c
deleted file mode 100644 (file)
index 0d83740..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#define foo(a) bar
-
-foo
-(
-1
-)