From f34a0009dd07dbca4de5491744bd3618eae9458e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 16:59:02 -0700 Subject: [PATCH] Pass through literal space values from replacement lists. This makes test 15 pass and also dramatically simplifies the lexer. We were previously using a CONTROL state in the lexer to only emit SPACE tokens when on text lines. But that's not actually what we want. We need SPACE tokens in the replacement lists as well. Instead of a lexer state for this, we now simply set a "space_tokens" flag whenever we start constructing a pp_tokens list and clear the flag whenever we see a '#' introducing a directive. Much cleaner this way. --- glcpp-lex.l | 85 ++++++++++++--------------------------------------- glcpp-parse.y | 10 +++--- glcpp.h | 1 + 3 files changed, 25 insertions(+), 71 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index b1980742d39..f6d0c8b7d67 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,21 +32,6 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" - /* This lexer has two states: - * - * The CONTROL state is for control lines (directives) - * It lexes exactly as specified in the C99 specification. - * - * The INITIAL state is for input lines. In this state, we - * make the OTHER token much more broad in that it now - * includes tokens consisting entirely of whitespace. This - * allows us to pass text through verbatim. It avoids the - * "inadvertent token pasting" problem that would occur if we - * just printed tokens, while also avoiding excess whitespace - * insertion in the output.*/ - -%x CONTROL - SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -63,116 +48,84 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_FUNC; } {HASH}define { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_DEFINE_OBJ; } {HASH}undef { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH_UNDEF; } {HASH} { - BEGIN CONTROL; + yyextra->space_tokens = 0; return HASH; } -{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -"##" { +"##" { return PASTE; } -{PUNCTUATION} { +{PUNCTUATION} { return yytext[0]; } -{OTHER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - -{HSPACE}+ - -\n { - BEGIN INITIAL; - return NEWLINE; -} - -{IDENTIFIER} { - yylval.str = xtalloc_strdup (yyextra, yytext); - return IDENTIFIER; -} - -"(" { - return '('; -} - -")" { - return ')'; -} - -"," { - return ','; -} - {OTHER}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } {HSPACE}+ { - yylval.str = xtalloc_strdup (yyextra, yytext); - return SPACE; + if (yyextra->space_tokens) { + yylval.str = xtalloc_strdup (yyextra, yytext); + return SPACE; + } } \n { return NEWLINE; } -. { - yylval.str = xtalloc_strdup (yyextra, yytext); - return OTHER; -} - %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 60b414e43a7..a1981995fd0 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -160,7 +160,7 @@ line: ; control_line: - HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { + HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE { _define_object_macro (parser, $2, $3); } | HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE { @@ -212,6 +212,7 @@ replacement_list: pp_tokens: preprocessing_token { + parser->space_tokens = 1; $$ = _token_list_create (parser); _token_list_append ($$, $1); talloc_unlink (parser, $1); @@ -234,7 +235,7 @@ preprocessing_token: $$ = _token_create_str (parser, OTHER, $1); } | SPACE { - $$ = _token_create_str (parser, OTHER, $1); + $$ = _token_create_str (parser, SPACE, $1); } ; @@ -494,6 +495,7 @@ _token_print (token_t *token) switch (token->type) { case IDENTIFIER: case OTHER: + case SPACE: printf ("%s", token->value.str); break; case LEFT_SHIFT: @@ -589,6 +591,7 @@ glcpp_parser_create (void) parser->defines = hash_table_ctor (32, hash_table_string_hash, hash_table_string_compare); parser->active = _string_list_create (parser); + parser->space_tokens = 1; parser->expansions = NULL; parser->just_printed_separator = 1; @@ -835,9 +838,6 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, for (node = list->head; node; node = node->next) { if (_glcpp_parser_print_expanded_token (parser, node->token)) _glcpp_parser_print_expanded_function (parser, &node); - - if (node->next) - printf (" "); } } diff --git a/glcpp.h b/glcpp.h index 043098b1347..f3760fa7a41 100644 --- a/glcpp.h +++ b/glcpp.h @@ -126,6 +126,7 @@ struct glcpp_parser { yyscan_t scanner; struct hash_table *defines; string_list_t *active; + int space_tokens; expansion_node_t *expansions; int just_printed_separator; int need_newline; -- 2.30.2