From 9fb8b7a495c9dc6f9a62cf82300fae5925af92fc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 25 May 2010 15:04:32 -0700 Subject: [PATCH] Make the lexer pass whitespace through (as OTHER tokens) for text lines. With this change, we can recreate the original text-line input exactly. Previously we were inserting a space between every pair of tokens so our output had a lot more whitespace than our input. With this change, we can drop the "-b" option to diff and match the input exactly. --- glcpp-lex.l | 72 ++++++++++++++++++++++++++++++++++++++---------- glcpp-parse.y | 2 -- tests/glcpp-test | 2 +- 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/glcpp-lex.l b/glcpp-lex.l index f1dd11ea9bd..7b5cdd57a0f 100644 --- a/glcpp-lex.l +++ b/glcpp-lex.l @@ -32,6 +32,21 @@ %option reentrant noyywrap %option extra-type="glcpp_parser_t *" + /* This lexer has two states: + * + * The CONTROL state is for control lines (directives) + * It lexes exactly as specified in the C99 specification. + * + * The INITIAL state is for input lines. In this state, we + * make the OTHER token much more broad in that it now + * includes tokens consisting entirely of whitespace. This + * allows us to pass text through verbatim. It avoids the + * "inadvertent token pasting" problem that would occur if we + * just printed tokens, while also avoiding excess whitespace + * insertion in the output.*/ + +%x CONTROL + SPACE [[:space:]] NONSPACE [^[:space:]] NEWLINE [\n] @@ -48,75 +63,104 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? %% {HASH}define{HSPACE}+/{IDENTIFIER}"(" { + BEGIN CONTROL; return HASH_DEFINE_FUNC; } {HASH}define { + BEGIN CONTROL; return HASH_DEFINE_OBJ; } {HASH}undef { + BEGIN CONTROL; return HASH_UNDEF; } {HASH} { + BEGIN CONTROL; return HASH; } -{IDENTIFIER} { +{IDENTIFIER} { yylval.str = xtalloc_strdup (yyextra, yytext); return IDENTIFIER; } -"<<" { +"<<" { return LEFT_SHIFT; } -">>" { +">>" { return RIGHT_SHIFT; } -"<=" { +"<=" { return LESS_OR_EQUAL; } -">=" { +">=" { return GREATER_OR_EQUAL; } -"==" { +"==" { return EQUAL; } -"!=" { +"!=" { return NOT_EQUAL; } -"&&" { +"&&" { return AND; } -"||" { +"||" { return OR; } -"##" { +"##" { return PASTE; } -{PUNCTUATION} { +{PUNCTUATION} { return yytext[0]; } -\n { +{OTHER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ + +\n { + BEGIN INITIAL; return NEWLINE; } -{OTHER} { +{IDENTIFIER} { + yylval.str = xtalloc_strdup (yyextra, yytext); + return IDENTIFIER; +} + +{OTHER}+ { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} + +{HSPACE}+ { yylval.str = xtalloc_strdup (yyextra, yytext); return OTHER; } -{HSPACE}+ +\n { + return NEWLINE; +} + +. { + yylval.str = xtalloc_strdup (yyextra, yytext); + return OTHER; +} %% diff --git a/glcpp-parse.y b/glcpp-parse.y index 991b8a0b856..957421b864e 100644 --- a/glcpp-parse.y +++ b/glcpp-parse.y @@ -517,8 +517,6 @@ _token_list_print (token_list_t *list) for (node = list->head; node; node = node->next) { _token_print (node->token); - if (node->next) - printf (" "); } } diff --git a/tests/glcpp-test b/tests/glcpp-test index 34cca883301..8074e471197 100755 --- a/tests/glcpp-test +++ b/tests/glcpp-test @@ -9,5 +9,5 @@ for test in *.c; do gcc -E $test -o $test.gcc # grep -v '^#' < $test.gcc > $test.expected grep -v '^[ ]*#' < $test > $test.expected - diff -w -u $test.expected $test.out + diff -u $test.expected $test.out done -- 2.30.2