%option reentrant noyywrap
%option extra-type="glcpp_parser_t *"
-%x ST_DEFINE
-%x ST_DEFINE_OBJ_OR_FUNC
-%x ST_DEFINE_PARAMETER
-%x ST_DEFINE_VALUE
-%x ST_IF
-%x ST_UNDEF
-%x ST_UNDEF_END
-
SPACE [[:space:]]
NONSPACE [^[:space:]]
NEWLINE [\n]
HSPACE [ \t]
HASH ^{HSPACE}*#{HSPACE}*
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
-TOKEN [^[:space:](),]+
+PUNCTUATION [][(){}.&*~!/%<>^|;,+-]
+OTHER [^][(){}.&*~!/%<>^|;,=#[:space:]+-]+
DECIMAL_INTEGER [1-9][0-9]*[uU]?
OCTAL_INTEGER 0[0-7]*[uU]?
%%
-{HASH}if{HSPACE}* {
- BEGIN ST_IF;
- return IF;
-}
-
-{HASH}elif{HSPACE}* {
- BEGIN ST_IF;
- return ELIF;
+{HASH}define{HSPACE}+/{IDENTIFIER}"(" {
+ return HASH_DEFINE_FUNC;
}
-<ST_IF>{DECIMAL_INTEGER} {
- yylval.ival = strtoll (yytext, NULL, 10);
- return INTEGER;
+{HASH}define {
+ return HASH_DEFINE_OBJ;
}
-<ST_IF>{OCTAL_INTEGER} {
- yylval.ival = strtoll (yytext + 1, NULL, 8);
- return INTEGER;
+{HASH}undef {
+ return HASH_UNDEF;
}
-<ST_IF>{HEXADECIMAL_INTEGER} {
- yylval.ival = strtoll (yytext + 2, NULL, 16);
- return INTEGER;
+{HASH} {
+ return HASH;
}
-<ST_IF>"defined" {
- return DEFINED;
+{IDENTIFIER} {
+ yylval.str = xtalloc_strdup (yyextra, yytext);
+ return IDENTIFIER;
}
-<ST_IF>"<<" {
+"<<" {
return LEFT_SHIFT;
}
-<ST_IF>">>" {
+">>" {
return RIGHT_SHIFT;
}
-<ST_IF>"<=" {
+"<=" {
return LESS_OR_EQUAL;
}
-<ST_IF>">=" {
+">=" {
return GREATER_OR_EQUAL;
}
-<ST_IF>"==" {
+"==" {
return EQUAL;
}
-<ST_IF>"!=" {
+"!=" {
return NOT_EQUAL;
}
-<ST_IF>"&&" {
+"&&" {
return AND;
}
-<ST_IF>"||" {
+"||" {
return OR;
}
-<ST_IF>[-+*/%<>&^|()~] {
- return yytext[0];
-}
-
-<ST_IF>{IDENTIFIER} {
- yylval.str = xtalloc_strdup (yyextra, yytext);
- return IDENTIFIER;
-}
-
-<ST_IF>{HSPACE}+
-
-<ST_IF>\n {
- BEGIN INITIAL;
- return NEWLINE;
-}
-
-{HASH}endif{HSPACE}* {
- return ENDIF;
+"##" {
+ return PASTE;
}
-{HASH}else{HSPACE}* {
- return ELSE;
-}
-
-{HASH}undef{HSPACE}* {
- BEGIN ST_UNDEF;
- return UNDEF;
-}
-
-<ST_UNDEF>{IDENTIFIER} {
- BEGIN ST_UNDEF_END;
- yylval.str = xtalloc_strdup (yyextra, yytext);
- return IDENTIFIER;
-}
-
-<ST_UNDEF_END>{HSPACE}*
-
-<ST_UNDEF_END>\n {
- BEGIN INITIAL;
-}
-
- /* We use the ST_DEFINE and ST_DEFVAL states so that we can
- * pass a space token, (yes, a token for whitespace!), since
- * the preprocessor specification requires distinguishing
- * "#define foo()" from "#define foo ()".
- */
-{HASH}define{HSPACE}* {
- BEGIN ST_DEFINE;
- return DEFINE;
-}
-
-<ST_DEFINE>{IDENTIFIER} {
- BEGIN ST_DEFINE_OBJ_OR_FUNC;
- yylval.str = xtalloc_strdup (yyextra, yytext);
- return IDENTIFIER;
-}
-
-<ST_DEFINE_OBJ_OR_FUNC>\n {
- BEGIN INITIAL;
- return NEWLINE;
-}
-
-<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ {
- BEGIN ST_DEFINE_VALUE;
- return SPACE;
-}
-
-<ST_DEFINE_OBJ_OR_FUNC>"(" {
- BEGIN ST_DEFINE_PARAMETER;
- return '(';
-}
-
-<ST_DEFINE_PARAMETER>{IDENTIFIER} {
- yylval.str = xtalloc_strdup (yyextra, yytext);
- return IDENTIFIER;
-}
-
-<ST_DEFINE_PARAMETER>"," {
- return ',';
-}
-
-<ST_DEFINE_PARAMETER>")" {
- BEGIN ST_DEFINE_VALUE;
- return ')';
-}
-
-<ST_DEFINE_PARAMETER>{HSPACE}+
-
-<ST_DEFINE_VALUE>{TOKEN} {
- yylval.token.type = TOKEN;
- yylval.token.value = xtalloc_strdup (yyextra, yytext);
- return TOKEN;
-}
-
-<ST_DEFINE_VALUE>[(),] {
- yylval.token.type = TOKEN;
- yylval.token.value = xtalloc_strdup (yyextra, yytext);
- return TOKEN;
+{PUNCTUATION} {
+ return yytext[0];
}
-<ST_DEFINE_VALUE>{HSPACE}+
-
-<ST_DEFINE_VALUE>\n {
- BEGIN INITIAL;
+\n {
return NEWLINE;
}
-{IDENTIFIER} {
- int parameter_index;
+{OTHER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
- switch (glcpp_parser_classify_token (yyextra, yylval.str,
- ¶meter_index))
- {
- case TOKEN_CLASS_IDENTIFIER:
- return IDENTIFIER;
- break;
- case TOKEN_CLASS_IDENTIFIER_FINALIZED:
- return IDENTIFIER_FINALIZED;
- break;
- case TOKEN_CLASS_FUNC_MACRO:
- return FUNC_MACRO;
- break;
- case TOKEN_CLASS_OBJ_MACRO:
- return OBJ_MACRO;
- break;
-
- }
-}
-
-[(),] {
- return yytext[0];
-}
-
-{TOKEN} {
- yylval.token.type = TOKEN;
- yylval.token.value = xtalloc_strdup (yyextra, yytext);
- return TOKEN;
-}
-
-\n {
- yyextra->need_newline = 1;
+ return OTHER;
}
{HSPACE}+
%parse-param {glcpp_parser_t *parser}
%lex-param {glcpp_parser_t *parser}
-%token DEFINE DEFINED ELIF ELSE ENDIF FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED IF IFDEF IFNDEF INTEGER OBJ_MACRO NEWLINE SPACE TOKEN UNDEF
-%type <ival> punctuator
-%type <imaxval> expression INTEGER
-%type <str> content FUNC_MACRO IDENTIFIER IDENTIFIER_FINALIZED OBJ_MACRO
-%type <argument_list> argument_list
-%type <string_list> macro parameter_list
-%type <token> TOKEN argument_word argument_word_or_comma
-%type <token_list> argument argument_or_comma replacement_list pp_tokens
-%left OR
-%left AND
-%left '|'
-%left '^'
-%left '&'
-%left EQUAL NOT_EQUAL
-%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL
-%left LEFT_SHIFT RIGHT_SHIFT
-%left '+' '-'
-%left '*' '/' '%'
-%right UNARY
-
-/* Hard to remove shift/reduce conflicts documented as follows:
- *
- * 1. '(' after FUNC_MACRO name which is correctly resolved to shift
- * to form macro invocation rather than reducing directly to
- * content.
- *
- * 2. Similarly, '(' after FUNC_MACRO which is correctly resolved to
- * shift to form macro invocation rather than reducing directly to
- * argument.
- *
- * 3. Similarly again now that we added argument_or_comma as well.
- */
-%expect 3
+%token HASH_DEFINE_FUNC HASH_DEFINE_OBJ HASH IDENTIFIER NEWLINE OTHER HASH_UNDEF
+%token LEFT_SHIFT RIGHT_SHIFT LESS_OR_EQUAL GREATER_OR_EQUAL EQUAL NOT_EQUAL AND OR PASTE
+
+ /* Stale stuff just to allow code to compile. */
+%token IDENTIFIER_FINALIZED FUNC_MACRO OBJ_MACRO
%%
- /* We do all printing at the input level. */
input:
- /* empty */ {
- parser->just_printed_separator = 1;
- }
-| input content {
- int is_token;
- int skipping = 0;
-
- if (parser->skip_stack && parser->skip_stack->type != SKIP_NO_SKIP)
- skipping = 1;
-
- if ($2 && strlen ($2) && ! skipping) {
- int c = $2[0];
- int is_not_separator = ((c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- (c == '_'));
-
- if (! parser->just_printed_separator && is_not_separator)
- {
- printf (" ");
- }
- printf ("%s", $2);
-
- if (is_not_separator)
- parser->just_printed_separator = 0;
- else
- parser->just_printed_separator = 1;
- }
-
- if ($2)
- talloc_free ($2);
-
- if (parser->need_newline) {
- printf ("\n");
- parser->just_printed_separator = 1;
- parser->need_newline = 0;
- }
- }
-;
-
-content:
- IDENTIFIER {
- $$ = $1;
- }
-| IDENTIFIER_FINALIZED {
- $$ = $1;
- }
-| TOKEN {
- $$ = $1.value;
- }
-| FUNC_MACRO {
- $$ = $1;
- }
-| directive {
- $$ = talloc_strdup (parser, "\n");
- }
-| punctuator {
- $$ = talloc_asprintf (parser, "%c", $1);
- }
-| macro {
- $$ = NULL;
- }
+ /* empty */
+| input line
;
-punctuator:
- '(' { $$ = '('; }
-| ')' { $$ = ')'; }
-| ',' { $$ = ','; }
- ;
-
-macro:
- FUNC_MACRO '(' argument_list ')' {
- _expand_function_macro (parser, $1, $3);
- }
-| OBJ_MACRO {
- _expand_object_macro (parser, $1);
- talloc_free ($1);
- }
+line:
+ control_line
+| text_line
+| HASH non_directive
;
-argument_list:
- /* empty */ {
- $$ = _argument_list_create (parser);
- }
-| argument {
- $$ = _argument_list_create (parser);
- _argument_list_append ($$, $1);
- }
-| argument_list ',' argument {
- _argument_list_append ($1, $3);
- $$ = $1;
- }
-;
-
-argument:
- argument_word {
- $$ = _token_list_create (parser);
- _token_list_append ($$, $1.type, $1.value);
- }
-| argument argument_word {
- _token_list_append ($1, $2.type, $2.value);
- talloc_free ($2.value);
- $$ = $1;
- }
-| argument '(' argument_or_comma ')' {
- _token_list_append ($1, '(', "(");
- _token_list_append_list ($1, $3);
- _token_list_append ($1, ')', ")");
- $$ = $1;
- }
+control_line:
+ HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE
+| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE
+| HASH_DEFINE_FUNC IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE
+| HASH_UNDEF IDENTIFIER NEWLINE
+| HASH NEWLINE
;
-argument_word:
- IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-| TOKEN { $$ = $1; }
-| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
+identifier_list:
+ IDENTIFIER
+| identifier_list ',' IDENTIFIER
;
- /* XXX: The body of argument_or_comma is the same as the body
- * of argument, but with "argument" and "argument_word"
- * changed to "argument_or_comma" and
- * "argument_word_or_comma". It would be nice to have less
- * redundancy here, but I'm not sure how.
- *
- * It would also be nice to have a less ugly grammar to have
- * to implement, but such is the C preprocessor.
- */
-argument_or_comma:
- argument_word_or_comma {
- $$ = _token_list_create (parser);
- _token_list_append ($$, $1.type, $1.value);
- }
-| argument_or_comma argument_word_or_comma {
- _token_list_append ($1, $2.type, $2.value);
- $$ = $1;
- }
-| argument_or_comma '(' argument_or_comma ')' {
- _token_list_append ($1, '(', "(");
- _token_list_append_list ($1, $3);
- _token_list_append ($1, ')', ")");
- $$ = $1;
- }
+text_line:
+ NEWLINE
+| pp_tokens NEWLINE
;
-argument_word_or_comma:
- IDENTIFIER { $$.type = IDENTIFIER; $$.value = $1; }
-| IDENTIFIER_FINALIZED { $$.type = IDENTIFIER_FINALIZED; $$.value = $1; }
-| TOKEN { $$ = $1; }
-| FUNC_MACRO { $$.type = FUNC_MACRO; $$.value = $1; }
-| macro { $$.type = TOKEN; $$.value = xtalloc_strdup (parser, ""); }
-| ',' { $$.type = ','; $$.value = xtalloc_strdup (parser, ","); }
+non_directive:
+ pp_tokens NEWLINE
;
-directive:
- DEFINE IDENTIFIER NEWLINE {
- token_list_t *list = _token_list_create (parser);
- _define_object_macro (parser, $2, list);
- }
-| DEFINE IDENTIFIER SPACE replacement_list NEWLINE {
- _define_object_macro (parser, $2, $4);
- }
-| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list NEWLINE {
- _define_function_macro (parser, $2, $4, $6);
- }
-| IF expression NEWLINE {
- _glcpp_parser_skip_stack_push_if (parser, $2);
- }
-| IFDEF IDENTIFIER NEWLINE {
- string_list_t *macro = hash_table_find (parser->defines, $2);
- talloc_free ($2);
- _glcpp_parser_skip_stack_push_if (parser, macro != NULL);
- }
-| IFNDEF IDENTIFIER NEWLINE {
- string_list_t *macro = hash_table_find (parser->defines, $2);
- talloc_free ($2);
- _glcpp_parser_skip_stack_push_if (parser, macro == NULL);
- }
-| ELIF expression NEWLINE {
- _glcpp_parser_skip_stack_change_if (parser, "#elif", $2);
- }
-| ELSE {
- _glcpp_parser_skip_stack_change_if (parser, "else", 1);
- }
-| ENDIF {
- _glcpp_parser_skip_stack_pop (parser);
- }
-| UNDEF IDENTIFIER {
- string_list_t *macro = hash_table_find (parser->defines, $2);
- if (macro) {
- /* XXX: Need hash table to support a real way
- * to remove an element rather than prefixing
- * a new node with data of NULL like this. */
- hash_table_insert (parser->defines, NULL, $2);
- talloc_free (macro);
- }
- talloc_free ($2);
- }
+replacement_list:
+ /* empty */
+| pp_tokens
;
-expression:
- INTEGER {
- $$ = $1;
- }
-| expression OR expression {
- $$ = $1 || $3;
- }
-| expression AND expression {
- $$ = $1 && $3;
- }
-| expression '|' expression {
- $$ = $1 | $3;
- }
-| expression '^' expression {
- $$ = $1 ^ $3;
- }
-| expression '&' expression {
- $$ = $1 & $3;
- }
-| expression NOT_EQUAL expression {
- $$ = $1 != $3;
- }
-| expression EQUAL expression {
- $$ = $1 == $3;
- }
-| expression GREATER_OR_EQUAL expression {
- $$ = $1 >= $3;
- }
-| expression LESS_OR_EQUAL expression {
- $$ = $1 <= $3;
- }
-| expression '>' expression {
- $$ = $1 > $3;
- }
-| expression '<' expression {
- $$ = $1 < $3;
- }
-| expression RIGHT_SHIFT expression {
- $$ = $1 >> $3;
- }
-| expression LEFT_SHIFT expression {
- $$ = $1 << $3;
- }
-| expression '-' expression {
- $$ = $1 - $3;
- }
-| expression '+' expression {
- $$ = $1 + $3;
- }
-| expression '%' expression {
- $$ = $1 % $3;
- }
-| expression '/' expression {
- $$ = $1 / $3;
- }
-| expression '*' expression {
- $$ = $1 * $3;
- }
-| '!' expression %prec UNARY {
- $$ = ! $2;
- }
-| '~' expression %prec UNARY {
- $$ = ~ $2;
- }
-| '-' expression %prec UNARY {
- $$ = - $2;
- }
-| '+' expression %prec UNARY {
- $$ = + $2;
- }
-| DEFINED IDENTIFIER %prec UNARY {
- string_list_t *macro = hash_table_find (parser->defines, $2);
- talloc_free ($2);
- if (macro)
- $$ = 1;
- else
- $$ = 0;
- }
-| '(' expression ')' {
- $$ = $2;
- }
+pp_tokens:
+ preprocessing_token
+| pp_tokens preprocessing_token
;
-parameter_list:
- /* empty */ {
- $$ = _string_list_create (parser);
- }
-| IDENTIFIER {
- $$ = _string_list_create (parser);
- _string_list_append_item ($$, $1);
- talloc_free ($1);
- }
-| parameter_list ',' IDENTIFIER {
- _string_list_append_item ($1, $3);
- talloc_free ($3);
- $$ = $1;
- }
+preprocessing_token:
+ IDENTIFIER
+| punctuator
+| OTHER
;
-replacement_list:
- /* empty */ {
- $$ = _token_list_create (parser);
- }
-| pp_tokens {
- $$ = $1;
- }
+punctuator:
+ '['
+| ']'
+| '('
+| ')'
+| '{'
+| '}'
+| '.'
+| '&'
+| '*'
+| '+'
+| '-'
+| '~'
+| '!'
+| '/'
+| '%'
+| LEFT_SHIFT
+| RIGHT_SHIFT
+| '<'
+| '>'
+| LESS_OR_EQUAL
+| GREATER_OR_EQUAL
+| EQUAL
+| NOT_EQUAL
+| '^'
+| '|'
+| AND
+| OR
+| ';'
+| ','
+| PASTE
;
-pp_tokens:
- TOKEN {
- $$ = _token_list_create (parser);
- _token_list_append ($$, $1.type, $1.value);
- }
-| pp_tokens TOKEN {
- _token_list_append ($1, $2.type, $2.value);
- $$ = $1;
- }
-;
-
%%
string_list_t *