X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fglsl%2Fglcpp%2Fglcpp-lex.l;h=fa9aa50691280df8e8324b58abb44ea12c19aaa7;hb=a4ba476c30ebcb99694c6167ac9b8af9414cb656;hp=5f0bb324990cdaa2eefcfeec3ec8dde9da6f6d24;hpb=21dda50549c5f220eff7ec04a72fb02e5eb09e76;p=mesa.git

diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l
index 5f0bb324990..fa9aa506912 100644
--- a/src/glsl/glcpp/glcpp-lex.l
+++ b/src/glsl/glcpp/glcpp-lex.l
@@ -52,14 +52,107 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
 		yylloc->last_column = yycolumn + 1;			\
 		parser->has_new_line_number = 0;			\
 		parser->has_new_source_number = 0;			\
- } while(0);
+	} while(0);
 
 #define YY_USER_INIT			\
 	do {				\
 		yylineno = 1;		\
-		yycolumn = 1;		\
+		yycolumn = 0;		\
 		yylloc->source = 0;	\
 	} while(0)
+
+/* It's ugly to have macros that have return statements inside of
+ * them, but flex-based lexer generation is all built around the
+ * return statement.
+ *
+ * To mitigate the ugliness, we defer as much of the logic as possible
+ * to an actual function, not a macro (see
+ * glcpplex_update_state_per_token) and we make the word RETURN
+ * prominent in all of the macros which may return.
+ *
+ * The most-commonly-used macro is RETURN_TOKEN which will perform all
+ * necessary state updates based on the provided token,, then
+ * conditionally return the token. It will not return a token if the
+ * parser is currently skipping tokens, (such as within #if
+ * 0...#else).
+ *
+ * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that
+ * makes the token returning unconditional. This is needed for things
+ * like #if and the tokens of its condition, (since these must be
+ * evaluated by the parser even when otherwise skipping).
+ *
+ * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top
+ * of RETURN_TOKEN that performs a string copy of yytext before the
+ * return.
+ */
+#define RETURN_TOKEN_NEVER_SKIP(token)					\
+	do {								\
+		if (glcpp_lex_update_state_per_token (parser, token))	\
+			return token;					\
+	} while (0)
+
+#define RETURN_TOKEN(token)						\
+	do {								\
+		if (! parser->skipping) {				\
+			RETURN_TOKEN_NEVER_SKIP(token);			\
+		}							\
+	} while(0)
+
+#define RETURN_STRING_TOKEN(token)					\
+	do {								\
+		if (! parser->skipping) {				\
+			yylval->str = ralloc_strdup (yyextra, yytext);	\
+			RETURN_TOKEN_NEVER_SKIP (token);		\
+		}							\
+	} while(0)
+
+
+/* Update all state necessary for each token being returned.
+ *
+ * Here we'll be tracking newlines and spaces so that the lexer can
+ * alter its behavior as necessary, (for example, '#' has special
+ * significance if it is the first non-whitespace, non-comment token
+ * in a line, but does not otherwise).
+ *
+ * NOTE: If this function returns FALSE, then no token should be
+ * returned at all. This is used to suprress duplicate SPACE tokens.
+ */
+static int
+glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token)
+{
+	/* After the first non-space token in a line, we won't
+	 * allow any '#' to introduce a directive. */
+	if (token == NEWLINE) {
+		parser->first_non_space_token_this_line = 1;
+	} else if (token != SPACE) {
+		parser->first_non_space_token_this_line = 0;
+	}
+
+	/* Track newlines just to know whether a newline needs
+	 * to be inserted if end-of-file comes early. */
+	if (token == NEWLINE) {
+		parser->last_token_was_newline = 1;
+	} else {
+		parser->last_token_was_newline = 0;
+	}
+
+	/* Track spaces to avoid emitting multiple SPACE
+	 * tokens in a row. */
+	if (token == SPACE) {
+		if (! parser->last_token_was_space) {
+			parser->last_token_was_space = 1;
+			return 1;
+		} else {
+			parser->last_token_was_space = 1;
+			return 0;
+		}
+	} else {
+		parser->last_token_was_space = 0;
+		return 1;
+	}
+}
+
+
 %}
 
 %option bison-bridge bison-locations reentrant noyywrap
@@ -67,14 +160,19 @@ void glcpp_set_column (int  column_no , yyscan_t yyscanner);
 %option prefix="glcpp_"
 %option stack
 %option never-interactive
+%option warn nodefault
 
-%x DONE COMMENT UNREACHABLE SKIP DEFINE NEWLINE_CATCHUP
+	/* Note: When adding any start conditions to this list, you must also
+	 * update the "Internal compiler error" catch-all rule near the end of
+	 * this file. */
+
+%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE
 
 SPACE		[[:space:]]
 NONSPACE	[^[:space:]]
-NEWLINE		[\n]
 HSPACE		[ \t]
-HASH		^{HSPACE}*#{HSPACE}*
+HASH		#
+NEWLINE		(\r\n|\n\r|\r|\n)
 IDENTIFIER	[_a-zA-Z][_a-zA-Z0-9]*
 PP_NUMBER	[.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])*
 PUNCTUATION	[][(){}.&*~!/%<>^|;,=+-]
@@ -111,130 +209,159 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 			parser->commented_newlines--;
 		if (parser->commented_newlines == 0)
 			BEGIN INITIAL;
-		return NEWLINE;
+		RETURN_TOKEN_NEVER_SKIP (NEWLINE);
 	}
 
-	/* The handling of the SKIP vs INITIAL start states requires
-	 * some special handling. Typically, a lexer would change
-	 * start states with statements like "BEGIN SKIP" within the
-	 * lexer rules. We can't get away with that here, since we
-	 * need the parser to actually evaluate expressions for
-	 * directives like "#if".
+	/* Set up the parser->skipping bit here before doing any lexing.
+	 *
+	 * This bit controls whether tokens are skipped, (as implemented by
+         * RETURN_TOKEN), such as between "#if 0" and "#endif".
 	 *
-	 * So, here, in code that will be executed on every call to
-	 * the lexer,and before any rules, we examine the skip_stack
-	 * as set by the parser to know whether to change from INITIAL
-	 * to SKIP or from SKIP back to INITIAL.
+	 * The parser maintains a skip_stack indicating whether we should be
+         * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will
+         * push and pop items from the stack.
 	 *
-	 * Three cases cause us to switch out of the SKIP state and
-	 * back to the INITIAL state:
+	 * Here are the rules for determining whether we are skipping:
 	 *
-	 *	1. The top of the skip_stack is of type SKIP_NO_SKIP
-	 *	   This means we're still evaluating some #if
-	 *	   hierarchy, but we're on a branch of it where
-	 *	   content should not be skipped (such as "#if 1" or
-	 *	   "#else" or so).
+	 *	1. If the skip stack is NULL, we are outside of all #if blocks
+	 *         and we are not skipping.
 	 *
-	 *	2. The skip_stack is NULL meaning that we've reached
-	 *	   the last #endif.
+	 *	2. If the skip stack is non-NULL, the type of the top node in
+	 *	   the stack determines whether to skip. A type of
+	 *	   SKIP_NO_SKIP is used for blocks wheere we are emitting
+	 *	   tokens, (such as between #if 1 and #endif, or after the
+	 *	   #else of an #if 0, etc.).
 	 *
-	 *	3. The lexing_directive bit is set. This indicates that we are
-	 *	   lexing a pre-processor directive, (such as #if, #elif, or
-	 *	   #else). For the #if and #elif directives we always need to
-	 *	   parse the conditions, (even if otherwise within an #if
-	 *	   0). And for #else, we want to be able to generate an error
-	 *	   if any garbage follows #else.
+	 *	3. The lexing_directive bit overrides the skip stack. This bit
+	 *	   is set when we are actively lexing the expression for a
+	 *	   pre-processor condition, (such as #if, #elif, or #else). In
+	 *	   this case, even if otherwise skipping, we need to emit the
+	 *	   tokens for this condition so that the parser can evaluate
+	 *	   the expression. (For, #else, there's no expression, but we
+	 *	   emit tokens so the parser can generate a nice error message
+	 *	   if there are any tokens here).
 	 */
-	if (YY_START == INITIAL || YY_START == SKIP) {
-		if (parser->lexing_directive ||
-		    parser->skip_stack == NULL ||
-		    parser->skip_stack->type == SKIP_NO_SKIP)
-		{
-			BEGIN INITIAL;
-		} else {
-			BEGIN SKIP;
-		}
+	if (parser->skip_stack &&
+	    parser->skip_stack->type != SKIP_NO_SKIP &&
+	    ! parser->lexing_directive)
+	{
+		parser->skipping = 1;
+	} else {
+		parser->skipping = 0;
 	}
 
 	/* Single-line comments */
-"//"[^\n]* {
+<INITIAL,DEFINE,HASH>"//"[^\r\n]* {
 }
 
 	/* Multi-line comments */
-<DEFINE,INITIAL>"/*"                    { yy_push_state(COMMENT, yyscanner); }
-<COMMENT>[^*\n]*
-<COMMENT>[^*\n]*\n      { yylineno++; yycolumn = 0; parser->commented_newlines++; }
-<COMMENT>"*"+[^*/\n]*
-<COMMENT>"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<INITIAL,DEFINE,HASH>"/*"   { yy_push_state(COMMENT, yyscanner); }
+<COMMENT>[^*\r\n]*
+<COMMENT>[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<COMMENT>"*"+[^*/\r\n]*
+<COMMENT>"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
 <COMMENT>"*"+"/"        {
 	yy_pop_state(yyscanner);
-	if (yyextra->space_tokens)
-		return SPACE;
+	/* In the <HASH> start condition, we don't want any SPACE token. */
+	if (yyextra->space_tokens && YY_START != HASH)
+		RETURN_TOKEN (SPACE);
 }
 
-{HASH}version{HSPACE}+ {
-	yylval->str = ralloc_strdup (yyextra, yytext);
+{HASH} {
+
+	/* If the '#' is the first non-whitespace, non-comment token on this
+	 * line, then it introduces a directive, switch to the <HASH> start
+	 * condition.
+	 *
+	 * Otherwise, this is just punctuation, so return the HASH_TOKEN
+         * token. */
+	if (parser->first_non_space_token_this_line) {
+		BEGIN HASH;
+	}
+
+	RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
+}
+
+<HASH>version{HSPACE}+ {
+	BEGIN INITIAL;
 	yyextra->space_tokens = 0;
-	return HASH_VERSION;
+	RETURN_STRING_TOKEN (VERSION_TOKEN);
+}
+
+	/* Swallow empty #pragma directives, (to avoid confusing the
+	 * downstream compiler).
+	 *
+	 * Note: We use a simple regular expression for the lookahead
+	 * here. Specifically, we cannot use the complete {NEWLINE} expression
+	 * since it uses alternation and we've found that there's a flex bug
+	 * where using alternation in the lookahead portion of a pattern
+	 * triggers a buffer overrun. */
+<HASH>pragma{HSPACE}*/[\r\n] {
+	BEGIN INITIAL;
 }
 
 	/* glcpp doesn't handle #extension, #version, or #pragma directives.
 	 * Simply pass them through to the main compiler's lexer/parser. */
-{HASH}(extension|pragma)[^\n]* {
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	yylineno++;
-	yycolumn = 0;
-	return OTHER;
+<HASH>(extension|pragma)[^\r\n]* {
+	BEGIN INITIAL;
+	RETURN_STRING_TOKEN (PRAGMA);
 }
 
-{HASH}line{HSPACE}+ {
-	return HASH_LINE;
+<HASH>line{HSPACE}+ {
+	BEGIN INITIAL;
+	RETURN_TOKEN (LINE);
 }
 
-<SKIP,INITIAL>{
-{HASH}ifdef {
-	yyextra->lexing_directive = 1;
-	yyextra->space_tokens = 0;
-	return HASH_IFDEF;
+<HASH>{NEWLINE} {
+	BEGIN INITIAL;
+	RETURN_TOKEN_NEVER_SKIP (NEWLINE);
 }
 
-{HASH}ifndef {
+	/* For the pre-processor directives, we return these tokens
+	 * even when we are otherwise skipping. */
+<HASH>ifdef {
+	BEGIN INITIAL;
 	yyextra->lexing_directive = 1;
 	yyextra->space_tokens = 0;
-	return HASH_IFNDEF;
+	RETURN_TOKEN_NEVER_SKIP (IFDEF);
 }
 
-{HASH}if/[^_a-zA-Z0-9] {
+<HASH>ifndef {
+	BEGIN INITIAL;
 	yyextra->lexing_directive = 1;
 	yyextra->space_tokens = 0;
-	return HASH_IF;
+	RETURN_TOKEN_NEVER_SKIP (IFNDEF);
 }
 
-{HASH}elif/[^_a-zA-Z0-9] {
+<HASH>if/[^_a-zA-Z0-9] {
+	BEGIN INITIAL;
 	yyextra->lexing_directive = 1;
 	yyextra->space_tokens = 0;
-	return HASH_ELIF;
+	RETURN_TOKEN_NEVER_SKIP (IF);
 }
 
-{HASH}else {
+<HASH>elif/[^_a-zA-Z0-9] {
+	BEGIN INITIAL;
+	yyextra->lexing_directive = 1;
 	yyextra->space_tokens = 0;
-	return HASH_ELSE;
+	RETURN_TOKEN_NEVER_SKIP (ELIF);
 }
 
-{HASH}endif {
+<HASH>else {
+	BEGIN INITIAL;
 	yyextra->space_tokens = 0;
-	return HASH_ENDIF;
-}
+	RETURN_TOKEN_NEVER_SKIP (ELSE);
 }
 
-<SKIP>[^\n] {
+<HASH>endif {
+	BEGIN INITIAL;
+	yyextra->space_tokens = 0;
+	RETURN_TOKEN_NEVER_SKIP (ENDIF);
 }
 
-{HASH}error.* {
-	char *p;
-	for (p = yytext; !isalpha(p[0]); p++); /* skip "  #   " */
-	p += 5; /* skip "error" */
-	glcpp_error(yylloc, yyextra, "#error%s", p);
+<HASH>error[^\r\n]* {
+	BEGIN INITIAL;
+	RETURN_STRING_TOKEN (ERROR_TOKEN);
 }
 
 	/* After we see a "#define" we enter the <DEFINE> start state
@@ -255,24 +382,42 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 	 *	* Anything else, (not an identifier, not a comment,
 	 *	  and not whitespace). This will generate an error.
 	 */
-{HASH}define{HSPACE}+ {
+<HASH>define{HSPACE}* {
+	if (! parser->skipping) {
+		BEGIN DEFINE;
+		yyextra->space_tokens = 0;
+		RETURN_TOKEN (DEFINE_TOKEN);
+	}
+}
+
+<HASH>undef {
+	BEGIN INITIAL;
 	yyextra->space_tokens = 0;
-	yy_push_state(DEFINE, yyscanner);
-	return HASH_DEFINE;
+	RETURN_TOKEN (UNDEF);
+}
+
+<HASH>{HSPACE}+ {
+	/* Nothing to do here. Importantly, don't leave the <HASH>
+	 * start condition, since it's legal to have space between the
+	 * '#' and the directive.. */
+}
+
+	/* This will catch any non-directive garbage after a HASH */
+<HASH>{NONSPACE} {
+	BEGIN INITIAL;
+	RETURN_TOKEN (GARBAGE);
 }
 
 	/* An identifier immediately followed by '(' */
 <DEFINE>{IDENTIFIER}/"(" {
-	yy_pop_state(yyscanner);
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return FUNC_IDENTIFIER;
+	BEGIN INITIAL;
+	RETURN_STRING_TOKEN (FUNC_IDENTIFIER);
 }
 
 	/* An identifier not immediately followed by '(' */
 <DEFINE>{IDENTIFIER} {
-	yy_pop_state(yyscanner);
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return OBJ_IDENTIFIER;
+	BEGIN INITIAL;
+	RETURN_STRING_TOKEN (OBJ_IDENTIFIER);
 }
 
 	/* Whitespace */
@@ -284,7 +429,7 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 <DEFINE>[/][^*]{NONSPACE}* {
 	BEGIN INITIAL;
 	glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
-	return INTEGER_STRING;
+	RETURN_STRING_TOKEN (INTEGER_STRING);
 }
 
 	/* A character that can't start an identifier, comment, or
@@ -292,126 +437,135 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 <DEFINE>[^_a-zA-Z/[:space:]]{NONSPACE}* {
 	BEGIN INITIAL;
 	glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
-	return INTEGER_STRING;
-}
-
-{HASH}undef {
-	yyextra->space_tokens = 0;
-	return HASH_UNDEF;
-}
-
-{HASH} {
-	yyextra->space_tokens = 0;
-	return HASH;
+	RETURN_STRING_TOKEN (INTEGER_STRING);
 }
 
 {DECIMAL_INTEGER} {
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return INTEGER_STRING;
+	RETURN_STRING_TOKEN (INTEGER_STRING);
 }
 
 {OCTAL_INTEGER} {
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return INTEGER_STRING;
+	RETURN_STRING_TOKEN (INTEGER_STRING);
 }
 
 {HEXADECIMAL_INTEGER} {
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return INTEGER_STRING;
+	RETURN_STRING_TOKEN (INTEGER_STRING);
 }
 
 "<<"  {
-	return LEFT_SHIFT;
+	RETURN_TOKEN (LEFT_SHIFT);
 }
 
 ">>" {
-	return RIGHT_SHIFT;
+	RETURN_TOKEN (RIGHT_SHIFT);
 }
 
 "<=" {
-	return LESS_OR_EQUAL;
+	RETURN_TOKEN (LESS_OR_EQUAL);
 }
 
 ">=" {
-	return GREATER_OR_EQUAL;
+	RETURN_TOKEN (GREATER_OR_EQUAL);
 }
 
 "==" {
-	return EQUAL;
+	RETURN_TOKEN (EQUAL);
 }
 
 "!=" {
-	return NOT_EQUAL;
+	RETURN_TOKEN (NOT_EQUAL);
 }
 
 "&&" {
-	return AND;
+	RETURN_TOKEN (AND);
 }
 
 "||" {
-	return OR;
+	RETURN_TOKEN (OR);
+}
+
+"++" {
+	RETURN_TOKEN (PLUS_PLUS);
+}
+
+"--" {
+	RETURN_TOKEN (MINUS_MINUS);
 }
 
 "##" {
-	if (parser->is_gles)
-		glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES");
-	return PASTE;
+	if (! parser->skipping) {
+		if (parser->is_gles)
+			glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES");
+		RETURN_TOKEN (PASTE);
+	}
 }
 
 "defined" {
-	return DEFINED;
+	RETURN_TOKEN (DEFINED);
 }
 
 {IDENTIFIER} {
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return IDENTIFIER;
+	RETURN_STRING_TOKEN (IDENTIFIER);
 }
 
 {PP_NUMBER} {
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return OTHER;
+	RETURN_STRING_TOKEN (OTHER);
 }
 
 {PUNCTUATION} {
-	return yytext[0];
+	RETURN_TOKEN (yytext[0]);
 }
 
 {OTHER}+ {
-	yylval->str = ralloc_strdup (yyextra, yytext);
-	return OTHER;
+	RETURN_STRING_TOKEN (OTHER);
 }
 
 {HSPACE} {
 	if (yyextra->space_tokens) {
-		return SPACE;
+		RETURN_TOKEN (SPACE);
 	}
 }
 
-<SKIP,INITIAL>\n {
+	/* We preserve all newlines, even between #if 0..#endif, so no
+	skipping.. */
+<*>{NEWLINE} {
 	if (parser->commented_newlines) {
 		BEGIN NEWLINE_CATCHUP;
+	} else {
+		BEGIN INITIAL;
 	}
 	yyextra->space_tokens = 1;
 	yyextra->lexing_directive = 0;
 	yylineno++;
 	yycolumn = 0;
-	return NEWLINE;
+	RETURN_TOKEN_NEVER_SKIP (NEWLINE);
 }
 
-	/* Handle missing newline at EOF. */
-<INITIAL><<EOF>> {
+<INITIAL,COMMENT,DEFINE,HASH><<EOF>> {
+	if (YY_START == COMMENT)
+		glcpp_error(yylloc, yyextra, "Unterminated comment");
 	BEGIN DONE; /* Don't keep matching this rule forever. */
 	yyextra->lexing_directive = 0;
-	return NEWLINE;
+	if (! parser->last_token_was_newline)
+		RETURN_TOKEN (NEWLINE);
 }
 
+	/* This is a catch-all to avoid the annoying default flex action which
+	 * matches any character and prints it. If any input ever matches this
+	 * rule, then we have made a mistake above and need to fix one or more
+	 * of the preceding patterns to match that input. */
+
+<*>. {
+	glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext);
+
 	/* We don't actually use the UNREACHABLE start condition. We
-	only have this action here so that we can pretend to call some
+	only have this block here so that we can pretend to call some
 	generated functions, (to avoid "defined but not used"
 	warnings. */
-<UNREACHABLE>. {
-	unput('.');
-	yy_top_state(yyextra);
+        if (YY_START == UNREACHABLE) {
+		unput('.');
+		yy_top_state(yyextra);
+	}
 }
 
 %%