X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=ld%2Fldlex.l;h=5db1e731b7408ec0c750de48b55a4d41468c46d5;hb=7c80aab93666727965c2d5d6e17b1338d88293e0;hp=e52d83b222387f7f97913c0433a06f7aeed3da2c;hpb=45e481d13b770b743fb0be9d9f5fe8c0e03c2a12;p=binutils-gdb.git diff --git a/ld/ldlex.l b/ld/ldlex.l index e52d83b2223..5db1e731b74 100644 --- a/ld/ldlex.l +++ b/ld/ldlex.l @@ -1,10 +1,8 @@ -%option nounput +%option nounput noyywrap %{ -/* Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 - Free Software Foundation, Inc. +/* Copyright (C) 1991-2021 Free Software Foundation, Inc. Written by Steve Chamberlain of Cygnus Support. This file is part of the GNU Binutils. @@ -27,6 +25,7 @@ #include "bfd.h" #include "safe-ctype.h" #include "bfdlink.h" +#include "ctf-api.h" #include "ld.h" #include "ldmisc.h" #include "ldexp.h" @@ -41,9 +40,8 @@ yylex and yyparse (indirectly) both check this. */ input_type parser_input; -/* Line number in the current input file. - (FIXME Actually, it doesn't appear to get reset for each file?) */ -unsigned int lineno = 1; +/* Line number in the current input file. */ +unsigned int lineno; /* The string we are currently lexing, or NULL if we are reading a file. */ @@ -60,12 +58,15 @@ const char *lex_string = NULL; #undef YY_INPUT #define YY_INPUT(buf,result,max_size) result = yy_input (buf, max_size) +#ifndef YY_NO_UNPUT #define YY_NO_UNPUT +#endif #define MAX_INCLUDE_DEPTH 10 static YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; static const char *file_name_stack[MAX_INCLUDE_DEPTH]; static unsigned int lineno_stack[MAX_INCLUDE_DEPTH]; +static unsigned int sysrooted_stack[MAX_INCLUDE_DEPTH]; static unsigned int include_stack_ptr = 0; static int vers_node_nesting = 0; @@ -74,44 +75,39 @@ static void comment (void); static void lex_warn_invalid (char *where, char *what); /* STATES - EXPRESSION definitely in an expression - SCRIPT definitely in a script - BOTH either EXPRESSION or SCRIPT - DEFSYMEXP in an argument to -defsym - MRI in an MRI script + EXPRESSION in an expression + SCRIPT in a script + INPUTLIST in a script, a filename-list + MRI in an MRI script + WILD inside the braces of an output section or overlay, + for input section wildcards VERS_START starting a Sun style mapfile VERS_SCRIPT a Sun style mapfile VERS_NODE a node within a Sun style mapfile */ #define RTOKEN(x) { yylval.token = x; return x; } -/* Some versions of flex want this. */ -#ifndef yywrap -int yywrap (void) { return 1; } -#endif %} %a 4000 %o 5000 -CMDFILENAMECHAR [_a-zA-Z0-9\/\.\\_\+\$\:\[\]\\\,\=\&\!\<\>\-\~] -CMDFILENAMECHAR1 [_a-zA-Z0-9\/\.\\_\+\$\:\[\]\\\,\=\&\!\<\>\~] -FILENAMECHAR1 [_a-zA-Z\/\.\\\$\_\~] -SYMBOLCHARN [_a-zA-Z\/\.\\\$\_\~0-9] -FILENAMECHAR [_a-zA-Z0-9\/\.\-\_\+\=\$\:\[\]\\\,\~] -WILDCHAR [_a-zA-Z0-9\/\.\-\_\+\=\$\:\[\]\\\,\~\?\*] +WILDCHAR [_a-zA-Z0-9\/\.\\\$\~\-\+\:\[\]\,\=\?\*\^\!] +FILENAMECHAR [_a-zA-Z0-9\/\.\\\$\~\-\+\:\[\]\,\=] +NOCFILENAMECHAR [_a-zA-Z0-9\/\.\\\$\~\-\+\:\[\]] +SYMBOLNAMECHAR [_a-zA-Z0-9\/\.\\\$\~] +FILENAMECHAR1 [_a-zA-Z\/\.\\\$\~] +SYMBOLNAMECHAR1 [_a-zA-Z\.\\\$] WHITE [ \t\n\r]+ -NOCFILENAMECHAR [_a-zA-Z0-9\/\.\-\_\+\$\:\[\]\\\~] - V_TAG [.$_a-zA-Z][._a-zA-Z0-9]* V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* %s SCRIPT +%s INPUTLIST %s EXPRESSION -%s BOTH -%s DEFSYMEXP %s MRI +%s WILD %s VERS_START %s VERS_SCRIPT %s VERS_NODE @@ -133,16 +129,11 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* } } -"/*" { comment (); } - - -"-" { RTOKEN('-');} -"+" { RTOKEN('+');} -{FILENAMECHAR1}{SYMBOLCHARN}* { yylval.name = xstrdup (yytext); return NAME; } -"=" { RTOKEN('='); } +"/*" { + comment (); } "$"([0-9A-Fa-f])+ { - yylval.integer = bfd_scan_vma (yytext + 1, 0, 16); + yylval.integer = bfd_scan_vma (yytext + 1, 0, 16); yylval.bigint.str = NULL; return INT; } @@ -172,7 +163,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* yylval.bigint.str = NULL; return INT; } -((("$"|0[xX])([0-9A-Fa-f])+)|(([0-9])+))(M|K|m|k)? { +((("$"|0[xX])([0-9A-Fa-f])+)|(([0-9])+))(M|K|m|k)? { char *s = yytext; int ibase = 0; @@ -201,187 +192,228 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* } return INT; } -"]" { RTOKEN(']');} -"[" { RTOKEN('[');} -"<<=" { RTOKEN(LSHIFTEQ);} -">>=" { RTOKEN(RSHIFTEQ);} -"||" { RTOKEN(OROR);} -"==" { RTOKEN(EQ);} -"!=" { RTOKEN(NE);} -">=" { RTOKEN(GE);} -"<=" { RTOKEN(LE);} -"<<" { RTOKEN(LSHIFT);} -">>" { RTOKEN(RSHIFT);} -"+=" { RTOKEN(PLUSEQ);} -"-=" { RTOKEN(MINUSEQ);} -"*=" { RTOKEN(MULTEQ);} -"/=" { RTOKEN(DIVEQ);} -"&=" { RTOKEN(ANDEQ);} -"|=" { RTOKEN(OREQ);} -"&&" { RTOKEN(ANDAND);} -">" { RTOKEN('>');} -"," { RTOKEN(',');} -"&" { RTOKEN('&');} -"|" { RTOKEN('|');} -"~" { RTOKEN('~');} -"!" { RTOKEN('!');} -"?" { RTOKEN('?');} -"*" { RTOKEN('*');} -"+" { RTOKEN('+');} -"-" { RTOKEN('-');} -"/" { RTOKEN('/');} -"%" { RTOKEN('%');} -"<" { RTOKEN('<');} -"=" { RTOKEN('=');} -"}" { RTOKEN('}') ; } -"{" { RTOKEN('{'); } -")" { RTOKEN(')');} -"(" { RTOKEN('(');} -":" { RTOKEN(':'); } -";" { RTOKEN(';');} -"MEMORY" { RTOKEN(MEMORY);} -"REGION_ALIAS" { RTOKEN(REGION_ALIAS);} -"ORIGIN" { RTOKEN(ORIGIN);} -"VERSION" { RTOKEN(VERSIONK);} -"BLOCK" { RTOKEN(BLOCK);} -"BIND" { RTOKEN(BIND);} -"LENGTH" { RTOKEN(LENGTH);} -"ALIGN" { RTOKEN(ALIGN_K);} -"DATA_SEGMENT_ALIGN" { RTOKEN(DATA_SEGMENT_ALIGN);} -"DATA_SEGMENT_RELRO_END" { RTOKEN(DATA_SEGMENT_RELRO_END);} -"DATA_SEGMENT_END" { RTOKEN(DATA_SEGMENT_END);} -"ADDR" { RTOKEN(ADDR);} -"LOADADDR" { RTOKEN(LOADADDR);} -"ALIGNOF" { RTOKEN(ALIGNOF); } -"MAX" { RTOKEN(MAX_K); } -"MIN" { RTOKEN(MIN_K); } -"ASSERT" { RTOKEN(ASSERT_K); } -"ENTRY" { RTOKEN(ENTRY);} -"EXTERN" { RTOKEN(EXTERN);} -"NEXT" { RTOKEN(NEXT);} -"sizeof_headers" { RTOKEN(SIZEOF_HEADERS);} -"SIZEOF_HEADERS" { RTOKEN(SIZEOF_HEADERS);} -"SEGMENT_START" { RTOKEN(SEGMENT_START);} -"MAP" { RTOKEN(MAP);} -"SIZEOF" { RTOKEN(SIZEOF);} -"TARGET" { RTOKEN(TARGET_K);} -"SEARCH_DIR" { RTOKEN(SEARCH_DIR);} -"OUTPUT" { RTOKEN(OUTPUT);} -"INPUT" { RTOKEN(INPUT);} -"GROUP" { RTOKEN(GROUP);} -"AS_NEEDED" { RTOKEN(AS_NEEDED);} -"DEFINED" { RTOKEN(DEFINED);} -"CREATE_OBJECT_SYMBOLS" { RTOKEN(CREATE_OBJECT_SYMBOLS);} -"CONSTRUCTORS" { RTOKEN( CONSTRUCTORS);} -"FORCE_COMMON_ALLOCATION" { RTOKEN(FORCE_COMMON_ALLOCATION);} -"INHIBIT_COMMON_ALLOCATION" { RTOKEN(INHIBIT_COMMON_ALLOCATION);} -"SECTIONS" { RTOKEN(SECTIONS);} -"INSERT" { RTOKEN(INSERT_K);} -"AFTER" { RTOKEN(AFTER);} -"BEFORE" { RTOKEN(BEFORE);} -"FILL" { RTOKEN(FILL);} -"STARTUP" { RTOKEN(STARTUP);} -"OUTPUT_FORMAT" { RTOKEN(OUTPUT_FORMAT);} -"OUTPUT_ARCH" { RTOKEN( OUTPUT_ARCH);} -"HLL" { RTOKEN(HLL);} -"SYSLIB" { RTOKEN(SYSLIB);} -"FLOAT" { RTOKEN(FLOAT);} -"QUAD" { RTOKEN( QUAD);} -"SQUAD" { RTOKEN( SQUAD);} -"LONG" { RTOKEN( LONG);} -"SHORT" { RTOKEN( SHORT);} -"BYTE" { RTOKEN( BYTE);} -"NOFLOAT" { RTOKEN(NOFLOAT);} -"NOCROSSREFS" { RTOKEN(NOCROSSREFS);} -"OVERLAY" { RTOKEN(OVERLAY); } -"SORT_BY_NAME" { RTOKEN(SORT_BY_NAME); } -"SORT_BY_ALIGNMENT" { RTOKEN(SORT_BY_ALIGNMENT); } -"SORT" { RTOKEN(SORT_BY_NAME); } -"NOLOAD" { RTOKEN(NOLOAD);} -"DSECT" { RTOKEN(DSECT);} -"COPY" { RTOKEN(COPY);} -"INFO" { RTOKEN(INFO);} -"OVERLAY" { RTOKEN(OVERLAY);} -"ONLY_IF_RO" { RTOKEN(ONLY_IF_RO); } -"ONLY_IF_RW" { RTOKEN(ONLY_IF_RW); } -"SPECIAL" { RTOKEN(SPECIAL); } -"o" { RTOKEN(ORIGIN);} -"org" { RTOKEN(ORIGIN);} -"l" { RTOKEN( LENGTH);} -"len" { RTOKEN( LENGTH);} -"INCLUDE" { RTOKEN(INCLUDE);} -"PHDRS" { RTOKEN (PHDRS); } -"AT" { RTOKEN(AT);} -"SUBALIGN" { RTOKEN(SUBALIGN);} -"PROVIDE" { RTOKEN(PROVIDE); } -"PROVIDE_HIDDEN" { RTOKEN(PROVIDE_HIDDEN); } -"KEEP" { RTOKEN(KEEP); } -"EXCLUDE_FILE" { RTOKEN(EXCLUDE_FILE); } -"CONSTANT" { RTOKEN(CONSTANT);} + + /* Some tokens that only appear in expressions must be enabled for + states other than EXPRESSION, since parser lookahead means they + must be recognised before the parser switches the lexer out of + SCRIPT or WILD state into EXPRESSION state. + + This sort of thing happens for example with NAME in ldgram.y + "section" rule, which is immediately followed by ldlex_expression. + However, if you follow the grammar from "sec_or_group_p1" you see + "assignment" appearing in "statement_anywhere". Now, + "assignment" also has NAME as its first token, just like + "section". So the parser can't know whether it is in the + "section" or the "assignment" rule until it has scanned the next + token to find an assignment operator. Thus the next token after + NAME in the "section" rule may be lexed before the lexer is + switched to EXPRESSION state, and there are quite a number of + optional components. The first token in all those components + must be able to be lexed in SCRIPT state, as well as the + assignment operators. In fact, due to "opt_exp_with_type", + anything that can appear on the left hand side of "exp" might + need to be lexed in SCRIPT state. + + MRI mode tends to cover everything in MRI scripts. + */ +"]" { RTOKEN(']'); } +"[" { RTOKEN('['); } +"<<=" { RTOKEN(LSHIFTEQ); } +">>=" { RTOKEN(RSHIFTEQ); } +"||" { RTOKEN(OROR); } +"==" { RTOKEN(EQ); } +"!=" { RTOKEN(NE); } +">=" { RTOKEN(GE); } +"<=" { RTOKEN(LE); } +"<<" { RTOKEN(LSHIFT); } +">>" { RTOKEN(RSHIFT); } +"+=" { RTOKEN(PLUSEQ); } +"-=" { RTOKEN(MINUSEQ); } +"*=" { RTOKEN(MULTEQ); } +"/=" { RTOKEN(DIVEQ); } +"&=" { RTOKEN(ANDEQ); } +"|=" { RTOKEN(OREQ); } +"&&" { RTOKEN(ANDAND); } +">" { RTOKEN('>'); } +"," { RTOKEN(','); } +"&" { RTOKEN('&'); } +"|" { RTOKEN('|'); } +"~" { RTOKEN('~'); } +"!" { RTOKEN('!'); } +"?" { RTOKEN('?'); } +"*" { RTOKEN('*'); } +"+" { RTOKEN('+'); } +"-" { RTOKEN('-'); } +"/" { RTOKEN('/'); } +"%" { RTOKEN('%'); } +"<" { RTOKEN('<'); } +"=" { RTOKEN('='); } +"}" { RTOKEN('}'); } +"{" { RTOKEN('{'); } +")" { RTOKEN(')'); } +"(" { RTOKEN('('); } +":" { RTOKEN(':'); } +";" { RTOKEN(';'); } +