From 0d79a2a8e2d91fc258ac795c19c13e3ab505a6c2 Mon Sep 17 00:00:00 2001 From: Ulf Samuelsson Date: Tue, 14 Feb 2023 10:13:28 +0000 Subject: [PATCH] ASCIZ Command for output section Adds a new directive to the linker script syntax: ASCIZ. This inserts a zero-terminated string into the output at the place where it is used. --- ld/NEWS | 3 ++ ld/ld.texi | 13 +++++ ld/ldgram.y | 7 ++- ld/ldlang.c | 83 ++++++++++++++++++++++++++++++ ld/ldlang.h | 4 +- ld/ldlex.l | 1 + ld/testsuite/ld-scripts/asciz.d | 17 ++++++ ld/testsuite/ld-scripts/asciz.s | 8 +++ ld/testsuite/ld-scripts/asciz.t | 23 +++++++++ ld/testsuite/ld-scripts/script.exp | 1 + 10 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 ld/testsuite/ld-scripts/asciz.d create mode 100644 ld/testsuite/ld-scripts/asciz.s create mode 100644 ld/testsuite/ld-scripts/asciz.t diff --git a/ld/NEWS b/ld/NEWS index 9982ad0168d..4ce7e19d40b 100644 --- a/ld/NEWS +++ b/ld/NEWS @@ -1,5 +1,8 @@ -*- text -*- +* The linker script syntax has a new command for output sections: ASCIZ "string" + This will insert a zero-terminated string at the current location. + Changes in 2.40: * The linker has a new command line option to suppress the generation of any diff --git a/ld/ld.texi b/ld/ld.texi index 36005dc2b0d..f576a8bae6c 100644 --- a/ld/ld.texi +++ b/ld/ld.texi @@ -5308,6 +5308,7 @@ C identifiers because they contain a @samp{.} character. @cindex data @cindex section data @cindex output section data +@kindex ASCIZ ``@var{string}'' @kindex BYTE(@var{expression}) @kindex SHORT(@var{expression}) @kindex LONG(@var{expression}) @@ -5344,6 +5345,18 @@ When the object file format does not have an explicit endianness, as is true of, for example, S-records, the value will be stored in the endianness of the first input object file. +You can include a zero-terminated string in an output section by using +@code{ASCIZ}. The keyword is followed by a string which is stored at +the current value of the location counter adding a zero byte at the +end. If the string includes spaces it must be enclosed in double +quotes. The string may contain '\n', '\r', '\t' and octal numbers. +Hex numbers are not supported. + +For example, this string of 16 characters will create a 17 byte area +@smallexample + ASCIZ "This is 16 bytes" +@end smallexample + Note---these commands only work inside a section description and not between them, so the following will produce an error from the linker: @smallexample diff --git a/ld/ldgram.y b/ld/ldgram.y index fa5f01fef1d..8240cf97327 100644 --- a/ld/ldgram.y +++ b/ld/ldgram.y @@ -125,7 +125,7 @@ static int error_index; %right UNARY %token END %left '(' -%token ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE +%token ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ %token SECTIONS PHDRS INSERT_K AFTER BEFORE %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE @@ -668,7 +668,10 @@ statement: { lang_add_data ((int) $1, $3); } - + | ASCIZ NAME + { + lang_add_string ($2); + } | FILL '(' fill_exp ')' { lang_add_fill ($3); diff --git a/ld/ldlang.c b/ld/ldlang.c index b5e0d026ae4..b20455c9373 100644 --- a/ld/ldlang.c +++ b/ld/ldlang.c @@ -8361,6 +8361,89 @@ lang_add_data (int type, union etree_union *exp) new_stmt->type = type; } +void +lang_add_string (const char *s) +{ + bfd_vma len = strlen (s); + bfd_vma i; + bool escape = false; + + /* Add byte expressions until end of string. */ + for (i = 0 ; i < len; i++) + { + char c = *s++; + + if (escape) + { + switch (c) + { + default: + /* Ignore the escape. */ + break; + + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* We have an octal number. */ + { + unsigned int value = c - '0'; + + c = *s; + if ((c >= '0') && (c <= '7')) + { + value <<= 3; + value += (c - '0'); + i++; + s++; + + c = *s; + if ((c >= '0') && (c <= '7')) + { + value <<= 3; + value += (c - '0'); + i++; + s++; + } + } + + if (value > 0xff) + { + /* octal: \777 is treated as '\077' + '7' */ + value >>= 3; + i--; + s--; + } + + c = value; + } + break; + } + + lang_add_data (BYTE, exp_intop (c)); + escape = false; + } + else + { + if (c == '\\') + escape = true; + else + lang_add_data (BYTE, exp_intop (c)); + } + } + + /* Remeber to terminate the string. */ + lang_add_data (BYTE, exp_intop (0)); +} + /* Create a new reloc statement. RELOC is the BFD relocation type to generate. HOWTO is the corresponding howto structure (we could look this up, but the caller has already done so). SECTION is the diff --git a/ld/ldlang.h b/ld/ldlang.h index 24c42f48218..32819066b8a 100644 --- a/ld/ldlang.h +++ b/ld/ldlang.h @@ -645,7 +645,9 @@ extern void push_stat_ptr extern void pop_stat_ptr (void); extern void lang_add_data - (int type, union etree_union *); + (int, union etree_union *); +extern void lang_add_string + (const char *); extern void lang_add_reloc (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *, union etree_union *); diff --git a/ld/ldlex.l b/ld/ldlex.l index cf596530b20..32336cf0be2 100644 --- a/ld/ldlex.l +++ b/ld/ldlex.l @@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* "LONG" { RTOKEN(LONG); } "SHORT" { RTOKEN(SHORT); } "BYTE" { RTOKEN(BYTE); } +"ASCIZ" { RTOKEN(ASCIZ); }