From 9fe129a4105bb59398f73ce96938a94f19265b79 Mon Sep 17 00:00:00 2001 From: Nick Clifton Date: Thu, 16 Feb 2023 16:27:08 +0000 Subject: [PATCH] Add support for the ASCII directive inside linker scripts. * ldlex.l: Add ASCII token. * ldgram.y: Add parsing of the ASCII command. * ldlang.c (lang_add_string): Add maximum size parameter. Move escape character handling code into separate function. * ldlang.h (lang_add_string): Update prototype. * NEWS: Mention the new feature. * ld.texi (Output Section Data): Document the new directives. * testsuite/ld-scripts/asciz.t: Adjust to work on more architectures and to test more aspects of the ASCIZ directive. * testsuite/ld-scripts/asciz.d: Adjust to match the changes to the test linker script. * testsuite/ld-scripts/ascii.d: New test driver. * testsuite/ld-scripts/ascii.s: New test assembler source. * testsuite/ld-scripts/ascii.t: New test script. * testsuite/ld-scripts/script.exp: Run the new test. --- ld/ChangeLog | 19 ++++++++ ld/NEWS | 8 +++- ld/ld.texi | 53 +++++++++++++++++------ ld/ldgram.y | 10 ++++- ld/ldlang.c | 69 +++++++++++++++++++++++------- ld/ldlang.h | 3 +- ld/ldlex.l | 1 + ld/testsuite/ld-scripts/ascii.d | 25 +++++++++++ ld/testsuite/ld-scripts/ascii.s | 11 +++++ ld/testsuite/ld-scripts/ascii.t | 38 ++++++++++++++++ ld/testsuite/ld-scripts/asciz.d | 19 ++++---- ld/testsuite/ld-scripts/asciz.t | 23 ++++------ ld/testsuite/ld-scripts/script.exp | 1 + 13 files changed, 221 insertions(+), 59 deletions(-) create mode 100644 ld/testsuite/ld-scripts/ascii.d create mode 100644 ld/testsuite/ld-scripts/ascii.s create mode 100644 ld/testsuite/ld-scripts/ascii.t diff --git a/ld/ChangeLog b/ld/ChangeLog index ac2c913f3cd..0fa00a95525 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,22 @@ +2023-02-16 Ulf Samuelsson + Nick Clifton + + * ldlex.l: Add ASCII token. + * ldgram.y: Add parsing of the ASCII command. + * ldlang.c (lang_add_string): Add maximum size parameter. Move + escape character handling code into separate function. + * ldlang.h (lang_add_string): Update prototype. + * NEWS: Mention the new feature. + * ld.texi (Output Section Data): Document the new directives. + * testsuite/ld-scripts/asciz.t: Adjust to work on more architectures + and to test more aspects of the ASCIZ directive. + * testsuite/ld-scripts/asciz.d: Adjust to match the changes to the + test linker script. + * testsuite/ld-scripts/ascii.d: New test driver. + * testsuite/ld-scripts/ascii.s: New test assembler source. + * testsuite/ld-scripts/ascii.t: New test script. + * testsuite/ld-scripts/script.exp: Run the new test. + 2023-02-15 Nick Clifton PR 30078 diff --git a/ld/NEWS b/ld/NEWS index 4ce7e19d40b..4b91f2c3b0a 100644 --- a/ld/NEWS +++ b/ld/NEWS @@ -1,6 +1,12 @@ -*- text -*- -* The linker script syntax has a new command for output sections: ASCIZ "string" +* The linker script syntax has two new commands for inserting text into output + sections: + ASCII () "string" + This will reserve a zero filled block of bytes at the current + location and insert "string" at the beginning of the block. If the string + is too long, it will be truncated. + ASCIZ "string" This will insert a zero-terminated string at the current location. Changes in 2.40: diff --git a/ld/ld.texi b/ld/ld.texi index 335886d4e6b..7802f0661b0 100644 --- a/ld/ld.texi +++ b/ld/ld.texi @@ -5308,7 +5308,6 @@ C identifiers because they contain a @samp{.} character. @cindex data @cindex section data @cindex output section data -@kindex ASCIZ ``@var{string}'' @kindex BYTE(@var{expression}) @kindex SHORT(@var{expression}) @kindex LONG(@var{expression}) @@ -5345,18 +5344,6 @@ When the object file format does not have an explicit endianness, as is true of, for example, S-records, the value will be stored in the endianness of the first input object file. -You can include a zero-terminated string in an output section by using -@code{ASCIZ}. The keyword is followed by a string which is stored at -the current value of the location counter adding a zero byte at the -end. If the string includes spaces it must be enclosed in double -quotes. The string may contain '\n', '\r', '\t' and octal numbers. -Hex numbers are not supported. - -For example, this string of 16 characters will create a 17 byte area -@smallexample - ASCIZ "This is 16 bytes" -@end smallexample - Note---these commands only work inside a section description and not between them, so the following will produce an error from the linker: @smallexample @@ -5367,6 +5354,46 @@ whereas this will work: SECTIONS @{@ .text : @{@ *(.text) ; LONG(1) @}@ .data : @{@ *(.data) @}@ @}@ @end smallexample +@cindex output section strings +@kindex ASCII (@var{expression}) ``@var{string}'' +@kindex ASCIZ ``@var{string}'' +You can include a zero-terminated string in an output section by using +@code{ASCIZ}. The keyword is followed by a string which is stored at +the current value of the location counter including adding a zero byte +at the end. Any length of string is supported by this directive. + +You can include a fixed size string in an output section by using +@code{ASCII}. The keyword is followed by a size enclosed in +parentheses and then a string. The string is stored at the current +value of the location counter and zero bytes are added at the end to +fill up to the specified size. Note the fill value is ignored for +this padding. + +If the string is too long, a warning is issued and the string is +truncated. The string will still be zero-terminated in this case. + +If the expression evaluates to zero then the directive will be treated +as if it were @code{ASCIZ} instead. + +If the string in an @code{ASCII} or @code{ASCIZ} command includes spaces +it must be enclosed in double quotes. + +The string can have C escape characters like '\n', '\r', '\t' and +octal numbers. The '\"' escape is not supported. Nor are escaped hex +values. + +Example 1: This is string of 16 characters and will create a 32 byte +area: +@smallexample + ASCII (32) "This is 16 bytes" +@end smallexample + +Example 2: This is a string of 16 characters and will create a 17 byte +area: +@smallexample + ASCIZ "This is 16 bytes" +@end smallexample + @kindex FILL(@var{expression}) @cindex holes, filling @cindex unspecified memory diff --git a/ld/ldgram.y b/ld/ldgram.y index 8240cf97327..faffeec94b8 100644 --- a/ld/ldgram.y +++ b/ld/ldgram.y @@ -125,7 +125,7 @@ static int error_index; %right UNARY %token END %left '(' -%token ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ +%token ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCII ASCIZ %token SECTIONS PHDRS INSERT_K AFTER BEFORE %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE @@ -668,9 +668,15 @@ statement: { lang_add_data ((int) $1, $3); } + | ASCII '(' mustbe_exp ')' NAME + { + /* 'value' is a memory leak, do we care? */ + etree_type *value = $3; + lang_add_string (value->value.value, $5); + } | ASCIZ NAME { - lang_add_string ($2); + lang_add_string (0, $2); } | FILL '(' fill_exp ')' { diff --git a/ld/ldlang.c b/ld/ldlang.c index b20455c9373..2852a4222d3 100644 --- a/ld/ldlang.c +++ b/ld/ldlang.c @@ -8361,15 +8361,20 @@ lang_add_data (int type, union etree_union *exp) new_stmt->type = type; } -void -lang_add_string (const char *s) +/* Convert escape codes in S. + Supports \n, \r, \t and \NNN octals. + Returns a copy of S in a malloc'ed buffer. */ + +static char * +convert_string (const char * s) { - bfd_vma len = strlen (s); - bfd_vma i; - bool escape = false; + size_t len = strlen (s); + size_t i; + bool escape = false; + char * buffer = malloc (len + 1); + char * b; - /* Add byte expressions until end of string. */ - for (i = 0 ; i < len; i++) + for (i = 0, b = buffer; i < len; i++) { char c = *s++; @@ -8404,7 +8409,7 @@ lang_add_string (const char *s) value += (c - '0'); i++; s++; - + c = *s; if ((c >= '0') && (c <= '7')) { @@ -8422,26 +8427,58 @@ lang_add_string (const char *s) i--; s--; } - + c = value; } break; } - - lang_add_data (BYTE, exp_intop (c)); escape = false; } else { if (c == '\\') - escape = true; - else - lang_add_data (BYTE, exp_intop (c)); + { + escape = true; + continue; + } } + + * b ++ = c; + } + + * b = 0; + return buffer; +} + +void +lang_add_string (size_t size, const char *s) +{ + size_t len; + size_t i; + char * string; + + string = convert_string (s); + len = strlen (string); + + /* Check if it is ASCIZ command (len == 0) */ + if (size == 0) + /* Make sure that we include the terminating nul byte. */ + size = len + 1; + else if (len >= size) + { + len = size - 1; + + einfo (_("%P:%pS: warning: ASCII string does not fit in allocated space," + " truncated\n"), NULL); } - /* Remeber to terminate the string. */ - lang_add_data (BYTE, exp_intop (0)); + for (i = 0 ; i < len ; i++) + lang_add_data (BYTE, exp_intop (string[i])); + + while (i++ < size) + lang_add_data (BYTE, exp_intop ('\0')); + + free (string); } /* Create a new reloc statement. RELOC is the BFD relocation type to diff --git a/ld/ldlang.h b/ld/ldlang.h index 32819066b8a..2300fa5b2a3 100644 --- a/ld/ldlang.h +++ b/ld/ldlang.h @@ -646,8 +646,9 @@ extern void pop_stat_ptr (void); extern void lang_add_data (int, union etree_union *); +extern bfd_vma charcount(const char *s); extern void lang_add_string - (const char *); + (size_t, const char *s); extern void lang_add_reloc (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *, union etree_union *); diff --git a/ld/ldlex.l b/ld/ldlex.l index 32336cf0be2..910e7ea3b8b 100644 --- a/ld/ldlex.l +++ b/ld/ldlex.l @@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* "LONG" { RTOKEN(LONG); } "SHORT" { RTOKEN(SHORT); } "BYTE" { RTOKEN(BYTE); } +"ASCII" { RTOKEN(ASCII); } "ASCIZ" { RTOKEN(ASCIZ); }