Add support for the ASCII directive inside linker scripts.

author Nick Clifton <nickc@redhat.com>

Thu, 16 Feb 2023 16:27:08 +0000 (16:27 +0000)

committer Nick Clifton <nickc@redhat.com>

Thu, 16 Feb 2023 16:27:08 +0000 (16:27 +0000)
author Nick Clifton <nickc@redhat.com>
Thu, 16 Feb 2023 16:27:08 +0000 (16:27 +0000)
committer Nick Clifton <nickc@redhat.com>
Thu, 16 Feb 2023 16:27:08 +0000 (16:27 +0000)
diff --git a/ld/ChangeLog b/ld/ChangeLog

index ac2c913f3cdd198629e452c5a55e3aa63c4c8f60..0fa00a95525081d7989392e61ba3a2f70cf26339 100644 (file)
--- a/ld/ChangeLog
+++ b/ld/ChangeLog
@@ -1,3 +1,22 @@
+2023-02-16  Ulf Samuelsson <binutils@emagii.com>
+           Nick Clifton  <nickc@redhat.com>
+
+       * ldlex.l: Add ASCII token.
+       * ldgram.y: Add parsing of the ASCII command.
+       * ldlang.c (lang_add_string): Add maximum size parameter.  Move
+       escape character handling code into separate function.
+       * ldlang.h (lang_add_string): Update prototype.
+       * NEWS: Mention the new feature.
+       * ld.texi (Output Section Data): Document the new directives.
+       * testsuite/ld-scripts/asciz.t: Adjust to work on more architectures
+       and to test more aspects of the ASCIZ directive.
+       * testsuite/ld-scripts/asciz.d: Adjust to match the changes to the
+       test linker script.
+       * testsuite/ld-scripts/ascii.d: New test driver.
+       * testsuite/ld-scripts/ascii.s: New test assembler source.
+       * testsuite/ld-scripts/ascii.t: New test script.
+       * testsuite/ld-scripts/script.exp: Run the new test.
+
  2023-02-15  Nick Clifton  <nickc@redhat.com>
  
         PR 30078
diff --git a/ld/NEWS b/ld/NEWS

index 4ce7e19d40b9222dce7cb7bd9db6b2437b54c271..4b91f2c3b0a451ab23a9e761e5592b18a18ee95e 100644 (file)
--- a/ld/NEWS
+++ b/ld/NEWS
@@ -1,6 +1,12 @@
  -*- text -*-
  
-* The linker script syntax has a new command for output sections: ASCIZ "string"
+* The linker script syntax has two new commands for inserting text into output
+  sections:
+    ASCII (<size>) "string"
+  This will reserve a zero filled block of <size> bytes at the current
+  location and insert "string" at the beginning of the block.  If the string
+  is too long, it will be truncated.
+    ASCIZ "string"
    This will insert a zero-terminated string at the current location.
  
  Changes in 2.40:
diff --git a/ld/ld.texi b/ld/ld.texi

index 335886d4e6b172993d1cf44c52f3e62902d272e5..7802f0661b0a3d673f1c7bf919f54a43538c1e06 100644 (file)
--- a/ld/ld.texi
+++ b/ld/ld.texi
@@ -5308,7 +5308,6 @@ C identifiers because they contain a @samp{.} character.
  @cindex data
  @cindex section data
  @cindex output section data
-@kindex ASCIZ ``@var{string}''
  @kindex BYTE(@var{expression})
  @kindex SHORT(@var{expression})
  @kindex LONG(@var{expression})
@@ -5345,18 +5344,6 @@ When the object file format does not have an explicit endianness, as is
  true of, for example, S-records, the value will be stored in the
  endianness of the first input object file.
  
-You can include a zero-terminated string in an output section by using
-@code{ASCIZ}.  The keyword is followed by a string which is stored at
-the current value of the location counter adding a zero byte at the
-end.  If the string includes spaces it must be enclosed in double
-quotes.  The string may contain '\n', '\r', '\t' and octal numbers.
-Hex numbers are not supported.
-
-For example, this string of 16 characters will create a 17 byte area
-@smallexample
-  ASCIZ "This is 16 bytes"
-@end smallexample
-
  Note---these commands only work inside a section description and not
  between them, so the following will produce an error from the linker:
  @smallexample
@@ -5367,6 +5354,46 @@ whereas this will work:
  SECTIONS @{@ .text : @{@ *(.text) ; LONG(1) @}@ .data : @{@ *(.data) @}@ @}@
  @end smallexample
  
+@cindex output section strings
+@kindex ASCII (@var{expression}) ``@var{string}''
+@kindex ASCIZ ``@var{string}''
+You can include a zero-terminated string in an output section by using
+@code{ASCIZ}.  The keyword is followed by a string which is stored at
+the current value of the location counter including adding a zero byte
+at the end.  Any length of string is supported by this directive.
+
+You can include a fixed size string in an output section by using
+@code{ASCII}.  The keyword is followed by a size enclosed in
+parentheses and then a string.  The string is stored at the current
+value of the location counter and zero bytes are added at the end to
+fill up to the specified size.  Note the fill value is ignored for
+this padding.
+
+If the string is too long, a warning is issued and the string is
+truncated.  The string will still be zero-terminated in this case.
+
+If the expression evaluates to zero then the directive will be treated
+as if it were @code{ASCIZ} instead.
+
+If the string in an @code{ASCII} or @code{ASCIZ} command includes spaces
+it must be enclosed in double quotes.
+
+The string can have C escape characters like '\n', '\r', '\t' and
+octal numbers.  The '\"' escape is not supported.  Nor are escaped hex
+values.
+
+Example 1: This is string of 16 characters and will create a 32 byte
+area:
+@smallexample
+  ASCII (32) "This is 16 bytes"
+@end smallexample
+
+Example 2: This is a string of 16 characters and will create a 17 byte
+area:
+@smallexample
+  ASCIZ "This is 16 bytes"
+@end smallexample
+
  @kindex FILL(@var{expression})
  @cindex holes, filling
  @cindex unspecified memory
diff --git a/ld/ldgram.y b/ld/ldgram.y

index 8240cf97327d226e985fd8fb103b2ef34119904d..faffeec94b8a79363d67781d9f3284588b30b7c4 100644 (file)
--- a/ld/ldgram.y
+++ b/ld/ldgram.y
@@ -125,7 +125,7 @@ static int error_index;
  %right UNARY
  %token END
  %left <token> '('
-%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ
+%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCII ASCIZ
  %token SECTIONS PHDRS INSERT_K AFTER BEFORE
  %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END
  %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE
@@ -668,9 +668,15 @@ statement:
                 {
                   lang_add_data ((int) $1, $3);
                 }
+        | ASCII '(' mustbe_exp ')' NAME
+               {
+                 /* 'value' is a memory leak, do we care?  */
+                 etree_type *value = $3;
+                 lang_add_string (value->value.value, $5);
+               }
         | ASCIZ NAME
                 {
-                 lang_add_string ($2);
+                 lang_add_string (0, $2);
                 }
         | FILL '(' fill_exp ')'
                 {
diff --git a/ld/ldlang.c b/ld/ldlang.c

index b20455c9373c226dc7780dc4e8d9deb5968f1b2e..2852a4222d36300af513f361adec040af115a6a5 100644 (file)
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -8361,15 +8361,20 @@ lang_add_data (int type, union etree_union *exp)
    new_stmt->type = type;
  }
  
-void
-lang_add_string (const char *s)
+/* Convert escape codes in S.
+   Supports \n, \r, \t and \NNN octals.
+   Returns a copy of S in a malloc'ed buffer.  */
+
+static char *
+convert_string (const char * s)
  {
-  bfd_vma  len = strlen (s);
-  bfd_vma  i;
-  bool     escape = false;
+  size_t  len = strlen (s);
+  size_t  i;
+  bool    escape = false;
+  char *  buffer = malloc (len + 1);
+  char *  b;
  
-  /* Add byte expressions until end of string.  */
-  for (i = 0 ; i < len; i++)
+  for (i = 0, b = buffer; i < len; i++)
      {
        char c = *s++;
  
@@ -8404,7 +8409,7 @@ lang_add_string (const char *s)
                     value += (c - '0');
                     i++;
                     s++;
-
+ 
                     c = *s;
                     if ((c >= '0') && (c <= '7'))
                       {
@@ -8422,26 +8427,58 @@ lang_add_string (const char *s)
                     i--;
                     s--;
                   }
-
+               
                 c = value;
               }
               break;
             }
-
-         lang_add_data (BYTE, exp_intop (c));
           escape = false;
         }
        else
         {
           if (c == '\\')
-           escape = true;
-         else
-           lang_add_data (BYTE, exp_intop (c));
+           {
+             escape = true;
+             continue;
+           }
         }
+
+      * b ++ = c;
+    }
+
+  * b = 0;
+  return buffer;
+}
+
+void
+lang_add_string (size_t size, const char *s)
+{
+  size_t  len;
+  size_t  i;
+  char *  string;
+
+  string = convert_string (s);
+  len = strlen (string);
+
+  /* Check if it is ASCIZ command (len == 0) */
+  if (size == 0)
+    /* Make sure that we include the terminating nul byte.  */
+    size = len + 1;
+  else if (len >= size)
+    {
+      len = size - 1;
+
+      einfo (_("%P:%pS: warning: ASCII string does not fit in allocated space,"
+               " truncated\n"), NULL);
      }
  
-  /* Remeber to terminate the string.  */
-  lang_add_data (BYTE, exp_intop (0));
+  for (i = 0 ; i < len ; i++)
+    lang_add_data (BYTE, exp_intop (string[i]));
+
+  while (i++ < size)
+    lang_add_data (BYTE, exp_intop ('\0'));
+
+  free (string);
  }
  
  /* Create a new reloc statement.  RELOC is the BFD relocation type to
diff --git a/ld/ldlang.h b/ld/ldlang.h

index 32819066b8a41e249bb0d3d9aab84e1ebd1259f8..2300fa5b2a34498b0df4c90b5ed152ec4b314682 100644 (file)
--- a/ld/ldlang.h
+++ b/ld/ldlang.h
@@ -646,8 +646,9 @@ extern void pop_stat_ptr
    (void);
  extern void lang_add_data
    (int, union etree_union *);
+extern bfd_vma charcount(const char *s);
  extern void lang_add_string
-  (const char *);
+  (size_t, const char *s);
  extern void lang_add_reloc
    (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *,
     union etree_union *);
diff --git a/ld/ldlex.l b/ld/ldlex.l

index 32336cf0be2c7d06a9fdd106f1f0ab4d5acf2e3d..910e7ea3b8b8da748dd29b276934572e4d1743ee 100644 (file)
--- a/ld/ldlex.l
+++ b/ld/ldlex.l
@@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)*
  <WILD>"LONG"                           { RTOKEN(LONG); }
  <WILD>"SHORT"                          { RTOKEN(SHORT); }
  <WILD>"BYTE"                           { RTOKEN(BYTE); }
+<WILD>"ASCII"                          { RTOKEN(ASCII); }
  <WILD>"ASCIZ"                          { RTOKEN(ASCIZ); }
  <SCRIPT>"NOFLOAT"                      { RTOKEN(NOFLOAT); }
  <SCRIPT,EXPRESSION>"NOCROSSREFS"       { RTOKEN(NOCROSSREFS); }
diff --git a/ld/testsuite/ld-scripts/ascii.d b/ld/testsuite/ld-scripts/ascii.d

new file mode 100644 (file)

index 0000000..cfc1a4c
--- /dev/null
+++ b/ld/testsuite/ld-scripts/ascii.d
@@ -0,0 +1,25 @@
+#source: ascii.s
+#ld: -T ascii.t
+#objdump: -s -j .header
+#notarget: [is_aout_format]
+#skip: tic4x-*-* tic54x-*-* *-*-*ecoff *-*-macho *-*-aix*
+
+.*:     file format .*
+
+Contents of section .header:
+ .... 70726f67 72616d20 6e616d65 00000000  program name....
+ .... 656d7074 79000000 00000000 00000000  empty...........
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 310a0000 00000000  comment 1.......
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 320a0000 00000000  comment 2.......
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 330a0000 00000000  comment 3.......
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 340a0000 00000000  comment 4.......
+ .... 00000000 00000000 49206d65 616e7420  ........I meant 
+ .... 746f2073 61793a20 54686973 20697320  to say: This is 
+ .... 77617920 746f6f20 6c6f6e67 00000000  way too long....
+#pass
diff --git a/ld/testsuite/ld-scripts/ascii.s b/ld/testsuite/ld-scripts/ascii.s

new file mode 100644 (file)

index 0000000..a1b6148
--- /dev/null
+++ b/ld/testsuite/ld-scripts/ascii.s
@@ -0,0 +1,11 @@
+        .extern ecc_start
+       .section .text
+main:
+       .long 0x45444F43
+       .long 0x12345678
+       
+       .section .data
+       .long 0x9abcdef0
+       
+       .section .bss
+       .long 0
diff --git a/ld/testsuite/ld-scripts/ascii.t b/ld/testsuite/ld-scripts/ascii.t

new file mode 100644 (file)

index 0000000..6f682fa
--- /dev/null
+++ b/ld/testsuite/ld-scripts/ascii.t
@@ -0,0 +1,38 @@
+_start = 0x000000;
+
+SECTIONS
+{
+  . = 0x1000 + SIZEOF_HEADERS;
+
+  .header ALIGN (0x100) (READONLY) :
+    {
+      ASCII (16) "program name"
+      ASCII (64) "empty"
+      ASCII (4 * 8) "comment 1\n"
+      ASCII (32) "comment 2\n"
+      ASCII (32) "comment 3\n"
+      ASCII (24) "comment 4\n"
+      ASCII (64) "I meant to say: This is way too long"
+    }
+
+  .text ALIGN (0x100) :
+  {
+      entry = .;
+      *(.text)
+  }
+
+  .data : AT (0x400000)
+  {
+       *(.data)
+  }
+  
+  . = ALIGN(0x20);
+  
+  .bss :
+  {
+       *(.bss)
+  }
+
+  /DISCARD/ : { *(*) }
+}
+
diff --git a/ld/testsuite/ld-scripts/asciz.d b/ld/testsuite/ld-scripts/asciz.d

index 615cf99732f1b6611c68fdf931bb142b89545ebf..75e3c858a074a614da1add876bb48a4acb3ca597 100644 (file)
--- a/ld/testsuite/ld-scripts/asciz.d
+++ b/ld/testsuite/ld-scripts/asciz.d
@@ -1,17 +1,14 @@
  #source: asciz.s
  #ld: -T asciz.t
-#objdump: -s -j .text
-#target: [is_elf_format]
-#skip: mips*-*-*
-#skip: tilegx*-*-* tilepro-*-*
-# COFF, PE and MIPS targets align code to a 16 byte boundary
-# tilegx andtilepro aligns code to a 8 byte boundary.
+#objdump: -s -j .data
+#notarget: [is_aout_format]
+#skip: tic4x-*-* tic54x-*-* *-*-*ecoff *-*-macho *-*-aix*
  
  .*:     file format .*
  
-Contents of section .text:
- .... 01010101 54686973 20697320 61207374  ....This is a st
- .... 72696e67 00...... ........ ........  ring............
- .... 54686973 20697320 616e6f74 68657220  This is another 
- .... 0a737472 696e6753 00                 .stringS........
+Contents of section .data:
+ .... 54686973 20697320 61207374 72696e67  This is a string
+ .... 00546869 73206973 20616e6f 74686572  .This is another
+ .... 0a537472 696e6700 006e6f71 756f7465  .String..noquote
+ .... 7300                                 s.              
  #pass
diff --git a/ld/testsuite/ld-scripts/asciz.t b/ld/testsuite/ld-scripts/asciz.t

index ab66f9a5bfb8cc11dc90552fdfa945922f86fb9a..3aeb7d0c767a868fc84b88e0ac31b0f556d12864 100644 (file)
--- a/ld/testsuite/ld-scripts/asciz.t
+++ b/ld/testsuite/ld-scripts/asciz.t
@@ -1,23 +1,16 @@
-MEMORY {
-  rom : ORIGIN = 0x00000, LENGTH = 0x10000
-  ram : ORIGIN = 0x10000, LENGTH = 0x10000
-}
  
  _start = 0x000000;
  SECTIONS
  {
    . = 0x1000 + SIZEOF_HEADERS;
-  .text ALIGN (0x20) :
-    {
-      *(.text)
+  
+  .data : AT (0x10000)
+  {
        ASCIZ "This is a string"
-      . = ALIGN(0x20);
-      align_label = .;
-      ASCIZ "This is another \nstring\123"
-      unalign_label = .;
-    }
-  .data : AT (0x10000) { *(.data) } >ram /* NO default AT>rom */
-  . = ALIGN(0x20);
-  .bss : { *(.bss) } >ram /* NO default AT>rom */
+      ASCIZ "This is another\n\123tring"
+      ASCIZ ""
+      ASCIZ noquotes
+  }
+  
    /DISCARD/ : { *(*) }
  }
diff --git a/ld/testsuite/ld-scripts/script.exp b/ld/testsuite/ld-scripts/script.exp

index a574dde034c6b990f50f663e2b50030e18c3f0f4..56e12da8e616c566793fc798835e545fb8c8bbe1 100644 (file)
--- a/ld/testsuite/ld-scripts/script.exp
+++ b/ld/testsuite/ld-scripts/script.exp
@@ -228,6 +228,7 @@ foreach test_script $test_script_list {
  }
  
  run_dump_test "asciz"
+run_dump_test "ascii"
  run_dump_test "align-with-input"
  run_dump_test "pr20302"
  run_dump_test "output-section-types"
author	Nick Clifton <nickc@redhat.com>
	Thu, 16 Feb 2023 16:27:08 +0000 (16:27 +0000)
committer	Nick Clifton <nickc@redhat.com>
	Thu, 16 Feb 2023 16:27:08 +0000 (16:27 +0000)
ld/ChangeLog		patch \| blob \| history
ld/NEWS		patch \| blob \| history
ld/ld.texi		patch \| blob \| history
ld/ldgram.y		patch \| blob \| history
ld/ldlang.c		patch \| blob \| history
ld/ldlang.h		patch \| blob \| history
ld/ldlex.l		patch \| blob \| history
ld/testsuite/ld-scripts/ascii.d	[new file with mode: 0644]	patch \| blob
ld/testsuite/ld-scripts/ascii.s	[new file with mode: 0644]	patch \| blob
ld/testsuite/ld-scripts/ascii.t	[new file with mode: 0644]	patch \| blob
ld/testsuite/ld-scripts/asciz.d		patch \| blob \| history
ld/testsuite/ld-scripts/asciz.t		patch \| blob \| history
ld/testsuite/ld-scripts/script.exp		patch \| blob \| history