From 9fe129a4105bb59398f73ce96938a94f19265b79 Mon Sep 17 00:00:00 2001
From: Nick Clifton <nickc@redhat.com>
Date: Thu, 16 Feb 2023 16:27:08 +0000
Subject: [PATCH] Add support for the ASCII directive inside linker scripts.

 * ldlex.l: Add ASCII token.
 * ldgram.y: Add parsing of the ASCII command.
 * ldlang.c (lang_add_string): Add maximum size parameter.  Move escape character handling code into separate function.
 * ldlang.h (lang_add_string): Update prototype.
 * NEWS: Mention the new feature.
 * ld.texi (Output Section Data): Document the new directives.
 * testsuite/ld-scripts/asciz.t: Adjust to work on more architectures and to test more aspects of the ASCIZ directive.
 * testsuite/ld-scripts/asciz.d: Adjust to match the changes to the test linker script.
 * testsuite/ld-scripts/ascii.d: New test driver.
 * testsuite/ld-scripts/ascii.s: New test assembler source.
 * testsuite/ld-scripts/ascii.t: New test script.
 * testsuite/ld-scripts/script.exp: Run the new test.
---
 ld/ChangeLog                       | 19 ++++++++
 ld/NEWS                            |  8 +++-
 ld/ld.texi                         | 53 +++++++++++++++++------
 ld/ldgram.y                        | 10 ++++-
 ld/ldlang.c                        | 69 +++++++++++++++++++++++-------
 ld/ldlang.h                        |  3 +-
 ld/ldlex.l                         |  1 +
 ld/testsuite/ld-scripts/ascii.d    | 25 +++++++++++
 ld/testsuite/ld-scripts/ascii.s    | 11 +++++
 ld/testsuite/ld-scripts/ascii.t    | 38 ++++++++++++++++
 ld/testsuite/ld-scripts/asciz.d    | 19 ++++----
 ld/testsuite/ld-scripts/asciz.t    | 23 ++++------
 ld/testsuite/ld-scripts/script.exp |  1 +
 13 files changed, 221 insertions(+), 59 deletions(-)
 create mode 100644 ld/testsuite/ld-scripts/ascii.d
 create mode 100644 ld/testsuite/ld-scripts/ascii.s
 create mode 100644 ld/testsuite/ld-scripts/ascii.t
diff --git a/ld/ChangeLog b/ld/ChangeLog
index ac2c913f3cd..0fa00a95525 100644
--- a/ld/ChangeLog
+++ b/ld/ChangeLog
@@ -1,3 +1,22 @@
+2023-02-16  Ulf Samuelsson <binutils@emagii.com>
+	    Nick Clifton  <nickc@redhat.com>
+
+	* ldlex.l: Add ASCII token.
+	* ldgram.y: Add parsing of the ASCII command.
+	* ldlang.c (lang_add_string): Add maximum size parameter.  Move
+	escape character handling code into separate function.
+	* ldlang.h (lang_add_string): Update prototype.
+	* NEWS: Mention the new feature.
+	* ld.texi (Output Section Data): Document the new directives.
+	* testsuite/ld-scripts/asciz.t: Adjust to work on more architectures
+	and to test more aspects of the ASCIZ directive.
+	* testsuite/ld-scripts/asciz.d: Adjust to match the changes to the
+	test linker script.
+	* testsuite/ld-scripts/ascii.d: New test driver.
+	* testsuite/ld-scripts/ascii.s: New test assembler source.
+	* testsuite/ld-scripts/ascii.t: New test script.
+	* testsuite/ld-scripts/script.exp: Run the new test.
+
 2023-02-15  Nick Clifton  <nickc@redhat.com>
 
 	PR 30078
diff --git a/ld/NEWS b/ld/NEWS
index 4ce7e19d40b..4b91f2c3b0a 100644
--- a/ld/NEWS
+++ b/ld/NEWS
@@ -1,6 +1,12 @@
 -*- text -*-
 
-* The linker script syntax has a new command for output sections: ASCIZ "string"
+* The linker script syntax has two new commands for inserting text into output
+  sections:
+    ASCII (<size>) "string"
+  This will reserve a zero filled block of <size> bytes at the current
+  location and insert "string" at the beginning of the block.  If the string
+  is too long, it will be truncated.
+    ASCIZ "string"
   This will insert a zero-terminated string at the current location.
 
 Changes in 2.40:
diff --git a/ld/ld.texi b/ld/ld.texi
index 335886d4e6b..7802f0661b0 100644
--- a/ld/ld.texi
+++ b/ld/ld.texi
@@ -5308,7 +5308,6 @@ C identifiers because they contain a @samp{.} character.
 @cindex data
 @cindex section data
 @cindex output section data
-@kindex ASCIZ ``@var{string}''
 @kindex BYTE(@var{expression})
 @kindex SHORT(@var{expression})
 @kindex LONG(@var{expression})
@@ -5345,18 +5344,6 @@ When the object file format does not have an explicit endianness, as is
 true of, for example, S-records, the value will be stored in the
 endianness of the first input object file.
 
-You can include a zero-terminated string in an output section by using
-@code{ASCIZ}.  The keyword is followed by a string which is stored at
-the current value of the location counter adding a zero byte at the
-end.  If the string includes spaces it must be enclosed in double
-quotes.  The string may contain '\n', '\r', '\t' and octal numbers.
-Hex numbers are not supported.
-
-For example, this string of 16 characters will create a 17 byte area
-@smallexample
-  ASCIZ "This is 16 bytes"
-@end smallexample
-
 Note---these commands only work inside a section description and not
 between them, so the following will produce an error from the linker:
 @smallexample
@@ -5367,6 +5354,46 @@ whereas this will work:
 SECTIONS @{@ .text : @{@ *(.text) ; LONG(1) @}@ .data : @{@ *(.data) @}@ @}@
 @end smallexample
 
+@cindex output section strings
+@kindex ASCII (@var{expression}) ``@var{string}''
+@kindex ASCIZ ``@var{string}''
+You can include a zero-terminated string in an output section by using
+@code{ASCIZ}.  The keyword is followed by a string which is stored at
+the current value of the location counter including adding a zero byte
+at the end.  Any length of string is supported by this directive.
+
+You can include a fixed size string in an output section by using
+@code{ASCII}.  The keyword is followed by a size enclosed in
+parentheses and then a string.  The string is stored at the current
+value of the location counter and zero bytes are added at the end to
+fill up to the specified size.  Note the fill value is ignored for
+this padding.
+
+If the string is too long, a warning is issued and the string is
+truncated.  The string will still be zero-terminated in this case.
+
+If the expression evaluates to zero then the directive will be treated
+as if it were @code{ASCIZ} instead.
+
+If the string in an @code{ASCII} or @code{ASCIZ} command includes spaces
+it must be enclosed in double quotes.
+
+The string can have C escape characters like '\n', '\r', '\t' and
+octal numbers.  The '\"' escape is not supported.  Nor are escaped hex
+values.
+
+Example 1: This is string of 16 characters and will create a 32 byte
+area:
+@smallexample
+  ASCII (32) "This is 16 bytes"
+@end smallexample
+
+Example 2: This is a string of 16 characters and will create a 17 byte
+area:
+@smallexample
+  ASCIZ "This is 16 bytes"
+@end smallexample
+
 @kindex FILL(@var{expression})
 @cindex holes, filling
 @cindex unspecified memory
diff --git a/ld/ldgram.y b/ld/ldgram.y
index 8240cf97327..faffeec94b8 100644
--- a/ld/ldgram.y
+++ b/ld/ldgram.y
@@ -125,7 +125,7 @@ static int error_index;
 %right UNARY
 %token END
 %left <token> '('
-%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ
+%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCII ASCIZ
 %token SECTIONS PHDRS INSERT_K AFTER BEFORE
 %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END
 %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE
@@ -668,9 +668,15 @@ statement:
 		{
 		  lang_add_data ((int) $1, $3);
 		}
+        | ASCII '(' mustbe_exp ')' NAME
+		{
+		  /* 'value' is a memory leak, do we care?  */
+		  etree_type *value = $3;
+		  lang_add_string (value->value.value, $5);
+		}
 	| ASCIZ NAME
 		{
-		  lang_add_string ($2);
+		  lang_add_string (0, $2);
 		}
 	| FILL '(' fill_exp ')'
 		{
diff --git a/ld/ldlang.c b/ld/ldlang.c
index b20455c9373..2852a4222d3 100644
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -8361,15 +8361,20 @@ lang_add_data (int type, union etree_union *exp)
   new_stmt->type = type;
 }
 
-void
-lang_add_string (const char *s)
+/* Convert escape codes in S.
+   Supports \n, \r, \t and \NNN octals.
+   Returns a copy of S in a malloc'ed buffer.  */
+
+static char *
+convert_string (const char * s)
 {
-  bfd_vma  len = strlen (s);
-  bfd_vma  i;
-  bool     escape = false;
+  size_t  len = strlen (s);
+  size_t  i;
+  bool    escape = false;
+  char *  buffer = malloc (len + 1);
+  char *  b;
 
-  /* Add byte expressions until end of string.  */
-  for (i = 0 ; i < len; i++)
+  for (i = 0, b = buffer; i < len; i++)
     {
       char c = *s++;
 
@@ -8404,7 +8409,7 @@ lang_add_string (const char *s)
 		    value += (c - '0');
 		    i++;
 		    s++;
-
+ 
 		    c = *s;
 		    if ((c >= '0') && (c <= '7'))
 		      {
@@ -8422,26 +8427,58 @@ lang_add_string (const char *s)
 		    i--;
 		    s--;
 		  }
-
+		
 		c = value;
 	      }
 	      break;
 	    }
-
-	  lang_add_data (BYTE, exp_intop (c));
 	  escape = false;
 	}
       else
 	{
 	  if (c == '\\')
-	    escape = true;
-	  else
-	    lang_add_data (BYTE, exp_intop (c));
+	    {
+	      escape = true;
+	      continue;
+	    }
 	}
+
+      * b ++ = c;
+    }
+
+  * b = 0;
+  return buffer;
+}
+
+void
+lang_add_string (size_t size, const char *s)
+{
+  size_t  len;
+  size_t  i;
+  char *  string;
+
+  string = convert_string (s);
+  len = strlen (string);
+
+  /* Check if it is ASCIZ command (len == 0) */
+  if (size == 0)
+    /* Make sure that we include the terminating nul byte.  */
+    size = len + 1;
+  else if (len >= size)
+    {
+      len = size - 1;
+
+      einfo (_("%P:%pS: warning: ASCII string does not fit in allocated space,"
+               " truncated\n"), NULL);
     }
 
-  /* Remeber to terminate the string.  */
-  lang_add_data (BYTE, exp_intop (0));
+  for (i = 0 ; i < len ; i++)
+    lang_add_data (BYTE, exp_intop (string[i]));
+
+  while (i++ < size)
+    lang_add_data (BYTE, exp_intop ('\0'));
+
+  free (string);
 }
 
 /* Create a new reloc statement.  RELOC is the BFD relocation type to
diff --git a/ld/ldlang.h b/ld/ldlang.h
index 32819066b8a..2300fa5b2a3 100644
--- a/ld/ldlang.h
+++ b/ld/ldlang.h
@@ -646,8 +646,9 @@ extern void pop_stat_ptr
   (void);
 extern void lang_add_data
   (int, union etree_union *);
+extern bfd_vma charcount(const char *s);
 extern void lang_add_string
-  (const char *);
+  (size_t, const char *s);
 extern void lang_add_reloc
   (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *,
    union etree_union *);
diff --git a/ld/ldlex.l b/ld/ldlex.l
index 32336cf0be2..910e7ea3b8b 100644
--- a/ld/ldlex.l
+++ b/ld/ldlex.l
@@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)*
 <WILD>"LONG"				{ RTOKEN(LONG); }
 <WILD>"SHORT"				{ RTOKEN(SHORT); }
 <WILD>"BYTE"				{ RTOKEN(BYTE); }
+<WILD>"ASCII"				{ RTOKEN(ASCII); }
 <WILD>"ASCIZ"				{ RTOKEN(ASCIZ); }
 <SCRIPT>"NOFLOAT"			{ RTOKEN(NOFLOAT); }
 <SCRIPT,EXPRESSION>"NOCROSSREFS"	{ RTOKEN(NOCROSSREFS); }
diff --git a/ld/testsuite/ld-scripts/ascii.d b/ld/testsuite/ld-scripts/ascii.d
new file mode 100644
index 00000000000..cfc1a4cbdba
--- /dev/null
+++ b/ld/testsuite/ld-scripts/ascii.d
@@ -0,0 +1,25 @@
+#source: ascii.s
+#ld: -T ascii.t
+#objdump: -s -j .header
+#notarget: [is_aout_format]
+#skip: tic4x-*-* tic54x-*-* *-*-*ecoff *-*-macho *-*-aix*
+
+.*:     file format .*
+
+Contents of section .header:
+ .... 70726f67 72616d20 6e616d65 00000000  program name....
+ .... 656d7074 79000000 00000000 00000000  empty...........
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 310a0000 00000000  comment 1.......
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 320a0000 00000000  comment 2.......
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 330a0000 00000000  comment 3.......
+ .... 00000000 00000000 00000000 00000000  ................
+ .... 636f6d6d 656e7420 340a0000 00000000  comment 4.......
+ .... 00000000 00000000 49206d65 616e7420  ........I meant 
+ .... 746f2073 61793a20 54686973 20697320  to say: This is 
+ .... 77617920 746f6f20 6c6f6e67 00000000  way too long....
+#pass
diff --git a/ld/testsuite/ld-scripts/ascii.s b/ld/testsuite/ld-scripts/ascii.s
new file mode 100644
index 00000000000..a1b6148dc79
--- /dev/null
+++ b/ld/testsuite/ld-scripts/ascii.s
@@ -0,0 +1,11 @@
+        .extern ecc_start
+	.section .text
+main:
+	.long 0x45444F43
+	.long 0x12345678
+	
+	.section .data
+	.long 0x9abcdef0
+	
+	.section .bss
+	.long 0
diff --git a/ld/testsuite/ld-scripts/ascii.t b/ld/testsuite/ld-scripts/ascii.t
new file mode 100644
index 00000000000..6f682fabd38
--- /dev/null
+++ b/ld/testsuite/ld-scripts/ascii.t
@@ -0,0 +1,38 @@
+_start = 0x000000;
+
+SECTIONS
+{
+  . = 0x1000 + SIZEOF_HEADERS;
+
+  .header ALIGN (0x100) (READONLY) :
+    {
+      ASCII (16) "program name"
+      ASCII (64) "empty"
+      ASCII (4 * 8) "comment 1\n"
+      ASCII (32) "comment 2\n"
+      ASCII (32) "comment 3\n"
+      ASCII (24) "comment 4\n"
+      ASCII (64) "I meant to say: This is way too long"
+    }
+
+  .text ALIGN (0x100) :
+  {
+      entry = .;
+      *(.text)
+  }
+
+  .data : AT (0x400000)
+  {
+	*(.data)
+  }
+  
+  . = ALIGN(0x20);
+  
+  .bss :
+  {
+	*(.bss)
+  }
+
+  /DISCARD/ : { *(*) }
+}
+
diff --git a/ld/testsuite/ld-scripts/asciz.d b/ld/testsuite/ld-scripts/asciz.d
index 615cf99732f..75e3c858a07 100644
--- a/ld/testsuite/ld-scripts/asciz.d
+++ b/ld/testsuite/ld-scripts/asciz.d
@@ -1,17 +1,14 @@
 #source: asciz.s
 #ld: -T asciz.t
-#objdump: -s -j .text
-#target: [is_elf_format]
-#skip: mips*-*-*
-#skip: tilegx*-*-* tilepro-*-*
-# COFF, PE and MIPS targets align code to a 16 byte boundary
-# tilegx andtilepro aligns code to a 8 byte boundary.
+#objdump: -s -j .data
+#notarget: [is_aout_format]
+#skip: tic4x-*-* tic54x-*-* *-*-*ecoff *-*-macho *-*-aix*
 
 .*:     file format .*
 
-Contents of section .text:
- .... 01010101 54686973 20697320 61207374  ....This is a st
- .... 72696e67 00...... ........ ........  ring............
- .... 54686973 20697320 616e6f74 68657220  This is another 
- .... 0a737472 696e6753 00                 .stringS........
+Contents of section .data:
+ .... 54686973 20697320 61207374 72696e67  This is a string
+ .... 00546869 73206973 20616e6f 74686572  .This is another
+ .... 0a537472 696e6700 006e6f71 756f7465  .String..noquote
+ .... 7300                                 s.              
 #pass
diff --git a/ld/testsuite/ld-scripts/asciz.t b/ld/testsuite/ld-scripts/asciz.t
index ab66f9a5bfb..3aeb7d0c767 100644
--- a/ld/testsuite/ld-scripts/asciz.t
+++ b/ld/testsuite/ld-scripts/asciz.t
@@ -1,23 +1,16 @@
-MEMORY {
-  rom : ORIGIN = 0x00000, LENGTH = 0x10000
-  ram : ORIGIN = 0x10000, LENGTH = 0x10000
-}
 
 _start = 0x000000;
 SECTIONS
 {
   . = 0x1000 + SIZEOF_HEADERS;
-  .text ALIGN (0x20) :
-    {
-      *(.text)
+  
+  .data : AT (0x10000)
+  {
       ASCIZ "This is a string"
-      . = ALIGN(0x20);
-      align_label = .;
-      ASCIZ "This is another \nstring\123"
-      unalign_label = .;
-    }
-  .data : AT (0x10000) { *(.data) } >ram /* NO default AT>rom */
-  . = ALIGN(0x20);
-  .bss : { *(.bss) } >ram /* NO default AT>rom */
+      ASCIZ "This is another\n\123tring"
+      ASCIZ ""
+      ASCIZ noquotes
+  }
+  
   /DISCARD/ : { *(*) }
 }
diff --git a/ld/testsuite/ld-scripts/script.exp b/ld/testsuite/ld-scripts/script.exp
index a574dde034c..56e12da8e61 100644
--- a/ld/testsuite/ld-scripts/script.exp
+++ b/ld/testsuite/ld-scripts/script.exp
@@ -228,6 +228,7 @@ foreach test_script $test_script_list {
 }
 
 run_dump_test "asciz"
+run_dump_test "ascii"
 run_dump_test "align-with-input"
 run_dump_test "pr20302"
 run_dump_test "output-section-types"
-- 
2.30.2