ASCIZ Command for output section

author Ulf Samuelsson <ulf@emagii.com>

Tue, 14 Feb 2023 10:13:28 +0000 (10:13 +0000)

committer Nick Clifton <nickc@redhat.com>

Tue, 14 Feb 2023 10:13:28 +0000 (10:13 +0000)
author Ulf Samuelsson <ulf@emagii.com>
Tue, 14 Feb 2023 10:13:28 +0000 (10:13 +0000)
committer Nick Clifton <nickc@redhat.com>
Tue, 14 Feb 2023 10:13:28 +0000 (10:13 +0000)
diff --git a/ld/NEWS b/ld/NEWS

index 9982ad0168d392870ada444323a692a2d9e3073f..4ce7e19d40b9222dce7cb7bd9db6b2437b54c271 100644 (file)
--- a/ld/NEWS
+++ b/ld/NEWS
@@ -1,5 +1,8 @@
  -*- text -*-
  
+* The linker script syntax has a new command for output sections: ASCIZ "string"
+  This will insert a zero-terminated string at the current location.
+
  Changes in 2.40:
  
  * The linker has a new command line option to suppress the generation of any
diff --git a/ld/ld.texi b/ld/ld.texi

index 36005dc2b0d14515461d3ebbbf92711e39cc23a5..f576a8bae6cb2ef6fe214ba92d95e34d2029b5e0 100644 (file)
--- a/ld/ld.texi
+++ b/ld/ld.texi
@@ -5308,6 +5308,7 @@ C identifiers because they contain a @samp{.} character.
  @cindex data
  @cindex section data
  @cindex output section data
+@kindex ASCIZ ``@var{string}''
  @kindex BYTE(@var{expression})
  @kindex SHORT(@var{expression})
  @kindex LONG(@var{expression})
@@ -5344,6 +5345,18 @@ When the object file format does not have an explicit endianness, as is
  true of, for example, S-records, the value will be stored in the
  endianness of the first input object file.
  
+You can include a zero-terminated string in an output section by using
+@code{ASCIZ}.  The keyword is followed by a string which is stored at
+the current value of the location counter adding a zero byte at the
+end.  If the string includes spaces it must be enclosed in double
+quotes.  The string may contain '\n', '\r', '\t' and octal numbers.
+Hex numbers are not supported.
+
+For example, this string of 16 characters will create a 17 byte area
+@smallexample
+  ASCIZ "This is 16 bytes"
+@end smallexample
+
  Note---these commands only work inside a section description and not
  between them, so the following will produce an error from the linker:
  @smallexample
diff --git a/ld/ldgram.y b/ld/ldgram.y

index fa5f01fef1d0b2e0a63f645751056958f99d798e..8240cf97327d226e985fd8fb103b2ef34119904d 100644 (file)
--- a/ld/ldgram.y
+++ b/ld/ldgram.y
@@ -125,7 +125,7 @@ static int error_index;
  %right UNARY
  %token END
  %left <token> '('
-%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE
+%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ
  %token SECTIONS PHDRS INSERT_K AFTER BEFORE
  %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END
  %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE
@@ -668,7 +668,10 @@ statement:
                 {
                   lang_add_data ((int) $1, $3);
                 }
-
+       | ASCIZ NAME
+               {
+                 lang_add_string ($2);
+               }
         | FILL '(' fill_exp ')'
                 {
                   lang_add_fill ($3);
diff --git a/ld/ldlang.c b/ld/ldlang.c

index b5e0d026ae4b98ffa87d80d24bb6c38531834861..b20455c9373c226dc7780dc4e8d9deb5968f1b2e 100644 (file)
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -8361,6 +8361,89 @@ lang_add_data (int type, union etree_union *exp)
    new_stmt->type = type;
  }
  
+void
+lang_add_string (const char *s)
+{
+  bfd_vma  len = strlen (s);
+  bfd_vma  i;
+  bool     escape = false;
+
+  /* Add byte expressions until end of string.  */
+  for (i = 0 ; i < len; i++)
+    {
+      char c = *s++;
+
+      if (escape)
+       {
+         switch (c)
+           {
+           default:
+             /* Ignore the escape.  */
+             break;
+
+           case 'n': c = '\n'; break;
+           case 'r': c = '\r'; break;
+           case 't': c = '\t'; break;
+         
+           case '0':
+           case '1':
+           case '2':
+           case '3':
+           case '4':
+           case '5':
+           case '6':
+           case '7':
+             /* We have an octal number.  */
+             {
+               unsigned int value = c - '0';
+
+               c = *s;
+               if ((c >= '0') && (c <= '7'))
+                 {
+                   value <<= 3;
+                   value += (c - '0');
+                   i++;
+                   s++;
+
+                   c = *s;
+                   if ((c >= '0') && (c <= '7'))
+                     {
+                       value <<= 3;
+                       value += (c - '0');
+                       i++;
+                       s++;
+                     }
+                 }
+
+               if (value > 0xff)
+                 {
+                   /* octal: \777 is treated as '\077' + '7' */
+                   value >>= 3;
+                   i--;
+                   s--;
+                 }
+
+               c = value;
+             }
+             break;
+           }
+
+         lang_add_data (BYTE, exp_intop (c));
+         escape = false;
+       }
+      else
+       {
+         if (c == '\\')
+           escape = true;
+         else
+           lang_add_data (BYTE, exp_intop (c));
+       }
+    }
+
+  /* Remeber to terminate the string.  */
+  lang_add_data (BYTE, exp_intop (0));
+}
+
  /* Create a new reloc statement.  RELOC is the BFD relocation type to
     generate.  HOWTO is the corresponding howto structure (we could
     look this up, but the caller has already done so).  SECTION is the
diff --git a/ld/ldlang.h b/ld/ldlang.h

index 24c42f48218c3460959ff0d46e5b93779674861b..32819066b8a41e249bb0d3d9aab84e1ebd1259f8 100644 (file)
--- a/ld/ldlang.h
+++ b/ld/ldlang.h
@@ -645,7 +645,9 @@ extern void push_stat_ptr
  extern void pop_stat_ptr
    (void);
  extern void lang_add_data
-  (int type, union etree_union *);
+  (int, union etree_union *);
+extern void lang_add_string
+  (const char *);
  extern void lang_add_reloc
    (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *,
     union etree_union *);
diff --git a/ld/ldlex.l b/ld/ldlex.l

index cf596530b2029e485cde648fac5068c3ca91d2b2..32336cf0be2c7d06a9fdd106f1f0ab4d5acf2e3d 100644 (file)
--- a/ld/ldlex.l
+++ b/ld/ldlex.l
@@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)*
  <WILD>"LONG"                           { RTOKEN(LONG); }
  <WILD>"SHORT"                          { RTOKEN(SHORT); }
  <WILD>"BYTE"                           { RTOKEN(BYTE); }
+<WILD>"ASCIZ"                          { RTOKEN(ASCIZ); }
  <SCRIPT>"NOFLOAT"                      { RTOKEN(NOFLOAT); }
  <SCRIPT,EXPRESSION>"NOCROSSREFS"       { RTOKEN(NOCROSSREFS); }
  <SCRIPT,EXPRESSION>"NOCROSSREFS_TO"    { RTOKEN(NOCROSSREFS_TO); }
diff --git a/ld/testsuite/ld-scripts/asciz.d b/ld/testsuite/ld-scripts/asciz.d

new file mode 100644 (file)

index 0000000..615cf99
--- /dev/null
+++ b/ld/testsuite/ld-scripts/asciz.d
@@ -0,0 +1,17 @@
+#source: asciz.s
+#ld: -T asciz.t
+#objdump: -s -j .text
+#target: [is_elf_format]
+#skip: mips*-*-*
+#skip: tilegx*-*-* tilepro-*-*
+# COFF, PE and MIPS targets align code to a 16 byte boundary
+# tilegx andtilepro aligns code to a 8 byte boundary.
+
+.*:     file format .*
+
+Contents of section .text:
+ .... 01010101 54686973 20697320 61207374  ....This is a st
+ .... 72696e67 00...... ........ ........  ring............
+ .... 54686973 20697320 616e6f74 68657220  This is another 
+ .... 0a737472 696e6753 00                 .stringS........
+#pass
diff --git a/ld/testsuite/ld-scripts/asciz.s b/ld/testsuite/ld-scripts/asciz.s

new file mode 100644 (file)

index 0000000..5803bb4
--- /dev/null
+++ b/ld/testsuite/ld-scripts/asciz.s
@@ -0,0 +1,8 @@
+       .section .text
+       .long 0x01010101
+       
+       .section .data
+       .long 0x9abcdef0
+       
+       .section .bss
+       .long 0
diff --git a/ld/testsuite/ld-scripts/asciz.t b/ld/testsuite/ld-scripts/asciz.t

new file mode 100644 (file)

index 0000000..ab66f9a
--- /dev/null
+++ b/ld/testsuite/ld-scripts/asciz.t
@@ -0,0 +1,23 @@
+MEMORY {
+  rom : ORIGIN = 0x00000, LENGTH = 0x10000
+  ram : ORIGIN = 0x10000, LENGTH = 0x10000
+}
+
+_start = 0x000000;
+SECTIONS
+{
+  . = 0x1000 + SIZEOF_HEADERS;
+  .text ALIGN (0x20) :
+    {
+      *(.text)
+      ASCIZ "This is a string"
+      . = ALIGN(0x20);
+      align_label = .;
+      ASCIZ "This is another \nstring\123"
+      unalign_label = .;
+    }
+  .data : AT (0x10000) { *(.data) } >ram /* NO default AT>rom */
+  . = ALIGN(0x20);
+  .bss : { *(.bss) } >ram /* NO default AT>rom */
+  /DISCARD/ : { *(*) }
+}
diff --git a/ld/testsuite/ld-scripts/script.exp b/ld/testsuite/ld-scripts/script.exp

index e0af28134a7c7b11e297969cdb11654dfc773693..a574dde034c6b990f50f663e2b50030e18c3f0f4 100644 (file)
--- a/ld/testsuite/ld-scripts/script.exp
+++ b/ld/testsuite/ld-scripts/script.exp
@@ -227,6 +227,7 @@ foreach test_script $test_script_list {
      run_dump_test [string range $test_script 0 end-2]
  }
  
+run_dump_test "asciz"
  run_dump_test "align-with-input"
  run_dump_test "pr20302"
  run_dump_test "output-section-types"
author	Ulf Samuelsson <ulf@emagii.com>
	Tue, 14 Feb 2023 10:13:28 +0000 (10:13 +0000)
committer	Nick Clifton <nickc@redhat.com>
	Tue, 14 Feb 2023 10:13:28 +0000 (10:13 +0000)
ld/NEWS		patch \| blob \| history
ld/ld.texi		patch \| blob \| history
ld/ldgram.y		patch \| blob \| history
ld/ldlang.c		patch \| blob \| history
ld/ldlang.h		patch \| blob \| history
ld/ldlex.l		patch \| blob \| history
ld/testsuite/ld-scripts/asciz.d	[new file with mode: 0644]	patch \| blob
ld/testsuite/ld-scripts/asciz.s	[new file with mode: 0644]	patch \| blob
ld/testsuite/ld-scripts/asciz.t	[new file with mode: 0644]	patch \| blob
ld/testsuite/ld-scripts/script.exp		patch \| blob \| history