Support fusion for ELFv2 stubs
authorAlan Modra <amodra@gmail.com>
Tue, 3 Jun 2014 01:25:29 +0000 (10:55 +0930)
committerAlan Modra <amodra@gmail.com>
Tue, 3 Jun 2014 01:25:29 +0000 (10:55 +0930)
Power8 fuses addis,addi and addis,ld sequences when the target of the
addis is the same as the addi/ld.  Thus
    addis r12,r2,xxx@ha
    addi r12,r12,xxx@l / ld r12,xxx@l(r12)
is faster than
    addis r11,r2,xxx@ha
    addi r12,r11,xxx@l / ld r12,xxx@l(r11)
So use the form that allows fusion in plt call and branch stubs.

bfd/
* elf64-ppc.c (ADDIS_R12_R2): Define.
(build_plt_stub): Support fusion on ELFv2 stub.
(ppc_build_one_stub): Likewise for plt branch stubs.
gold/
* powerpc.cc (addis_12_2): Define.
(Stub_table::do_write): Support fusion on ELFv2 stubs.
ld/testsuite/
* ld-powerpc/elfv2exe.d: Update for changed plt call stubs.
gdb/
* ppc64-tdep.c (ppc64_standard_linkage8): New.
(ppc64_skip_trampoline_code): Recognise ELFv2 stub supporting fusion.

bfd/ChangeLog
bfd/elf64-ppc.c
gdb/ChangeLog
gdb/ppc64-tdep.c
gold/ChangeLog
gold/powerpc.cc
ld/testsuite/ChangeLog
ld/testsuite/ld-powerpc/elfv2exe.d

index 8c93f398d35b4a4b240c03753d9a60d134eee904..396b7ab1257c67316f6fcdb0f0ad22f2c96b2c9b 100644 (file)
@@ -1,3 +1,9 @@
+2014-06-03  Alan Modra  <amodra@gmail.com>
+
+       * elf64-ppc.c (ADDIS_R12_R2): Define.
+       (build_plt_stub): Support fusion on ELFv2 stub.
+       (ppc_build_one_stub): Likewise for plt branch stubs.
+
 2014-05-28  Alan Modra  <amodra@gmail.com>
 
        * elf32-rx.c (rx_table_map): Delete set but not used variables.
index fdca0c6f5004ed2be2c2b228e4f908c6b626b6b3..e7e2e397666d097d87325297a4955525f5d2594a 100644 (file)
@@ -173,6 +173,7 @@ static bfd_vma opd_entry_value
 
 #define LD_R2_0R1      0xe8410000      /* ld    %r2,0(%r1)      */
 
+#define ADDIS_R12_R2   0x3d820000      /* addis %r12,%r2,xxx@ha     */
 #define ADDIS_R12_R12  0x3d8c0000      /* addis %r12,%r12,xxx@ha */
 #define LD_R12_0R12    0xe98c0000      /* ld    %r12,xxx@l(%r12) */
 
@@ -10246,8 +10247,16 @@ build_plt_stub (struct ppc_link_hash_table *htab,
       if (ALWAYS_EMIT_R2SAVE
          || stub_entry->stub_type == ppc_stub_plt_call_r2save)
        bfd_put_32 (obfd, STD_R2_0R1 + STK_TOC (htab), p),      p += 4;
-      bfd_put_32 (obfd, ADDIS_R11_R2 | PPC_HA (offset), p),    p += 4;
-      bfd_put_32 (obfd, LD_R12_0R11 | PPC_LO (offset), p),     p += 4;
+      if (plt_load_toc)
+       {
+         bfd_put_32 (obfd, ADDIS_R11_R2 | PPC_HA (offset), p), p += 4;
+         bfd_put_32 (obfd, LD_R12_0R11 | PPC_LO (offset), p),  p += 4;
+       }
+      else
+       {
+         bfd_put_32 (obfd, ADDIS_R12_R2 | PPC_HA (offset), p), p += 4;
+         bfd_put_32 (obfd, LD_R12_0R12 | PPC_LO (offset), p),  p += 4;
+       }
       if (plt_load_toc
          && PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
        {
@@ -10668,10 +10677,10 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
            {
              size = 16;
              bfd_put_32 (htab->params->stub_bfd,
-                         ADDIS_R11_R2 | PPC_HA (off), loc);
+                         ADDIS_R12_R2 | PPC_HA (off), loc);
              loc += 4;
              bfd_put_32 (htab->params->stub_bfd,
-                         LD_R12_0R11 | PPC_LO (off), loc);
+                         LD_R12_0R12 | PPC_LO (off), loc);
            }
          else
            {
@@ -10697,10 +10706,10 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
            {
              size += 4;
              bfd_put_32 (htab->params->stub_bfd,
-                         ADDIS_R11_R2 | PPC_HA (off), loc);
+                         ADDIS_R12_R2 | PPC_HA (off), loc);
              loc += 4;
              bfd_put_32 (htab->params->stub_bfd,
-                         LD_R12_0R11 | PPC_LO (off), loc);
+                         LD_R12_0R12 | PPC_LO (off), loc);
            }
          else
            bfd_put_32 (htab->params->stub_bfd, LD_R12_0R2 | PPC_LO (off), loc);
index 5e428f51e99d0091d3411e3fcb5359f4a370da9d..d7f5c6f8af3ef875258c66f489d255433717874a 100644 (file)
@@ -1,3 +1,8 @@
+2014-06-03  Alan Modra  <amodra@gmail.com>
+
+       * ppc64-tdep.c (ppc64_standard_linkage8): New.
+       (ppc64_skip_trampoline_code): Recognise ELFv2 stub supporting fusion.
+
 2014-06-02  Doug Evans  <dje@google.com>
 
        Add support for skeletonless type units.
index cbbbedc070ccfeb3f29f136c9567e5607f474c12..8acd754036de2141ed5516aa4caa8ada93a3f4ba 100644 (file)
@@ -303,6 +303,29 @@ static struct ppc_insn_pattern ppc64_standard_linkage7[] =
     { 0, 0, 0 }
   };
 
+/* ELFv2 PLT call stub to access PLT entries more than +/- 32k from r2,
+   supporting fusion.  */
+
+static struct ppc_insn_pattern ppc64_standard_linkage8[] =
+  {
+    /* std r2, 24(r1) <optional> */
+    { -1, insn_ds (62, 2, 1, 24, 0), 1 },
+
+    /* addis r12, r2, <any> */
+    { insn_d (-1, -1, -1, 0), insn_d (15, 12, 2, 0), 0 },
+
+    /* ld r12, <any>(r12) */
+    { insn_ds (-1, -1, -1, 0, -1), insn_ds (58, 12, 12, 0, 0), 0 },
+
+    /* mtctr r12 */
+    { insn_xfx (-1, -1, -1, -1), insn_xfx (31, 12, 9, 467), 0 },
+
+    /* bctr */
+    { -1, 0x4e800420, 0 },
+
+    { 0, 0, 0 }
+  };
+
 /* When the dynamic linker is doing lazy symbol resolution, the first
    call to a function in another object will go like this:
 
@@ -437,10 +460,14 @@ ppc64_skip_trampoline_code (struct frame_info *frame, CORE_ADDR pc)
                                    ARRAY_SIZE (ppc64_standard_linkage4))),
                          MAX (MAX (ARRAY_SIZE (ppc64_standard_linkage5),
                                    ARRAY_SIZE (ppc64_standard_linkage6)),
-                              ARRAY_SIZE (ppc64_standard_linkage7))) - 1];
+                              MAX (ARRAY_SIZE (ppc64_standard_linkage7),
+                                   ARRAY_SIZE (ppc64_standard_linkage8))))
+                    - 1];
   CORE_ADDR target;
 
-  if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage7, insns))
+  if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage8, insns))
+    pc = ppc64_standard_linkage4_target (frame, pc, insns);
+  else if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage7, insns))
     pc = ppc64_standard_linkage3_target (frame, pc, insns);
   else if (ppc_insns_match_pattern (frame, pc, ppc64_standard_linkage6, insns))
     pc = ppc64_standard_linkage4_target (frame, pc, insns);
index 0bde6a1cd0ad506f990d37430f4fdd27b78453b5..5e15f5e43713f534a95d06846bce43486cbe9c0b 100644 (file)
@@ -1,3 +1,8 @@
+2014-06-03  Alan Modra  <amodra@gmail.com>
+
+       * powerpc.cc (addis_12_2): Define.
+       (Stub_table::do_write): Support fusion on ELFv2 stubs.
+
 2014-06-03  Alan Modra  <amodra@gmail.com>
 
        * testsuite/plugin_test.c (parse_readelf_line): Skip non-visibility
index 10780179696f7d92841770dcacb271a4c361dd03..bd3994a9d186cb808fb25d550b4bb48c2d7068cb 100644 (file)
@@ -3077,6 +3077,7 @@ static const uint32_t addis_3_13  = 0x3c6d0000;
 static const uint32_t addis_11_2       = 0x3d620000;
 static const uint32_t addis_11_11      = 0x3d6b0000;
 static const uint32_t addis_11_30      = 0x3d7e0000;
+static const uint32_t addis_12_2       = 0x3d820000;
 static const uint32_t addis_12_12      = 0x3d8c0000;
 static const uint32_t b                        = 0x48000000;
 static const uint32_t bcl_20_31                = 0x429f0005;
@@ -4210,10 +4211,20 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
                {
                  write_insn<big_endian>(p, std_2_1 + this->targ_->stk_toc());
                  p += 4;
-                 write_insn<big_endian>(p, addis_11_2 + ha(off));
-                 p += 4;
-                 write_insn<big_endian>(p, ld_12_11 + l(off));
-                 p += 4;
+                 if (plt_load_toc)
+                   {
+                     write_insn<big_endian>(p, addis_11_2 + ha(off));
+                     p += 4;
+                     write_insn<big_endian>(p, ld_12_11 + l(off));
+                     p += 4;
+                   }
+                 else
+                   {
+                     write_insn<big_endian>(p, addis_12_2 + ha(off));
+                     p += 4;
+                     write_insn<big_endian>(p, ld_12_12 + l(off));
+                     p += 4;
+                   }
                  if (plt_load_toc
                      && ha(off + 8 + 8 * static_chain) != ha(off))
                    {
@@ -4312,8 +4323,8 @@ Stub_table<size, big_endian>::do_write(Output_file* of)
                }
              else
                {
-                 write_insn<big_endian>(p, addis_11_2 + ha(brltoff)),  p += 4;
-                 write_insn<big_endian>(p, ld_12_11 + l(brltoff)),     p += 4;
+                 write_insn<big_endian>(p, addis_12_2 + ha(brltoff)),  p += 4;
+                 write_insn<big_endian>(p, ld_12_12 + l(brltoff)),     p += 4;
                }
              write_insn<big_endian>(p, mtctr_12),                      p += 4;
              write_insn<big_endian>(p, bctr);
index bd604b6381332867976c289cebd60b53198b7f4b..bc8051892d65708ad7da0a855c9fbaaee266e066 100644 (file)
@@ -1,3 +1,7 @@
+2014-06-03  Alan Modra  <amodra@gmail.com>
+
+       * ld-powerpc/elfv2exe.d: Update for changed plt call stubs.
+
 2014-05-28  Matthew Fortune  <matthew.fortune@imgtec.com>
 
        * lib/ld-lib.exp: Add objcopy_objects command to run_dump_test.
index 7ff9d3891139bbbb95499cc938402856dd4386ba..9ea816c3153f12022a4d7c5933c6c990028acbb2 100644 (file)
@@ -8,14 +8,14 @@
 Disassembly of section \.text:
 
 0+100000c0 <.*\.plt_branch\.f2>:
-.*:    (ff ff 62 3d|3d 62 ff ff)       addis   r11,r2,-1
-.*:    (f0 7f 8b e9|e9 8b 7f f0)       ld      r12,32752\(r11\)
+.*:    (ff ff 82 3d|3d 82 ff ff)       addis   r12,r2,-1
+.*:    (f0 7f 8c e9|e9 8c 7f f0)       ld      r12,32752\(r12\)
 .*:    (a6 03 89 7d|7d 89 03 a6)       mtctr   r12
 .*:    (20 04 80 4e|4e 80 04 20)       bctr
 
 0+100000d0 <.*\.plt_branch\.f4>:
-.*:    (ff ff 62 3d|3d 62 ff ff)       addis   r11,r2,-1
-.*:    (f8 7f 8b e9|e9 8b 7f f8)       ld      r12,32760\(r11\)
+.*:    (ff ff 82 3d|3d 82 ff ff)       addis   r12,r2,-1
+.*:    (f8 7f 8c e9|e9 8c 7f f8)       ld      r12,32760\(r12\)
 .*:    (a6 03 89 7d|7d 89 03 a6)       mtctr   r12
 .*:    (20 04 80 4e|4e 80 04 20)       bctr