+2019-05-24 Alan Modra <amodra@gmail.com>
+
+ * elf64-ppc.c (ppc64_elf_check_relocs): Set has_gotrel for
+ R_PPC64_GOT_PCREL34.
+ (xlate_pcrel_opt): New function.
+ (ppc64_elf_edit_toc): Handle R_PPC64_GOT_PCREL34.
+ (ppc64_elf_relocate_section): Edit GOT indirect to GOT relative
+ for R_PPC64_GOT_PCREL34. Implement R_PPC64_PCREL_OPT optimisation.
+
2019-05-24 Alan Modra <amodra@gmail.com>
* reloc.c (BFD_RELOC_PPC64_D34, BFD_RELOC_PPC64_D34_LO),
case R_PPC64_GOT16_DS:
case R_PPC64_GOT16_HA:
case R_PPC64_GOT16_LO_DS:
+ case R_PPC64_GOT_PCREL34:
ppc64_elf_tdata (abfd)->has_gotrel = 1;
ppc64_elf_section_data (sec)->has_gotrel = 1;
/* Fall through. */
case R_PPC64_GOT16:
case R_PPC64_GOT16_HI:
case R_PPC64_GOT16_LO:
- case R_PPC64_GOT_PCREL34:
dogot:
/* This symbol requires a global offset table entry. */
sec->has_toc_reloc = 1;
&& (insn & 1) == 0));
}
+/* PCREL_OPT in one instance flags to the linker that a pair of insns:
+ pld ra,symbol@got@pcrel
+ load/store rt,0(ra)
+ or
+ paddi ra,symbol@pcrel
+ load/store rt,0(ra)
+ may be translated to
+ pload/pstore rt,symbol@pcrel
+ nop.
+ This function returns true if the optimization is possible, placing
+ the prefix insn in *PINSN1 and a NOP in *PINSN2.
+
+ On entry to this function, the linker has already determined that
+ the pld can be replaced with paddi: *PINSN1 is that paddi insn,
+ while *PINSN2 is the second instruction. */
+
+static bfd_boolean
+xlate_pcrel_opt (uint64_t *pinsn1, uint64_t *pinsn2)
+{
+ uint32_t insn2 = *pinsn2 >> 32;
+ uint64_t i1new;
+
+ /* Check that regs match. */
+ if (((insn2 >> 16) & 31) != ((*pinsn1 >> 21) & 31))
+ return FALSE;
+
+ switch ((insn2 >> 26) & 63)
+ {
+ default:
+ return FALSE;
+
+ case 32: /* lwz */
+ case 34: /* lbz */
+ case 36: /* stw */
+ case 38: /* stb */
+ case 40: /* lhz */
+ case 42: /* lha */
+ case 44: /* sth */
+ case 48: /* lfs */
+ case 50: /* lfd */
+ case 52: /* stfs */
+ case 54: /* stfd */
+ /* These are the PMLS cases, where we just need to tack a prefix
+ on the insn. Check that the D field is zero. */
+ if ((insn2 & 0xffff) != 0)
+ return FALSE;
+ i1new = ((1ULL << 58) | (2ULL << 56) | (1ULL << 52)
+ | (insn2 & ((63ULL << 26) | (31ULL << 21))));
+ break;
+
+ case 58: /* lwa, ld */
+ if ((insn2 & 0xfffd) != 0)
+ return FALSE;
+ i1new = ((1ULL << 58) | (1ULL << 52)
+ | (insn2 & 2 ? 41ULL << 26 : 57ULL << 26)
+ | (insn2 & (31ULL << 21)));
+ break;
+
+ case 57: /* lxsd, lxssp */
+ if ((insn2 & 0xfffc) != 0 || (insn2 & 3) < 2)
+ return FALSE;
+ i1new = ((1ULL << 58) | (1ULL << 52)
+ | ((40ULL | (insn2 & 3)) << 26)
+ | (insn2 & (31ULL << 21)));
+ break;
+
+ case 61: /* stxsd, stxssp, lxv, stxv */
+ if ((insn2 & 3) == 0)
+ return FALSE;
+ else if ((insn2 & 3) >= 2)
+ {
+ if ((insn2 & 0xfffc) != 0)
+ return FALSE;
+ i1new = ((1ULL << 58) | (1ULL << 52)
+ | ((44ULL | (insn2 & 3)) << 26)
+ | (insn2 & (31ULL << 21)));
+ }
+ else
+ {
+ if ((insn2 & 0xfff0) != 0)
+ return FALSE;
+ i1new = ((1ULL << 58) | (1ULL << 52)
+ | ((50ULL | (insn2 & 4) | ((insn2 & 8) >> 3)) << 26)
+ | (insn2 & (31ULL << 21)));
+ }
+ break;
+
+ case 56: /* lq */
+ if ((insn2 & 0xffff) != 0)
+ return FALSE;
+ i1new = ((1ULL << 58) | (1ULL << 52)
+ | (insn2 & ((63ULL << 26) | (31ULL << 21))));
+ break;
+
+ case 62: /* std, stq */
+ if ((insn2 & 0xfffd) != 0)
+ return FALSE;
+ i1new = ((1ULL << 58) | (1ULL << 52)
+ | ((insn2 & 2) == 0 ? 61ULL << 26 : 60ULL << 26)
+ | (insn2 & (31ULL << 21)));
+ break;
+ }
+
+ *pinsn1 = i1new;
+ *pinsn2 = (uint64_t) NOP << 32;
+ return TRUE;
+}
+
/* Examine all relocs referencing .toc sections in order to remove
unused .toc entries. */
}
/* Look for cases where we can change an indirect GOT access to
- a GOT relative access, possibly reducing the number of GOT
- entries. */
+ a GOT relative or PC relative access, possibly reducing the
+ number of GOT entries. */
for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link.next)
{
asection *sec;
asection *sym_sec;
struct elf_link_hash_entry *h;
struct got_entry *ent;
- bfd_vma val;
- unsigned char buf[4];
+ bfd_vma sym_addend, val, pc;
+ unsigned char buf[8];
unsigned int insn;
r_type = ELF64_R_TYPE (rel->r_info);
case R_PPC64_GOT16_DS:
case R_PPC64_GOT16_HA:
case R_PPC64_GOT16_LO_DS:
+ sym_addend = rel->r_addend;
+ break;
+
+ case R_PPC64_GOT_PCREL34:
+ sym_addend = 0;
break;
}
val = h->root.u.def.value;
else
val = sym->st_value;
- val += rel->r_addend;
+ val += sym_addend;
val += sym_sec->output_section->vma + sym_sec->output_offset;
switch (r_type)
if ((insn & (0x3f << 26 | 0x3)) != 58u << 26 /* ld */)
continue;
break;
+
+ case R_PPC64_GOT_PCREL34:
+ pc = rel->r_offset;
+ pc += sec->output_section->vma + sec->output_offset;
+ if (val - pc + (1ULL << 33) >= 1ULL << 34)
+ continue;
+ if (!bfd_get_section_contents (ibfd, sec, buf,
+ rel->r_offset & ~3, 8))
+ goto got_error_ret;
+ insn = bfd_get_32 (ibfd, buf);
+ if ((insn & (-1u << 18)) != ((1u << 26) | (1u << 20)))
+ continue;
+ insn = bfd_get_32 (ibfd, buf + 4);
+ if ((insn & (0x3f << 26)) != 57u << 26)
+ continue;
+ break;
}
if (h != NULL)
ent = local_got_ents[r_symndx];
}
for (; ent != NULL; ent = ent->next)
- if (ent->addend == rel->r_addend
+ if (ent->addend == sym_addend
&& ent->owner == ibfd
&& ent->tls_type == 0)
break;
Elf_Internal_Rela orig_rel;
reloc_howto_type *howto;
struct reloc_howto_struct alt_howto;
+ uint64_t pinsn;
+ bfd_vma offset;
again:
orig_rel = *rel;
if ((tls_mask & TLS_TLS) != 0 && (tls_mask & TLS_LD) == 0)
{
unsigned int insn1, insn2;
- bfd_vma offset;
tls_ldgd_opt:
offset = (bfd_vma) -1;
&& rel + 1 < relend)
{
unsigned int insn2;
- bfd_vma offset = rel->r_offset;
enum elf_ppc64_reloc_type r_type1 = ELF64_R_TYPE (rel[1].r_info);
+ offset = rel->r_offset;
if (is_plt_seq_reloc (r_type1))
{
bfd_put_32 (output_bfd, NOP, contents + offset);
&& rel + 1 < relend)
{
unsigned int insn2;
- bfd_vma offset = rel->r_offset;
enum elf_ppc64_reloc_type r_type1 = ELF64_R_TYPE (rel[1].r_info);
+ offset = rel->r_offset;
if (is_plt_seq_reloc (r_type1))
{
bfd_put_32 (output_bfd, NOP, contents + offset);
&& relocation + 0x80008000 <= 0xffffffff)
{
unsigned int insn1, insn2;
- bfd_vma offset = rel->r_offset - d_offset;
+ offset = rel->r_offset - d_offset;
insn1 = bfd_get_32 (input_bfd, contents + offset);
insn2 = bfd_get_32 (input_bfd, contents + offset + 4);
if ((insn1 & 0xffff0000) == ADDIS_R2_R12
}
}
break;
+
+ case R_PPC64_GOT_PCREL34:
+ from = (rel->r_offset
+ + input_section->output_section->vma
+ + input_section->output_offset);
+ if (relocation - from + (1ULL << 33) < 1ULL << 34
+ && SYMBOL_REFERENCES_LOCAL (info, &h->elf))
+ {
+ offset = rel->r_offset;
+ pinsn = bfd_get_32 (input_bfd, contents + offset);
+ pinsn <<= 32;
+ pinsn |= bfd_get_32 (input_bfd, contents + offset + 4);
+ if ((pinsn & ((-1ULL << 50) | (63ULL << 26)))
+ == ((1ULL << 58) | (1ULL << 52) | (57ULL << 26) /* pld */))
+ {
+ /* Replace with paddi. */
+ pinsn += (2ULL << 56) + (14ULL << 26) - (57ULL << 26);
+ r_type = R_PPC64_PCREL34;
+ rel->r_info = ELF64_R_INFO (r_symndx, r_type);
+ bfd_put_32 (input_bfd, pinsn >> 32, contents + offset);
+ bfd_put_32 (input_bfd, pinsn, contents + offset + 4);
+ goto pcrelopt;
+ }
+ }
+ break;
+
+ case R_PPC64_PCREL34:
+ if (SYMBOL_REFERENCES_LOCAL (info, &h->elf))
+ {
+ offset = rel->r_offset;
+ pinsn = bfd_get_32 (input_bfd, contents + offset);
+ pinsn <<= 32;
+ pinsn |= bfd_get_32 (input_bfd, contents + offset + 4);
+ if ((pinsn & ((-1ULL << 50) | (63ULL << 26)))
+ == ((1ULL << 58) | (2ULL << 56) | (1ULL << 52)
+ | (14ULL << 26) /* paddi */))
+ {
+ pcrelopt:
+ if (rel + 1 < relend
+ && rel[1].r_offset == offset
+ && rel[1].r_info == ELF64_R_INFO (0, R_PPC64_PCREL_OPT))
+ {
+ bfd_vma off2 = rel[1].r_addend;
+ if (off2 == 0)
+ /* zero means next insn. */
+ off2 = 8;
+ off2 += offset;
+ if (off2 + 4 <= input_section->size)
+ {
+ uint64_t pinsn2;
+ pinsn2 = bfd_get_32 (input_bfd, contents + off2);
+ pinsn2 <<= 32;
+ if ((pinsn2 & (63ULL << 58)) == 1ULL << 58)
+ break;
+ if (xlate_pcrel_opt (&pinsn, &pinsn2))
+ {
+ bfd_put_32 (input_bfd, pinsn >> 32,
+ contents + offset);
+ bfd_put_32 (input_bfd, pinsn,
+ contents + offset + 4);
+ bfd_put_32 (input_bfd, pinsn2 >> 32,
+ contents + off2);
+ }
+ }
+ }
+ }
+ }
+ break;
}
/* Set `addend'. */
case R_PPC64_GNU_VTINHERIT:
case R_PPC64_GNU_VTENTRY:
case R_PPC64_ENTRY:
+ case R_PPC64_PCREL_OPT:
goto copy_reloc;
/* GOT16 relocations. Like an ADDR16 using the symbol's
r = bfd_reloc_outofrange;
else
{
- uint64_t pinsn;
-
relocation += addend;
if (howto->pc_relative)
relocation -= (rel->r_offset
+2019-05-24 Alan Modra <amodra@gmail.com>
+
+ * testsuite/ld-powerpc/pcrelopt.s,
+ * testsuite/ld-powerpc/pcrelopt.d,
+ * testsuite/ld-powerpc/pcrelopt.sec: New test.
+ * testsuite/ld-powerpc/powerpc.exp: Run it.
+
2019-05-23 Jose E. Marchesi <jose.marchesi@oracle.com>
* Makefile.am (ALL_64_EMULATION_SOURCES): Add eelf64bpf.c.
--- /dev/null
+
+.*: file format .*
+
+Disassembly of section \.text:
+
+0+10000200 <_start>:
+.*: (06 10 00 01|01 00 10 06) plbz r3,66320
+.*: (88 60 03 10|10 03 60 88)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) plhz r4,66308
+.*: (a0 80 03 04|04 03 80 a0)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) plha r3,66288
+.*: (a8 60 02 f0|f0 02 60 a8)
+.*: (40 82 ff f4|f4 ff 82 40) bne .*
+.*: (06 10 00 01|01 00 10 06) plwz r3,66276
+.*: (80 60 02 e4|e4 02 60 80)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) plwa r3,66264
+.*: (a4 60 02 d8|d8 02 60 a4)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pld r3,66252
+.*: (e4 60 02 cc|cc 02 60 e4)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) plq r14,66240
+.*: (e1 c0 02 c0|c0 02 c0 e1)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) plfs f1,66228
+.*: (c0 20 02 b4|b4 02 20 c0)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) plfd f1,66216
+.*: (c8 20 02 a8|a8 02 20 c8)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) plxsd v30,66204
+.*: (ab c0 02 9c|9c 02 c0 ab)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) plxssp v31,66192
+.*: (af e0 02 90|90 02 e0 af)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) plxv vs63,66180
+.*: (cf e0 02 84|84 02 e0 cf)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) plxv vs0,66168
+.*: (c8 00 02 78|78 02 00 c8)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) pstb r3,66156
+.*: (98 60 02 6c|6c 02 60 98)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) psth r3,66144
+.*: (b0 60 02 60|60 02 60 b0)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) pstw r3,66128
+.*: (90 60 02 50|50 02 60 90)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pstd r3,66116
+.*: (f4 60 02 44|44 02 60 f4)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pstq r14,66104
+.*: (f1 c0 02 38|38 02 c0 f1)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) pstfd f1,66092
+.*: (d8 20 02 2c|2c 02 20 d8)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) pstfs f2,66080
+.*: (d0 40 02 20|20 02 40 d0)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pstxsd v30,66064
+.*: (bb c0 02 10|10 02 c0 bb)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pstxssp v31,66052
+.*: (bf e0 02 04|04 02 e0 bf)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pstxv vs63,66040
+.*: (df e0 01 f8|f8 01 e0 df)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pstxv vs0,66028
+.*: (d8 00 01 ec|ec 01 00 d8)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (04 10 00 01|01 00 10 04) pld r9,66008
+.*: (e5 20 01 d8|d8 01 20 e5)
+.*: (e8 09 00 00|00 00 09 e8) ld r0,0\(r9\)
+.*: (60 00 00 00|00 00 00 60) nop
+.*: (06 10 00 01|01 00 10 06) pla r7,66000
+.*: (38 e0 01 d0|d0 01 e0 38)
+.*: (88 c7 00 00|00 00 c7 88) lbz r6,0\(r7\)
--- /dev/null
+ .text
+ .globl _start
+_start:
+# original PCREL_OPT definition, with second insn immediately after first
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0
+ lbz 3,0(9)
+
+# but we now allow an offset to the second insn
+ pld 22,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+ nop
+0: lhz 4,0(22)
+
+# in fact, it can even be before the "first" insn
+0: lha 3,0(9)
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0b-(.-8)
+ bne 0b
+
+# and of course, other local labels work
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,.L1-(.-8)
+.L1: lwz 3,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lwa 3,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: ld 3,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lq 14,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lfs 1,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lfd 1,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lxsd 30,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lxssp 31,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lxv 63,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: lxv 0,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stb 3,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: sth 3,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stw 3,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: std 3,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stq 14,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stfd 1,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stfs 2,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stxsd 30,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stxssp 31,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stxv 63,0(9)
+
+ pld 9,sym@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: stxv 0,0(9)
+
+# This should not optimize
+ .extern i
+ .type i,@object
+ pld 9,i@got@pcrel
+ .reloc .-8,R_PPC64_PCREL_OPT,0f-(.-8)
+0: ld 0,0(9)
+
+# and this should edit from GOT indirect to GOT relative
+# ie. change the pld to paddi, leaving the lbz as is.
+ pld 7,sym@got@pcrel
+ lbz 6,0(7)
+
+ .data
+sym: .space 32
--- /dev/null
+# check for just one GOT entry
+#...
+.* \.rela\.dyn +RELA +[0-9a-f]+ [0-9a-f]+ 000018 .*
+#...
+.* \.got +PROGBITS +[0-9a-f]+ [0-9a-f]+ 000010 .*
+#pass
{"notoc ext" "" "" "-a64" {ext.s} {} ""}
{"notoc" "-melf64ppc --no-plt-localentry -T ext.lnk" "" "-a64" {notoc.s}
{{objdump -d notoc.d} {readelf {-wf -W} notoc.wf}} "notoc"}
+ {"pcrelopt" "-melf64ppc --hash-style=gnu" "tmpdir/symtocbase.so"
+ "-a64 -mfuture" {pcrelopt.s}
+ {{objdump {-d -Mfuture} pcrelopt.d}
+ {readelf {-S --wide} pcrelopt.sec}} "pcrelopt" }
}
set ppceabitests {