From 252dcdf432c67f6baafb766ed068c64db1eb2bad Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 24 Aug 2020 16:32:57 +0930 Subject: [PATCH] PowerPC TPREL_HA/LO optimisation ppc64 ld optimises sequences like the following addis 3,13,wot@tprel@ha lwz 3,wot@tprel@l(3) to nop lwz 3,wot@tprel(13) when "wot" is located near enough to the thread pointer. However, the ABI doesn't require that R_PPC64_TPREL16_HA always be on an addis rt,13,imm instruction, and while ld checked for that on the high-part instruction it didn't disable the optimisation on the low-part instruction. This patch fixes that problem, disabling the tprel optimisation globally if high-part instructions don't pass sanity checks. The optimisation is also enabled for ppc32, where before ld.bfd had the code in the wrong place and ld.gold had it in a block only enabled for ppc64. bfd/ * elf32-ppc.c (ppc_elf_check_relocs): Set has_tls_reloc for high part tprel16 relocs. (ppc_elf_tls_optimize): Sanity check high part tprel16 relocs. Clear do_tls_opt on odd instructions. (ppc_elf_relocate_section): Move TPREL16_HA/LO optimisation later. Don't sanity check them here. * elf64-ppc.c (ppc64_elf_check_relocs): Set has_tls_reloc for high part tprel16 relocs. (ppc64_elf_tls_optimize): Sanity check high part tprel16 relocs. Clear do_tls_opt on odd instructions. (ppc64_elf_relocate_section): Don't sanity check TPREL16_HA. ld/ * testsuite/ld-powerpc/tls32.d: Update for TPREL_HA/LO optimisation. * testsuite/ld-powerpc/tlsexe32.d: Likewise. * testsuite/ld-powerpc/tlsldopt32.d: Likewise. * testsuite/ld-powerpc/tlsmark32.d: Likewise. * testsuite/ld-powerpc/tlsopt4_32.d: Likewise. * testsuite/ld-powerpc/tprel.s, * testsuite/ld-powerpc/tprel.d, * testsuite/ld-powerpc/tprel32.d: New tests. * testsuite/ld-powerpc/tprelbad.s, * testsuite/ld-powerpc/tprelbad.d: New test. * testsuite/ld-powerpc/powerpc.exp: Run them. gold/ * powerpc.cc (Target_powerpc): Add tprel_opt_ and accessors. (Target_powerpc::Scan::local): Sanity check tprel high relocs. (Target_powerpc::Scan::global): Likewise. (Target_powerpc::Relocate::relocate): Control tprel optimisation with tprel_opt_ and enable for 32-bit. --- bfd/ChangeLog | 14 ++ bfd/elf32-ppc.c | 98 ++++++---- bfd/elf64-ppc.c | 64 +++++-- gold/ChangeLog | 8 + gold/powerpc.cc | 261 ++++++++++++++++++--------- ld/ChangeLog | 14 ++ ld/testsuite/ld-powerpc/powerpc.exp | 4 + ld/testsuite/ld-powerpc/tls32.d | 40 ++-- ld/testsuite/ld-powerpc/tlsexe32.d | 32 ++-- ld/testsuite/ld-powerpc/tlsldopt32.d | 16 +- ld/testsuite/ld-powerpc/tlsmark32.d | 8 +- ld/testsuite/ld-powerpc/tlsopt4_32.d | 20 +- ld/testsuite/ld-powerpc/tprel.d | 12 ++ ld/testsuite/ld-powerpc/tprel.s | 10 + ld/testsuite/ld-powerpc/tprel32.d | 13 ++ ld/testsuite/ld-powerpc/tprelbad.d | 12 ++ ld/testsuite/ld-powerpc/tprelbad.s | 10 + 17 files changed, 436 insertions(+), 200 deletions(-) create mode 100644 ld/testsuite/ld-powerpc/tprel.d create mode 100644 ld/testsuite/ld-powerpc/tprel.s create mode 100644 ld/testsuite/ld-powerpc/tprel32.d create mode 100644 ld/testsuite/ld-powerpc/tprelbad.d create mode 100644 ld/testsuite/ld-powerpc/tprelbad.s diff --git a/bfd/ChangeLog b/bfd/ChangeLog index eb4d882ce97..08e9763afb3 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,17 @@ +2020-08-24 Alan Modra + + * elf32-ppc.c (ppc_elf_check_relocs): Set has_tls_reloc for + high part tprel16 relocs. + (ppc_elf_tls_optimize): Sanity check high part tprel16 relocs. + Clear do_tls_opt on odd instructions. + (ppc_elf_relocate_section): Move TPREL16_HA/LO optimisation later. + Don't sanity check them here. + * elf64-ppc.c (ppc64_elf_check_relocs): Set has_tls_reloc for + high part tprel16 relocs. + (ppc64_elf_tls_optimize): Sanity check high part tprel16 relocs. + Clear do_tls_opt on odd instructions. + (ppc64_elf_relocate_section): Don't sanity check TPREL16_HA. + 2020-08-23 John David Anglin PR binutils/26357 diff --git a/bfd/elf32-ppc.c b/bfd/elf32-ppc.c index 8d34b9b058a..43c0e188967 100644 --- a/bfd/elf32-ppc.c +++ b/bfd/elf32-ppc.c @@ -3301,12 +3301,14 @@ ppc_elf_check_relocs (bfd *abfd, return FALSE; break; + case R_PPC_TPREL16_HI: + case R_PPC_TPREL16_HA: + sec->has_tls_reloc = 1; + /* Fall through. */ /* We shouldn't really be seeing TPREL32. */ case R_PPC_TPREL32: case R_PPC_TPREL16: case R_PPC_TPREL16_LO: - case R_PPC_TPREL16_HI: - case R_PPC_TPREL16_HA: if (bfd_link_dll (info)) info->flags |= DF_STATIC_TLS; goto dodyn; @@ -4416,6 +4418,8 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUTE_UNUSED, if (htab == NULL) return FALSE; + htab->do_tls_opt = 1; + /* Make two passes through the relocs. First time check that tls relocs involved in setting up a tls_get_addr call are indeed followed by such a call. If they are not, don't do any tls @@ -4581,6 +4585,37 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUTE_UNUSED, tls_clear = 0; break; + case R_PPC_TPREL16_HA: + if (pass == 0) + { + unsigned char buf[4]; + unsigned int insn; + bfd_vma off = rel->r_offset & ~3; + if (!bfd_get_section_contents (ibfd, sec, buf, + off, 4)) + { + if (elf_section_data (sec)->relocs != relstart) + free (relstart); + return FALSE; + } + insn = bfd_get_32 (ibfd, buf); + /* addis rt,2,imm */ + if ((insn & ((0x3fu << 26) | 0x1f << 16)) + != ((15u << 26) | (2 << 16))) + { + /* xgettext:c-format */ + info->callbacks->minfo + (_("%H: warning: %s unexpected insn %#x.\n"), + ibfd, sec, off, "R_PPC_TPREL16_HA", insn); + htab->do_tls_opt = 0; + } + } + continue; + + case R_PPC_TPREL16_HI: + htab->do_tls_opt = 0; + continue; + default: continue; } @@ -4675,7 +4710,6 @@ ppc_elf_tls_optimize (bfd *obfd ATTRIBUTE_UNUSED, free (relstart); } } - htab->do_tls_opt = 1; return TRUE; } @@ -7552,39 +7586,6 @@ ppc_elf_relocate_section (bfd *output_bfd, if (r_type < R_PPC_max) howto = ppc_elf_howto_table[r_type]; - switch (r_type) - { - default: - break; - - case R_PPC_TPREL16_HA: - if (htab->do_tls_opt && relocation + addend + 0x8000 < 0x10000) - { - bfd_byte *p = contents + (rel->r_offset & ~3); - unsigned int insn = bfd_get_32 (input_bfd, p); - if ((insn & ((0x3fu << 26) | 0x1f << 16)) - != ((15u << 26) | (2 << 16)) /* addis rt,2,imm */) - /* xgettext:c-format */ - info->callbacks->minfo - (_("%H: warning: %s unexpected insn %#x.\n"), - input_bfd, input_section, rel->r_offset, howto->name, insn); - else - bfd_put_32 (input_bfd, NOP, p); - } - break; - - case R_PPC_TPREL16_LO: - if (htab->do_tls_opt && relocation + addend + 0x8000 < 0x10000) - { - bfd_byte *p = contents + (rel->r_offset & ~3); - unsigned int insn = bfd_get_32 (input_bfd, p); - insn &= ~(0x1f << 16); - insn |= 2 << 16; - bfd_put_32 (input_bfd, insn, p); - } - break; - } - tls_type = 0; switch (r_type) { @@ -8749,6 +8750,31 @@ ppc_elf_relocate_section (bfd *output_bfd, goto copy_reloc; } + switch (r_type) + { + default: + break; + + case R_PPC_TPREL16_HA: + if (htab->do_tls_opt && relocation + addend + 0x8000 < 0x10000) + { + bfd_byte *p = contents + (rel->r_offset & ~3); + bfd_put_32 (input_bfd, NOP, p); + } + break; + + case R_PPC_TPREL16_LO: + if (htab->do_tls_opt && relocation + addend + 0x8000 < 0x10000) + { + bfd_byte *p = contents + (rel->r_offset & ~3); + unsigned int insn = bfd_get_32 (input_bfd, p); + insn &= ~(0x1f << 16); + insn |= 2 << 16; + bfd_put_32 (input_bfd, insn, p); + } + break; + } + switch (r_type) { default: diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c index 5cbf9acfcd1..02f0f18fc70 100644 --- a/bfd/elf64-ppc.c +++ b/bfd/elf64-ppc.c @@ -5073,19 +5073,21 @@ ppc64_elf_check_relocs (bfd *abfd, struct bfd_link_info *info, ppc64_sec->u.toc.symndx[rel->r_offset / 8 + 1] = -2; goto dodyn; - case R_PPC64_TPREL16: - case R_PPC64_TPREL16_LO: case R_PPC64_TPREL16_HI: case R_PPC64_TPREL16_HA: - case R_PPC64_TPREL16_DS: - case R_PPC64_TPREL16_LO_DS: case R_PPC64_TPREL16_HIGH: case R_PPC64_TPREL16_HIGHA: case R_PPC64_TPREL16_HIGHER: case R_PPC64_TPREL16_HIGHERA: case R_PPC64_TPREL16_HIGHEST: case R_PPC64_TPREL16_HIGHESTA: + sec->has_tls_reloc = 1; + /* Fall through. */ case R_PPC64_TPREL34: + case R_PPC64_TPREL16: + case R_PPC64_TPREL16_DS: + case R_PPC64_TPREL16_LO: + case R_PPC64_TPREL16_LO_DS: if (bfd_link_dll (info)) info->flags |= DF_STATIC_TLS; goto dodyn; @@ -7936,6 +7938,8 @@ ppc64_elf_tls_optimize (struct bfd_link_info *info) if (htab == NULL) return FALSE; + htab->do_tls_opt = 1; + /* Make two passes over the relocs. On the first pass, mark toc entries involved with tls relocs, and check that tls relocs involved in setting up a tls_get_addr call are indeed followed by @@ -8240,6 +8244,42 @@ ppc64_elf_tls_optimize (struct bfd_link_info *info) } break; + case R_PPC64_TPREL16_HA: + if (pass == 0) + { + unsigned char buf[4]; + unsigned int insn; + bfd_vma off = rel->r_offset & ~3; + if (!bfd_get_section_contents (ibfd, sec, buf, + off, 4)) + goto err_free_rel; + insn = bfd_get_32 (ibfd, buf); + /* addis rt,13,imm */ + if ((insn & ((0x3fu << 26) | 0x1f << 16)) + != ((15u << 26) | (13 << 16))) + { + /* xgettext:c-format */ + info->callbacks->minfo + (_("%H: warning: %s unexpected insn %#x.\n"), + ibfd, sec, off, "R_PPC64_TPREL16_HA", insn); + htab->do_tls_opt = 0; + } + } + continue; + + case R_PPC64_TPREL16_HI: + case R_PPC64_TPREL16_HIGH: + case R_PPC64_TPREL16_HIGHA: + case R_PPC64_TPREL16_HIGHER: + case R_PPC64_TPREL16_HIGHERA: + case R_PPC64_TPREL16_HIGHEST: + case R_PPC64_TPREL16_HIGHESTA: + /* These can all be used in sequences along with + TPREL16_LO or TPREL16_LO_DS in ways we aren't + able to verify easily. */ + htab->do_tls_opt = 0; + continue; + default: continue; } @@ -8406,7 +8446,6 @@ ppc64_elf_tls_optimize (struct bfd_link_info *info) } free (toc_ref); - htab->do_tls_opt = 1; return TRUE; } @@ -16913,19 +16952,8 @@ ppc64_elf_relocate_section (bfd *output_bfd, if (htab->do_tls_opt && relocation + addend + 0x8000 < 0x10000) { bfd_byte *p = contents + (rel->r_offset & ~3); - insn = bfd_get_32 (input_bfd, p); - if ((insn & ((0x3fu << 26) | 0x1f << 16)) - != ((15u << 26) | (13 << 16)) /* addis rt,13,imm */) - /* xgettext:c-format */ - info->callbacks->minfo - (_("%H: warning: %s unexpected insn %#x.\n"), - input_bfd, input_section, rel->r_offset, - ppc64_elf_howto_table[r_type]->name, insn); - else - { - bfd_put_32 (input_bfd, NOP, p); - goto copy_reloc; - } + bfd_put_32 (input_bfd, NOP, p); + goto copy_reloc; } break; diff --git a/gold/ChangeLog b/gold/ChangeLog index 51b1ddb2c8e..176b728606d 100644 --- a/gold/ChangeLog +++ b/gold/ChangeLog @@ -1,3 +1,11 @@ +2020-08-24 Alan Modra + + * powerpc.cc (Target_powerpc): Add tprel_opt_ and accessors. + (Target_powerpc::Scan::local): Sanity check tprel high relocs. + (Target_powerpc::Scan::global): Likewise. + (Target_powerpc::Relocate::relocate): Control tprel optimisation + with tprel_opt_ and enable for 32-bit. + 2020-08-12 Nick Clifton * po/sr.po: Updated Serbian translation. diff --git a/gold/powerpc.cc b/gold/powerpc.cc index d5736665499..e35cbcf6c0b 100644 --- a/gold/powerpc.cc +++ b/gold/powerpc.cc @@ -650,6 +650,7 @@ class Target_powerpc : public Sized_target power10_stubs_(false), plt_thread_safe_(false), plt_localentry0_(false), plt_localentry0_init_(false), has_localentry0_(false), has_tls_get_addr_opt_(false), + tprel_opt_(parameters->options().tls_optimize()), relax_failed_(false), relax_fail_count_(0), stub_group_size_(0), savres_section_(0), tls_get_addr_(NULL), tls_get_addr_opt_(NULL), @@ -1145,6 +1146,14 @@ class Target_powerpc : public Sized_target return false; } + bool + tprel_opt() const + { return this->tprel_opt_; } + + void + set_tprel_opt(bool val) + { this->tprel_opt_ = val; } + // Remember any symbols seen with non-zero localentry, even those // not providing a definition bool @@ -1702,6 +1711,7 @@ class Target_powerpc : public Sized_target bool plt_localentry0_init_; bool has_localentry0_; bool has_tls_get_addr_opt_; + bool tprel_opt_; bool relax_failed_; int relax_fail_count_; @@ -8365,10 +8375,6 @@ Target_powerpc::Scan::local( switch (r_type) { - case elfcpp::R_POWERPC_TPREL16: - case elfcpp::R_POWERPC_TPREL16_LO: - case elfcpp::R_POWERPC_TPREL16_HI: - case elfcpp::R_POWERPC_TPREL16_HA: case elfcpp::R_PPC64_TPREL16_DS: case elfcpp::R_PPC64_TPREL16_LO_DS: case elfcpp::R_PPC64_TPREL16_HIGH: @@ -8378,12 +8384,55 @@ Target_powerpc::Scan::local( case elfcpp::R_PPC64_TPREL16_HIGHEST: case elfcpp::R_PPC64_TPREL16_HIGHESTA: case elfcpp::R_PPC64_TPREL34: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16: + case elfcpp::R_POWERPC_TPREL16_LO: + case elfcpp::R_POWERPC_TPREL16_HI: + case elfcpp::R_POWERPC_TPREL16_HA: layout->set_has_static_tls(); break; default: break; } + switch (r_type) + { + case elfcpp::R_POWERPC_TPREL16_HA: + if (target->tprel_opt()) + { + section_size_type slen; + const unsigned char* view = NULL; + view = ppc_object->section_contents(data_shndx, &slen, false); + section_size_type off + = convert_to_section_size_type(reloc.get_r_offset()) & -4; + if (off < slen) + { + uint32_t insn = elfcpp::Swap<32, big_endian>::readval(view + off); + if ((insn & ((0x3fu << 26) | 0x1f << 16)) + != ((15u << 26) | ((size == 32 ? 2 : 13) << 16))) + target->set_tprel_opt(false); + } + } + break; + + case elfcpp::R_PPC64_TPREL16_HIGH: + case elfcpp::R_PPC64_TPREL16_HIGHA: + case elfcpp::R_PPC64_TPREL16_HIGHER: + case elfcpp::R_PPC64_TPREL16_HIGHERA: + case elfcpp::R_PPC64_TPREL16_HIGHEST: + case elfcpp::R_PPC64_TPREL16_HIGHESTA: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16_HI: + target->set_tprel_opt(false); + break; + default: + break; + } + switch (r_type) { case elfcpp::R_PPC64_D34: @@ -9123,10 +9172,6 @@ Target_powerpc::Scan::global( switch (r_type) { - case elfcpp::R_POWERPC_TPREL16: - case elfcpp::R_POWERPC_TPREL16_LO: - case elfcpp::R_POWERPC_TPREL16_HI: - case elfcpp::R_POWERPC_TPREL16_HA: case elfcpp::R_PPC64_TPREL16_DS: case elfcpp::R_PPC64_TPREL16_LO_DS: case elfcpp::R_PPC64_TPREL16_HIGH: @@ -9136,12 +9181,55 @@ Target_powerpc::Scan::global( case elfcpp::R_PPC64_TPREL16_HIGHEST: case elfcpp::R_PPC64_TPREL16_HIGHESTA: case elfcpp::R_PPC64_TPREL34: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16: + case elfcpp::R_POWERPC_TPREL16_LO: + case elfcpp::R_POWERPC_TPREL16_HI: + case elfcpp::R_POWERPC_TPREL16_HA: layout->set_has_static_tls(); break; default: break; } + switch (r_type) + { + case elfcpp::R_POWERPC_TPREL16_HA: + if (target->tprel_opt()) + { + section_size_type slen; + const unsigned char* view = NULL; + view = ppc_object->section_contents(data_shndx, &slen, false); + section_size_type off + = convert_to_section_size_type(reloc.get_r_offset()) & -4; + if (off < slen) + { + uint32_t insn = elfcpp::Swap<32, big_endian>::readval(view + off); + if ((insn & ((0x3fu << 26) | 0x1f << 16)) + != ((15u << 26) | ((size == 32 ? 2 : 13) << 16))) + target->set_tprel_opt(false); + } + } + break; + + case elfcpp::R_PPC64_TPREL16_HIGH: + case elfcpp::R_PPC64_TPREL16_HIGHA: + case elfcpp::R_PPC64_TPREL16_HIGHER: + case elfcpp::R_PPC64_TPREL16_HIGHERA: + case elfcpp::R_PPC64_TPREL16_HIGHEST: + case elfcpp::R_PPC64_TPREL16_HIGHESTA: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16_HI: + target->set_tprel_opt(false); + break; + default: + break; + } + switch (r_type) { case elfcpp::R_PPC64_D34: @@ -11124,10 +11212,9 @@ Target_powerpc::Relocate::relocate( break; } - if (size == 64 - && (gsym - ? relative_value_is_known(gsym) - : relative_value_is_known(psymval))) + if (gsym + ? relative_value_is_known(gsym) + : relative_value_is_known(psymval)) { Insn* iview; Insn* iview2; @@ -11152,7 +11239,7 @@ Target_powerpc::Relocate::relocate( case elfcpp::R_POWERPC_GOT_DTPREL16_HA: case elfcpp::R_POWERPC_GOT16_HA: case elfcpp::R_PPC64_TOC16_HA: - if (parameters->options().toc_optimize()) + if (size == 64 && parameters->options().toc_optimize()) { iview = reinterpret_cast(view - d_offset); insn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11184,7 +11271,7 @@ Target_powerpc::Relocate::relocate( case elfcpp::R_PPC64_GOT16_LO_DS: case elfcpp::R_PPC64_TOC16_LO: case elfcpp::R_PPC64_TOC16_LO_DS: - if (parameters->options().toc_optimize()) + if (size == 64 && parameters->options().toc_optimize()) { iview = reinterpret_cast(view - d_offset); insn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11223,7 +11310,7 @@ Target_powerpc::Relocate::relocate( break; case elfcpp::R_PPC64_GOT_PCREL34: - if (parameters->options().toc_optimize()) + if (size == 64 && parameters->options().toc_optimize()) { iview = reinterpret_cast(view); pinsn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11249,63 +11336,57 @@ Target_powerpc::Relocate::relocate( break; case elfcpp::R_PPC64_PCREL34: - { - iview = reinterpret_cast(view); - pinsn = elfcpp::Swap<32, big_endian>::readval(iview); - pinsn <<= 32; - pinsn |= elfcpp::Swap<32, big_endian>::readval(iview + 1); - if ((pinsn & ((-1ULL << 50) | (63ULL << 26))) - != ((1ULL << 58) | (2ULL << 56) | (1ULL << 52) - | (14ULL << 26) /* paddi */)) - break; + if (size == 64) + { + iview = reinterpret_cast(view); + pinsn = elfcpp::Swap<32, big_endian>::readval(iview); + pinsn <<= 32; + pinsn |= elfcpp::Swap<32, big_endian>::readval(iview + 1); + if ((pinsn & ((-1ULL << 50) | (63ULL << 26))) + != ((1ULL << 58) | (2ULL << 56) | (1ULL << 52) + | (14ULL << 26) /* paddi */)) + break; - pcrelopt: - const int reloc_size = elfcpp::Elf_sizes::rela_size; - elfcpp::Shdr shdr(relinfo->reloc_shdr); - size_t reloc_count = shdr.get_sh_size() / reloc_size; - if (relnum >= reloc_count - 1) - break; + pcrelopt: + const int reloc_size = elfcpp::Elf_sizes::rela_size; + elfcpp::Shdr shdr(relinfo->reloc_shdr); + size_t reloc_count = shdr.get_sh_size() / reloc_size; + if (relnum >= reloc_count - 1) + break; - Reltype next_rela(preloc + reloc_size); - if ((elfcpp::elf_r_type(next_rela.get_r_info()) - != elfcpp::R_PPC64_PCREL_OPT) - || next_rela.get_r_offset() != rela.get_r_offset()) - break; + Reltype next_rela(preloc + reloc_size); + if ((elfcpp::elf_r_type(next_rela.get_r_info()) + != elfcpp::R_PPC64_PCREL_OPT) + || next_rela.get_r_offset() != rela.get_r_offset()) + break; - Address off = next_rela.get_r_addend(); - if (off == 0) - off = 8; // zero means next insn. - if (off + rela.get_r_offset() + 4 > view_size) - break; + Address off = next_rela.get_r_addend(); + if (off == 0) + off = 8; // zero means next insn. + if (off + rela.get_r_offset() + 4 > view_size) + break; - iview2 = reinterpret_cast(view + off); - pinsn2 = elfcpp::Swap<32, big_endian>::readval(iview2); - pinsn2 <<= 32; - if ((pinsn2 & (63ULL << 58)) == 1ULL << 58) - break; - if (xlate_pcrel_opt(&pinsn, &pinsn2)) - { - elfcpp::Swap<32, big_endian>::writeval(iview, pinsn >> 32); - elfcpp::Swap<32, big_endian>::writeval(iview + 1, - pinsn & 0xffffffff); - elfcpp::Swap<32, big_endian>::writeval(iview2, pinsn2 >> 32); - } - } + iview2 = reinterpret_cast(view + off); + pinsn2 = elfcpp::Swap<32, big_endian>::readval(iview2); + pinsn2 <<= 32; + if ((pinsn2 & (63ULL << 58)) == 1ULL << 58) + break; + if (xlate_pcrel_opt(&pinsn, &pinsn2)) + { + elfcpp::Swap<32, big_endian>::writeval(iview, pinsn >> 32); + elfcpp::Swap<32, big_endian>::writeval(iview + 1, + pinsn & 0xffffffff); + elfcpp::Swap<32, big_endian>::writeval(iview2, pinsn2 >> 32); + } + } break; case elfcpp::R_POWERPC_TPREL16_HA: - if (parameters->options().tls_optimize() && value + 0x8000 < 0x10000) + if (target->tprel_opt() && value + 0x8000 < 0x10000) { Insn* iview = reinterpret_cast(view - d_offset); - Insn insn = elfcpp::Swap<32, big_endian>::readval(iview); - if ((insn & ((0x3f << 26) | 0x1f << 16)) - != ((15u << 26) | ((size == 32 ? 2 : 13) << 16))) - ; - else - { - elfcpp::Swap<32, big_endian>::writeval(iview, nop); - return true; - } + elfcpp::Swap<32, big_endian>::writeval(iview, nop); + return true; } break; @@ -11315,7 +11396,7 @@ Target_powerpc::Relocate::relocate( break; // Fall through. case elfcpp::R_POWERPC_TPREL16_LO: - if (parameters->options().tls_optimize() && value + 0x8000 < 0x10000) + if (target->tprel_opt() && value + 0x8000 < 0x10000) { Insn* iview = reinterpret_cast(view - d_offset); Insn insn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11326,29 +11407,12 @@ Target_powerpc::Relocate::relocate( break; case elfcpp::R_PPC64_ENTRY: - value = (target->got_section()->output_section()->address() - + object->toc_base_offset()); - if (value + 0x80008000 <= 0xffffffff - && !parameters->options().output_is_position_independent()) - { - Insn* iview = reinterpret_cast(view); - Insn insn1 = elfcpp::Swap<32, big_endian>::readval(iview); - Insn insn2 = elfcpp::Swap<32, big_endian>::readval(iview + 1); - - if ((insn1 & ~0xfffc) == ld_2_12 - && insn2 == add_2_2_12) - { - insn1 = lis_2 + ha(value); - elfcpp::Swap<32, big_endian>::writeval(iview, insn1); - insn2 = addi_2_2 + l(value); - elfcpp::Swap<32, big_endian>::writeval(iview + 1, insn2); - return true; - } - } - else + if (size == 64) { - value -= address; - if (value + 0x80008000 <= 0xffffffff) + value = (target->got_section()->output_section()->address() + + object->toc_base_offset()); + if (value + 0x80008000 <= 0xffffffff + && !parameters->options().output_is_position_independent()) { Insn* iview = reinterpret_cast(view); Insn insn1 = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11357,13 +11421,33 @@ Target_powerpc::Relocate::relocate( if ((insn1 & ~0xfffc) == ld_2_12 && insn2 == add_2_2_12) { - insn1 = addis_2_12 + ha(value); + insn1 = lis_2 + ha(value); elfcpp::Swap<32, big_endian>::writeval(iview, insn1); insn2 = addi_2_2 + l(value); elfcpp::Swap<32, big_endian>::writeval(iview + 1, insn2); return true; } } + else + { + value -= address; + if (value + 0x80008000 <= 0xffffffff) + { + Insn* iview = reinterpret_cast(view); + Insn insn1 = elfcpp::Swap<32, big_endian>::readval(iview); + Insn insn2 = elfcpp::Swap<32, big_endian>::readval(iview + 1); + + if ((insn1 & ~0xfffc) == ld_2_12 + && insn2 == add_2_2_12) + { + insn1 = addis_2_12 + ha(value); + elfcpp::Swap<32, big_endian>::writeval(iview, insn1); + insn2 = addi_2_2 + l(value); + elfcpp::Swap<32, big_endian>::writeval(iview + 1, insn2); + return true; + } + } + } } break; @@ -11375,7 +11459,8 @@ Target_powerpc::Relocate::relocate( // lis 2,.TOC.@ha // addi 2,2,.TOC.@l // if .TOC. is in range. */ - if (value + address - 4 + 0x80008000 <= 0xffffffff + if (size == 64 + && value + address - 4 + 0x80008000 <= 0xffffffff && relnum + 1 > 1 && preloc != NULL && target->abiversion() >= 2 diff --git a/ld/ChangeLog b/ld/ChangeLog index 4f57be86ded..7a2c770b7db 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,17 @@ +2020-08-24 Alan Modra + + * testsuite/ld-powerpc/tls32.d: Update for TPREL_HA/LO optimisation. + * testsuite/ld-powerpc/tlsexe32.d: Likewise. + * testsuite/ld-powerpc/tlsldopt32.d: Likewise. + * testsuite/ld-powerpc/tlsmark32.d: Likewise. + * testsuite/ld-powerpc/tlsopt4_32.d: Likewise. + * testsuite/ld-powerpc/tprel.s, + * testsuite/ld-powerpc/tprel.d, + * testsuite/ld-powerpc/tprel32.d: New tests. + * testsuite/ld-powerpc/tprelbad.s, + * testsuite/ld-powerpc/tprelbad.d: New test. + * testsuite/ld-powerpc/powerpc.exp: Run them. + 2020-08-22 H.J. Lu PR ld/26382 diff --git a/ld/testsuite/ld-powerpc/powerpc.exp b/ld/testsuite/ld-powerpc/powerpc.exp index a4c060a8770..7a49b1a80a3 100644 --- a/ld/testsuite/ld-powerpc/powerpc.exp +++ b/ld/testsuite/ld-powerpc/powerpc.exp @@ -419,6 +419,7 @@ if [ supports_ppc64 ] then { run_dump_test "tlsld" run_dump_test "tlsie" run_dump_test "non-contiguous-powerpc64" + run_dump_test "tprel" } run_dump_test "localgot" @@ -459,3 +460,6 @@ run_dump_test "ppc476-shared" run_dump_test "ppc476-shared2" run_dump_test "non-contiguous-powerpc" + +run_dump_test "tprel32" +run_dump_test "tprelbad" diff --git a/ld/testsuite/ld-powerpc/tls32.d b/ld/testsuite/ld-powerpc/tls32.d index 664f9cdf7dc..e3e81f873ec 100644 --- a/ld/testsuite/ld-powerpc/tls32.d +++ b/ld/testsuite/ld-powerpc/tls32.d @@ -14,34 +14,34 @@ Disassembly of section \.text: .*: (7f c8 02 a6|a6 02 c8 7f) mflr r30 .*: (3f de 00 02|02 00 de 3f) addis r30,r30,2 .*: (3b de 80 a0|a0 80 de 3b) addi r30,r30,-32608 -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 90 3c|3c 90 63 38) addi r3,r3,-28612 -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 10 00|00 10 63 38) addi r3,r3,4096 -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 90 20|20 90 63 38) addi r3,r3,-28640 -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 10 00|00 10 63 38) addi r3,r3,4096 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 90 3c|3c 90 62 38) addi r3,r2,-28612 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 10 00|00 10 62 38) addi r3,r2,4096 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 90 20|20 90 62 38) addi r3,r2,-28640 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 10 00|00 10 62 38) addi r3,r2,4096 .*: (39 23 80 24|24 80 23 39) addi r9,r3,-32732 .*: (3d 23 00 00|00 00 23 3d) addis r9,r3,0 .*: (81 49 80 28|28 80 49 81) lwz r10,-32728\(r9\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (a1 49 90 30|30 90 49 a1) lhz r10,-28624\(r9\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (a1 42 90 30|30 90 42 a1) lhz r10,-28624\(r2\) .*: (89 42 90 34|34 90 42 89) lbz r10,-28620\(r2\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (99 49 90 38|38 90 49 99) stb r10,-28616\(r9\) -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 90 00|00 90 63 38) addi r3,r3,-28672 -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 10 00|00 10 63 38) addi r3,r3,4096 +.*: (60 00 00 00|00 00 00 60) nop +.*: (99 42 90 38|38 90 42 99) stb r10,-28616\(r2\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 90 00|00 90 62 38) addi r3,r2,-28672 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 10 00|00 10 62 38) addi r3,r2,4096 .*: (91 43 80 04|04 80 43 91) stw r10,-32764\(r3\) .*: (3d 23 00 00|00 00 23 3d) addis r9,r3,0 .*: (91 49 80 08|08 80 49 91) stw r10,-32760\(r9\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (b1 49 90 30|30 90 49 b1) sth r10,-28624\(r9\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (b1 42 90 30|30 90 42 b1) sth r10,-28624\(r2\) .*: (a1 42 90 14|14 90 42 a1) lhz r10,-28652\(r2\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (a9 49 90 18|18 90 49 a9) lha r10,-28648\(r9\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (a9 42 90 18|18 90 42 a9) lha r10,-28648\(r2\) 0+1800120 <__tls_get_addr>: .*: (4e 80 00 20|20 00 80 4e) blr diff --git a/ld/testsuite/ld-powerpc/tlsexe32.d b/ld/testsuite/ld-powerpc/tlsexe32.d index b3420935b1a..d1a7c9c9bf0 100644 --- a/ld/testsuite/ld-powerpc/tlsexe32.d +++ b/ld/testsuite/ld-powerpc/tlsexe32.d @@ -17,30 +17,30 @@ Disassembly of section \.text: .*: (7c 63 12 14|14 12 63 7c) add r3,r3,r2 .*: (38 7f ff f8|f8 ff 7f 38) addi r3,r31,-8 .*: (48 00 00 65|65 00 00 48) bl .* <__tls_get_addr_opt@plt> -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 90 1c|1c 90 63 38) addi r3,r3,-28644 -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 10 00|00 10 63 38) addi r3,r3,4096 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 90 1c|1c 90 62 38) addi r3,r2,-28644 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 10 00|00 10 62 38) addi r3,r2,4096 .*: (39 23 80 20|20 80 23 39) addi r9,r3,-32736 .*: (3d 23 00 00|00 00 23 3d) addis r9,r3,0 .*: (81 49 80 24|24 80 49 81) lwz r10,-32732\(r9\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (a1 49 90 2c|2c 90 49 a1) lhz r10,-28628\(r9\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (a1 42 90 2c|2c 90 42 a1) lhz r10,-28628\(r2\) .*: (89 42 90 30|30 90 42 89) lbz r10,-28624\(r2\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (99 49 90 34|34 90 49 99) stb r10,-28620\(r9\) -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 90 00|00 90 63 38) addi r3,r3,-28672 -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 10 00|00 10 63 38) addi r3,r3,4096 +.*: (60 00 00 00|00 00 00 60) nop +.*: (99 42 90 34|34 90 42 99) stb r10,-28620\(r2\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 90 00|00 90 62 38) addi r3,r2,-28672 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 10 00|00 10 62 38) addi r3,r2,4096 .*: (91 43 80 04|04 80 43 91) stw r10,-32764\(r3\) .*: (3d 23 00 00|00 00 23 3d) addis r9,r3,0 .*: (91 49 80 08|08 80 49 91) stw r10,-32760\(r9\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (b1 49 90 2c|2c 90 49 b1) sth r10,-28628\(r9\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (b1 42 90 2c|2c 90 42 b1) sth r10,-28628\(r2\) .*: (a1 42 90 14|14 90 42 a1) lhz r10,-28652\(r2\) -.*: (3d 22 00 00|00 00 22 3d) addis r9,r2,0 -.*: (a9 49 90 18|18 90 49 a9) lha r10,-28648\(r9\) +.*: (60 00 00 00|00 00 00 60) nop +.*: (a9 42 90 18|18 90 42 a9) lha r10,-28648\(r2\) .* <__tls_get_addr_opt@plt>: .*: (81 63 00 00|00 00 63 81) lwz r11,0\(r3\) diff --git a/ld/testsuite/ld-powerpc/tlsldopt32.d b/ld/testsuite/ld-powerpc/tlsldopt32.d index 5178fdb13e1..228e7bc790a 100644 --- a/ld/testsuite/ld-powerpc/tlsldopt32.d +++ b/ld/testsuite/ld-powerpc/tlsldopt32.d @@ -10,32 +10,32 @@ Disassembly of section \.text: .*: .* nop -.* addis r29,r2,0 +.* nop .* mr r3,r29 -.* addi r3,r3,4096 +.* addi r3,r2,4096 .* addis r3,r3,0 .* lwz r3,-32768\(r3\) .* nop .* nop -.* addis r29,r2,0 +.* nop .* mr r3,r29 -.* addi r3,r3,4096 +.* addi r3,r2,4096 .* lwz r3,-32768\(r3\) .* nop .* nop .* nop .* nop .* nop -.* addis r29,r2,0 +.* nop .* mr r3,r29 -.* addi r3,r3,-28672 +.* addi r3,r2,-28672 .* lwz r3,0\(r3\) .* nop .* nop .* nop -.* addis r29,r2,0 +.* nop .* mr r3,r29 -.* addi r3,r3,-28672 +.* addi r3,r2,-28672 .* lwz r3,0\(r3\) .* nop .* nop diff --git a/ld/testsuite/ld-powerpc/tlsmark32.d b/ld/testsuite/ld-powerpc/tlsmark32.d index 3692755e083..448eda90d8a 100644 --- a/ld/testsuite/ld-powerpc/tlsmark32.d +++ b/ld/testsuite/ld-powerpc/tlsmark32.d @@ -11,13 +11,13 @@ Disassembly of section \.text: 0+1800094 <_start>: .*: (48 00 00 14|14 00 00 48) b 18000a8 <_start\+0x14> -.*: (38 63 90 00|00 90 63 38) addi r3,r3,-28672 +.*: (38 62 90 00|00 90 62 38) addi r3,r2,-28672 .*: (80 83 00 00|00 00 83 80) lwz r4,0\(r3\) -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 +.*: (60 00 00 00|00 00 00 60) nop .*: (48 00 00 0c|0c 00 00 48) b 18000b0 <_start\+0x1c> -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 +.*: (60 00 00 00|00 00 00 60) nop .*: (4b ff ff ec|ec ff ff 4b) b 1800098 <_start\+0x4> -.*: (38 63 10 00|00 10 63 38) addi r3,r3,4096 +.*: (38 62 10 00|00 10 62 38) addi r3,r2,4096 .*: (80 83 80 00|00 80 83 80) lwz r4,-32768\(r3\) 0+18000b8 <__tls_get_addr>: diff --git a/ld/testsuite/ld-powerpc/tlsopt4_32.d b/ld/testsuite/ld-powerpc/tlsopt4_32.d index 59c0a6aae37..32314b0d01e 100644 --- a/ld/testsuite/ld-powerpc/tlsopt4_32.d +++ b/ld/testsuite/ld-powerpc/tlsopt4_32.d @@ -15,30 +15,30 @@ Disassembly of section \.text: Disassembly of section \.opt1: 0+1800098 <\.opt1>: -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 +.*: (60 00 00 00|00 00 00 60) nop .*: (2c 04 00 00|00 00 04 2c) cmpwi r4,0 .*: (41 82 00 0c|0c 00 82 41) beq .* -.*: (38 63 90 10|10 90 63 38) addi r3,r3,-28656 +.*: (38 62 90 10|10 90 62 38) addi r3,r2,-28656 .*: (48 00 00 08|08 00 00 48) b .* -.*: (38 63 90 10|10 90 63 38) addi r3,r3,-28656 +.*: (38 62 90 10|10 90 62 38) addi r3,r2,-28656 Disassembly of section \.opt2: 0+18000b0 <\.opt2>: -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 +.*: (60 00 00 00|00 00 00 60) nop .*: (2c 04 00 00|00 00 04 2c) cmpwi r4,0 .*: (41 82 00 08|08 00 82 41) beq .* -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 -.*: (38 63 90 10|10 90 63 38) addi r3,r3,-28656 +.*: (60 00 00 00|00 00 00 60) nop +.*: (38 62 90 10|10 90 62 38) addi r3,r2,-28656 Disassembly of section \.opt3: 0+18000c4 <\.opt3>: -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 +.*: (60 00 00 00|00 00 00 60) nop .*: (48 00 00 0c|0c 00 00 48) b .* -.*: (3c 62 00 00|00 00 62 3c) addis r3,r2,0 +.*: (60 00 00 00|00 00 00 60) nop .*: (48 00 00 0c|0c 00 00 48) b .* -.*: (38 63 90 10|10 90 63 38) addi r3,r3,-28656 +.*: (38 62 90 10|10 90 62 38) addi r3,r2,-28656 .*: (48 00 00 08|08 00 00 48) b .* -.*: (38 63 90 08|08 90 63 38) addi r3,r3,-28664 +.*: (38 62 90 08|08 90 62 38) addi r3,r2,-28664 #pass diff --git a/ld/testsuite/ld-powerpc/tprel.d b/ld/testsuite/ld-powerpc/tprel.d new file mode 100644 index 00000000000..f3135b0a9b0 --- /dev/null +++ b/ld/testsuite/ld-powerpc/tprel.d @@ -0,0 +1,12 @@ +#as: -a64 --defsym REG=13 +#ld: -melf64ppc +#objdump: -d + +.*: file format .* + +Disassembly of section \.text: + +.* <_start>: +.*: (60 00 00 00|00 00 00 60) nop +.*: (80 6d 90 00|00 90 6d 80) lwz r3,-28672\(r13\) +.*: (4e 80 00 20|20 00 80 4e) blr diff --git a/ld/testsuite/ld-powerpc/tprel.s b/ld/testsuite/ld-powerpc/tprel.s new file mode 100644 index 00000000000..67a13e79f7c --- /dev/null +++ b/ld/testsuite/ld-powerpc/tprel.s @@ -0,0 +1,10 @@ + .section ".tbss","awT",@nobits + .p2align 2 +wot: .space 4 + + .text + .global _start +_start: + addis 3,REG,wot@tprel@ha + lwz 3,wot@tprel@l(3) + blr diff --git a/ld/testsuite/ld-powerpc/tprel32.d b/ld/testsuite/ld-powerpc/tprel32.d new file mode 100644 index 00000000000..444db13294b --- /dev/null +++ b/ld/testsuite/ld-powerpc/tprel32.d @@ -0,0 +1,13 @@ +#source: tprel.s +#as: -a32 --defsym REG=2 +#ld: -melf32ppc +#objdump: -d + +.*: file format .* + +Disassembly of section \.text: + +.* <_start>: +.*: (60 00 00 00|00 00 00 60) nop +.*: (80 62 90 00|00 90 62 80) lwz r3,-28672\(r2\) +.*: (4e 80 00 20|20 00 80 4e) blr diff --git a/ld/testsuite/ld-powerpc/tprelbad.d b/ld/testsuite/ld-powerpc/tprelbad.d new file mode 100644 index 00000000000..c7fc60b7e95 --- /dev/null +++ b/ld/testsuite/ld-powerpc/tprelbad.d @@ -0,0 +1,12 @@ +#as: +#ld: +#objdump: -d + +.*: file format .* + +Disassembly of section \.text: + +.* <_start>: +.*: (3c 60 00 00|00 00 60 3c) lis r3,0 +.*: (38 63 90 00|00 90 63 38) addi r3,r3,-28672 +.*: (4e 80 00 20|20 00 80 4e) blr diff --git a/ld/testsuite/ld-powerpc/tprelbad.s b/ld/testsuite/ld-powerpc/tprelbad.s new file mode 100644 index 00000000000..1b9a1170eaf --- /dev/null +++ b/ld/testsuite/ld-powerpc/tprelbad.s @@ -0,0 +1,10 @@ + .section ".tbss","awT",@nobits + .p2align 2 +wot: .space 4 + + .text + .global _start +_start: + lis 3,wot@tprel@ha + addi 3,3,wot@tprel@l + blr -- 2.30.2