From: Alan Modra Date: Fri, 30 Sep 2011 05:11:04 +0000 (+0000) Subject: PR ld/13235 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9852805258de4c870df1f723bf22f21b41ebd6a7;p=binutils-gdb.git PR ld/13235 bfd/ * elf64-ppc.c (struct ppc64_elf_obj_tdata): Add ha_relocs_not_using_r2. (ppc64_elf_edit_toc): Check HA relocs. (ha_reloc_match): Delete function. (ppc64_elf_relocate_section): Remove delayed HA nop optimization. Instead do it and low part optimization based on ha_relocs_not_using_r2. ld/testsuite/ * ld-powerpc/tocopt.d: Update. * ld-powerpc/tocopt5.d, * ld-powerpc/tocopt5.s: New test. * ld-powerpc/powerpc.exp: Run new test. --- diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 011e33cf5db..91842ad259b 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,13 @@ +2011-09-30 Alan Modra + + PR ld/13235 + * elf64-ppc.c (struct ppc64_elf_obj_tdata): Add ha_relocs_not_using_r2. + (ppc64_elf_edit_toc): Check HA relocs. + (ha_reloc_match): Delete function. + (ppc64_elf_relocate_section): Remove delayed HA nop optimization. + Instead do it and low part optimization based on + ha_relocs_not_using_r2. + 2011-09-29 Alan Modra PR ld/13233 diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c index 5992f0824c7..2511aa88068 100644 --- a/bfd/elf64-ppc.c +++ b/bfd/elf64-ppc.c @@ -2598,7 +2598,10 @@ struct ppc64_elf_obj_tdata /* Nonzero if this bfd has small toc/got relocs, ie. that expect the reloc to be in the range -32768 to 32767. */ - unsigned int has_small_toc_reloc; + unsigned int has_small_toc_reloc : 1; + + /* Set if toc/got ha relocs detected not using r2. */ + unsigned int ha_relocs_not_using_r2 : 1; }; #define ppc64_elf_tdata(bfd) \ @@ -8171,6 +8174,35 @@ ppc64_elf_edit_toc (struct bfd_link_info *info) bfd_vma val; r_type = ELF64_R_TYPE (rel->r_info); + switch (r_type) + { + default: + break; + + case R_PPC64_GOT_TLSLD16_HA: + case R_PPC64_GOT_TLSGD16_HA: + case R_PPC64_GOT_TPREL16_HA: + case R_PPC64_GOT_DTPREL16_HA: + case R_PPC64_GOT16_HA: + case R_PPC64_TOC16_HA: + { + bfd_vma off = rel->r_offset & ~3; + unsigned char buf[4]; + unsigned int insn; + + if (!bfd_get_section_contents (ibfd, sec, buf, off, 4)) + { + free (used); + goto error_ret; + } + insn = bfd_get_32 (ibfd, buf); + if ((insn & ((0x3f << 26) | 0x1f << 16)) + != ((15u << 26) | (2 << 16)) /* addis rt,2,imm */) + ppc64_elf_tdata (ibfd)->ha_relocs_not_using_r2 = 1; + } + break; + } + switch (r_type) { case R_PPC64_TOC16: @@ -8220,7 +8252,10 @@ ppc64_elf_edit_toc (struct bfd_link_info *info) case R_PPC64_TOC16_LO_DS: off = rel->r_offset + (bfd_big_endian (ibfd) ? -2 : 3); if (!bfd_get_section_contents (ibfd, sec, &opc, off, 1)) - return FALSE; + { + free (used); + goto error_ret; + } if ((opc & (0x3f << 2)) == (58u << 2)) break; /* Fall thru */ @@ -11673,63 +11708,6 @@ ppc64_elf_action_discarded (asection *sec) return _bfd_elf_default_action_discarded (sec); } -/* REL points to a low-part reloc on a largetoc instruction sequence. - Find the matching high-part reloc instruction and verify that it - is addis REG,x,imm. If so, set *REG to x and return a pointer to - the high-part reloc. */ - -static const Elf_Internal_Rela * -ha_reloc_match (const Elf_Internal_Rela *relocs, - const Elf_Internal_Rela *rel, - unsigned int *reg, - bfd_boolean match_addend, - const bfd *input_bfd, - const bfd_byte *contents) -{ - enum elf_ppc64_reloc_type r_type, r_type_ha; - bfd_vma r_info_ha, r_addend; - - r_type = ELF64_R_TYPE (rel->r_info); - switch (r_type) - { - case R_PPC64_GOT_TLSLD16_LO: - case R_PPC64_GOT_TLSGD16_LO: - case R_PPC64_GOT_TPREL16_LO_DS: - case R_PPC64_GOT_DTPREL16_LO_DS: - case R_PPC64_GOT16_LO: - case R_PPC64_TOC16_LO: - r_type_ha = r_type + 2; - break; - case R_PPC64_GOT16_LO_DS: - r_type_ha = R_PPC64_GOT16_HA; - break; - case R_PPC64_TOC16_LO_DS: - r_type_ha = R_PPC64_TOC16_HA; - break; - default: - abort (); - } - r_info_ha = ELF64_R_INFO (ELF64_R_SYM (rel->r_info), r_type_ha); - r_addend = rel->r_addend; - - while (--rel >= relocs) - if (rel->r_info == r_info_ha - && (!match_addend - || rel->r_addend == r_addend)) - { - const bfd_byte *p = contents + (rel->r_offset & ~3); - unsigned int insn = bfd_get_32 (input_bfd, p); - if ((insn & (0x3f << 26)) == (15u << 26) /* addis rt,x,imm */ - && (insn & (0x1f << 21)) == (*reg << 21)) - { - *reg = (insn >> 16) & 0x1f; - return rel; - } - break; - } - return NULL; -} - /* The RELOCATE_SECTION function is called by the ELF backend linker to handle the relocations for a section. @@ -11777,9 +11755,7 @@ ppc64_elf_relocate_section (bfd *output_bfd, Elf_Internal_Rela outrel; bfd_byte *loc; struct got_entry **local_got_ents; - unsigned char *ha_opt; bfd_vma TOCstart; - bfd_boolean no_ha_opt; bfd_boolean ret = TRUE; bfd_boolean is_opd; /* Disabled until we sort out how ld should choose 'y' vs 'at'. */ @@ -11805,8 +11781,6 @@ ppc64_elf_relocate_section (bfd *output_bfd, symtab_hdr = &elf_symtab_hdr (input_bfd); sym_hashes = elf_sym_hashes (input_bfd); is_opd = ppc64_elf_section_data (input_section)->sec_type == sec_opd; - ha_opt = NULL; - no_ha_opt = FALSE; rel = relocs; relend = relocs + input_section->reloc_count; @@ -13218,7 +13192,12 @@ ppc64_elf_relocate_section (bfd *output_bfd, case R_PPC64_GOT_DTPREL16_HA: case R_PPC64_GOT16_HA: case R_PPC64_TOC16_HA: - /* nop is done later. */ + if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000 + && !ppc64_elf_tdata (input_bfd)->ha_relocs_not_using_r2) + { + bfd_byte *p = contents + (rel->r_offset & ~3); + bfd_put_32 (input_bfd, NOP, p); + } break; case R_PPC64_GOT_TLSLD16_LO: @@ -13229,7 +13208,8 @@ ppc64_elf_relocate_section (bfd *output_bfd, case R_PPC64_GOT16_LO_DS: case R_PPC64_TOC16_LO: case R_PPC64_TOC16_LO_DS: - if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000) + if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000 + && !ppc64_elf_tdata (input_bfd)->ha_relocs_not_using_r2) { bfd_byte *p = contents + (rel->r_offset & ~3); insn = bfd_get_32 (input_bfd, p); @@ -13252,32 +13232,9 @@ ppc64_elf_relocate_section (bfd *output_bfd, || ((insn & (0x3f << 26)) == 62u << 26 /* std, stmd */ && ((insn & 3) == 0 || (insn & 3) == 3))) { - unsigned int reg = (insn >> 16) & 0x1f; - const Elf_Internal_Rela *ha; - bfd_boolean match_addend; - - match_addend = (sym != NULL - && ELF_ST_TYPE (sym->st_info) == STT_SECTION); - ha = ha_reloc_match (relocs, rel, ®, match_addend, - input_bfd, contents); - if (ha != NULL) - { - insn &= ~(0x1f << 16); - insn |= reg << 16; - bfd_put_32 (input_bfd, insn, p); - if (ha_opt == NULL) - { - ha_opt = bfd_zmalloc (input_section->reloc_count); - if (ha_opt == NULL) - return FALSE; - } - ha_opt[ha - relocs] = 1; - } - else - /* If we don't find a matching high part insn, - something is fishy. Refuse to nop any high - part insn in this section. */ - no_ha_opt = TRUE; + insn &= ~(0x1f << 16); + insn |= 2 << 16; + bfd_put_32 (input_bfd, insn, p); } } break; @@ -13431,23 +13388,6 @@ ppc64_elf_relocate_section (bfd *output_bfd, } } - if (ha_opt != NULL) - { - if (!no_ha_opt) - { - unsigned char *opt = ha_opt; - rel = relocs; - relend = relocs + input_section->reloc_count; - for (; rel < relend; opt++, rel++) - if (*opt != 0) - { - bfd_byte *p = contents + (rel->r_offset & ~3); - bfd_put_32 (input_bfd, NOP, p); - } - } - free (ha_opt); - } - /* If we're emitting relocations, then shortly after this function returns, reloc offsets and addends for this section will be adjusted. Worse, reloc symbol indices will be for the output diff --git a/ld/testsuite/ChangeLog b/ld/testsuite/ChangeLog index be38879a39e..54428dd6631 100644 --- a/ld/testsuite/ChangeLog +++ b/ld/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2011-09-30 Alan Modra + + * ld-powerpc/tocopt.d: Update. + * ld-powerpc/tocopt5.d, * ld-powerpc/tocopt5.s: New test. + * ld-powerpc/powerpc.exp: Run new test. + 2011-09-16 H.J. Lu * ld-elf/pr12975.d: Only run for *-*-linux* and *-*-gnu* diff --git a/ld/testsuite/ld-powerpc/powerpc.exp b/ld/testsuite/ld-powerpc/powerpc.exp index 5ec36b29cd7..7f9e7faec8c 100644 --- a/ld/testsuite/ld-powerpc/powerpc.exp +++ b/ld/testsuite/ld-powerpc/powerpc.exp @@ -211,6 +211,8 @@ set ppc64elftests { {{objdump -s tocopt3.d}} "tocopt3"} {"TOC opt4" "-melf64ppc -no-keep-memory --defsym x=2" "-a64" {tocopt4a.s tocopt4b.s} {{objdump -s tocopt4.d}} "tocopt4"} + {"TOC opt5" "-melf64ppc" "-a64" {tocopt5.s} + {{objdump -s tocopt5.d}} "tocopt5"} } diff --git a/ld/testsuite/ld-powerpc/tocopt.d b/ld/testsuite/ld-powerpc/tocopt.d index 91686611e10..f447f7006fd 100644 --- a/ld/testsuite/ld-powerpc/tocopt.d +++ b/ld/testsuite/ld-powerpc/tocopt.d @@ -2,10 +2,10 @@ .*: file format .* Contents of section \.text: - 100000b0 60000000 e9228018 60000000 38a28020 .* - 100000c0 e8c50000 60000000 3ba08028 7c62e82a .* - 100000d0 60000000 39228033 60000000 38a28008 .* - 100000e0 e8c50000 60000000 3ba08010 7c62e82a .* + 100000b0 3d220000 e9298018 3c820000 38a48020 .* + 100000c0 e8c50000 3fa00000 3bbd8028 7c62e82a .* + 100000d0 3d220000 39298033 3c820000 38a48008 .* + 100000e0 e8c50000 3fa00000 3bbd8010 7c62e82a .* Contents of section \.got: 100100f0 00000000 100180f0 00000000 10010124 .* 10010100 00000000 10010125 00000000 10010120 .* diff --git a/ld/testsuite/ld-powerpc/tocopt5.d b/ld/testsuite/ld-powerpc/tocopt5.d new file mode 100644 index 00000000000..8f03c077aa5 --- /dev/null +++ b/ld/testsuite/ld-powerpc/tocopt5.d @@ -0,0 +1,13 @@ + +.*: file format .* + +Contents of section \.text: + 100000b0 60000000 e9228018 60000000 38a28020 .* + 100000c0 e8c50000 60000000 3922802b 60000000 .* + 100000d0 38a28008 e8c50000 .* +Contents of section \.got: + 100100d8 00000000 100180d8 00000000 10010104 .* + 100100e8 00000000 10010105 00000000 10010100 .* + 100100f8 00000000 10010101 .* +Contents of section \.sdata: + 10010100 01020304 0506 .* diff --git a/ld/testsuite/ld-powerpc/tocopt5.s b/ld/testsuite/ld-powerpc/tocopt5.s new file mode 100644 index 00000000000..67da1a92834 --- /dev/null +++ b/ld/testsuite/ld-powerpc/tocopt5.s @@ -0,0 +1,43 @@ + .section .toc,"aw" +x4t: + .quad x4 +x5t: + .quad x5 +x6t: + .quad x6 + + .section .sdata,"aw" +x1: + .byte 1 +x2: + .byte 2 +x3: + .byte 3 +x4: + .byte 4 +x5: + .byte 5 +x6: + .byte 6 + + .globl _start + .text +_start: +# no need for got entry, optimise to nop,addi +# note: ld doesn't yet do got optimisation, so we get nop,ld + addis 9,2,x1@got@ha + ld 9,x1@got@l(9) +# must keep got entry, optimise to nop,addi,ld + addis 4,2,x2@got@ha + addi 5,4,x2@got@l + ld 6,0(5) + +# no need for toc entry, optimise to nop,addi + addis 9,2,x4t@toc@ha + ld 9,x4t@toc@l(9) +# must keep toc entry, optimise to nop,addi,ld +# if we had a reloc tying the ld to x5/x5t then we could throw away +# the toc entry and optimise to nop,nop,addi + addis 4,2,x5t@toc@ha + addi 5,4,x5t@toc@l + ld 6,0(5)