+2018-04-09 Alan Modra <amodra@gmail.com>
+
+ * elf64-ppc.c (struct _ppc64_elf_section_data): Add has_pltcall field.
+ (struct ppc_link_hash_table): Add can_convert_all_inline_plt.
+ (ppc64_elf_check_relocs): Set has_pltcall.
+ (ppc64_elf_adjust_dynamic_symbol): Discard some PLT entries.
+ (ppc64_elf_inline_plt): New function.
+ (ppc64_elf_size_dynamic_sections): Discard some PLT entries for locals.
+ * elf64-ppc.h (ppc64_elf_inline_plt): Declare.
+ * elf32-ppc.c (has_pltcall): Define.
+ (struct ppc_elf_link_hash_table): Add can_convert_all_inline_plt.
+ (ppc_elf_check_relocs): Set has_pltcall.
+ (ppc_elf_inline_plt): New function.
+ (ppc_elf_adjust_dynamic_symbol): Discard some PLT entries.
+ (ppc_elf_size_dynamic_sections): Likewise.
+ * elf32-ppc.h (ppc_elf_inline_plt): Declare.
+
2018-04-09 Alan Modra <amodra@gmail.com>
* elf32-ppc.c (ppc_elf_howto_raw): Add PLTSEQ and PLTCALL howtos.
/* Set if tls optimization is enabled. */
unsigned int do_tls_opt:1;
+ /* Set if inline plt calls should be converted to direct calls. */
+ unsigned int can_convert_all_inline_plt:1;
+
/* The size of PLT entries. */
int plt_entry_size;
/* The distance between adjacent PLT slots. */
/* Nonzero if this section has a call to __tls_get_addr. */
#define has_tls_get_addr_call sec_flg1
+ /* Flag set when PLTCALL relocs are detected. */
+#define has_pltcall sec_flg2
+
/* Get the PPC ELF linker hash table from a link_info structure. */
#define ppc_elf_hash_table(p) \
if (h == NULL)
break;
ppc_elf_tdata (abfd)->makes_plt_call = 1;
- /* Fall through */
+ goto pltentry;
case R_PPC_PLTCALL:
+ sec->has_pltcall = 1;
+ /* Fall through. */
+
case R_PPC_PLT32:
case R_PPC_PLTREL32:
case R_PPC_PLT16_LO:
case R_PPC_PLT16_HI:
case R_PPC_PLT16_HA:
+ pltentry:
#ifdef DEBUG
fprintf (stderr, "Reloc requires a PLT entry\n");
#endif
return TRUE;
}
\f
+/* Analyze inline PLT call relocations to see whether calls to locally
+ defined functions can be converted to direct calls. */
+
+bfd_boolean
+ppc_elf_inline_plt (struct bfd_link_info *info)
+{
+ struct ppc_elf_link_hash_table *htab;
+ bfd *ibfd;
+ asection *sec;
+ bfd_vma low_vma, high_vma, limit;
+
+ htab = ppc_elf_hash_table (info);
+ if (htab == NULL)
+ return FALSE;
+
+ /* A bl insn can reach -0x2000000 to 0x1fffffc. The limit is
+ reduced somewhat to cater for possible stubs that might be added
+ between the call and its destination. */
+ limit = 0x1e00000;
+ low_vma = -1;
+ high_vma = 0;
+ for (sec = info->output_bfd->sections; sec != NULL; sec = sec->next)
+ if ((sec->flags & (SEC_ALLOC | SEC_CODE)) == (SEC_ALLOC | SEC_CODE))
+ {
+ if (low_vma > sec->vma)
+ low_vma = sec->vma;
+ if (high_vma < sec->vma + sec->size)
+ high_vma = sec->vma + sec->size;
+ }
+
+ /* If a "bl" can reach anywhere in local code sections, then we can
+ convert all inline PLT sequences to direct calls when the symbol
+ is local. */
+ if (high_vma - low_vma < limit)
+ {
+ htab->can_convert_all_inline_plt = 1;
+ return TRUE;
+ }
+
+ /* Otherwise, go looking through relocs for cases where a direct
+ call won't reach. Mark the symbol on any such reloc to disable
+ the optimization and keep the PLT entry as it seems likely that
+ this will be better than creating trampolines. Note that this
+ will disable the optimization for all inline PLT calls to a
+ particular symbol, not just those that won't reach. The
+ difficulty in doing a more precise optimization is that the
+ linker needs to make a decision depending on whether a
+ particular R_PPC_PLTCALL insn can be turned into a direct
+ call, for each of the R_PPC_PLTSEQ and R_PPC_PLT16* insns in
+ the sequence, and there is nothing that ties those relocs
+ together except their symbol. */
+
+ for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link.next)
+ {
+ Elf_Internal_Shdr *symtab_hdr;
+ Elf_Internal_Sym *local_syms;
+
+ if (!is_ppc_elf (ibfd))
+ continue;
+
+ local_syms = NULL;
+ symtab_hdr = &elf_symtab_hdr (ibfd);
+
+ for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+ if (sec->has_pltcall
+ && !bfd_is_abs_section (sec->output_section))
+ {
+ Elf_Internal_Rela *relstart, *rel, *relend;
+
+ /* Read the relocations. */
+ relstart = _bfd_elf_link_read_relocs (ibfd, sec, NULL, NULL,
+ info->keep_memory);
+ if (relstart == NULL)
+ return FALSE;
+
+ relend = relstart + sec->reloc_count;
+ for (rel = relstart; rel < relend; )
+ {
+ enum elf_ppc_reloc_type r_type;
+ unsigned long r_symndx;
+ asection *sym_sec;
+ struct elf_link_hash_entry *h;
+ Elf_Internal_Sym *sym;
+ unsigned char *tls_maskp;
+
+ r_type = ELF32_R_TYPE (rel->r_info);
+ if (r_type != R_PPC_PLTCALL)
+ continue;
+
+ r_symndx = ELF32_R_SYM (rel->r_info);
+ if (!get_sym_h (&h, &sym, &sym_sec, &tls_maskp, &local_syms,
+ r_symndx, ibfd))
+ {
+ if (elf_section_data (sec)->relocs != relstart)
+ free (relstart);
+ if (local_syms != NULL
+ && symtab_hdr->contents != (unsigned char *) local_syms)
+ free (local_syms);
+ return FALSE;
+ }
+
+ if (sym_sec != NULL && sym_sec->output_section != NULL)
+ {
+ bfd_vma from, to;
+ if (h != NULL)
+ to = h->root.u.def.value;
+ else
+ to = sym->st_value;
+ to += (rel->r_addend
+ + sym_sec->output_offset
+ + sym_sec->output_section->vma);
+ from = (rel->r_offset
+ + sec->output_offset
+ + sec->output_section->vma);
+ if (to - from + limit < 2 * limit)
+ *tls_maskp &= ~PLT_KEEP;
+ }
+ }
+ if (elf_section_data (sec)->relocs != relstart)
+ free (relstart);
+ }
+
+ if (local_syms != NULL
+ && symtab_hdr->contents != (unsigned char *) local_syms)
+ {
+ if (!info->keep_memory)
+ free (local_syms);
+ else
+ symtab_hdr->contents = (unsigned char *) local_syms;
+ }
+ }
+
+ return TRUE;
+}
+
/* Set plt output section type, htab->tls_get_addr, and call the
generic ELF tls_setup function. */
if (ent == NULL
|| (h->type != STT_GNU_IFUNC
&& local
- && ((ppc_elf_hash_entry (h)->tls_mask & (TLS_TLS | PLT_KEEP))
- != PLT_KEEP)))
+ && (htab->can_convert_all_inline_plt
+ || (ppc_elf_hash_entry (h)->tls_mask
+ & (TLS_TLS | PLT_KEEP)) != PLT_KEEP)))
{
/* A PLT entry is not required/allowed when:
|| (h->needs_plt
&& h->def_regular
&& !htab->elf.dynamic_sections_created
+ && !htab->can_convert_all_inline_plt
&& (ppc_elf_hash_entry (h)->tls_mask
& (TLS_TLS | PLT_KEEP)) == PLT_KEEP))
{
{
if ((*lgot_masks & (TLS_TLS | PLT_IFUNC)) == PLT_IFUNC)
s = htab->elf.iplt;
- else if ((*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP)
+ else if (htab->can_convert_all_inline_plt
+ || (*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP)
{
ent->plt.offset = (bfd_vma) -1;
continue;
void ppc_elf_link_params (struct bfd_link_info *, struct ppc_elf_params *);
int ppc_elf_select_plt_layout (bfd *, struct bfd_link_info *);
+bfd_boolean ppc_elf_inline_plt (struct bfd_link_info *);
asection *ppc_elf_tls_setup (bfd *, struct bfd_link_info *);
bfd_boolean ppc_elf_tls_optimize (bfd *, struct bfd_link_info *);
void ppc_elf_maybe_strip_sdata_syms (struct bfd_link_info *);
/* Flag set when small branches are detected. Used to
select suitable defaults for the stub group size. */
unsigned int has_14bit_branch:1;
+
+ /* Flag set when PLTCALL relocs are detected. */
+ unsigned int has_pltcall:1;
};
#define ppc64_elf_section_data(sec) \
/* Set if tls optimization is enabled. */
unsigned int do_tls_opt:1;
+ /* Set if inline plt calls should be converted to direct calls. */
+ unsigned int can_convert_all_inline_plt:1;
+
/* Set on error. */
unsigned int stub_error:1;
if (dest != sec)
ppc64_elf_section_data (sec)->has_14bit_branch = 1;
}
+ goto rel24;
+
+ case R_PPC64_PLTCALL:
+ ppc64_elf_section_data (sec)->has_pltcall = 1;
/* Fall through. */
case R_PPC64_REL24:
- case R_PPC64_PLTCALL:
+ rel24:
plt_list = ifunc;
if (h != NULL)
{
if (ent == NULL
|| (h->type != STT_GNU_IFUNC
&& local
- && (((struct ppc_link_hash_entry *) h)->tls_mask
- & (TLS_TLS | PLT_KEEP)) != PLT_KEEP))
+ && (htab->can_convert_all_inline_plt
+ || (((struct ppc_link_hash_entry *) h)->tls_mask
+ & (TLS_TLS | PLT_KEEP)) != PLT_KEEP)))
{
h->plt.plist = NULL;
h->needs_plt = 0;
return TRUE;
}
+/* Analyze inline PLT call relocations to see whether calls to locally
+ defined functions can be converted to direct calls. */
+
+bfd_boolean
+ppc64_elf_inline_plt (struct bfd_link_info *info)
+{
+ struct ppc_link_hash_table *htab;
+ bfd *ibfd;
+ asection *sec;
+ bfd_vma low_vma, high_vma, limit;
+
+ htab = ppc_hash_table (info);
+ if (htab == NULL)
+ return FALSE;
+
+ /* A bl insn can reach -0x2000000 to 0x1fffffc. The limit is
+ reduced somewhat to cater for possible stubs that might be added
+ between the call and its destination. */
+ if (htab->params->group_size < 0)
+ {
+ limit = -htab->params->group_size;
+ if (limit == 1)
+ limit = 0x1e00000;
+ }
+ else
+ {
+ limit = htab->params->group_size;
+ if (limit == 1)
+ limit = 0x1c00000;
+ }
+
+ low_vma = -1;
+ high_vma = 0;
+ for (sec = info->output_bfd->sections; sec != NULL; sec = sec->next)
+ if ((sec->flags & (SEC_ALLOC | SEC_CODE)) == (SEC_ALLOC | SEC_CODE))
+ {
+ if (low_vma > sec->vma)
+ low_vma = sec->vma;
+ if (high_vma < sec->vma + sec->size)
+ high_vma = sec->vma + sec->size;
+ }
+
+ /* If a "bl" can reach anywhere in local code sections, then we can
+ convert all inline PLT sequences to direct calls when the symbol
+ is local. */
+ if (high_vma - low_vma < limit)
+ {
+ htab->can_convert_all_inline_plt = 1;
+ return TRUE;
+ }
+
+ /* Otherwise, go looking through relocs for cases where a direct
+ call won't reach. Mark the symbol on any such reloc to disable
+ the optimization and keep the PLT entry as it seems likely that
+ this will be better than creating trampolines. Note that this
+ will disable the optimization for all inline PLT calls to a
+ particular symbol, not just those that won't reach. The
+ difficulty in doing a more precise optimization is that the
+ linker needs to make a decision depending on whether a
+ particular R_PPC64_PLTCALL insn can be turned into a direct
+ call, for each of the R_PPC64_PLTSEQ and R_PPC64_PLT16* insns in
+ the sequence, and there is nothing that ties those relocs
+ together except their symbol. */
+
+ for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link.next)
+ {
+ Elf_Internal_Shdr *symtab_hdr;
+ Elf_Internal_Sym *local_syms;
+
+ if (!is_ppc64_elf (ibfd))
+ continue;
+
+ local_syms = NULL;
+ symtab_hdr = &elf_symtab_hdr (ibfd);
+
+ for (sec = ibfd->sections; sec != NULL; sec = sec->next)
+ if (ppc64_elf_section_data (sec)->has_pltcall
+ && !bfd_is_abs_section (sec->output_section))
+ {
+ Elf_Internal_Rela *relstart, *rel, *relend;
+
+ /* Read the relocations. */
+ relstart = _bfd_elf_link_read_relocs (ibfd, sec, NULL, NULL,
+ info->keep_memory);
+ if (relstart == NULL)
+ return FALSE;
+
+ relend = relstart + sec->reloc_count;
+ for (rel = relstart; rel < relend; )
+ {
+ enum elf_ppc64_reloc_type r_type;
+ unsigned long r_symndx;
+ asection *sym_sec;
+ struct elf_link_hash_entry *h;
+ Elf_Internal_Sym *sym;
+ unsigned char *tls_maskp;
+
+ r_type = ELF64_R_TYPE (rel->r_info);
+ if (r_type != R_PPC64_PLTCALL)
+ continue;
+
+ r_symndx = ELF64_R_SYM (rel->r_info);
+ if (!get_sym_h (&h, &sym, &sym_sec, &tls_maskp, &local_syms,
+ r_symndx, ibfd))
+ {
+ if (elf_section_data (sec)->relocs != relstart)
+ free (relstart);
+ if (local_syms != NULL
+ && symtab_hdr->contents != (unsigned char *) local_syms)
+ free (local_syms);
+ return FALSE;
+ }
+
+ if (sym_sec != NULL && sym_sec->output_section != NULL)
+ {
+ bfd_vma from, to;
+ if (h != NULL)
+ to = h->root.u.def.value;
+ else
+ to = sym->st_value;
+ to += (rel->r_addend
+ + sym_sec->output_offset
+ + sym_sec->output_section->vma);
+ from = (rel->r_offset
+ + sec->output_offset
+ + sec->output_section->vma);
+ if (to - from + limit < 2 * limit)
+ *tls_maskp &= ~PLT_KEEP;
+ }
+ }
+ if (elf_section_data (sec)->relocs != relstart)
+ free (relstart);
+ }
+
+ if (local_syms != NULL
+ && symtab_hdr->contents != (unsigned char *) local_syms)
+ {
+ if (!info->keep_memory)
+ free (local_syms);
+ else
+ symtab_hdr->contents = (unsigned char *) local_syms;
+ }
+ }
+
+ return TRUE;
+}
+
/* Set htab->tls_get_addr and call the generic ELF tls_setup function. */
asection *
|| (h->needs_plt
&& h->def_regular
&& !htab->elf.dynamic_sections_created
+ && !htab->can_convert_all_inline_plt
&& (((struct ppc_link_hash_entry *) h)->tls_mask
& (TLS_TLS | PLT_KEEP)) == PLT_KEEP))
{
s->size += PLT_ENTRY_SIZE (htab);
htab->elf.irelplt->size += sizeof (Elf64_External_Rela);
}
- else if ((*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP)
+ else if (htab->can_convert_all_inline_plt
+ || (*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP)
ent->plt.offset = (bfd_vma) -1;
else
{
(struct bfd_link_info *, struct ppc64_elf_params *);
bfd_boolean ppc64_elf_edit_opd
(struct bfd_link_info *);
+bfd_boolean ppc64_elf_inline_plt
+ (struct bfd_link_info *);
asection *ppc64_elf_tls_setup
(struct bfd_link_info *);
bfd_boolean ppc64_elf_tls_optimize
+2018-04-09 Alan Modra <amodra@gmail.com>
+
+ * emultempl/ppc64elf.em (no_inline_plt): New var.
+ (ppc_before_allocation): Call ppc64_elf_inline_plt.
+ (enum ppc64_opt): Add OPTION_NO_INLINE_OPT.
+ (PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
+ PARSE_AND_LIST_ARGS_CASES): Handle --no-inline-optimize.
+ * emultemps/ppc32elf.em (no_inline_opt): New var.
+ (prelim_size_sections): New function, extracted from..
+ (ppc_before_allocation): ..here. Call ppc_elf_inline_plt.
+ (enum ppc32_opt): Add OPTION_NO_INLINE_OPT.
+ (PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
+ PARSE_AND_LIST_ARGS_CASES): Handle --no-inline-optimize.
+
2018-04-09 Alan Modra <amodra@gmail.com>
* emulparams/elf32ppc.sh (OTHER_RELRO_SECTIONS_2): Add .branch_lt.
/* Whether to run tls optimization. */
static int notlsopt = 0;
+/* Whether to convert inline PLT calls to direct. */
+static int no_inline_opt = 0;
+
/* Choose the correct place for .got. */
static int old_got = 0;
EOF
fi
fragment <<EOF
+static void
+prelim_size_sections (void)
+{
+ if (expld.phase != lang_mark_phase_enum)
+ {
+ expld.phase = lang_mark_phase_enum;
+ expld.dataseg.phase = exp_seg_none;
+ one_lang_size_sections_pass (NULL, FALSE);
+ /* We must not cache anything from the preliminary sizing. */
+ lang_reset_memory_regions ();
+ }
+}
+
static void
ppc_before_allocation (void)
{
if (is_ppc_elf (link_info.output_bfd))
{
+ if (!no_inline_opt
+ && !bfd_link_relocatable (&link_info))
+ {
+ prelim_size_sections ();
+
+ if (!ppc_elf_inline_plt (&link_info))
+ einfo (_("%X%P: inline PLT: %E\n"));
+ }
+
if (ppc_elf_tls_setup (link_info.output_bfd, &link_info)
&& !notlsopt)
{
asection *o;
/* Run lang_size_sections (if not already done). */
- if (expld.phase != lang_mark_phase_enum)
- {
- expld.phase = lang_mark_phase_enum;
- expld.dataseg.phase = exp_seg_none;
- one_lang_size_sections_pass (NULL, FALSE);
- lang_reset_memory_regions ();
- }
+ prelim_size_sections ();
for (o = link_info.output_bfd->sections; o != NULL; o = o->next)
{
OPTION_OLD_PLT,
OPTION_PLT_ALIGN,
OPTION_NO_PLT_ALIGN,
+ OPTION_NO_INLINE_OPT,
OPTION_OLD_GOT,
OPTION_STUBSYMS,
OPTION_NO_STUBSYMS,
{ "bss-plt", no_argument, NULL, OPTION_OLD_PLT },
{ "plt-align", optional_argument, NULL, OPTION_PLT_ALIGN },
{ "no-plt-align", no_argument, NULL, OPTION_NO_PLT_ALIGN },
+ { "no-inline-optimize", no_argument, NULL, OPTION_NO_INLINE_OPT },
{ "sdata-got", no_argument, NULL, OPTION_OLD_GOT },'
fi
PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
--no-plt-align Dont'\''t align individual PLT call stubs\n"
));
fprintf (file, _("\
+ --no-inline-optimize Don'\''t convert inline PLT to direct calls\n"
+ ));
+ fprintf (file, _("\
--sdata-got Force GOT location just before .sdata\n"
));'
fi
params.plt_stub_align = 0;
break;
+ case OPTION_NO_INLINE_OPT:
+ no_inline_opt = 1;
+ break;
+
case OPTION_OLD_GOT:
old_got = 1;
break;
/* Whether to run opd optimization. */
static int no_opd_opt = 0;
+/* Whether to convert inline PLT calls to direct. */
+static int no_inline_opt = 0;
+
/* Whether to run toc optimization. */
static int no_toc_opt = 0;
&& !ppc64_elf_edit_opd (&link_info))
einfo (_("%X%P: can not edit %s: %E\n"), "opd");
+ if (!no_inline_opt
+ && !bfd_link_relocatable (&link_info))
+ {
+ prelim_size_sections ();
+
+ if (!ppc64_elf_inline_plt (&link_info))
+ einfo (_("%X%P: inline PLT: %E\n"));
+ }
+
if (ppc64_elf_tls_setup (&link_info)
&& !no_tls_opt)
{
OPTION_TLS_GET_ADDR_OPT,
OPTION_NO_TLS_GET_ADDR_OPT,
OPTION_NO_OPD_OPT,
+ OPTION_NO_INLINE_OPT,
OPTION_NO_TOC_OPT,
OPTION_NO_MULTI_TOC,
OPTION_NO_TOC_SORT,
{ "tls-get-addr-optimize", no_argument, NULL, OPTION_TLS_GET_ADDR_OPT },
{ "no-tls-get-addr-optimize", no_argument, NULL, OPTION_NO_TLS_GET_ADDR_OPT },
{ "no-opd-optimize", no_argument, NULL, OPTION_NO_OPD_OPT },
+ { "no-inline-optimize", no_argument, NULL, OPTION_NO_INLINE_OPT },
{ "no-toc-optimize", no_argument, NULL, OPTION_NO_TOC_OPT },
{ "no-multi-toc", no_argument, NULL, OPTION_NO_MULTI_TOC },
{ "no-toc-sort", no_argument, NULL, OPTION_NO_TOC_SORT },
--no-opd-optimize Don'\''t optimize the OPD section\n"
));
fprintf (file, _("\
+ --no-inline-optimize Don'\''t convert inline PLT to direct calls\n"
+ ));
+ fprintf (file, _("\
--no-toc-optimize Don'\''t optimize the TOC section\n"
));
fprintf (file, _("\
no_opd_opt = 1;
break;
+ case OPTION_NO_INLINE_OPT:
+ no_inline_opt = 1;
+ break;
+
case OPTION_NO_TOC_OPT:
no_toc_opt = 1;
break;