From 3cd7c7d7ef38ec5dc0a0c137c47d9ad0fc9e2e5f Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Sat, 26 Sep 2020 15:10:09 +0930 Subject: [PATCH] PPC64_OPT_LOCALENTRY is incompatible with tail calls The save of r2 in __glink_PLTresolve is the culprit. Remove it, unless we know we need it for --plt-localentry. --plt-localentry should not be used with power10 pc-relative code that makes tail calls. The patch also removes use of r2 as a scratch reg in the ELFv2 __glink_PLTresolve. Using r2 isn't a problem, this is just reducing the number of scratch regs. bfd/ * elf64-ppc.c (GLINK_PLTRESOLVE_SIZE): Depend on has_plt_localentry0. (LD_R0_0R11, ADD_R11_R0_R11): Define. (ppc64_elf_tls_setup): Disable params->plt_localentry0 when power10 code detected. (ppc64_elf_size_stubs): Update __glink_PLTresolve eh_frame. (ppc64_elf_build_stubs): Move r2 save to start of __glink_PLTresolve, and only emit for has_plt_localentry0. Don't use r2 in the stub. ld/ * testsuite/ld-powerpc/elfv2so.d, * testsuite/ld-powerpc/notoc2.d, * testsuite/ld-powerpc/tlsdesc.wf, * testsuite/ld-powerpc/tlsdesc2.d, * testsuite/ld-powerpc/tlsdesc2.wf, * testsuite/ld-powerpc/tlsopt5.d, * testsuite/ld-powerpc/tlsopt5.wf, * testsuite/ld-powerpc/tlsopt6.d, * testsuite/ld-powerpc/tlsopt6.wf: Update __glink_PLTresolve. --- bfd/ChangeLog | 10 ++++ bfd/elf64-ppc.c | 84 +++++++++++++++++++++++------ ld/ChangeLog | 12 +++++ ld/testsuite/ld-powerpc/elfv2so.d | 17 +++--- ld/testsuite/ld-powerpc/notoc2.d | 4 +- ld/testsuite/ld-powerpc/tlsdesc.wf | 4 +- ld/testsuite/ld-powerpc/tlsdesc2.d | 9 ++-- ld/testsuite/ld-powerpc/tlsdesc2.wf | 6 +-- ld/testsuite/ld-powerpc/tlsopt5.d | 11 ++-- ld/testsuite/ld-powerpc/tlsopt5.wf | 4 +- ld/testsuite/ld-powerpc/tlsopt6.d | 11 ++-- ld/testsuite/ld-powerpc/tlsopt6.wf | 4 +- 12 files changed, 124 insertions(+), 52 deletions(-) diff --git a/bfd/ChangeLog b/bfd/ChangeLog index f642403309b..8d0e695422a 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,3 +1,13 @@ +2020-09-26 Alan Modra + + * elf64-ppc.c (GLINK_PLTRESOLVE_SIZE): Depend on has_plt_localentry0. + (LD_R0_0R11, ADD_R11_R0_R11): Define. + (ppc64_elf_tls_setup): Disable params->plt_localentry0 when power10 + code detected. + (ppc64_elf_size_stubs): Update __glink_PLTresolve eh_frame. + (ppc64_elf_build_stubs): Move r2 save to start of __glink_PLTresolve, + and only emit for has_plt_localentry0. Don't use r2 in the stub. + 2020-09-24 Alan Modra PR 26656 diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c index 320717cb453..4194802d48b 100644 --- a/bfd/elf64-ppc.c +++ b/bfd/elf64-ppc.c @@ -211,9 +211,10 @@ static bfd_vma opd_entry_value #define PLD_R12_PC 0x04100000e5800000ULL #define PNOP 0x0700000000000000ULL -/* __glink_PLTresolve stub instructions. We enter with the index in R0. */ +/* __glink_PLTresolve stub instructions. We enter with the index in + R0 for ELFv1, and the address of a glink branch in R12 for ELFv2. */ #define GLINK_PLTRESOLVE_SIZE(htab) \ - (8u + (htab->opd_abi ? 11 * 4 : 14 * 4)) + (8u + (htab->opd_abi ? 11 * 4 : htab->has_plt_localentry0 ? 14 * 4 : 13 * 4)) /* 0: */ /* .quad plt0-1f */ /* __glink: */ @@ -229,11 +230,14 @@ static bfd_vma opd_entry_value /* mtctr %12 */ /* ld %11,16(%11) */ /* bctr */ -#define MFLR_R0 0x7c0802a6 /* mflr %r0 */ -#define MTLR_R0 0x7c0803a6 /* mtlr %r0 */ -#define SUB_R12_R12_R11 0x7d8b6050 /* subf %r12,%r11,%r12 */ -#define ADDI_R0_R12 0x380c0000 /* addi %r0,%r12,0 */ -#define SRDI_R0_R0_2 0x7800f082 /* rldicl %r0,%r0,62,2 */ + +#define MFLR_R0 0x7c0802a6 /* mflr %r0 */ +#define MTLR_R0 0x7c0803a6 /* mtlr %r0 */ +#define SUB_R12_R12_R11 0x7d8b6050 /* subf %r12,%r11,%r12 */ +#define ADDI_R0_R12 0x380c0000 /* addi %r0,%r12,0 */ +#define SRDI_R0_R0_2 0x7800f082 /* rldicl %r0,%r0,62,2 */ +#define LD_R0_0R11 0xe80b0000 /* ld %r0,0(%r11) */ +#define ADD_R11_R0_R11 0x7d605a14 /* add %r11,%r0,%r11 */ /* Pad with this. */ #define NOP 0x60000000 @@ -7736,6 +7740,19 @@ ppc64_elf_tls_setup (struct bfd_link_info *info) --plt-localentry can cause trouble. */ if (htab->params->plt_localentry0 < 0) htab->params->plt_localentry0 = 0; + if (htab->params->plt_localentry0 && htab->has_power10_relocs) + { + /* The issue is that __glink_PLTresolve saves r2, which is done + because glibc ld.so _dl_runtime_resolve restores r2 to support + a glibc plt call optimisation where global entry code is + skipped on calls that resolve to the same binary. The + __glink_PLTresolve save of r2 is incompatible with code + making tail calls, because the tail call might go via the + resolver and thus overwrite the proper saved r2. */ + _bfd_error_handler (_("warning: --plt-localentry is incompatible with " + "power10 pc-relative code")); + htab->params->plt_localentry0 = 0; + } if (htab->params->plt_localentry0 && elf_link_hash_lookup (&htab->elf, "GLIBC_2.26", FALSE, FALSE, FALSE) == NULL) @@ -13874,11 +13891,11 @@ ppc64_elf_size_stubs (struct bfd_link_info *info) /* Augmentation. */ p += 1; - *p++ = DW_CFA_advance_loc + 1; + *p++ = DW_CFA_advance_loc + (htab->has_plt_localentry0 ? 3 : 2); *p++ = DW_CFA_register; *p++ = 65; *p++ = htab->opd_abi ? 12 : 0; - *p++ = DW_CFA_advance_loc + (htab->opd_abi ? 5 : 7); + *p++ = DW_CFA_advance_loc + (htab->opd_abi ? 4 : 2); *p++ = DW_CFA_restore_extended; *p++ = 65; p += ((24 + align - 1) & -align) - 24; @@ -14474,23 +14491,60 @@ ppc64_elf_build_stubs (struct bfd_link_info *info, } else { + unsigned int insn; + + /* 0: + . .quad plt0-1f # plt0 entry relative to 1: + # + # We get here with r12 initially @ a glink branch + # Load the address of _dl_runtime_resolve from plt0 and + # jump to it, with r0 set to the index of the PLT entry + # to be resolved and r11 the link map. + __glink_PLTresolve: + . std %r2,24(%r1) # optional + . mflr %r0 + . bcl 20,31,1f + 1: + . mflr %r11 + . mtlr %r0 + . ld %r0,(0b-1b)(%r11) + . sub %r12,%r12,%r11 + . add %r11,%r0,%r11 + . addi %r0,%r12,1b-2f + . ld %r12,0(%r11) + . srdi %r0,%r0,2 + . mtctr %r12 + . ld %r11,8(%r11) + . bctr + 2: + . b __glink_PLTresolve + . ... + . b __glink_PLTresolve */ + + if (htab->has_plt_localentry0) + { + bfd_put_32 (htab->glink->owner, STD_R2_0R1 + 24, p); + p += 4; + } bfd_put_32 (htab->glink->owner, MFLR_R0, p); p += 4; bfd_put_32 (htab->glink->owner, BCL_20_31, p); p += 4; bfd_put_32 (htab->glink->owner, MFLR_R11, p); p += 4; - bfd_put_32 (htab->glink->owner, STD_R2_0R1 + 24, p); - p += 4; - bfd_put_32 (htab->glink->owner, LD_R2_0R11 | (-16 & 0xfffc), p); - p += 4; bfd_put_32 (htab->glink->owner, MTLR_R0, p); p += 4; + if (htab->has_plt_localentry0) + insn = LD_R0_0R11 | (-20 & 0xfffc); + else + insn = LD_R0_0R11 | (-16 & 0xfffc); + bfd_put_32 (htab->glink->owner, insn, p); + p += 4; bfd_put_32 (htab->glink->owner, SUB_R12_R12_R11, p); p += 4; - bfd_put_32 (htab->glink->owner, ADD_R11_R2_R11, p); + bfd_put_32 (htab->glink->owner, ADD_R11_R0_R11, p); p += 4; - bfd_put_32 (htab->glink->owner, ADDI_R0_R12 | (-48 & 0xffff), p); + bfd_put_32 (htab->glink->owner, ADDI_R0_R12 | (-44 & 0xffff), p); p += 4; bfd_put_32 (htab->glink->owner, LD_R12_0R11, p); p += 4; diff --git a/ld/ChangeLog b/ld/ChangeLog index 0662ac2ca78..ea28290e7ad 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,15 @@ +2020-09-26 Alan Modra + + * testsuite/ld-powerpc/elfv2so.d, + * testsuite/ld-powerpc/notoc2.d, + * testsuite/ld-powerpc/tlsdesc.wf, + * testsuite/ld-powerpc/tlsdesc2.d, + * testsuite/ld-powerpc/tlsdesc2.wf, + * testsuite/ld-powerpc/tlsopt5.d, + * testsuite/ld-powerpc/tlsopt5.wf, + * testsuite/ld-powerpc/tlsopt6.d, + * testsuite/ld-powerpc/tlsopt6.wf: Update __glink_PLTresolve. + 2020-09-24 Alan Modra PR 26655 diff --git a/ld/testsuite/ld-powerpc/elfv2so.d b/ld/testsuite/ld-powerpc/elfv2so.d index 0162bd0880e..4018f0536c6 100644 --- a/ld/testsuite/ld-powerpc/elfv2so.d +++ b/ld/testsuite/ld-powerpc/elfv2so.d @@ -74,12 +74,11 @@ Disassembly of section \.text: .*: (7c 08 02 a6|a6 02 08 7c) mflr r0 .*: (42 9f 00 05|05 00 9f 42) bcl .* .*: (7d 68 02 a6|a6 02 68 7d) mflr r11 -.*: (18 00 41 f8|f8 41 00 18) std r2,24\(r1\) -.*: (e8 4b ff f0|f0 ff 4b e8) ld r2,-16\(r11\) .*: (7c 08 03 a6|a6 03 08 7c) mtlr r0 +.*: (e8 0b ff f0|f0 ff 0b e8) ld r0,-16\(r11\) .*: (7d 8b 60 50|50 60 8b 7d) subf r12,r11,r12 -.*: (7d 62 5a 14|14 5a 62 7d) add r11,r2,r11 -.*: (38 0c ff d0|d0 ff 0c 38) addi r0,r12,-48 +.*: (7d 60 5a 14|14 5a 60 7d) add r11,r0,r11 +.*: (38 0c ff d4|d4 ff 0c 38) addi r0,r12,-44 .*: (e9 8b 00 00|00 00 8b e9) ld r12,0\(r11\) .*: (78 00 f0 82|82 f0 00 78) rldicl r0,r0,62,2 .*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 @@ -87,16 +86,16 @@ Disassembly of section \.text: .*: (4e 80 04 20|20 04 80 4e) bctr .* : -.*: (4b ff ff c8|c8 ff ff 4b) b .* <__glink_PLTresolve> +.*: (4b ff ff cc|cc ff ff 4b) b .* <__glink_PLTresolve> .* : -.*: (4b ff ff c4|c4 ff ff 4b) b .* <__glink_PLTresolve> +.*: (4b ff ff c8|c8 ff ff 4b) b .* <__glink_PLTresolve> .* : -.*: (4b ff ff c0|c0 ff ff 4b) b .* <__glink_PLTresolve> +.*: (4b ff ff c4|c4 ff ff 4b) b .* <__glink_PLTresolve> .* : -.*: (4b ff ff bc|bc ff ff 4b) b .* <__glink_PLTresolve> +.*: (4b ff ff c0|c0 ff ff 4b) b .* <__glink_PLTresolve> .* : -.*: (4b ff ff b8|b8 ff ff 4b) b .* <__glink_PLTresolve> +.*: (4b ff ff bc|bc ff ff 4b) b .* <__glink_PLTresolve> diff --git a/ld/testsuite/ld-powerpc/notoc2.d b/ld/testsuite/ld-powerpc/notoc2.d index 1e519c0d1b0..3448f8b37e6 100644 --- a/ld/testsuite/ld-powerpc/notoc2.d +++ b/ld/testsuite/ld-powerpc/notoc2.d @@ -22,8 +22,8 @@ Disassembly of section \.text: .*: (39 80 ff ff|ff ff 80 39) .*: (06 10 00 00|00 00 10 06) pla r12,0 .*: (39 80 00 00|00 00 80 39) -.*: (06 10 00 00|00 00 10 06) pla r3,92 -.*: (38 60 00 5c|5c 00 60 38) +.*: (06 10 00 00|00 00 10 06) pla r3,88 +.*: (38 60 00 58|58 00 60 38) .*: (4b ff ff 99|99 ff ff 4b) bl .* <.*\.plt_call\.puts> .*: (60 00 00 00|00 00 00 60) nop #pass diff --git a/ld/testsuite/ld-powerpc/tlsdesc.wf b/ld/testsuite/ld-powerpc/tlsdesc.wf index 09503fa0053..e7d4522b507 100644 --- a/ld/testsuite/ld-powerpc/tlsdesc.wf +++ b/ld/testsuite/ld-powerpc/tlsdesc.wf @@ -38,9 +38,9 @@ Contents of the \.eh_frame section: DW_CFA_nop 0+4c 0+14 0+50 FDE cie=0+ pc=0+2f8\.\.0+32c - DW_CFA_advance_loc: 4 to 0+2fc + DW_CFA_advance_loc: 8 to 0+300 DW_CFA_register: r65 in r12 - DW_CFA_advance_loc: 20 to 0+310 + DW_CFA_advance_loc: 16 to 0+310 DW_CFA_restore_extended: r65 0+64 0+10 0+68 FDE cie=0+ pc=0+2e0\.\.0+2ec diff --git a/ld/testsuite/ld-powerpc/tlsdesc2.d b/ld/testsuite/ld-powerpc/tlsdesc2.d index 47aedbecb88..c271c949b01 100644 --- a/ld/testsuite/ld-powerpc/tlsdesc2.d +++ b/ld/testsuite/ld-powerpc/tlsdesc2.d @@ -53,12 +53,11 @@ Disassembly of section \.text: .*: (7c 08 02 a6|a6 02 08 7c) mflr r0 .*: (42 9f 00 05|05 00 9f 42) bcl .* .*: (7d 68 02 a6|a6 02 68 7d) mflr r11 -.*: (f8 41 00 18|18 00 41 f8) std r2,24\(r1\) -.*: (e8 4b ff f0|f0 ff 4b e8) ld r2,-16\(r11\) .*: (7c 08 03 a6|a6 03 08 7c) mtlr r0 +.*: (e8 0b ff f0|f0 ff 0b e8) ld r0,-16\(r11\) .*: (7d 8b 60 50|50 60 8b 7d) subf r12,r11,r12 -.*: (7d 62 5a 14|14 5a 62 7d) add r11,r2,r11 -.*: (38 0c ff d0|d0 ff 0c 38) addi r0,r12,-48 +.*: (7d 60 5a 14|14 5a 60 7d) add r11,r0,r11 +.*: (38 0c ff d4|d4 ff 0c 38) addi r0,r12,-44 .*: (e9 8b 00 00|00 00 8b e9) ld r12,0\(r11\) .*: (78 00 f0 82|82 f0 00 78) rldicl r0,r0,62,2 .*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 @@ -66,4 +65,4 @@ Disassembly of section \.text: .*: (4e 80 04 20|20 04 80 4e) bctr .* <__tls_get_addr_opt@plt>: -.*: (4b ff ff c8|c8 ff ff 4b) b .* <__glink_PLTresolve> +.*: (4b ff ff cc|cc ff ff 4b) b .* <__glink_PLTresolve> diff --git a/ld/testsuite/ld-powerpc/tlsdesc2.wf b/ld/testsuite/ld-powerpc/tlsdesc2.wf index cb92c294b15..79a417ba06e 100644 --- a/ld/testsuite/ld-powerpc/tlsdesc2.wf +++ b/ld/testsuite/ld-powerpc/tlsdesc2.wf @@ -37,10 +37,10 @@ Contents of the \.eh_frame section: DW_CFA_nop DW_CFA_nop -0+4c 0+14 0+50 FDE cie=0+ pc=0+318\.\.0+354 - DW_CFA_advance_loc: 4 to 0+31c +0+4c 0+14 0+50 FDE cie=0+ pc=0+318\.\.0+350 + DW_CFA_advance_loc: 8 to 0+320 DW_CFA_register: r65 in r0 - DW_CFA_advance_loc: 28 to 0+338 + DW_CFA_advance_loc: 8 to 0+328 DW_CFA_restore_extended: r65 0+64 0+10 0+68 FDE cie=0+ pc=0+300\.\.0+30c diff --git a/ld/testsuite/ld-powerpc/tlsopt5.d b/ld/testsuite/ld-powerpc/tlsopt5.d index 0fcb79821b8..efd6debc555 100644 --- a/ld/testsuite/ld-powerpc/tlsopt5.d +++ b/ld/testsuite/ld-powerpc/tlsopt5.d @@ -49,12 +49,11 @@ Disassembly of section \.text: .*: (a6 02 08 7c|7c 08 02 a6) mflr r0 .*: (05 00 9f 42|42 9f 00 05) bcl .* .*: (a6 02 68 7d|7d 68 02 a6) mflr r11 -.*: (18 00 41 f8|f8 41 00 18) std r2,24\(r1\) -.*: (f0 ff 4b e8|e8 4b ff f0) ld r2,-16\(r11\) .*: (a6 03 08 7c|7c 08 03 a6) mtlr r0 +.*: (f0 ff 0b e8|e8 0b ff f0) ld r0,-16\(r11\) .*: (50 60 8b 7d|7d 8b 60 50) subf r12,r11,r12 -.*: (14 5a 62 7d|7d 62 5a 14) add r11,r2,r11 -.*: (d0 ff 0c 38|38 0c ff d0) addi r0,r12,-48 +.*: (14 5a 60 7d|7d 60 5a 14) add r11,r0,r11 +.*: (d4 ff 0c 38|38 0c ff d4) addi r0,r12,-44 .*: (00 00 8b e9|e9 8b 00 00) ld r12,0\(r11\) .*: (82 f0 00 78|78 00 f0 82) rldicl r0,r0,62,2 .*: (a6 03 89 7d|7d 89 03 a6) mtctr r12 @@ -62,7 +61,7 @@ Disassembly of section \.text: .*: (20 04 80 4e|4e 80 04 20) bctr .* <__tls_get_addr_opt@plt>: -.* (c8 ff ff 4b|4b ff ff c8) b .* +.* (cc ff ff 4b|4b ff ff cc) b .* .* : -.*: (c4 ff ff 4b|4b ff ff c4) b .* +.*: (c8 ff ff 4b|4b ff ff c8) b .* diff --git a/ld/testsuite/ld-powerpc/tlsopt5.wf b/ld/testsuite/ld-powerpc/tlsopt5.wf index f0453610e08..84bd94ed728 100644 --- a/ld/testsuite/ld-powerpc/tlsopt5.wf +++ b/ld/testsuite/ld-powerpc/tlsopt5.wf @@ -16,9 +16,9 @@ Contents of the \.eh_frame section: DW_CFA_restore_extended: r65 0+2c 0+14 0+30 FDE cie=0+ pc=.* - DW_CFA_advance_loc: 4 to .* + DW_CFA_advance_loc: 8 to .* DW_CFA_register: r65 in r0 - DW_CFA_advance_loc: 28 to .* + DW_CFA_advance_loc: 8 to .* DW_CFA_restore_extended: r65 0+44 0+10 0+48 FDE cie=0+ pc=.* diff --git a/ld/testsuite/ld-powerpc/tlsopt6.d b/ld/testsuite/ld-powerpc/tlsopt6.d index 4ca64092c1d..15def719cba 100644 --- a/ld/testsuite/ld-powerpc/tlsopt6.d +++ b/ld/testsuite/ld-powerpc/tlsopt6.d @@ -67,12 +67,11 @@ Disassembly of section \.text: .*: (a6 02 08 7c|7c 08 02 a6) mflr r0 .*: (05 00 9f 42|42 9f 00 05) bcl .* .*: (a6 02 68 7d|7d 68 02 a6) mflr r11 -.*: (18 00 41 f8|f8 41 00 18) std r2,24\(r1\) -.*: (f0 ff 4b e8|e8 4b ff f0) ld r2,-16\(r11\) .*: (a6 03 08 7c|7c 08 03 a6) mtlr r0 +.*: (f0 ff 0b e8|e8 0b ff f0) ld r0,-16\(r11\) .*: (50 60 8b 7d|7d 8b 60 50) subf r12,r11,r12 -.*: (14 5a 62 7d|7d 62 5a 14) add r11,r2,r11 -.*: (d0 ff 0c 38|38 0c ff d0) addi r0,r12,-48 +.*: (14 5a 60 7d|7d 60 5a 14) add r11,r0,r11 +.*: (d4 ff 0c 38|38 0c ff d4) addi r0,r12,-44 .*: (00 00 8b e9|e9 8b 00 00) ld r12,0\(r11\) .*: (82 f0 00 78|78 00 f0 82) rldicl r0,r0,62,2 .*: (a6 03 89 7d|7d 89 03 a6) mtctr r12 @@ -80,7 +79,7 @@ Disassembly of section \.text: .*: (20 04 80 4e|4e 80 04 20) bctr .* <__tls_get_addr_opt@plt>: -.* (c8 ff ff 4b|4b ff ff c8) b .* +.* (cc ff ff 4b|4b ff ff cc) b .* .* : -.*: (c4 ff ff 4b|4b ff ff c4) b .* +.*: (c8 ff ff 4b|4b ff ff c8) b .* diff --git a/ld/testsuite/ld-powerpc/tlsopt6.wf b/ld/testsuite/ld-powerpc/tlsopt6.wf index abb414a6339..c2b96163523 100644 --- a/ld/testsuite/ld-powerpc/tlsopt6.wf +++ b/ld/testsuite/ld-powerpc/tlsopt6.wf @@ -38,9 +38,9 @@ Contents of the \.eh_frame section: DW_CFA_nop 0+4c 0+14 0+50 FDE cie=0+ pc=.* - DW_CFA_advance_loc: 4 to .* + DW_CFA_advance_loc: 8 to .* DW_CFA_register: r65 in r0 - DW_CFA_advance_loc: 28 to .* + DW_CFA_advance_loc: 8 to .* DW_CFA_restore_extended: r65 0+64 0+10 0+68 FDE cie=0+ pc=.* -- 2.30.2