From 5834f36d93cabf1a8bcc7dd7654141aed3d296bc Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Fri, 3 Feb 2023 17:09:58 +0000 Subject: [PATCH] bfd: aarch64: Optimize BTI stubs PR30076 Don't insert a second stub if the target is already compatible with an indirect branch. --- bfd/elfnn-aarch64.c | 36 +++++- ld/testsuite/ld-aarch64/aarch64-elf.exp | 1 + ld/testsuite/ld-aarch64/bti-far-opt.d | 153 ++++++++++++++++++++++++ ld/testsuite/ld-aarch64/bti-far-opt.s | 53 ++++++++ 4 files changed, 239 insertions(+), 4 deletions(-) create mode 100644 ld/testsuite/ld-aarch64/bti-far-opt.d create mode 100644 ld/testsuite/ld-aarch64/bti-far-opt.s diff --git a/bfd/elfnn-aarch64.c b/bfd/elfnn-aarch64.c index d9ebeae922c..c321ceb382e 100644 --- a/bfd/elfnn-aarch64.c +++ b/bfd/elfnn-aarch64.c @@ -3670,14 +3670,42 @@ group_sections (struct elf_aarch64_link_hash_table *htab, #undef PREV_SEC #undef PREV_SEC +#define AARCH64_HINT(insn) (((insn) & 0xfffff01f) == 0xd503201f) +#define AARCH64_PACIASP 0xd503233f +#define AARCH64_PACIBSP 0xd503237f +#define AARCH64_BTI_C 0xd503245f +#define AARCH64_BTI_J 0xd503249f +#define AARCH64_BTI_JC 0xd50324df + /* True if the inserted stub does not break BTI compatibility. */ static bool -aarch64_bti_stub_p (struct elf_aarch64_stub_hash_entry *stub_entry) +aarch64_bti_stub_p (bfd *input_bfd, + struct elf_aarch64_stub_hash_entry *stub_entry) { /* Stubs without indirect branch are BTI compatible. */ - return stub_entry->stub_type != aarch64_stub_adrp_branch - && stub_entry->stub_type != aarch64_stub_long_branch; + if (stub_entry->stub_type != aarch64_stub_adrp_branch + && stub_entry->stub_type != aarch64_stub_long_branch) + return true; + + /* Return true if the target instruction is compatible with BR x16. */ + + asection *section = stub_entry->target_section; + bfd_byte loc[4]; + file_ptr off = stub_entry->target_value; + bfd_size_type count = sizeof (loc); + + if (!bfd_get_section_contents (input_bfd, section, loc, off, count)) + return false; + + uint32_t insn = bfd_getl32 (loc); + if (!AARCH64_HINT (insn)) + return false; + return insn == AARCH64_BTI_C + || insn == AARCH64_PACIASP + || insn == AARCH64_BTI_JC + || insn == AARCH64_BTI_J + || insn == AARCH64_PACIBSP; } #define AARCH64_BITS(x, pos, n) (((x) >> (pos)) & ((1 << (n)) - 1)) @@ -4614,7 +4642,7 @@ _bfd_aarch64_add_call_stub_entries (bool *stub_changed, bfd *output_bfd, /* A stub with indirect jump may break BTI compatibility, so insert another stub with direct jump near the target then. */ - if (need_bti && !aarch64_bti_stub_p (stub_entry)) + if (need_bti && !aarch64_bti_stub_p (input_bfd, stub_entry)) { stub_entry->double_stub = true; htab->has_double_stub = true; diff --git a/ld/testsuite/ld-aarch64/aarch64-elf.exp b/ld/testsuite/ld-aarch64/aarch64-elf.exp index 80662de7e44..ec55bf49931 100644 --- a/ld/testsuite/ld-aarch64/aarch64-elf.exp +++ b/ld/testsuite/ld-aarch64/aarch64-elf.exp @@ -463,6 +463,7 @@ run_dump_test "undef-tls" run_dump_test "bti-far-1" run_dump_test "bti-far-2" +run_dump_test "bti-far-opt" if { ![skip_sframe_tests] } { run_dump_test "sframe-simple-1" diff --git a/ld/testsuite/ld-aarch64/bti-far-opt.d b/ld/testsuite/ld-aarch64/bti-far-opt.d new file mode 100644 index 00000000000..ff20d0c2825 --- /dev/null +++ b/ld/testsuite/ld-aarch64/bti-far-opt.d @@ -0,0 +1,153 @@ +#name: Check linker stubs with indirect calls handle BTI when target has BTI. +#source: bti-far-opt.s +#target: [check_shared_lib_support] +#as: -mabi=lp64 +#ld: -shared -T bti-far.ld +#objdump: -dr + +[^:]*: *file format elf64-.*aarch64 + + +Disassembly of section \.plt: + +0000000000018000 <\.plt>: + 18000: d503245f bti c + 18004: a9bf7bf0 stp x16, x30, \[sp, #-16\]! + 18008: 900000d0 adrp x16, 30000 <_GLOBAL_OFFSET_TABLE_> + 1800c: f9400e11 ldr x17, \[x16, #24\] + 18010: 91006210 add x16, x16, #0x18 + 18014: d61f0220 br x17 + 18018: d503201f nop + 1801c: d503201f nop + +0000000000018020 : + 18020: 900000d0 adrp x16, 30000 <_GLOBAL_OFFSET_TABLE_> + 18024: f9401211 ldr x17, \[x16, #32\] + 18028: 91008210 add x16, x16, #0x20 + 1802c: d61f0220 br x17 + 18030: 14000004 b 18040 <__foo_bti_veneer\+0x8> + 18034: d503201f nop + +0000000000018038 <__foo_bti_veneer>: + 18038: d503245f bti c + 1803c: 17fffff9 b 18020 + +Disassembly of section \.text: + +0000000000020000 <_start>: + 20000: 97ffe008 bl 18020 + 20004: 9400000f bl 20040 <___veneer> + 20008: 94000001 bl 2000c + +000000000002000c : + 2000c: d503201f nop + +0000000000020010 : + 20010: d503241f bti + +0000000000020014 : + 20014: d503245f bti c + +0000000000020018 : + 20018: d503249f bti j + +000000000002001c : + 2001c: d50324df bti jc + +0000000000020020 : + 20020: d503233f paciasp + +0000000000020024 : + 20024: d503237f pacibsp + 20028: 1400000c b 20058 <___veneer\+0x18> + 2002c: d503201f nop + +0000000000020030 <___bti_veneer>: + 20030: d503245f bti c + 20034: 17fffff6 b 2000c + +0000000000020038 <___bti_veneer>: + 20038: d503245f bti c + 2003c: 17fffff5 b 20010 + +0000000000020040 <___veneer>: + 20040: 90091910 adrp x16, 12340000 + 20044: 9101e210 add x16, x16, #0x78 + 20048: d61f0200 br x16 + \.\.\. + +Disassembly of section \.far: + +0000000012340000 : + 12340000: 94000018 bl 12340060 <___veneer> + 12340004: 9400003d bl 123400f8 <___veneer> + 12340008: 9400002a bl 123400b0 <___veneer> + 1234000c: 94000023 bl 12340098 <___veneer> + 12340010: 9400002e bl 123400c8 <___veneer> + 12340014: 94000033 bl 123400e0 <___veneer> + 12340018: 9400001a bl 12340080 <___veneer> + +000000001234001c : + 1234001c: 1400000b b 12340048 <__foo_veneer> + 12340020: 14000010 b 12340060 <___veneer> + 12340024: 14000035 b 123400f8 <___veneer> + 12340028: 14000022 b 123400b0 <___veneer> + 1234002c: 1400001b b 12340098 <___veneer> + 12340030: 14000026 b 123400c8 <___veneer> + 12340034: 1400002b b 123400e0 <___veneer> + 12340038: 14000012 b 12340080 <___veneer> + 1234003c: 00000000 udf #0 + 12340040: 14000034 b 12340110 <___veneer\+0x18> + 12340044: d503201f nop + +0000000012340048 <__foo_veneer>: + 12340048: 90f6e6d0 adrp x16, 18000 <\.plt> + 1234004c: 9100e210 add x16, x16, #0x38 + 12340050: d61f0200 br x16 + \.\.\. + +0000000012340060 <___veneer>: + 12340060: 90f6e710 adrp x16, 20000 <_start> + 12340064: 9100c210 add x16, x16, #0x30 + 12340068: d61f0200 br x16 + \.\.\. + +0000000012340078 <___bti_veneer>: + 12340078: d503245f bti c + 1234007c: 17ffffe8 b 1234001c + +0000000012340080 <___veneer>: + 12340080: 90f6e710 adrp x16, 20000 <_start> + 12340084: 91009210 add x16, x16, #0x24 + 12340088: d61f0200 br x16 + \.\.\. + +0000000012340098 <___veneer>: + 12340098: 90f6e710 adrp x16, 20000 <_start> + 1234009c: 91006210 add x16, x16, #0x18 + 123400a0: d61f0200 br x16 + \.\.\. + +00000000123400b0 <___veneer>: + 123400b0: 90f6e710 adrp x16, 20000 <_start> + 123400b4: 91005210 add x16, x16, #0x14 + 123400b8: d61f0200 br x16 + \.\.\. + +00000000123400c8 <___veneer>: + 123400c8: 90f6e710 adrp x16, 20000 <_start> + 123400cc: 91007210 add x16, x16, #0x1c + 123400d0: d61f0200 br x16 + \.\.\. + +00000000123400e0 <___veneer>: + 123400e0: 90f6e710 adrp x16, 20000 <_start> + 123400e4: 91008210 add x16, x16, #0x20 + 123400e8: d61f0200 br x16 + \.\.\. + +00000000123400f8 <___veneer>: + 123400f8: 90f6e710 adrp x16, 20000 <_start> + 123400fc: 9100e210 add x16, x16, #0x38 + 12340100: d61f0200 br x16 + \.\.\. diff --git a/ld/testsuite/ld-aarch64/bti-far-opt.s b/ld/testsuite/ld-aarch64/bti-far-opt.s new file mode 100644 index 00000000000..516b901f20f --- /dev/null +++ b/ld/testsuite/ld-aarch64/bti-far-opt.s @@ -0,0 +1,53 @@ + .text + .global _start + .type _start, %function +_start: + bl foo + bl bar + bl baz +baz: + nop +baz_bti_: + bti +baz_bti_c: + bti c +baz_bti_j: + bti j +baz_bti_jc: + bti jc +baz_paciasp: + paciasp +baz_pacibsp: + pacibsp + + .section .far,"ax",@progbits + .global foo + .type foo, %function +foo: + bl baz + bl baz_bti_ + bl baz_bti_c + bl baz_bti_j + bl baz_bti_jc + bl baz_paciasp + bl baz_pacibsp +bar: + b foo + b baz + b baz_bti_ + b baz_bti_c + b baz_bti_j + b baz_bti_jc + b baz_paciasp + b baz_pacibsp + + .section .note.gnu.property,"a" + .align 3 + .word 4 + .word 16 + .word 5 + .string "GNU" + .word 0xc0000000 + .word 4 + .word 1 + .align 3 -- 2.30.2