From d164359dbc14c8ae4c7a117d236f5b7de4af671a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 23 Oct 2023 10:30:30 +0200 Subject: [PATCH] x86: don't use 32-bit LEA as NOP surrogate in 64-bit code Except for the shared 1- and 2-byte cases, the LEA uses corrupt %rsi (by zero-extending %esi to %rsi). Introduce separate 64-bit patterns which keep %rsi intact. --- gas/config/tc-i386.c | 31 ++-- .../gas/i386/ilp32/x86-64-nops-1-pentium.d | 164 +----------------- .../gas/i386/x86-64-nops-1-pentium.d | 43 ++--- 3 files changed, 41 insertions(+), 197 deletions(-) diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index d7eea9d33d4..bec63049992 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -1279,6 +1279,18 @@ static const unsigned char f32_6[] = {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ static const unsigned char f32_7[] = {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ +static const unsigned char f64_3[] = + {0x48,0x89,0xf6}; /* mov %rsi,%rsi */ +static const unsigned char f64_4[] = + {0x48,0x8d,0x76,0x00}; /* lea 0(%rsi),%rsi */ +#define f64_5 (f64_6 + 1) /* lea 0(%rsi,%riz),%rsi */ +static const unsigned char f64_6[] = + {0x2e,0x48,0x8d,0x74,0x26,0x00}; /* lea %cs:0(%rsi,%riz),%rsi */ +static const unsigned char f64_7[] = + {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00}; /* lea 0L(%rsi),%rsi */ +#define f64_8 (f64_9 + 1) /* lea 0L(%rsi,%riz),%rsi */ +static const unsigned char f64_9[] = + {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */ static const unsigned char f16_3[] = {0x8d,0x74,0x00}; /* lea 0(%si),%si */ static const unsigned char f16_4[] = @@ -1293,6 +1305,10 @@ static const unsigned char jump16_disp32[] = static const unsigned char *const f32_patt[] = { f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7 }; +/* 64-bit NOPs patterns. */ +static const unsigned char *const f64_patt[] = { + f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9 +}; /* 16-bit NOPs patterns. */ static const unsigned char *const f16_patt[] = { f32_1, f32_2, f16_3, f16_4 @@ -1428,7 +1444,7 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) 2. For the rest, alt_patt will be used. When -mtune= isn't used, alt_patt will be used if - cpu_arch_isa_flags has CpuNop. Otherwise, f32_patt will + cpu_arch_isa_flags has CpuNop. Otherwise, f32_patt/f64_patt will be used. When -march= or .arch is used, we can't use anything beyond @@ -1443,6 +1459,7 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) } else { + patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt; if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN) { /* PROCESSOR_UNKNOWN means that all ISAs may be used. */ @@ -1453,8 +1470,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) optimize with nops. */ if (fragP->tc_frag_data.isa_flags.bitfield.cpunop) patt = alt_patt; - else - patt = f32_patt; break; case PROCESSOR_PENTIUM4: case PROCESSOR_NOCONA: @@ -1477,7 +1492,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) case PROCESSOR_PENTIUMPRO: case PROCESSOR_IAMCU: case PROCESSOR_GENERIC32: - patt = f32_patt; break; case PROCESSOR_NONE: abort (); @@ -1509,8 +1523,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) with nops. */ if (fragP->tc_frag_data.isa_flags.bitfield.cpunop) patt = alt_patt; - else - patt = f32_patt; break; case PROCESSOR_PENTIUMPRO: case PROCESSOR_PENTIUM4: @@ -1520,8 +1532,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) case PROCESSOR_COREI7: if (fragP->tc_frag_data.isa_flags.bitfield.cpunop) patt = alt_patt; - else - patt = f32_patt; break; case PROCESSOR_GENERIC64: patt = alt_patt; @@ -1531,9 +1541,10 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit) } } - if (patt == f32_patt) + if (patt != alt_patt) { - max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]); + max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt) + : ARRAY_SIZE (f64_patt); /* Limit number of NOPs to 2 for older processors. */ max_number_of_nops = 2; } diff --git a/gas/testsuite/gas/i386/ilp32/x86-64-nops-1-pentium.d b/gas/testsuite/gas/i386/ilp32/x86-64-nops-1-pentium.d index c669988f5aa..b144d5494b4 100644 --- a/gas/testsuite/gas/i386/ilp32/x86-64-nops-1-pentium.d +++ b/gas/testsuite/gas/i386/ilp32/x86-64-nops-1-pentium.d @@ -2,166 +2,4 @@ #as: -mtune=pentium #objdump: -drw #name: x86-64 (ILP32) -mtune=pentium nops 1 - -.*: +file format .* - -Disassembly of section .text: - -0+ : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop - -0+10 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi - -0+20 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d b6 00 00 00 00 lea 0x0\(%rsi\),%esi - -0+30 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop - -0+40 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi - -0+50 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d 76 00 lea 0x0\(%rsi\),%esi - -0+60 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 66 90 xchg %ax,%ax - -0+70 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop - -0+80 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi - -0+90 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b6 00 00 00 00 lea 0x0\(%rsi\),%esi - -0+a0 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop - -0+b0 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi - -0+c0 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d 76 00 lea 0x0\(%rsi\),%esi - -0+d0 : -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 66 90 xchg %ax,%ax -#pass +#dump: ../x86-64-nops-1-pentium.d diff --git a/gas/testsuite/gas/i386/x86-64-nops-1-pentium.d b/gas/testsuite/gas/i386/x86-64-nops-1-pentium.d index 4145e51a42d..a8882087923 100644 --- a/gas/testsuite/gas/i386/x86-64-nops-1-pentium.d +++ b/gas/testsuite/gas/i386/x86-64-nops-1-pentium.d @@ -9,31 +9,29 @@ Disassembly of section .text: 0+ : [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop +[ ]*[a-f0-9]+: 2e 48 8d b4 26 00 00 00 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi +[ ]*[a-f0-9]+: 2e 48 8d 74 26 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi 0+10 : [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi +[ ]*[a-f0-9]+: 2e 48 8d b4 26 00 00 00 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi +[ ]*[a-f0-9]+: 48 8d 74 26 00 lea (0x)?0\(%rsi,%riz,1\),%rsi 0+20 : [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d b6 00 00 00 00 lea 0x0\(%rsi\),%esi +[ ]*[a-f0-9]+: 2e 48 8d b4 26 00 00 00 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi +[ ]*[a-f0-9]+: 48 8d 76 00 lea (0x)?0\(%rsi\),%rsi 0+30 : [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop +[ ]*[a-f0-9]+: 2e 48 8d b4 26 00 00 00 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi +[ ]*[a-f0-9]+: 48 89 f6 mov %rsi,%rsi 0+40 : [ ]*[a-f0-9]+: 90 nop @@ -41,8 +39,8 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi +[ ]*[a-f0-9]+: 2e 48 8d b4 26 00 00 00 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi +[ ]*[a-f0-9]+: 66 90 xchg %ax,%ax 0+50 : [ ]*[a-f0-9]+: 90 nop @@ -51,8 +49,8 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 8d 76 00 lea 0x0\(%rsi\),%esi +[ ]*[a-f0-9]+: 2e 48 8d b4 26 00 00 00 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi +[ ]*[a-f0-9]+: 90 nop 0+60 : [ ]*[a-f0-9]+: 90 nop @@ -62,8 +60,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 66 90 xchg %ax,%ax +[ ]*[a-f0-9]+: 2e 48 8d b4 26 00 00 00 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi 0+70 : [ ]*[a-f0-9]+: 90 nop @@ -74,8 +71,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop +[ ]*[a-f0-9]+: 48 8d b4 26 00 00 00 00 lea (0x)?0\(%rsi,%riz,1\),%rsi 0+80 : [ ]*[a-f0-9]+: 90 nop @@ -87,7 +83,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b4 26 00 00 00 00 lea 0x0\(%rsi,%riz,1\),%esi +[ ]*[a-f0-9]+: 48 8d b6 00 00 00 00 lea (0x)?0\(%rsi\),%rsi 0+90 : [ ]*[a-f0-9]+: 90 nop @@ -100,7 +96,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d b6 00 00 00 00 lea 0x0\(%rsi\),%esi +[ ]*[a-f0-9]+: 2e 48 8d 74 26 00 cs lea (0x)?0\(%rsi,%riz,1\),%rsi 0+a0 : [ ]*[a-f0-9]+: 90 nop @@ -114,8 +110,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi -[ ]*[a-f0-9]+: 90 nop +[ ]*[a-f0-9]+: 48 8d 74 26 00 lea (0x)?0\(%rsi,%riz,1\),%rsi 0+b0 : [ ]*[a-f0-9]+: 90 nop @@ -130,7 +125,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d 74 26 00 lea 0x0\(%rsi,%riz,1\),%esi +[ ]*[a-f0-9]+: 48 8d 76 00 lea (0x)?0\(%rsi\),%rsi 0+c0 : [ ]*[a-f0-9]+: 90 nop @@ -146,7 +141,7 @@ Disassembly of section .text: [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop [ ]*[a-f0-9]+: 90 nop -[ ]*[a-f0-9]+: 8d 76 00 lea 0x0\(%rsi\),%esi +[ ]*[a-f0-9]+: 48 89 f6 mov %rsi,%rsi 0+d0 : [ ]*[a-f0-9]+: 90 nop -- 2.30.2