x86: don't use 32-bit LEA as NOP surrogate in 64-bit code
authorJan Beulich <jbeulich@suse.com>
Mon, 23 Oct 2023 08:30:30 +0000 (10:30 +0200)
committerJan Beulich <jbeulich@suse.com>
Mon, 23 Oct 2023 08:30:30 +0000 (10:30 +0200)
Except for the shared 1- and 2-byte cases, the LEA uses corrupt %rsi
(by zero-extending %esi to %rsi). Introduce separate 64-bit patterns
which keep %rsi intact.

gas/config/tc-i386.c
gas/testsuite/gas/i386/ilp32/x86-64-nops-1-pentium.d
gas/testsuite/gas/i386/x86-64-nops-1-pentium.d

index d7eea9d33d41f0b30c26a32609d853caf05018e5..bec6304999200b216c8fa9dc505be3a5a15f8770 100644 (file)
@@ -1279,6 +1279,18 @@ static const unsigned char f32_6[] =
   {0x8d,0xb6,0x00,0x00,0x00,0x00};     /* leal 0L(%esi),%esi   */
 static const unsigned char f32_7[] =
   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};        /* leal 0L(%esi,1),%esi */
+static const unsigned char f64_3[] =
+  {0x48,0x89,0xf6};                    /* mov %rsi,%rsi        */
+static const unsigned char f64_4[] =
+  {0x48,0x8d,0x76,0x00};               /* lea 0(%rsi),%rsi     */
+#define f64_5 (f64_6 + 1)              /* lea 0(%rsi,%riz),%rsi        */
+static const unsigned char f64_6[] =
+  {0x2e,0x48,0x8d,0x74,0x26,0x00};     /* lea %cs:0(%rsi,%riz),%rsi    */
+static const unsigned char f64_7[] =
+  {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00};        /* lea 0L(%rsi),%rsi    */
+#define f64_8 (f64_9 + 1)              /* lea 0L(%rsi,%riz),%rsi */
+static const unsigned char f64_9[] =
+  {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */
 static const unsigned char f16_3[] =
   {0x8d,0x74,0x00};                    /* lea 0(%si),%si       */
 static const unsigned char f16_4[] =
@@ -1293,6 +1305,10 @@ static const unsigned char jump16_disp32[] =
 static const unsigned char *const f32_patt[] = {
   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
 };
+/* 64-bit NOPs patterns.  */
+static const unsigned char *const f64_patt[] = {
+  f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9
+};
 /* 16-bit NOPs patterns.  */
 static const unsigned char *const f16_patt[] = {
   f32_1, f32_2, f16_3, f16_4
@@ -1428,7 +1444,7 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
      2. For the rest, alt_patt will be used.
 
      When -mtune= isn't used, alt_patt will be used if
-     cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
+     cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt/f64_patt will
      be used.
 
      When -march= or .arch is used, we can't use anything beyond
@@ -1443,6 +1459,7 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
     }
   else
     {
+      patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt;
       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
        {
          /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
@@ -1453,8 +1470,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
                 optimize with nops.  */
              if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
                patt = alt_patt;
-             else
-               patt = f32_patt;
              break;
            case PROCESSOR_PENTIUM4:
            case PROCESSOR_NOCONA:
@@ -1477,7 +1492,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
            case PROCESSOR_PENTIUMPRO:
            case PROCESSOR_IAMCU:
            case PROCESSOR_GENERIC32:
-             patt = f32_patt;
              break;
            case PROCESSOR_NONE:
              abort ();
@@ -1509,8 +1523,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
                 with nops.  */
              if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
                patt = alt_patt;
-             else
-               patt = f32_patt;
              break;
            case PROCESSOR_PENTIUMPRO:
            case PROCESSOR_PENTIUM4:
@@ -1520,8 +1532,6 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
            case PROCESSOR_COREI7:
              if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
                patt = alt_patt;
-             else
-               patt = f32_patt;
              break;
            case PROCESSOR_GENERIC64:
              patt = alt_patt;
@@ -1531,9 +1541,10 @@ i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
            }
        }
 
-      if (patt == f32_patt)
+      if (patt != alt_patt)
        {
-         max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
+         max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt)
+                                                : ARRAY_SIZE (f64_patt);
          /* Limit number of NOPs to 2 for older processors.  */
          max_number_of_nops = 2;
        }
index c669988f5aaebdfca5bfe64fd015e9e2b31e359b..b144d5494b448bcbb4f9dc3e05e4206573c9d771 100644 (file)
@@ -2,166 +2,4 @@
 #as: -mtune=pentium
 #objdump: -drw
 #name: x86-64 (ILP32) -mtune=pentium nops 1
-
-.*: +file format .*
-
-Disassembly of section .text:
-
-0+ <nop15>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
-
-0+10 <nop14>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-
-0+20 <nop13>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d b6 00 00 00 00       lea    0x0\(%rsi\),%esi
-
-0+30 <nop12>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
-
-0+40 <nop11>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
-
-0+50 <nop10>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d 76 00                lea    0x0\(%rsi\),%esi
-
-0+60 <nop9>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    66 90                   xchg   %ax,%ax
-
-0+70 <nop8>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
-
-0+80 <nop7>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-
-0+90 <nop6>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b6 00 00 00 00       lea    0x0\(%rsi\),%esi
-
-0+a0 <nop5>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
-
-0+b0 <nop4>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
-
-0+c0 <nop3>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d 76 00                lea    0x0\(%rsi\),%esi
-
-0+d0 <nop2>:
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    66 90                   xchg   %ax,%ax
-#pass
+#dump: ../x86-64-nops-1-pentium.d
index 4145e51a42d6a0e5e59a3720447d59b8bdf7a28c..a8882087923c66089f69bb23847a7fe8d52d45fc 100644 (file)
@@ -9,31 +9,29 @@ Disassembly of section .text:
 
 0+ <nop15>:
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
+[      ]*[a-f0-9]+:    2e 48 8d b4 26 00 00 00 00      cs lea (0x)?0\(%rsi,%riz,1\),%rsi
+[      ]*[a-f0-9]+:    2e 48 8d 74 26 00       cs lea (0x)?0\(%rsi,%riz,1\),%rsi
 
 0+10 <nop14>:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
+[      ]*[a-f0-9]+:    2e 48 8d b4 26 00 00 00 00      cs lea (0x)?0\(%rsi,%riz,1\),%rsi
+[      ]*[a-f0-9]+:    48 8d 74 26 00          lea    (0x)?0\(%rsi,%riz,1\),%rsi
 
 0+20 <nop13>:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d b6 00 00 00 00       lea    0x0\(%rsi\),%esi
+[      ]*[a-f0-9]+:    2e 48 8d b4 26 00 00 00 00      cs lea (0x)?0\(%rsi,%riz,1\),%rsi
+[      ]*[a-f0-9]+:    48 8d 76 00             lea    (0x)?0\(%rsi\),%rsi
 
 0+30 <nop12>:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
+[      ]*[a-f0-9]+:    2e 48 8d b4 26 00 00 00 00      cs lea (0x)?0\(%rsi,%riz,1\),%rsi
+[      ]*[a-f0-9]+:    48 89 f6                mov    %rsi,%rsi
 
 0+40 <nop11>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -41,8 +39,8 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
+[      ]*[a-f0-9]+:    2e 48 8d b4 26 00 00 00 00      cs lea (0x)?0\(%rsi,%riz,1\),%rsi
+[      ]*[a-f0-9]+:    66 90                   xchg   %ax,%ax
 
 0+50 <nop10>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -51,8 +49,8 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    8d 76 00                lea    0x0\(%rsi\),%esi
+[      ]*[a-f0-9]+:    2e 48 8d b4 26 00 00 00 00      cs lea (0x)?0\(%rsi,%riz,1\),%rsi
+[      ]*[a-f0-9]+:    90                      nop
 
 0+60 <nop9>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -62,8 +60,7 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    66 90                   xchg   %ax,%ax
+[      ]*[a-f0-9]+:    2e 48 8d b4 26 00 00 00 00      cs lea (0x)?0\(%rsi,%riz,1\),%rsi
 
 0+70 <nop8>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -74,8 +71,7 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
+[      ]*[a-f0-9]+:    48 8d b4 26 00 00 00 00         lea    (0x)?0\(%rsi,%riz,1\),%rsi
 
 0+80 <nop7>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -87,7 +83,7 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b4 26 00 00 00 00    lea    0x0\(%rsi,%riz,1\),%esi
+[      ]*[a-f0-9]+:    48 8d b6 00 00 00 00    lea    (0x)?0\(%rsi\),%rsi
 
 0+90 <nop6>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -100,7 +96,7 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d b6 00 00 00 00       lea    0x0\(%rsi\),%esi
+[      ]*[a-f0-9]+:    2e 48 8d 74 26 00       cs lea (0x)?0\(%rsi,%riz,1\),%rsi
 
 0+a0 <nop5>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -114,8 +110,7 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
-[      ]*[a-f0-9]+:    90                      nop
+[      ]*[a-f0-9]+:    48 8d 74 26 00          lea    (0x)?0\(%rsi,%riz,1\),%rsi
 
 0+b0 <nop4>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -130,7 +125,7 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d 74 26 00             lea    0x0\(%rsi,%riz,1\),%esi
+[      ]*[a-f0-9]+:    48 8d 76 00             lea    (0x)?0\(%rsi\),%rsi
 
 0+c0 <nop3>:
 [      ]*[a-f0-9]+:    90                      nop
@@ -146,7 +141,7 @@ Disassembly of section .text:
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
 [      ]*[a-f0-9]+:    90                      nop
-[      ]*[a-f0-9]+:    8d 76 00                lea    0x0\(%rsi\),%esi
+[      ]*[a-f0-9]+:    48 89 f6                mov    %rsi,%rsi
 
 0+d0 <nop2>:
 [      ]*[a-f0-9]+:    90                      nop