Pad short functions with NOPs for Atom.
authorH.J. Lu <hongjiu.lu@intel.com>
Fri, 17 Sep 2010 21:07:09 +0000 (21:07 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Fri, 17 Sep 2010 21:07:09 +0000 (14:07 -0700)
gcc/

2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>
    Richard Henderson  <rth@redhat.com>

* config/i386/i386.c (initial_ix86_tune_features): Add
X86_TUNE_PAD_SHORT_FUNCTION.
(ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION.
(ix86_count_insn): New.
(ix86_pad_short_function): Likewise.
(ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION.

* config/i386/i386.h (ix86_tune_indices): Add
X86_TUNE_PAD_SHORT_FUNCTION.
(TARGET_PAD_SHORT_FUNCTION): New.

* config/i386/i386.md (UNSPEC_NOPS): New.
(nops): Likewise.

gcc/testsuite/

2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>

* gcc.target/i386/pad-1.c: New.
* gcc.target/i386/pad-2.c: Likewise.
* gcc.target/i386/pad-3.c: Likewise.
* gcc.target/i386/pad-4.c: Likewise.
* gcc.target/i386/pad-5a.c: Likewise.
* gcc.target/i386/pad-5b.c: Likewise.
* gcc.target/i386/pad-6a.c: Likewise.
* gcc.target/i386/pad-6b.c: Likewise.
* gcc.target/i386/pad-7.c: Likewise.
* gcc.target/i386/pad-8.c: Likewise.
* gcc.target/i386/pad-9.c: Likewise.
* gcc.target/i386/pad-10.c: Likewise.

Co-Authored-By: Richard Henderson <rth@redhat.com>
From-SVN: r164379

17 files changed:
gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pad-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-10.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-5a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-5b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-6a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-6b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pad-9.c [new file with mode: 0644]

index 5301e8d043854b871504684f038fd550e99f55d2..dcfc7e6678358e47564b08a0b0a94cfc054aea85 100644 (file)
@@ -1,3 +1,20 @@
+2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>
+           Richard Henderson  <rth@redhat.com>
+
+       * config/i386/i386.c (initial_ix86_tune_features): Add
+       X86_TUNE_PAD_SHORT_FUNCTION.
+       (ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION.
+       (ix86_count_insn): New.
+       (ix86_pad_short_function): Likewise.
+       (ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION.
+
+       * config/i386/i386.h (ix86_tune_indices): Add
+       X86_TUNE_PAD_SHORT_FUNCTION.
+       (TARGET_PAD_SHORT_FUNCTION): New.
+
+       * config/i386/i386.md (UNSPEC_NOPS): New.
+       (nops): Likewise.
+
 2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR middle-end/45234
index abec05711cb10c935276a13268e75278e700129e..d9f92371e683ebd87be752e7dd327aa374bdfbc5 100644 (file)
@@ -1576,6 +1576,9 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_TUNE_PAD_RETURNS */
   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
 
+  /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion.  */
+  m_ATOM,
+
   /* X86_TUNE_EXT_80387_CONSTANTS */
   m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
   | m_CORE2 | m_GENERIC,
@@ -8021,6 +8024,11 @@ ix86_code_end (void)
 
       xops[0] = gen_rtx_REG (Pmode, regno);
       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+      /* Pad stack IP move with 4 instructions.  2 NOPs count as 1
+         instruction.  */
+      if (TARGET_PAD_SHORT_FUNCTION)
+       output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop",
+                        xops);
       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
       output_asm_insn ("ret", xops);
       final_end_function ();
@@ -27882,6 +27890,120 @@ ix86_pad_returns (void)
     }
 }
 
+/* Count the minimum number of instructions in BB.  Return 4 if the
+   number of instructions >= 4.  */
+
+static int 
+ix86_count_insn_bb (basic_block bb)
+{
+  rtx insn;
+  int insn_count = 0;
+
+  /* Count number of instructions in this block.  Return 4 if the number
+     of instructions >= 4.  */
+  FOR_BB_INSNS (bb, insn)
+    {
+      /* Only happen in exit blocks.  */
+      if (JUMP_P (insn)
+         && GET_CODE (PATTERN (insn)) == RETURN)
+       break;
+
+      if (NONDEBUG_INSN_P (insn)
+         && GET_CODE (PATTERN (insn)) != USE
+         && GET_CODE (PATTERN (insn)) != CLOBBER)
+       {
+         insn_count++;
+         if (insn_count >= 4)
+           return insn_count;
+       }
+    }
+
+  return insn_count;
+}
+
+
+/* Count the minimum number of instructions in code path in BB.  
+   Return 4 if the number of instructions >= 4.  */
+
+static int 
+ix86_count_insn (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  int min_prev_count;
+
+  /* Only bother counting instructions along paths with no
+     more than 2 basic blocks between entry and exit.  Given
+     that BB has an edge to exit, determine if a predecessor
+     of BB has an edge from entry.  If so, compute the number
+     of instructions in the predecessor block.  If there
+     happen to be multiple such blocks, compute the minimum.  */
+  min_prev_count = 4;
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      edge prev_e;
+      edge_iterator prev_ei;
+
+      if (e->src == ENTRY_BLOCK_PTR)
+       {
+         min_prev_count = 0;
+         break;
+       }
+      FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
+       {
+         if (prev_e->src == ENTRY_BLOCK_PTR)
+           {
+             int count = ix86_count_insn_bb (e->src);
+             if (count < min_prev_count)
+               min_prev_count = count;
+             break;
+           }
+       }
+    }
+
+  if (min_prev_count < 4)
+    min_prev_count += ix86_count_insn_bb (bb);
+
+  return min_prev_count;
+}
+
+/* Pad short funtion to 4 instructions.   */
+
+static void
+ix86_pad_short_function (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    {
+      rtx ret = BB_END (e->src);
+      if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
+       {
+         int insn_count = ix86_count_insn (e->src);
+
+         /* Pad short function.  */
+         if (insn_count < 4)
+           {
+             rtx insn = ret;
+
+             /* Find epilogue.  */
+             while (insn
+                    && (!NOTE_P (insn)
+                        || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
+               insn = PREV_INSN (insn);
+
+             if (!insn)
+               insn = ret;
+
+             /* Two NOPs are counted as one instruction.  */
+             insn_count = 2 * (4  - insn_count);
+             emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
+           }
+       }
+    }
+}
+
 /* Implement machine specific optimizations.  We implement padding of returns
    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
 static void
@@ -27889,7 +28011,9 @@ ix86_reorg (void)
 {
   if (optimize && optimize_function_for_speed_p (cfun))
     {
-      if (TARGET_PAD_RETURNS)
+      if (TARGET_PAD_SHORT_FUNCTION)
+       ix86_pad_short_function ();
+      else if (TARGET_PAD_RETURNS)
        ix86_pad_returns ();
 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
       if (TARGET_FOUR_JUMP_LIMIT)
index 22dd02b0bb86219d74d4164f582422f4452046cd..aa246c6fdcc9ec8fba5a7732e520022167ac8026 100644 (file)
@@ -299,6 +299,7 @@ enum ix86_tune_indices {
   X86_TUNE_USE_BT,
   X86_TUNE_USE_INCDEC,
   X86_TUNE_PAD_RETURNS,
+  X86_TUNE_PAD_SHORT_FUNCTION,
   X86_TUNE_EXT_80387_CONSTANTS,
   X86_TUNE_SHORTEN_X87_SSE,
   X86_TUNE_AVOID_VECTOR_DECODE,
@@ -385,6 +386,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_USE_BT          ix86_tune_features[X86_TUNE_USE_BT]
 #define TARGET_USE_INCDEC      ix86_tune_features[X86_TUNE_USE_INCDEC]
 #define TARGET_PAD_RETURNS     ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_PAD_SHORT_FUNCTION \
+       ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
 #define TARGET_EXT_80387_CONSTANTS \
        ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
 #define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
index ec43793b951a49658d2008754c8c67d1af552b7a..4ccd932a84034f3c44784e3fc44934be75f89df8 100644 (file)
   UNSPEC_LD_MPIC       ; load_macho_picbase
   UNSPEC_TRUNC_NOOP
   UNSPEC_DIV_ALREADY_SPLIT
+  UNSPEC_NOPS
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
+;; Generate nops.  Operand 0 is the number of nops, up to 8.
+(define_insn "nops"
+  [(unspec [(match_operand 0 "const_int_operand" "")]
+          UNSPEC_NOPS)]
+  "reload_completed"
+{
+  switch (INTVAL (operands[0]))
+    {
+    case 1:
+      return "nop";
+    case 2:
+      return "nop; nop";
+    case 3:
+      return "nop; nop; nop";
+    case 4:
+      return "nop; nop; nop; nop";
+    case 5:
+      return "nop; nop; nop; nop; nop";
+    case 6:
+      return "nop; nop; nop; nop; nop; nop";
+    case 7:
+      return "nop; nop; nop; nop; nop; nop; nop";
+    case 8:
+      return "nop; nop; nop; nop; nop; nop; nop; nop";
+    default:
+      gcc_unreachable ();
+      break;
+  }
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
 ;; Pad to 16-byte boundary, max skip in op0.  Used to avoid
 ;; branch prediction penalty for the third jump in a 16-byte
 ;; block on K8.
index 6bec743e8321fb19bece3cb4cc6e28b08796f485..f1193f90c4e96de072ca9e84ff632e231ab51400 100644 (file)
@@ -1,3 +1,18 @@
+2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * gcc.target/i386/pad-1.c: New.
+       * gcc.target/i386/pad-2.c: Likewise.
+       * gcc.target/i386/pad-3.c: Likewise.
+       * gcc.target/i386/pad-4.c: Likewise.
+       * gcc.target/i386/pad-5a.c: Likewise.
+       * gcc.target/i386/pad-5b.c: Likewise.
+       * gcc.target/i386/pad-6a.c: Likewise.
+       * gcc.target/i386/pad-6b.c: Likewise.
+       * gcc.target/i386/pad-7.c: Likewise.
+       * gcc.target/i386/pad-8.c: Likewise.
+       * gcc.target/i386/pad-9.c: Likewise.
+       * gcc.target/i386/pad-10.c: Likewise.
+
 2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR middle-end/45234
diff --git a/gcc/testsuite/gcc.target/i386/pad-1.c b/gcc/testsuite/gcc.target/i386/pad-1.c
new file mode 100644 (file)
index 0000000..87a9d6c
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -mtune=generic -S" } */
+/* { dg-final { scan-assembler "rep" } } */
+/* { dg-final { scan-assembler-not "nop" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-10.c b/gcc/testsuite/gcc.target/i386/pad-10.c
new file mode 100644 (file)
index 0000000..6ba3b78
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar ();
+
+int
+foo2 (int z, int x)
+{
+  if (x == 1)
+    {
+      bar ();
+      return z;
+    }
+  else
+    return x + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-2.c b/gcc/testsuite/gcc.target/i386/pad-2.c
new file mode 100644 (file)
index 0000000..964547c
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+void
+foo ()
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-3.c b/gcc/testsuite/gcc.target/i386/pad-3.c
new file mode 100644 (file)
index 0000000..52442b4
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int s[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+int d[8] = {11, 22, 33, 44, 55, 66, 77, 88};
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+    d[i] = s[i] + 0x1000;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-4.c b/gcc/testsuite/gcc.target/i386/pad-4.c
new file mode 100644 (file)
index 0000000..a7033fa
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S -fPIC" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern int bar;
+
+int
+foo ()
+{
+  return bar;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5a.c b/gcc/testsuite/gcc.target/i386/pad-5a.c
new file mode 100644 (file)
index 0000000..9d0aa2a
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-5b.c b/gcc/testsuite/gcc.target/i386/pad-5b.c
new file mode 100644 (file)
index 0000000..2e1cf12
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6a.c b/gcc/testsuite/gcc.target/i386/pad-6a.c
new file mode 100644 (file)
index 0000000..e865967
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-6b.c b/gcc/testsuite/gcc.target/i386/pad-6b.c
new file mode 100644 (file)
index 0000000..41aeaee
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return x + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-7.c b/gcc/testsuite/gcc.target/i386/pad-7.c
new file mode 100644 (file)
index 0000000..7a7493d
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-not "nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y, int z)
+{
+   return x + y + z + y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-8.c b/gcc/testsuite/gcc.target/i386/pad-8.c
new file mode 100644 (file)
index 0000000..873a0a4
--- /dev/null
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+int
+foo (int x, int y)
+{
+   return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pad-9.c b/gcc/testsuite/gcc.target/i386/pad-9.c
new file mode 100644 (file)
index 0000000..3d68805
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
+/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
+/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
+/* { dg-final { scan-assembler-not "rep" } } */
+
+extern void bar (void);
+
+void
+foo (int x)
+{
+  if (x)
+    bar ();
+}