Add option to force indirect calls for x86
authorAndi Kleen <ak@linux.intel.com>
Thu, 9 Nov 2017 05:42:43 +0000 (05:42 +0000)
committerAndi Kleen <ak@gcc.gnu.org>
Thu, 9 Nov 2017 05:42:43 +0000 (05:42 +0000)
This patch adds a -mforce-indirect-call option to force all calls
or tail calls on x86_64 between functions to indirect. This is similar to the
large code model, but doesn't affect jumps inside functions, so has much
less run time overhead.

This is useful with Intel Processor Trace (PT). PT has precise timing
for indirect calls/jumps, but not for direct ones. So if we can force
them to indirect it allows to time every function relatively accurately
(minus the overhead of the indirect branch)

Without this short functions often don't see a timing update and cannot
be measured.

The timing requires at least Skylake or Goldmont based CPUs.

I made it an option. Originally I tried to make it a new code model,
but since it can be combined with other code models (medium, pic, kernel
etc.) this turned out to be too many combinations.

For example with gcc. This first column is a ns time stamp for the
functions.

$ perf record -e intel_pt/noretcomp=1,cyc=1,cyc_thresh=1/u ./cc1 -O3 hello.c
$ perf script --itrace=cr -F callindent,time,sym,addr --ns  | sed -n 180000,182000p | less
...
1184596.432756920:                             build_int_cst                        =>           79c9de c_common_nodes_and_builtins
1184596.432756921:                             tree_cons                            =>           ee2080 tree_cons
1184596.432756938:                                 ggc_internal_alloc               =>           80f3e0 ggc_internal_alloc
1184596.432756951:                                     memset@plt                   =>           598af0 memset@plt
1184596.432756967:                                     __memset_avx2_unaligned_erms =>           80f605 ggc_internal_alloc
1184596.432756969:                                 ggc_internal_alloc               =>           ee20a2 tree_cons
1184596.432756973:                             tree_cons                            =>           79c9f4 c_common_nodes_and_builtins
1184596.432756974:                             build_int_cst                        =>           ef9a40 build_int_cst
1184596.432756996:                                 wide_int_to_tree                 =>           ef93a0 wide_int_to_tree
1184596.432757000:                                     wi::force_to_size            =>           f48f70 wi::force_to_size
1184596.432757005:                                     canonize                     =>           ef94de wide_int_to_tree
1184596.432757021:                                     get_int_cst_ext_nunits       =>           ee1960 get_int_cst_ext_nunits
1184596.432757026:                                     get_int_cst_ext_nunits       =>           ef94fe wide_int_to_tree
1184596.432757042:                                     tree_int_cst_elt_check       =>           83e310 tree_int_cst_elt_check
1184596.432757044:                                     tree_int_cst_elt_check       =>           ef9761 wide_int_to_tree
1184596.432757046:                                 wide_int_to_tree                 =>           ef9a9b build_int_cst

gcc/:
2017-11-08  Andi Kleen  <ak@linux.intel.com>

* config/i386/i386.opt: Add -mforce-indirect-call.
* config/i386/predicates.md: Check for flag_force_indirect_call.
* doc/invoke.texi: Document -mforce-indirect-call

gcc/testsuite/:
2017-11-08  Andi Kleen  <ak@linux.intel.com>

* gcc.target/i386/force-indirect-call-1.c: New test.
* gcc.target/i386/force-indirect-call-2.c: New test.
* gcc.target/i386/force-indirect-call-3.c: New test.

From-SVN: r254560

gcc/ChangeLog
gcc/config/i386/i386.opt
gcc/config/i386/predicates.md
gcc/doc/invoke.texi
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/force-indirect-call-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/force-indirect-call-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/force-indirect-call-3.c [new file with mode: 0644]

index b51ef192fc71ba8e27ca949e327872bff89e0631..442e5653a4063e7c2a83e8cc14cd29563ffa3f58 100644 (file)
@@ -1,3 +1,9 @@
+2017-11-08  Andi Kleen  <ak@linux.intel.com>
+
+       * config/i386/i386.opt: Add -mforce-indirect-call.
+       * config/i386/predicates.md: Check for flag_force_indirect_call.
+       * doc/invoke.texi: Document -mforce-indirect-call
+
 2017-11-08  Kito Cheng  <kito.cheng@gmail.com>
 
        * config/riscv/riscv-protos.h (riscv_slow_unaligned_access_p):
index b1bcb396935479bbe9d6e83a1520c4ede26d5ad1..d2c10ab24d1b799136d81052f9a51a4570574815 100644 (file)
@@ -977,3 +977,7 @@ mcet-switch
 Target Report Undocumented Var(flag_cet_switch) Init(0)
 Turn on CET instrumentation for switch statements, which use jump table and
 indirect jump.
+
+mforce-indirect-call
+Target Report Var(flag_force_indirect_call) Init(0)
+Make all function calls indirect.
index c3f442eb8ac556ee44ede54bb4ef2e5dceeb4eaf..c6e6e980959778ba8d248617b592e9fd59ee9533 100644 (file)
 (define_predicate "constant_call_address_operand"
   (match_code "symbol_ref")
 {
-  if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
+  if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC
+      || flag_force_indirect_call)
     return false;
   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op))
     return false;
index 2ef88e081f982f5619132cc33ce23c3fb542ae11..e897d93070ae320f741aeba4d2490f8366843935 100644 (file)
@@ -1205,7 +1205,7 @@ See RS/6000 and PowerPC Options.
 -msse4a  -m3dnow  -m3dnowa  -mpopcnt  -mabm  -mbmi  -mtbm  -mfma4  -mxop @gol
 -mlzcnt  -mbmi2  -mfxsr  -mxsave  -mxsaveopt  -mrtm  -mlwp  -mmpx  @gol
 -mmwaitx  -mclzero  -mpku  -mthreads @gol
--mcet -mibt -mshstk @gol
+-mcet -mibt -mshstk -mforce-indirect-call @gol
 -mms-bitfields  -mno-align-stringops  -minline-all-stringops @gol
 -minline-stringops-dynamically  -mstringop-strategy=@var{alg} @gol
 -mmemcpy-strategy=@var{strategy}  -mmemset-strategy=@var{strategy} @gol
@@ -26175,6 +26175,12 @@ You can control this behavior for specific functions by
 using the function attributes @code{ms_abi} and @code{sysv_abi}.
 @xref{Function Attributes}.
 
+@item -mforce-indirect-call
+@opindex mforce-indirect-call
+Force all calls to functions to be indirect. This is useful
+when using Intel Processor Trace where it generates more precise timing
+information for function calls.
+
 @item -mcall-ms2sysv-xlogues
 @opindex mcall-ms2sysv-xlogues
 @opindex mno-call-ms2sysv-xlogues
index 75c8a0ab752bfbedc4712bb97117de749ba80d34..73d5f874326e4aeffefb4945667882a600f8e44d 100644 (file)
@@ -1,3 +1,9 @@
+2017-11-08  Andi Kleen  <ak@linux.intel.com>
+
+       * gcc.target/i386/force-indirect-call-1.c: New test.
+       * gcc.target/i386/force-indirect-call-2.c: New test.
+       * gcc.target/i386/force-indirect-call-3.c: New test.
+
 2017-11-08  Steven G. Kargl  <kargl@kgcc.gnu.org>
 
        PR Fortran/82841
diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-1.c b/gcc/testsuite/gcc.target/i386/force-indirect-call-1.c
new file mode 100644 (file)
index 0000000..be1be2c
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mforce-indirect-call" } */
+/* { dg-final { scan-assembler-times "call\[ \\t\]+\\*%" 2 } } */
+/* { dg-final { scan-assembler-times "jmp\[ \\t\]+\\*%" 1 } } */
+int x;
+int y;
+
+void __attribute__((noinline)) f1(void)
+{
+       x++;
+}
+
+static __attribute__((noinline)) void f3(void)
+{
+       y++;
+}
+
+void f2()
+{
+       f1();
+       f3();
+       f1();
+}
diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
new file mode 100644 (file)
index 0000000..dd0df25
--- /dev/null
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mforce-indirect-call -fPIC" } */
+/* { dg-final { scan-assembler-times "call\[ \\t\]+\\*%" 2 } } */
+/* { dg-final { scan-assembler-times "jmp\[ \\t\]+\\*%" 1 } } */
+#include "force-indirect-call-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-3.c b/gcc/testsuite/gcc.target/i386/force-indirect-call-3.c
new file mode 100644 (file)
index 0000000..28d8c98
--- /dev/null
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mforce-indirect-call -mcmodel=medium" } */
+/* { dg-final { scan-assembler-times "call\[ \\t\]+\\*%" 2 } } */
+/* { dg-final { scan-assembler-times "jmp\[ \\t\]+\\*%" 1 } } */
+#include "force-indirect-call-1.c"