Add abs pattern to handle {si,di} mode abs to avoid pmax/cmove conversion.
authorHongyu Wang <hongyu.wang@intel.com>
Tue, 17 Dec 2019 01:50:35 +0000 (01:50 +0000)
committerHongtao Liu <liuhongt@gcc.gnu.org>
Tue, 17 Dec 2019 01:50:35 +0000 (01:50 +0000)
2019-12-17  Hongyu Wang  <hongyu.wang@intel.com>

gcc/
PR target/92651
* config/i386/i386.h (TARGET_EXPAND_ABS): New macro.
* config/i386/x86-tune.def (X86_TUNE_EXPAND_ABS): New.
* config/i386/i386.md (abs<SWI48x>2): New define_expand.

gcc/testsuite
* gcc.target/i386/pr92651.c: New testcase.

From-SVN: r279452

gcc/ChangeLog
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/config/i386/x86-tune.def
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr92651.c [new file with mode: 0644]

index 9a37d825362a0f05552d97010801799d8463ade0..bd1a252b9021ac190e03c2d7f9d463c4a10c9056 100644 (file)
@@ -1,3 +1,10 @@
+2019-12-17  Hongyu Wang  <hongyu.wang@intel.com>
+
+       PR target/92651
+       * config/i386/i386.h (TARGET_EXPAND_ABS): New macro.
+       * config/i386/x86-tune.def (X86_TUNE_EXPAND_ABS): New.
+       * config/i386/i386.md (abs<SWI48x>2): New define_expand.
+
 2019-12-17  H.J. Lu  <hjl.tools@gmail.com>
 
        PR target/92807
index 2542cb317835afa4992298614e11c2df0d33725b..65f6c76f846de9870b9599d87945ad6125504487 100644 (file)
@@ -596,6 +596,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
 #define TARGET_EMIT_VZEROUPPER \
        ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
+#define TARGET_EXPAND_ABS \
+       ix86_tune_features[X86_TUNE_EXPAND_ABS]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
index 66a6f200672b9eb65dfbe73b1affd38a76fe3213..cf4a0ccb0aaba635e4e61fbce3108c6e7825018c 100644 (file)
   "#"
   [(set_attr "isa" "noavx,noavx,avx,avx")])
 
+;; Special expand pattern to handle integer mode abs
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:SWI48x 0 "register_operand")
+    (abs:SWI48x
+      (match_operand:SWI48x 1 "register_operand")))]
+  "TARGET_EXPAND_ABS"
+  {
+    machine_mode mode = <MODE>mode;
+
+    /* Generate rtx abs using abs (x) = (((signed) x >> (W-1)) ^ x) -
+       ((signed) x >> (W-1)) */
+    rtx shift_amount = gen_int_shift_amount (mode,
+                                      GET_MODE_PRECISION (mode)
+                                      - 1);
+    shift_amount = convert_modes (E_QImode, GET_MODE (shift_amount),
+                           shift_amount, 1);
+    rtx shift_dst = gen_reg_rtx (mode);
+    rtx shift_op = gen_rtx_SET (shift_dst,
+                         gen_rtx_fmt_ee (ASHIFTRT, mode,
+                                         operands[1], shift_amount));
+    rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode,
+                                                   FLAGS_REG));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, shift_op,
+                                               clobber)));
+
+    rtx xor_op = gen_rtx_SET (operands[0],
+                       gen_rtx_fmt_ee (XOR, mode, shift_dst,
+                                       operands[1]));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, xor_op, clobber)));
+
+    rtx minus_op = gen_rtx_SET (operands[0],
+                         gen_rtx_fmt_ee (MINUS, mode,
+                                         operands[0], shift_dst));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, minus_op,
+                                               clobber)));
+    DONE;
+  })
+
 (define_expand "<code><mode>2"
   [(set (match_operand:X87MODEF 0 "register_operand")
        (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
index 328535d38d78e5ac66b00311366b21c970a25651..58a81e5ea729e5578e604c7d77609a4f52b475d4 100644 (file)
@@ -317,6 +317,13 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
 DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store",
         m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC)
 
+/* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
+   generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -
+   (signed) x >> (W-1)) instead of cmove or SSE max/abs instructions.  */
+DEF_TUNE (X86_TUNE_EXPAND_ABS, "expand_abs",
+         m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT
+         | m_GOLDMONT_PLUS | m_TREMONT )
+
 /*****************************************************************************/
 /* 387 instruction selection tuning                                          */
 /*****************************************************************************/
index 4142c507e2a9c02dfdf566e8f545a5fe47e67878..5fff39ec7fe398b6823b6c8107535448a7921bc4 100644 (file)
@@ -1,3 +1,7 @@
+2019-12-17  Hongyu Wang  <hongyu.wang@intel.com>
+
+       * gcc.target/i386/pr92651.c: New testcase.
+
 2019-12-17  H.J. Lu  <hjl.tools@gmail.com>
 
        PR target/92807
diff --git a/gcc/testsuite/gcc.target/i386/pr92651.c b/gcc/testsuite/gcc.target/i386/pr92651.c
new file mode 100644 (file)
index 0000000..3d0c3c7
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=corei7" } */
+
+#include <stdlib.h>
+
+int foo(unsigned char a, unsigned char b)
+{
+    int isum=abs(a - b);
+    return isum;
+}
+
+/* { dg-final { scan-assembler-not "cmov*" } } */
+/* { dg-final { scan-assembler "(cltd|cdq|shr)" } } */
+/* { dg-final { scan-assembler-times "xor" 1 } } */
+/* { dg-final { scan-assembler-times "sub" 2 } } */
+