Fold single imm use of a FMA if it is a negation [PR95060]
authorJakub Jelinek <jakub@redhat.com>
Wed, 13 May 2020 09:21:02 +0000 (11:21 +0200)
committerJakub Jelinek <jakub@redhat.com>
Wed, 13 May 2020 09:21:02 +0000 (11:21 +0200)
match.pd already has simplifications for negation of a FMA (FMS, FNMA, FNMS)
call if it is single use, but when the widening_mul pass discovers FMAs,
nothing folds the statements anymore.

So, the following patch adjusts the widening_mul pass to handle that.

I had to adjust quite a lot of tests, because they have in them nested FMAs
(one FMA feeding another one) and the patch results in some (equivalent) changes
in the chosen instructions, previously the negation of one FMA's result
would result in the dependent FMA being adjusted for the negation, but now
instead the first FMA is adjusted.

2020-05-13  Jakub Jelinek  <jakub@redhat.com>

PR tree-optimization/95060
* tree-ssa-math-opts.c (convert_mult_to_fma_1): Fold a NEGATE_EXPR
if it is the single use of the FMA internal builtin.

* gcc.target/i386/avx512f-pr95060.c: New test.
* gcc.target/i386/fma_double_1.c: Adjust expected insn counts.
* gcc.target/i386/fma_double_2.c: Likewise.
* gcc.target/i386/fma_double_3.c: Likewise.
* gcc.target/i386/fma_double_4.c: Likewise.
* gcc.target/i386/fma_double_5.c: Likewise.
* gcc.target/i386/fma_double_6.c: Likewise.
* gcc.target/i386/fma_float_1.c: Likewise.
* gcc.target/i386/fma_float_2.c: Likewise.
* gcc.target/i386/fma_float_3.c: Likewise.
* gcc.target/i386/fma_float_4.c: Likewise.
* gcc.target/i386/fma_float_5.c: Likewise.
* gcc.target/i386/fma_float_6.c: Likewise.
* gcc.target/i386/l_fma_double_1.c: Likewise.
* gcc.target/i386/l_fma_double_2.c: Likewise.
* gcc.target/i386/l_fma_double_3.c: Likewise.
* gcc.target/i386/l_fma_double_4.c: Likewise.
* gcc.target/i386/l_fma_double_5.c: Likewise.
* gcc.target/i386/l_fma_double_6.c: Likewise.
* gcc.target/i386/l_fma_float_1.c: Likewise.
* gcc.target/i386/l_fma_float_2.c: Likewise.
* gcc.target/i386/l_fma_float_3.c: Likewise.
* gcc.target/i386/l_fma_float_4.c: Likewise.
* gcc.target/i386/l_fma_float_5.c: Likewise.
* gcc.target/i386/l_fma_float_6.c: Likewise.

28 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512f-pr95060.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/fma_double_1.c
gcc/testsuite/gcc.target/i386/fma_double_2.c
gcc/testsuite/gcc.target/i386/fma_double_3.c
gcc/testsuite/gcc.target/i386/fma_double_4.c
gcc/testsuite/gcc.target/i386/fma_double_5.c
gcc/testsuite/gcc.target/i386/fma_double_6.c
gcc/testsuite/gcc.target/i386/fma_float_1.c
gcc/testsuite/gcc.target/i386/fma_float_2.c
gcc/testsuite/gcc.target/i386/fma_float_3.c
gcc/testsuite/gcc.target/i386/fma_float_4.c
gcc/testsuite/gcc.target/i386/fma_float_5.c
gcc/testsuite/gcc.target/i386/fma_float_6.c
gcc/testsuite/gcc.target/i386/l_fma_double_1.c
gcc/testsuite/gcc.target/i386/l_fma_double_2.c
gcc/testsuite/gcc.target/i386/l_fma_double_3.c
gcc/testsuite/gcc.target/i386/l_fma_double_4.c
gcc/testsuite/gcc.target/i386/l_fma_double_5.c
gcc/testsuite/gcc.target/i386/l_fma_double_6.c
gcc/testsuite/gcc.target/i386/l_fma_float_1.c
gcc/testsuite/gcc.target/i386/l_fma_float_2.c
gcc/testsuite/gcc.target/i386/l_fma_float_3.c
gcc/testsuite/gcc.target/i386/l_fma_float_4.c
gcc/testsuite/gcc.target/i386/l_fma_float_5.c
gcc/testsuite/gcc.target/i386/l_fma_float_6.c
gcc/tree-ssa-math-opts.c

index 4234a72432c6f1c45a55b48e7b1104f7f878b798..ba230b42121073fb7218ef0c191515f61f6d23e2 100644 (file)
@@ -1,3 +1,9 @@
+2020-05-13  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/95060
+       * tree-ssa-math-opts.c (convert_mult_to_fma_1): Fold a NEGATE_EXPR
+       if it is the single use of the FMA internal builtin.
+
 2020-05-13  Bin Cheng  <bin.cheng@linux.alibaba.com>
 
        PR tree-optimization/94969
index 3e5fe75b60b2c5339304b33dff9ea9de3ec4847b..02878c0647c49438411e6ab3be79f9997a342e2f 100644 (file)
@@ -1,3 +1,32 @@
+2020-05-13  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/95060
+       * gcc.target/i386/avx512f-pr95060.c: New test.
+       * gcc.target/i386/fma_double_1.c: Adjust expected insn counts.
+       * gcc.target/i386/fma_double_2.c: Likewise.
+       * gcc.target/i386/fma_double_3.c: Likewise.
+       * gcc.target/i386/fma_double_4.c: Likewise.
+       * gcc.target/i386/fma_double_5.c: Likewise.
+       * gcc.target/i386/fma_double_6.c: Likewise.
+       * gcc.target/i386/fma_float_1.c: Likewise.
+       * gcc.target/i386/fma_float_2.c: Likewise.
+       * gcc.target/i386/fma_float_3.c: Likewise.
+       * gcc.target/i386/fma_float_4.c: Likewise.
+       * gcc.target/i386/fma_float_5.c: Likewise.
+       * gcc.target/i386/fma_float_6.c: Likewise.
+       * gcc.target/i386/l_fma_double_1.c: Likewise.
+       * gcc.target/i386/l_fma_double_2.c: Likewise.
+       * gcc.target/i386/l_fma_double_3.c: Likewise.
+       * gcc.target/i386/l_fma_double_4.c: Likewise.
+       * gcc.target/i386/l_fma_double_5.c: Likewise.
+       * gcc.target/i386/l_fma_double_6.c: Likewise.
+       * gcc.target/i386/l_fma_float_1.c: Likewise.
+       * gcc.target/i386/l_fma_float_2.c: Likewise.
+       * gcc.target/i386/l_fma_float_3.c: Likewise.
+       * gcc.target/i386/l_fma_float_4.c: Likewise.
+       * gcc.target/i386/l_fma_float_5.c: Likewise.
+       * gcc.target/i386/l_fma_float_6.c: Likewise.
+
 2020-05-13  Martin Liska  <mliska@suse.cz>
 
        PR sanitizer/95051
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr95060.c b/gcc/testsuite/gcc.target/i386/avx512f-pr95060.c
new file mode 100644 (file)
index 0000000..b38dc4f
--- /dev/null
@@ -0,0 +1,22 @@
+/* PR tree-optimization/95060 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -ffast-math -mavx512f" } */
+/* { dg-final { scan-assembler "\tvfnmsub" } } */
+/* { dg-final { scan-assembler-not "\tvfmadd" } } */
+
+#define N 32
+float r[N], a[N], b[N], c[N];
+
+void
+foo (void)
+{
+  for (int i = 0; i < N; i++)
+    r[i] = -(a[i] * b[i]) - c[i];
+}
+
+void
+bar (void)
+{
+  for (int i = 0; i < N; i++)
+    r[i] = -(a[i] * b[i] + c[i]);
+}
index c3aa3e83c02af36ce0246b18c30e3d635a2c118b..767ee5ca63e36b6cbecea9abe7a1c3565eecba5a 100644 (file)
@@ -8,11 +8,9 @@
 
 #include "fma_1.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132sd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 8  } } */
 /* { dg-final { scan-assembler-times "vfmadd231sd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 8  } } */
 /* { dg-final { scan-assembler-times "vfmsub231sd" 4  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231sd" 4  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231sd" 4  } } */
index 843eff0a15813aceb31da0de86f2c61ab533a82d..f15fb3b2cfa1a8dd24d745c21dd0ed61f26172f7 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_2.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 4  } } */
index 3a04777c6bfe3ef46d955f459d10ca8661d9bb56..6b67774ca9113c9d8a59addb559848a7696f8b97 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_3.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub\[132\]+sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[132\]+sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[132\]+sd" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub\[132\]+sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[132\]+sd" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[132\]+sd" 4  } } */
index 51fc111adb2f7db89c9349aa7eff14de72784abf..267f6fd2d161d28c4620b5dfa571f969942bb5b6 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_4.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 4  } } */
index 640b552b0f05c5fd36bcd98a8c7ef34fa7ab9daa..fd7fcabe5b56589f383b1a5934c9cdc9d154591d 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_5.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[132\]+sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub\[132\]+sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[132\]+sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[132\]+sd" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd\[132\]+sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub\[132\]+sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[132\]+sd" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[132\]+sd" 4  } } */
index 7b75a224f116c7b4a09afb9c516fdc9195175302..f7ab1ce5b75450e1841f164bc745bcf297b371ce 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_6.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 4  } } */
index 67b1f3fe7cb5c71bb1214680527b64d7d87eb31d..a4ac2cbbe00927662494c045dd53b413ab3ae899 100644 (file)
@@ -8,11 +8,9 @@
 
 #include "fma_1.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132ss" 4  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 8  } } */
 /* { dg-final { scan-assembler-times "vfmadd231ss" 4  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 4  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 8  } } */
 /* { dg-final { scan-assembler-times "vfmsub231ss" 4  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmadd231ss" 4  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231ss" 4  } } */
index a54644d0c7001e7036d5223a1c9ed2d2d54206cb..a7509116b81ea47e892a592189d2e3fdec5a39fa 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_2.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 4  } } */
index 7986ce4ee767d3b991b9f683af7cf7e768e5aa9e..d88a3bfcd25060e2e0e0c9c69a12568622222056 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_3.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[132\]+ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub\[132\]+ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[132\]+ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[132\]+ss" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd\[132\]+ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub\[132\]+ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[132\]+ss" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[132\]+ss" 4  } } */
index d9689d9a7af2bb125d304bb6d8be6db9f28a82f5..cb1a81cbb26fffcaf47135bd9c575ad0db620e34 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_4.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 4  } } */
index 2105ae627f0ce7941e9e6373a5d5699e3b76c9a7..3a62590d2a2e1af9eea4c7ab1be97eb58c8b4172 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_5.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[132\]+ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub\[132\]+ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[132\]+ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[132\]+ss" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd\[132\]+ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub\[132\]+ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[132\]+ss" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[132\]+ss" 4  } } */
index c7580736834f3fcd1e4cd754615f556b1f5241c1..30d8283bde390e0eb853d84505c615deb38eb503 100644 (file)
@@ -8,7 +8,7 @@
 
 #include "fma_6.h"
 
-/* { dg-final { scan-assembler-times "vfmadd132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 8  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 8  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 12  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 4  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 4  } } */
index aa7c13072385e489f287415b21e33e00b1a1aeab..5089874faa5c89e6072b7e31eba388788aaa32a2 100644 (file)
@@ -11,11 +11,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
 
 #include "l_fma_1.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 16 } } */
index c59a89156bc3b105917b28231e15702a37da71e1..e4696204299792a26b3658a384d6109659946226 100644 (file)
@@ -11,11 +11,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
 
 #include "l_fma_2.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 16 } } */
index c508d64d2e758548d35313557ba8260a3f1dc872..df986d0a633dfed241a671b3f5c65029fdb43743 100644 (file)
@@ -11,11 +11,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
 
 #include "l_fma_3.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 16 } } */
index fec33e490af9dcd87d2cf2fce9503e35d4ccf7f8..ae065590f62d847f101d35b68a4844f46a47fc63 100644 (file)
@@ -11,11 +11,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
 
 #include "l_fma_4.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 16 } } */
index 8bcfe4e68db023b038ed7d740b34477bc92922d8..5d31abaa5a7271a910aafda437c065cae6aff3f4 100644 (file)
@@ -11,11 +11,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
 
 #include "l_fma_5.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 16 } } */
index 15da66b44cce412299326a74e3b213b8868bf7b4..ff857fb02f174262ea9c07465805948e53249c52 100644 (file)
@@ -11,11 +11,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
 
 #include "l_fma_6.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 48 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 16 } } */
index 29eb77fc1628ef2b7724f050e60f4b0c3a90fd0e..daef8767a5d3db155e8a85dee6af87d07aa5cd2a 100644 (file)
 
 #include "l_fma_1.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 32 } } */
index 2943b2c68b217088f84594de7c7e523b4d066ab7..ffa5c6f1b398aa4d331cd2c3a049aaf627f77663 100644 (file)
 
 #include "l_fma_2.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 32 } } */
index c144dc1861ae190da409e03fb4e481c585161ab1..a05ef5912f4d17cdd5e8e49edaebf8c89b73c3f5 100644 (file)
 
 #include "l_fma_3.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 32 } } */
index a940dfd72f9df0c84a371ec30ac24e51f697140c..b0a37baafeb3f4f664fc39d712f0f1ac851b08b0 100644 (file)
 
 #include "l_fma_4.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 32 } } */
index e7a12d47ea5d40423d1d3c6c941c6dd8df226601..598275c54af8cda32541614a67fab4217d7b85d0 100644 (file)
 
 #include "l_fma_5.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 32 } } */
index 82397a9d04f9f3b7337e6c4623cf009a35515d58..1e7a2162c07eef5b15375b51878489464c672fca 100644 (file)
 
 #include "l_fma_6.h"
 
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 12 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 96 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 32 } } */
index 969c1a6b6c6d788342cca54f1a401b6b20813f3a..5fbaa24142e3ca6d7325f6860ad6779ddd1b4bc7 100644 (file)
@@ -2930,6 +2930,35 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
          fprintf (dump_file, "\n");
        }
 
+      /* If the FMA result is negated in a single use, fold the negation
+        too.  */
+      orig_stmt = gsi_stmt (gsi);
+      use_operand_p use_p;
+      gimple *neg_stmt;
+      if (is_gimple_call (orig_stmt)
+         && gimple_call_internal_p (orig_stmt)
+         && gimple_call_lhs (orig_stmt)
+         && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
+         && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
+         && is_gimple_assign (neg_stmt)
+         && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
+         && !stmt_could_throw_p (cfun, neg_stmt))
+       {
+         gsi = gsi_for_stmt (neg_stmt);
+         if (fold_stmt (&gsi, follow_all_ssa_edges))
+           {
+             if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
+               gcc_unreachable ();
+             update_stmt (gsi_stmt (gsi));
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               {
+                 fprintf (dump_file, "Folded FMA negation ");
+                 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
+                 fprintf (dump_file, "\n");
+               }
+           }
+       }
+
       widen_mul_stats.fmas_inserted++;
     }
 }