IBM Z: Try to make use of load-and-test instructions

author Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>

Fri, 18 Sep 2020 07:10:19 +0000 (09:10 +0200)

committer Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>

Tue, 22 Sep 2020 11:55:42 +0000 (13:55 +0200)
author Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>
Fri, 18 Sep 2020 07:10:19 +0000 (09:10 +0200)
committer Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>
Tue, 22 Sep 2020 11:55:42 +0000 (13:55 +0200)
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md

index 4c3e5400a2be4b02bca8d0a7e3f73f849266d3b0..18edea1ce47497a93291dbfb552ee0547c542fbf 100644 (file)
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1391,23 +1391,55 @@
  ; (TF|DF|SF|TD|DD|SD) instructions
  
  
-; FIXME: load and test instructions turn SNaN into QNaN what is not
-; acceptable if the target will be used afterwards.  On the other hand
-; they are quite convenient for implementing comparisons with 0.0. So
-; try to enable them via splitter/peephole if the value isn't needed anymore.
-; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c
+; load and test instructions turn a signaling NaN into a quiet NaN.  Thus they
+; may only be used if the target register is dead afterwards or if fast math
+; is enabled.  The former is done via a peephole optimization.  Note, load and
+; test instructions may only be used for (in)equality comparisons because
+; relational comparisons must treat a quiet NaN like a signaling NaN which is
+; not the case for load and test instructions.  For fast math insn
+; "cmp<mode>_ccs_0_fastmath" applies.
+; See testcases load-and-test-fp-{1,2}.c
+
+(define_peephole2
+  [(set (match_operand:FP 0 "register_operand")
+       (match_operand:FP 1 "const0_operand"))
+   (set (reg:CCZ CC_REGNUM)
+       (compare:CCZ (match_operand:FP 2 "register_operand")
+                    (match_operand:FP 3 "register_operand")))]
+  "TARGET_HARD_FLOAT
+   && FP_REG_P (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[0])
+   && peep2_reg_dead_p (2, operands[2])"
+  [(parallel
+    [(set (reg:CCZ CC_REGNUM)
+         (compare:CCZ (match_dup 2) (match_dup 1)))
+     (clobber (match_dup 2))])]
+  "")
  
  ; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
-(define_insn "*cmp<mode>_ccs_0"
-  [(set (reg CC_REGNUM)
-       (compare (match_operand:FP 0 "register_operand"  "f")
-                (match_operand:FP 1 "const0_operand"    "")))
-   (clobber (match_operand:FP      2 "register_operand" "=0"))]
-  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+(define_insn "*cmp<mode>_ccz_0"
+  [(set (reg:CCZ CC_REGNUM)
+       (compare:CCZ (match_operand:FP 0 "register_operand" "f")
+                    (match_operand:FP 1 "const0_operand")))
+   (clobber (match_operand:FP 2 "register_operand" "=0"))]
+  "TARGET_HARD_FLOAT"
    "lt<xde><bt>r\t%0,%0"
     [(set_attr "op_type" "RRE")
      (set_attr "type"  "fsimp<mode>")])
  
+(define_insn "*cmp<mode>_ccs_0_fastmath"
+  [(set (reg CC_REGNUM)
+       (compare (match_operand:FP 0 "register_operand" "f")
+                (match_operand:FP 1 "const0_operand")))]
+  "s390_match_ccmode (insn, CCSmode)
+   && TARGET_HARD_FLOAT
+   && !flag_trapping_math
+   && !flag_signaling_nans"
+  "lt<xde><bt>r\t%0,%0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type" "fsimp<mode>")])
+
  ; VX: TFmode in FPR pairs: use cxbr instead of wfcxb
  ; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb
  (define_insn "*cmp<mode>_ccs"
diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c

index 2a7e88c0f1bd61398a19fe3ed658d1ae7fb5f811..f89d2d36d6191fdf0f54241c298dbc715ab4346d 100644 (file)
--- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
+++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c
@@ -1,17 +1,12 @@
  /* { dg-do compile } */
-/* { dg-options "-O3 -mzarch" } */
+/* { dg-options "-O3 -mzarch -march=z196" } */
  
-/* a is used after the comparison.  We cannot use load and test here
-   since it would turn SNaNs into QNaNs.  */
+/* Use load-and-test instructions if compared for (in)equality and if variable
+   `a` is dead after the comparison.  For all other cases use
+   compare-and-signal instructions.  */
  
-double gl;
+#include "load-and-test-fp.h"
  
-double
-foo (double dummy, double a)
-{
-  if (a == 0.0)
-    gl = 1;
-  return a;
-}
-
-/* { dg-final { scan-assembler {\tcdbr?\t} } } */
+/* { dg-final { scan-assembler-times "ltdbr\t" 2 } } */
+/* { dg-final { scan-assembler-times "cdbr\t" 2 } } */
+/* { dg-final { scan-assembler-times "kdbr\t" 8 } } */
diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c

index 7646fdd5def8cedf44ec45f184c00ebb8b82bdf4..53dab3c44240dd869b5bad9712984175cd0816e7 100644 (file)
--- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
+++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c
@@ -1,16 +1,9 @@
  /* { dg-do compile } */
-/* { dg-options "-O3" } */
+/* { dg-options "-O3 -mzarch -ffast-math" } */
  
-/* a is not used after the comparison.  So we should use load and test
-   here.  */
+/* Fast-math implies -fno-trapping-math -fno-signaling-nans which imply
+   that no user visible trap will happen.  */
  
-double gl;
+#include "load-and-test-fp.h"
  
-void
-bar (double a)
-{
-  if (a == 0.0)
-    gl = 1;
-}
-
-/* { dg-final { scan-assembler "ltdbr\t" } } */
+/* { dg-final { scan-assembler-times "ltdbr\t" 12 } } */
diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp.h b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h

new file mode 100644 (file)

index 0000000..f153d96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h
@@ -0,0 +1,12 @@
+double gl;
+
+#define test(N, CMP) \
+  void   N ## _dead(double a) { if (a CMP 0.0) gl = 1; } \
+  double N ## _live(double a) { if (a CMP 0.0) gl = 1; return a; }
+
+test(eq, ==)
+test(ne, !=)
+test(ge, >=)
+test(gt, >)
+test(le, <=)
+test(lt, <)
author	Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>
	Fri, 18 Sep 2020 07:10:19 +0000 (09:10 +0200)
committer	Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>
	Tue, 22 Sep 2020 11:55:42 +0000 (13:55 +0200)
gcc/config/s390/s390.md		patch \| blob \| history
gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c		patch \| blob \| history
gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c		patch \| blob \| history
gcc/testsuite/gcc.target/s390/load-and-test-fp.h	[new file with mode: 0644]	patch \| blob