From: Michael Meissner <meissner@linux.vnet.ibm.com>
Date: Wed, 27 Sep 2017 01:20:24 +0000 (+0000)
Subject: vsx.md (peephole for optimizing move SF to GPR): Adjust code to eliminate needing... 
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7a6ed74db49fdec8ee1eabe97cfcde53a03d9fa3;p=gcc.git

vsx.md (peephole for optimizing move SF to GPR): Adjust code to eliminate needing to do the shift right 32-bits operation after...

[gcc]
2017-09-26  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/vsx.md (peephole for optimizing move SF to GPR):
	Adjust code to eliminate needing to do the shift right 32-bits
	operation after XSCVDPSPN.

[gcc/testsuite]
2017-09-26  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/pr71977-1.c: Update test to know that we
	don't generate a 32-bit shift after doing XSCVDPSPN.
	* gcc.target/powerpc/direct-move-float1.c: Likewise.
	* gcc.target/powerpc/direct-move-float3.c: New test.

From-SVN: r253223
---

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ad090d2161c..340e1e8855d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2017-09-26  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/vsx.md (peephole for optimizing move SF to GPR):
+	Adjust code to eliminate needing to do the shift right 32-bits
+	operation after XSCVDPSPN.
+
 2017-09-26  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
 
 	* match.pd ((X / Y) == 0 -> X < Y): New pattern.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 16ed1698dc3..35be5dead64 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4797,9 +4797,10 @@
    (SFBOOL_SHL_D		 7)		;; shift left dest
    (SFBOOL_SHL_A		 8)		;; shift left arg
    (SFBOOL_MTVSR_D		 9)		;; move to vecter dest
-   (SFBOOL_BOOL_A_DI		10)		;; SFBOOL_BOOL_A1/A2 as DImode
-   (SFBOOL_TMP_VSX_DI		11)		;; SFBOOL_TMP_VSX as DImode
-   (SFBOOL_MTVSR_D_V4SF		12)])		;; SFBOOL_MTVSRD_D as V4SFmode
+   (SFBOOL_MFVSR_A_V4SF		10)		;; SFBOOL_MFVSR_A as V4SFmode
+   (SFBOOL_BOOL_A_DI		11)		;; SFBOOL_BOOL_A1/A2 as DImode
+   (SFBOOL_TMP_VSX_DI		12)		;; SFBOOL_TMP_VSX as DImode
+   (SFBOOL_MTVSR_D_V4SF		13)])		;; SFBOOL_MTVSRD_D as V4SFmode
 
 ;; Attempt to optimize some common GLIBC operations using logical operations to
 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
@@ -4837,29 +4838,22 @@
 ;;
 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
 ;;
-;; (set (reg:DI reg3) (lshiftrt:DI (reg:DI reg3) (const_int 32)))
+;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
 ;;
-;; (set (reg:DI reg5) (and:DI (reg:DI reg3) (reg:DI reg4)))
+;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
-;; (set (reg:DI reg6) (ashift:DI (reg:DI reg5) (const_int 32)))
+;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg7) (unspec:SF [(reg:DI reg6)] UNSPEC_P8V_MTVSRD))
-;;
-;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg7)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
 
-   ;; MFVSRD
+   ;; MFVSRWZ (aka zero_extend)
    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
-	(unspec:DI [(match_operand:V4SF SFBOOL_MFVSR_A "vsx_register_operand")]
-		   UNSPEC_P8V_RELOAD_FROM_VSX))
-
-   ;; SRDI
-   (set (match_dup SFBOOL_MFVSR_D)
-	(lshiftrt:DI (match_dup SFBOOL_MFVSR_D)
-		     (const_int 32)))
+	(zero_extend:DI
+	 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
 
    ;; AND/IOR/XOR operation on int
    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
@@ -4884,15 +4878,15 @@
    && (REG_P (operands[SFBOOL_BOOL_A2])
        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
-       || peep2_reg_dead_p (3, operands[SFBOOL_MFVSR_D]))
+       || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
        || (REG_P (operands[SFBOOL_BOOL_A2])
 	   && REGNO (operands[SFBOOL_MFVSR_D])
 		== REGNO (operands[SFBOOL_BOOL_A2])))
    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
-       || peep2_reg_dead_p (4, operands[SFBOOL_BOOL_D]))
-   && peep2_reg_dead_p (5, operands[SFBOOL_SHL_D])"
+       || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
+   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
   [(set (match_dup SFBOOL_TMP_GPR)
 	(ashift:DI (match_dup SFBOOL_BOOL_A_DI)
 		   (const_int 32)))
@@ -4901,12 +4895,13 @@
 	(match_dup SFBOOL_TMP_GPR))
 
    (set (match_dup SFBOOL_MTVSR_D_V4SF)
-	(and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A)
+	(and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
 			  (match_dup SFBOOL_TMP_VSX)))]
 {
   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
+  int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
 
@@ -4925,6 +4920,7 @@
       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
     }
 
+  operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c064775317b..d4f431339c0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2017-09-26  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/pr71977-1.c: Update test to know that we
+	don't generate a 32-bit shift after doing XSCVDPSPN.
+	* gcc.target/powerpc/direct-move-float1.c: Likewise.
+	* gcc.target/powerpc/direct-move-float3.c: New test.
+
 2017-09-26  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
 
 	* gcc.dg/tree-ssa/cmpdiv.c: New test.
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
index 2551077dbf7..f5cff6c7c0c 100644
--- a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c
@@ -4,10 +4,10 @@
 /* { dg-require-effective-target powerpc_p8vector_ok } */
 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
 /* { dg-options "-mcpu=power8 -O2" } */
-/* { dg-final { scan-assembler "mtvsrd" } } */
-/* { dg-final { scan-assembler "mfvsrd" } } */
-/* { dg-final { scan-assembler "xscvdpspn" } } */
-/* { dg-final { scan-assembler "xscvspdpn" } } */
+/* { dg-final { scan-assembler {\mmtvsrd\M}    } } */
+/* { dg-final { scan-assembler {\mmfvsrwz\M}   } } */
+/* { dg-final { scan-assembler {\mxscvdpspn\M} } } */
+/* { dg-final { scan-assembler {\mxscvspdpn\M} } } */
 
 /* Check code generation for direct move for float types.  */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float3.c b/gcc/testsuite/gcc.target/powerpc/direct-move-float3.c
new file mode 100644
index 00000000000..6e294aabdf7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float3.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mpower8-vector -O2" } */
+
+/* Test that we generate XSCVDPSP instead of FRSP and XSCVDPSPN when we combine
+   a round from double to float and moving the float value to a GPR.  */
+
+union u {
+  float f;
+  unsigned int ui;
+  int si;
+};
+
+unsigned int
+ui_d (double d)
+{
+  union u x;
+  x.f = d;
+  return x.ui;
+}
+
+/* { dg-final { scan-assembler     {\mmfvsrwz\M}   } } */
+/* { dg-final { scan-assembler     {\mxscvdpsp\M}  } } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M}    } } */
+/* { dg-final { scan-assembler-not {\mmtvsrwz\M}   } } */
+/* { dg-final { scan-assembler-not {\mmtvsrd\M}    } } */
+/* { dg-final { scan-assembler-not {\mxscvdpspn\M} } } */
+/* { dg-final { scan-assembler-not {\msrdi\M}      } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr71977-1.c b/gcc/testsuite/gcc.target/powerpc/pr71977-1.c
index 1cb2ec3b71b..65dd3ab6cf4 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr71977-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr71977-1.c
@@ -23,9 +23,9 @@ mask_and_float_var (float f, uint32_t mask)
   return u.value;
 }
 
-/* { dg-final { scan-assembler     "\[ \t\]xxland " } } */
-/* { dg-final { scan-assembler-not "\[ \t\]and "    } } */
-/* { dg-final { scan-assembler-not "\[ \t\]mfvsrd " } } */
-/* { dg-final { scan-assembler-not "\[ \t\]stxv"    } } */
-/* { dg-final { scan-assembler-not "\[ \t\]lxv"     } } */
-/* { dg-final { scan-assembler-not "\[ \t\]srdi "   } } */
+/* { dg-final { scan-assembler     {\mxxland\M}  } } */
+/* { dg-final { scan-assembler-not {\mand\M}     } } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M}  } } */
+/* { dg-final { scan-assembler-not {\mstxv\M}    } } */
+/* { dg-final { scan-assembler-not {\mlxv\M}     } } */
+/* { dg-final { scan-assembler-not {\msrdi\M}    } } */