re PR target/78101 (PowerPC 64-bit little endian fusion failure with -O3 -mcpu=power9)
authorMichael Meissner <meissner@linux.vnet.ibm.com>
Thu, 17 Nov 2016 21:42:13 +0000 (21:42 +0000)
committerMichael Meissner <meissner@gcc.gnu.org>
Thu, 17 Nov 2016 21:42:13 +0000 (21:42 +0000)
[gcc]
2016-11-17  Michael Meissner  <meissner@linux.vnet.ibm.com>

PR target/78101
* config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add
the appropriate checks for SFmode/DFmode load/stores in GPR
registers.
(fusion_addis_mem_combo_store): Likewise.
* config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename
fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar
d-form instructions for traditional Altivec registers.
(emit_fusion_p9_load): Likewise.
(emit_fusion_p9_store): Likewise.
* config/rs6000/rs6000.md (p9 fusion store peephole2): Remove
early clobber from scratch register.  Do not match if the register
being stored is the scratch register.
(fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load): Rename fusion_fpr_*
to fusion_vsx_* and add in support for ISA 3.0 scalar d-form
instructions for traditional Altivec registers.
(fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load): Likewise.
(fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store): Likewise.
(fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store): Likewise.

[gcc/testsuite]
2016-11-17  Michael Meissner  <meissner@linux.vnet.ibm.com>

PR target/78101
* gcc.target/powerpc/fusion4.c: New test.

From-SVN: r242564

gcc/ChangeLog
gcc/config/rs6000/predicates.md
gcc/config/rs6000/rs6000.c
gcc/config/rs6000/rs6000.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/powerpc/fusion4.c [new file with mode: 0644]

index 1cf436d161276282d924664e217ccd3409131712..f997b96b671996b8902dbc3d83044f9e901e621e 100644 (file)
@@ -1,3 +1,25 @@
+2016-11-17  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       PR target/78101
+       * config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add
+       the appropriate checks for SFmode/DFmode load/stores in GPR
+       registers.
+       (fusion_addis_mem_combo_store): Likewise.
+       * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename
+       fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar
+       d-form instructions for traditional Altivec registers.
+       (emit_fusion_p9_load): Likewise.
+       (emit_fusion_p9_store): Likewise.
+       * config/rs6000/rs6000.md (p9 fusion store peephole2): Remove
+       early clobber from scratch register.  Do not match if the register
+       being stored is the scratch register.
+       (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load): Rename fusion_fpr_*
+       to fusion_vsx_* and add in support for ISA 3.0 scalar d-form
+       instructions for traditional Altivec registers.
+       (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load): Likewise.
+       (fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store): Likewise.
+       (fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store): Likewise.
+
 2016-11-17  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
        PR target/77933
index be61b2b5aeae3d2dd0d2e5fa5b1feb00f842e7c7..57a463b2007397604e993abcdb38c57eed6be47d 100644 (file)
 ;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the
 ;; memory field with both the addis and the memory offset.  Sign extension
 ;; is not handled here, since lha and lwa are not fused.
-;; With extended fusion, also match a FPR load (lfd, lfs) and float_extend
+;; With P9 fusion, also match a fpr/vector load and float_extend
 (define_predicate "fusion_addis_mem_combo_load"
   (match_code "mem,zero_extend,float_extend")
 {
     case SImode:
       break;
 
+    /* Do not fuse 64-bit DImode in 32-bit since it splits into two
+       separate instructions.  */
     case DImode:
       if (!TARGET_POWERPC64)
        return 0;
       break;
 
+    /* ISA 2.08/power8 only had fusion of GPR loads.  */
     case SFmode:
-    case DFmode:
       if (!TARGET_P9_FUSION)
        return 0;
       break;
 
+    /* ISA 2.08/power8 only had fusion of GPR loads.  Do not allow 64-bit
+       DFmode in 32-bit if -msoft-float since it splits into two separate
+       instructions.  */
+    case DFmode:
+      if ((!TARGET_POWERPC64 && !TARGET_DF_FPR) || !TARGET_P9_FUSION)
+       return 0;
+      break;
+
     default:
       return 0;
     }
     case QImode:
     case HImode:
     case SImode:
+    case SFmode:
       break;
 
+    /* Do not fuse 64-bit DImode in 32-bit since it splits into two
+       separate instructions.  */
     case DImode:
       if (!TARGET_POWERPC64)
        return 0;
       break;
 
-    case SFmode:
-      if (!TARGET_SF_FPR)
-       return 0;
-      break;
-
+    /* Do not allow 64-bit DFmode in 32-bit if -msoft-float since it splits
+       into two separate instructions.  Do allow fusion if we have hardware
+       floating point.  */
     case DFmode:
-      if (!TARGET_DF_FPR)
+      if (!TARGET_POWERPC64 && !TARGET_DF_FPR)
        return 0;
       break;
 
index e85d3eeeb5e4f99a4be15d2932e4b5e8cbd744f6..e050da8875ebd87000df7bd880eca8d6441db593 100644 (file)
@@ -3441,28 +3441,28 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
       static const struct fuse_insns addis_insns[] = {
        { SFmode, DImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_di_sf_load,
-         CODE_FOR_fusion_fpr_di_sf_store },
+         CODE_FOR_fusion_vsx_di_sf_load,
+         CODE_FOR_fusion_vsx_di_sf_store },
 
        { SFmode, SImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_si_sf_load,
-         CODE_FOR_fusion_fpr_si_sf_store },
+         CODE_FOR_fusion_vsx_si_sf_load,
+         CODE_FOR_fusion_vsx_si_sf_store },
 
        { DFmode, DImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_di_df_load,
-         CODE_FOR_fusion_fpr_di_df_store },
+         CODE_FOR_fusion_vsx_di_df_load,
+         CODE_FOR_fusion_vsx_di_df_store },
 
        { DFmode, SImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_si_df_load,
-         CODE_FOR_fusion_fpr_si_df_store },
+         CODE_FOR_fusion_vsx_si_df_load,
+         CODE_FOR_fusion_vsx_si_df_store },
 
        { DImode, DImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_di_di_load,
-         CODE_FOR_fusion_fpr_di_di_store },
+         CODE_FOR_fusion_vsx_di_di_load,
+         CODE_FOR_fusion_vsx_di_di_store },
 
        { DImode, SImode, RELOAD_REG_FPR,
-         CODE_FOR_fusion_fpr_si_di_load,
-         CODE_FOR_fusion_fpr_si_di_store },
+         CODE_FOR_fusion_vsx_si_di_load,
+         CODE_FOR_fusion_vsx_si_di_store },
 
        { QImode, DImode, RELOAD_REG_GPR,
          CODE_FOR_fusion_gpr_di_qi_load,
@@ -3522,6 +3522,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
          reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
          reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
+
+         if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
+           {
+             reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
+               = addis_insns[i].load;
+             reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
+               = addis_insns[i].store;
+           }
        }
     }
 
@@ -39818,6 +39826,15 @@ emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
       else
        gcc_unreachable ();
     }
+  else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
+    {
+      if (mode == SFmode)
+       load_string = "lxssp";
+      else if (mode == DFmode || mode == DImode)
+       load_string = "lxsd";
+      else
+       gcc_unreachable ();
+    }
   else if (INT_REGNO_P (r))
     {
       switch (mode)
@@ -39896,6 +39913,15 @@ emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
       else
        gcc_unreachable ();
     }
+  else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
+    {
+      if (mode == SFmode)
+       store_string = "stxssp";
+      else if (mode == DFmode || mode == DImode)
+       store_string = "stxsd";
+      else
+       gcc_unreachable ();
+    }
   else if (INT_REGNO_P (r))
     {
       switch (mode)
index b3fe92a899cda27279b93ce831905d09efa7b1f6..acd4a7e508b43dc721b4da7e9f2ec377a1845dbe 100644 (file)
    (set (match_operand:SFDF 2 "offsettable_mem_operand" "")
        (match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))]
   "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0])
-   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])"
+   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])
+   && !rtx_equal_p (operands[0], operands[3])"
   [(const_int 0)]
 {
   expand_fusion_p9_store (operands);
        (unspec:GPR_FUSION
         [(match_operand:GPR_FUSION 1 "int_reg_operand" "r")]
         UNSPEC_FUSION_P9))
-   (clobber (match_operand:P 2 "base_reg_operand" "=&b"))]
+   (clobber (match_operand:P 2 "base_reg_operand" "=b"))]
   "TARGET_P9_FUSION"
 {
   return emit_fusion_p9_store (operands[0], operands[1], operands[2]);
   [(set_attr "type" "store")
    (set_attr "length" "8")])
 
-(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load"
-  [(set (match_operand:FPR_FUSION 0 "fpr_reg_operand" "=d")
+(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load"
+  [(set (match_operand:FPR_FUSION 0 "vsx_register_operand" "=dwb")
        (unspec:FPR_FUSION
         [(match_operand:FPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")]
         UNSPEC_FUSION_P9))
   [(set_attr "type" "fpload")
    (set_attr "length" "8")])
 
-(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store"
+(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store"
   [(set (match_operand:FPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF")
        (unspec:FPR_FUSION
-        [(match_operand:FPR_FUSION 1 "fpr_reg_operand" "d")]
+        [(match_operand:FPR_FUSION 1 "vsx_register_operand" "dwb")]
         UNSPEC_FUSION_P9))
    (clobber (match_operand:P 2 "base_reg_operand" "=b"))]
   "TARGET_P9_FUSION"
index 55f7d837aad2f735dc4bc30478040598f183fd4e..8018587479a520779d112e5b7bda733480599148 100644 (file)
@@ -1,3 +1,8 @@
+2016-11-17  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       PR target/78101
+       * gcc.target/powerpc/fusion4.c: New test.
+
 2016-11-17  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
        PR target/77933
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion4.c b/gcc/testsuite/gcc.target/powerpc/fusion4.c
new file mode 100644 (file)
index 0000000..e0185ea
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-options "-mcpu=power7 -mtune=power9 -O3 -msoft-float -m32" } */
+
+#define LARGE 0x12345
+
+float fusion_float_read (float *p){ return p[LARGE]; }
+
+void fusion_float_write (float *p, float f){ p[LARGE] = f; }
+
+/* { dg-final { scan-assembler "store fusion, type SF" } } */