From: Michael Meissner Date: Thu, 17 Nov 2016 21:42:13 +0000 (+0000) Subject: re PR target/78101 (PowerPC 64-bit little endian fusion failure with -O3 -mcpu=power9) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c80620fcfa577f25449c727758627f47224cfaf9;p=gcc.git re PR target/78101 (PowerPC 64-bit little endian fusion failure with -O3 -mcpu=power9) [gcc] 2016-11-17 Michael Meissner PR target/78101 * config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add the appropriate checks for SFmode/DFmode load/stores in GPR registers. (fusion_addis_mem_combo_store): Likewise. * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar d-form instructions for traditional Altivec registers. (emit_fusion_p9_load): Likewise. (emit_fusion_p9_store): Likewise. * config/rs6000/rs6000.md (p9 fusion store peephole2): Remove early clobber from scratch register. Do not match if the register being stored is the scratch register. (fusion_vsx___load): Rename fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar d-form instructions for traditional Altivec registers. (fusion_fpr___load): Likewise. (fusion_vsx___store): Likewise. (fusion_fpr___store): Likewise. [gcc/testsuite] 2016-11-17 Michael Meissner PR target/78101 * gcc.target/powerpc/fusion4.c: New test. From-SVN: r242564 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1cf436d1612..f997b96b671 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2016-11-17 Michael Meissner + + PR target/78101 + * config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add + the appropriate checks for SFmode/DFmode load/stores in GPR + registers. + (fusion_addis_mem_combo_store): Likewise. + * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename + fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar + d-form instructions for traditional Altivec registers. + (emit_fusion_p9_load): Likewise. + (emit_fusion_p9_store): Likewise. + * config/rs6000/rs6000.md (p9 fusion store peephole2): Remove + early clobber from scratch register. Do not match if the register + being stored is the scratch register. + (fusion_vsx___load): Rename fusion_fpr_* + to fusion_vsx_* and add in support for ISA 3.0 scalar d-form + instructions for traditional Altivec registers. + (fusion_fpr___load): Likewise. + (fusion_vsx___store): Likewise. + (fusion_fpr___store): Likewise. + 2016-11-17 Thomas Preud'homme PR target/77933 diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index be61b2b5aea..57a463b2007 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1844,7 +1844,7 @@ ;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the ;; memory field with both the addis and the memory offset. Sign extension ;; is not handled here, since lha and lwa are not fused. -;; With extended fusion, also match a FPR load (lfd, lfs) and float_extend +;; With P9 fusion, also match a fpr/vector load and float_extend (define_predicate "fusion_addis_mem_combo_load" (match_code "mem,zero_extend,float_extend") { @@ -1867,17 +1867,27 @@ case SImode: break; + /* Do not fuse 64-bit DImode in 32-bit since it splits into two + separate instructions. */ case DImode: if (!TARGET_POWERPC64) return 0; break; + /* ISA 2.08/power8 only had fusion of GPR loads. */ case SFmode: - case DFmode: if (!TARGET_P9_FUSION) return 0; break; + /* ISA 2.08/power8 only had fusion of GPR loads. Do not allow 64-bit + DFmode in 32-bit if -msoft-float since it splits into two separate + instructions. */ + case DFmode: + if ((!TARGET_POWERPC64 && !TARGET_DF_FPR) || !TARGET_P9_FUSION) + return 0; + break; + default: return 0; } @@ -1920,20 +1930,21 @@ case QImode: case HImode: case SImode: + case SFmode: break; + /* Do not fuse 64-bit DImode in 32-bit since it splits into two + separate instructions. */ case DImode: if (!TARGET_POWERPC64) return 0; break; - case SFmode: - if (!TARGET_SF_FPR) - return 0; - break; - + /* Do not allow 64-bit DFmode in 32-bit if -msoft-float since it splits + into two separate instructions. Do allow fusion if we have hardware + floating point. */ case DFmode: - if (!TARGET_DF_FPR) + if (!TARGET_POWERPC64 && !TARGET_DF_FPR) return 0; break; diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e85d3eeeb5e..e050da8875e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3441,28 +3441,28 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) static const struct fuse_insns addis_insns[] = { { SFmode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_sf_load, - CODE_FOR_fusion_fpr_di_sf_store }, + CODE_FOR_fusion_vsx_di_sf_load, + CODE_FOR_fusion_vsx_di_sf_store }, { SFmode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_sf_load, - CODE_FOR_fusion_fpr_si_sf_store }, + CODE_FOR_fusion_vsx_si_sf_load, + CODE_FOR_fusion_vsx_si_sf_store }, { DFmode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_df_load, - CODE_FOR_fusion_fpr_di_df_store }, + CODE_FOR_fusion_vsx_di_df_load, + CODE_FOR_fusion_vsx_di_df_store }, { DFmode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_df_load, - CODE_FOR_fusion_fpr_si_df_store }, + CODE_FOR_fusion_vsx_si_df_load, + CODE_FOR_fusion_vsx_si_df_store }, { DImode, DImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_di_di_load, - CODE_FOR_fusion_fpr_di_di_store }, + CODE_FOR_fusion_vsx_di_di_load, + CODE_FOR_fusion_vsx_di_di_store }, { DImode, SImode, RELOAD_REG_FPR, - CODE_FOR_fusion_fpr_si_di_load, - CODE_FOR_fusion_fpr_si_di_store }, + CODE_FOR_fusion_vsx_si_di_load, + CODE_FOR_fusion_vsx_si_di_store }, { QImode, DImode, RELOAD_REG_GPR, CODE_FOR_fusion_gpr_di_qi_load, @@ -3522,6 +3522,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load; reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store; + + if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR) + { + reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX] + = addis_insns[i].load; + reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX] + = addis_insns[i].store; + } } } @@ -39818,6 +39826,15 @@ emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg) else gcc_unreachable (); } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + load_string = "lxssp"; + else if (mode == DFmode || mode == DImode) + load_string = "lxsd"; + else + gcc_unreachable (); + } else if (INT_REGNO_P (r)) { switch (mode) @@ -39896,6 +39913,15 @@ emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg) else gcc_unreachable (); } + else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) + { + if (mode == SFmode) + store_string = "stxssp"; + else if (mode == DFmode || mode == DImode) + store_string = "stxsd"; + else + gcc_unreachable (); + } else if (INT_REGNO_P (r)) { switch (mode) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b3fe92a899c..acd4a7e508b 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -13438,7 +13438,8 @@ (set (match_operand:SFDF 2 "offsettable_mem_operand" "") (match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))] "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0]) - && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])" + && fusion_p9_p (operands[0], operands[1], operands[2], operands[3]) + && !rtx_equal_p (operands[0], operands[3])" [(const_int 0)] { expand_fusion_p9_store (operands); @@ -13496,7 +13497,7 @@ (unspec:GPR_FUSION [(match_operand:GPR_FUSION 1 "int_reg_operand" "r")] UNSPEC_FUSION_P9)) - (clobber (match_operand:P 2 "base_reg_operand" "=&b"))] + (clobber (match_operand:P 2 "base_reg_operand" "=b"))] "TARGET_P9_FUSION" { return emit_fusion_p9_store (operands[0], operands[1], operands[2]); @@ -13504,8 +13505,8 @@ [(set_attr "type" "store") (set_attr "length" "8")]) -(define_insn "fusion_fpr___load" - [(set (match_operand:FPR_FUSION 0 "fpr_reg_operand" "=d") +(define_insn "fusion_vsx___load" + [(set (match_operand:FPR_FUSION 0 "vsx_register_operand" "=dwb") (unspec:FPR_FUSION [(match_operand:FPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")] UNSPEC_FUSION_P9)) @@ -13517,10 +13518,10 @@ [(set_attr "type" "fpload") (set_attr "length" "8")]) -(define_insn "fusion_fpr___store" +(define_insn "fusion_vsx___store" [(set (match_operand:FPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF") (unspec:FPR_FUSION - [(match_operand:FPR_FUSION 1 "fpr_reg_operand" "d")] + [(match_operand:FPR_FUSION 1 "vsx_register_operand" "dwb")] UNSPEC_FUSION_P9)) (clobber (match_operand:P 2 "base_reg_operand" "=b"))] "TARGET_P9_FUSION" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 55f7d837aad..8018587479a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-11-17 Michael Meissner + + PR target/78101 + * gcc.target/powerpc/fusion4.c: New test. + 2016-11-17 Thomas Preud'homme PR target/77933 diff --git a/gcc/testsuite/gcc.target/powerpc/fusion4.c b/gcc/testsuite/gcc.target/powerpc/fusion4.c new file mode 100644 index 00000000000..e0185ead1a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fusion4.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-mcpu=power7 -mtune=power9 -O3 -msoft-float -m32" } */ + +#define LARGE 0x12345 + +float fusion_float_read (float *p){ return p[LARGE]; } + +void fusion_float_write (float *p, float f){ p[LARGE] = f; } + +/* { dg-final { scan-assembler "store fusion, type SF" } } */