From 90e0a7020eeeb72e95afe16b392440dba78f19a1 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Tue, 7 Apr 2015 03:36:05 +0000 Subject: [PATCH] re PR target/65614 (PowerPC VSX systems should use XSCPSGNDP to copy scalar fp data to/from Altivec registers) [gcc] 2015-04-06 Michael Meissner PR target/65614 * config/rs6000/rs6000.c (struct processor_costs): Add cost field for SF->DF conversions to make FLOAT_EXTEND more expensive, so that LFD is used to load double constants instead of LFS. Add defaults for all costs structures. Add comments for missing initialization fields. (size32_cost): Likewise. (size64_cost): Likewise. (rs64a_cost): Likewise. (mpccore_cost): Likewise. (ppc403_cost): Likewise. (ppc405_cost): Likewise. (ppc440_cost): Likewise. (ppc476_cost): Likewise. (ppc601_cost): Likewise. (ppc603_cost): Likewise. (ppc604_cost): Likewise. (ppc604e_cost): Likewise. (ppc620_cost): Likewise. (ppc630_cost): Likewise. (ppccell_cost): Likewise. (ppc750_cost): Likewise. (ppc7450_cost): Likewise. (ppc8540_cost): Likewise. (ppce300c2c3_cost): Likewise. (ppce500mc_cost): Likewise. (ppce500mc64_cost): Likewise. (ppce5500_cost): Likewise. (ppce6500_cost): Likewise. (titan_cost): Likewise. (power4_cost): Likewise. (power6_cost): Likewise. (power7_cost): Likewise. (power8_cost): Likewise. (ppca2_cost): Likewise. (rs6000_rtx_costs): Make FLOAT_EXTEND use SFDF_convert field. * config/rs6000/rs6000.md (extendsfdf2_fpr): Generate XSCPSGNDP instead of XXLOR to copy SFmode to clear out dirty bits created when SFmode denormals are generated. (mov_hardfloat, FMOVE32 case): Likewise. (truncdfsf2_fpr): Add support for ISA 2.07 XSRSP instruction. [gcc/testsuite] 2015-04-06 Michael Meissner PR target/65614 * gcc.target/powerpc/compress-float-ppc-pic.c: Run test on power5 to get floating point compression. * gcc.target/powerpc/compress-foat-ppc.c: Likewise. From-SVN: r221888 --- gcc/ChangeLog | 45 +++++++++++++++++ gcc/config/rs6000/rs6000.c | 48 +++++++++++++++---- gcc/config/rs6000/rs6000.md | 16 ++++--- gcc/testsuite/ChangeLog | 7 +++ .../powerpc/compress-float-ppc-pic.c | 4 +- .../gcc.target/powerpc/compress-float-ppc.c | 4 +- 6 files changed, 106 insertions(+), 18 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9d22614cd15..c060283c73f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,48 @@ +2015-04-06 Michael Meissner + + PR target/65614 + * config/rs6000/rs6000.c (struct processor_costs): Add cost field + for SF->DF conversions to make FLOAT_EXTEND more expensive, so + that LFD is used to load double constants instead of LFS. Add + defaults for all costs structures. Add comments for missing + initialization fields. + (size32_cost): Likewise. + (size64_cost): Likewise. + (rs64a_cost): Likewise. + (mpccore_cost): Likewise. + (ppc403_cost): Likewise. + (ppc405_cost): Likewise. + (ppc440_cost): Likewise. + (ppc476_cost): Likewise. + (ppc601_cost): Likewise. + (ppc603_cost): Likewise. + (ppc604_cost): Likewise. + (ppc604e_cost): Likewise. + (ppc620_cost): Likewise. + (ppc630_cost): Likewise. + (ppccell_cost): Likewise. + (ppc750_cost): Likewise. + (ppc7450_cost): Likewise. + (ppc8540_cost): Likewise. + (ppce300c2c3_cost): Likewise. + (ppce500mc_cost): Likewise. + (ppce500mc64_cost): Likewise. + (ppce5500_cost): Likewise. + (ppce6500_cost): Likewise. + (titan_cost): Likewise. + (power4_cost): Likewise. + (power6_cost): Likewise. + (power7_cost): Likewise. + (power8_cost): Likewise. + (ppca2_cost): Likewise. + (rs6000_rtx_costs): Make FLOAT_EXTEND use SFDF_convert field. + + * config/rs6000/rs6000.md (extendsfdf2_fpr): Generate XSCPSGNDP + instead of XXLOR to copy SFmode to clear out dirty bits created + when SFmode denormals are generated. + (mov_hardfloat, FMOVE32 case): Likewise. + (truncdfsf2_fpr): Add support for ISA 2.07 XSRSP instruction. + 2015-04-06 Evandro Menezes * doc/invoke.texi (AARCH64/mtune): Add exynos-m1 as an option. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 31b46ea4c82..4bb42213c87 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -457,6 +457,7 @@ struct processor_costs { const int l2_cache_size; /* size of l2 cache, in kilobytes. */ const int simultaneous_prefetches; /* number of parallel prefetch operations. */ + const int sfdf_convert; /* cost of SF->DF conversion. */ }; const struct processor_costs *rs6000_cost; @@ -476,10 +477,11 @@ struct processor_costs size32_cost = { COSTS_N_INSNS (1), /* dmul */ COSTS_N_INSNS (1), /* sdiv */ COSTS_N_INSNS (1), /* ddiv */ - 32, - 0, - 0, - 0, + 32, /* cache line size */ + 0, /* l1 cache */ + 0, /* l2 cache */ + 0, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction size costs on 64bit processors. */ @@ -495,10 +497,11 @@ struct processor_costs size64_cost = { COSTS_N_INSNS (1), /* dmul */ COSTS_N_INSNS (1), /* sdiv */ COSTS_N_INSNS (1), /* ddiv */ - 128, - 0, - 0, - 0, + 128, /* cache line size */ + 0, /* l1 cache */ + 0, /* l2 cache */ + 0, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on RS64A processors. */ @@ -518,6 +521,7 @@ struct processor_costs rs64a_cost = { 128, /* l1 cache */ 2048, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on MPCCORE processors. */ @@ -537,6 +541,7 @@ struct processor_costs mpccore_cost = { 4, /* l1 cache */ 16, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC403 processors. */ @@ -556,6 +561,7 @@ struct processor_costs ppc403_cost = { 4, /* l1 cache */ 16, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC405 processors. */ @@ -575,6 +581,7 @@ struct processor_costs ppc405_cost = { 16, /* l1 cache */ 128, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC440 processors. */ @@ -594,6 +601,7 @@ struct processor_costs ppc440_cost = { 32, /* l1 cache */ 256, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC476 processors. */ @@ -613,6 +621,7 @@ struct processor_costs ppc476_cost = { 32, /* l1 cache */ 512, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC601 processors. */ @@ -632,6 +641,7 @@ struct processor_costs ppc601_cost = { 32, /* l1 cache */ 256, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC603 processors. */ @@ -651,6 +661,7 @@ struct processor_costs ppc603_cost = { 8, /* l1 cache */ 64, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC604 processors. */ @@ -670,6 +681,7 @@ struct processor_costs ppc604_cost = { 16, /* l1 cache */ 512, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC604e processors. */ @@ -689,6 +701,7 @@ struct processor_costs ppc604e_cost = { 32, /* l1 cache */ 1024, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC620 processors. */ @@ -708,6 +721,7 @@ struct processor_costs ppc620_cost = { 32, /* l1 cache */ 1024, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC630 processors. */ @@ -727,6 +741,7 @@ struct processor_costs ppc630_cost = { 64, /* l1 cache */ 1024, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on Cell processor. */ @@ -747,6 +762,7 @@ struct processor_costs ppccell_cost = { 32, /* l1 cache */ 512, /* l2 cache */ 6, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC750 and PPC7400 processors. */ @@ -766,6 +782,7 @@ struct processor_costs ppc750_cost = { 32, /* l1 cache */ 512, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC7450 processors. */ @@ -785,6 +802,7 @@ struct processor_costs ppc7450_cost = { 32, /* l1 cache */ 1024, /* l2 cache */ 1, /* streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on PPC8540 processors. */ @@ -804,6 +822,7 @@ struct processor_costs ppc8540_cost = { 32, /* l1 cache */ 256, /* l2 cache */ 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on E300C2 and E300C3 cores. */ @@ -823,6 +842,7 @@ struct processor_costs ppce300c2c3_cost = { 16, /* l1 cache */ 16, /* l2 cache */ 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on PPCE500MC processors. */ @@ -842,6 +862,7 @@ struct processor_costs ppce500mc_cost = { 32, /* l1 cache */ 128, /* l2 cache */ 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on PPCE500MC64 processors. */ @@ -861,6 +882,7 @@ struct processor_costs ppce500mc64_cost = { 32, /* l1 cache */ 128, /* l2 cache */ 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on PPCE5500 processors. */ @@ -880,6 +902,7 @@ struct processor_costs ppce5500_cost = { 32, /* l1 cache */ 128, /* l2 cache */ 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on PPCE6500 processors. */ @@ -899,6 +922,7 @@ struct processor_costs ppce6500_cost = { 32, /* l1 cache */ 128, /* l2 cache */ 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on AppliedMicro Titan processors. */ @@ -918,6 +942,7 @@ struct processor_costs titan_cost = { 32, /* l1 cache */ 512, /* l2 cache */ 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on POWER4 and POWER5 processors. */ @@ -937,6 +962,7 @@ struct processor_costs power4_cost = { 32, /* l1 cache */ 1024, /* l2 cache */ 8, /* prefetch streams /*/ + 0, /* SF->DF convert */ }; /* Instruction costs on POWER6 processors. */ @@ -956,6 +982,7 @@ struct processor_costs power6_cost = { 64, /* l1 cache */ 2048, /* l2 cache */ 16, /* prefetch streams */ + 0, /* SF->DF convert */ }; /* Instruction costs on POWER7 processors. */ @@ -975,6 +1002,7 @@ struct processor_costs power7_cost = { 32, /* l1 cache */ 256, /* l2 cache */ 12, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ }; /* Instruction costs on POWER8 processors. */ @@ -994,6 +1022,7 @@ struct processor_costs power8_cost = { 32, /* l1 cache */ 256, /* l2 cache */ 12, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ }; /* Instruction costs on POWER A2 processors. */ @@ -1013,6 +1042,7 @@ struct processor_costs ppca2_cost = { 16, /* l1 cache */ 2048, /* l2 cache */ 16, /* prefetch streams */ + 0, /* SF->DF convert */ }; @@ -30480,7 +30510,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, case FLOAT_EXTEND: if (mode == DFmode) - *total = 0; + *total = rs6000_cost->sfdf_convert; else *total = rs6000_cost->fp; return false; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 5025e6080a3..0178bf45b00 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5222,7 +5222,7 @@ fmr %0,%1 lfs%U1%X1 %0,%1 # - xxlor %x0,%x1,%x1 + xscpsgndp %x0,%x1,%x1 lxsspx %x0,%y1" "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])" [(const_int 0)] @@ -5230,7 +5230,7 @@ emit_note (NOTE_INSN_DELETED); DONE; } - [(set_attr "type" "fp,fp,fpload,fp,vecsimple,fpload")]) + [(set_attr "type" "fp,fp,fpload,fp,fp,fpload")]) (define_expand "truncdfsf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") @@ -5239,10 +5239,12 @@ "") (define_insn "*truncdfsf2_fpr" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))] + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy") + (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d,ws")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" - "frsp %0,%1" + "@ + frsp %0,%1 + xsrsp %x0,%x1" [(set_attr "type" "fp")]) ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in @@ -8058,7 +8060,7 @@ lwz%U1%X1 %0,%1 stw%U0%X0 %1,%0 fmr %0,%1 - xxlor %x0,%x1,%x1 + xscpsgndp %x0,%x1,%x1 xxlxor %x0,%x0,%x0 li %0,0 @@ -8070,7 +8072,7 @@ mt%0 %1 mf%1 %0 nop" - [(set_attr "type" "*,load,store,fp,vecsimple,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*") + [(set_attr "type" "*,load,store,fp,fp,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*") (set_attr "length" "4")]) (define_insn "*mov_softfloat" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6a0d0001274..2c01f72e0ca 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2015-04-06 Michael Meissner + + PR target/65614 + * gcc.target/powerpc/compress-float-ppc-pic.c: Run test on power5 + to get floating point compression. + * gcc.target/powerpc/compress-foat-ppc.c: Likewise. + 2015-04-06 Jakub Jelinek PR preprocessor/61977 diff --git a/gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c b/gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c index aa1da5245ff..1c8814bcb93 100644 --- a/gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c +++ b/gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c @@ -1,5 +1,7 @@ /* { dg-do compile { target powerpc_fprs } } */ -/* { dg-options "-O2 -fpic" } */ +/* { dg-options "-O2 -fpic -mcpu=power5" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */ + double foo (double x) { return x + 1.75; } diff --git a/gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c b/gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c index 312642e68fc..290ab2be121 100644 --- a/gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c +++ b/gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c @@ -1,5 +1,7 @@ /* { dg-do compile { target powerpc_fprs } } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -mcpu=power5" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */ + double foo (double x) { return x + 1.75; } -- 2.30.2