From fe3f3340416fc6f2a197f2c057de4094f5974d9c Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Mon, 23 May 2016 23:42:52 +0000 Subject: [PATCH] re PR target/71201 (PowerPC XXPERM instruction fails on ISA 3.0 system.) [gcc] 2016-05-23 Michael Meissner PR target/71201 * config/rs6000/altivec.md (altivec_vperm__internal): Drop ISA 3.0 xxperm fusion alternative. (altivec_vperm_v8hiv16qi): Likewise. (altivec_vperm__uns_internal): Likewise. (vperm_v8hiv4si): Likewise. (vperm_v16qiv8hi): Likewise. [gcc/testsuite] 2016-05-23 Michael Meissner Kelvin Nilsen * gcc.target/powerpc/p9-permute.c: Run test on big endian as well as little endian. [gcc] 2016-05-23 Michael Meissner Kelvin Nilsen * config/rs6000/rs6000.c (rs6000_expand_vector_set): Generate vpermr/xxpermr on ISA 3.0. (altivec_expand_vec_perm_le): Likewise. * config/rs6000/altivec.md (UNSPEC_VPERMR): New unspec. (altivec_vpermr__internal): Add VPERMR/XXPERMR support for ISA 3.0. Co-Authored-By: Kelvin Nilsen From-SVN: r236617 --- gcc/ChangeLog | 20 +++++ gcc/config/rs6000/altivec.md | 79 +++++++++++-------- gcc/config/rs6000/rs6000.c | 62 +++++++++------ gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.target/powerpc/p9-permute.c | 3 +- 5 files changed, 111 insertions(+), 59 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 92f25c746d5..4fd5899ddc5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2016-05-23 Michael Meissner + + PR target/71201 + * config/rs6000/altivec.md (altivec_vperm__internal): Drop + ISA 3.0 xxperm fusion alternative. + (altivec_vperm_v8hiv16qi): Likewise. + (altivec_vperm__uns_internal): Likewise. + (vperm_v8hiv4si): Likewise. + (vperm_v16qiv8hi): Likewise. + +2016-05-23 Michael Meissner + Kelvin Nilsen + + * config/rs6000/rs6000.c (rs6000_expand_vector_set): Generate + vpermr/xxpermr on ISA 3.0. + (altivec_expand_vec_perm_le): Likewise. + * config/rs6000/altivec.md (UNSPEC_VPERMR): New unspec. + (altivec_vpermr__internal): Add VPERMR/XXPERMR support for + ISA 3.0. + 2016-05-23 Uros Bizjak * config/i386/i386.h (IS_STACK_MODE): Enable for diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index e94aec39d73..14fed06f024 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -58,6 +58,7 @@ UNSPEC_VSUM2SWS UNSPEC_VSUMSWS UNSPEC_VPERM + UNSPEC_VPERMR UNSPEC_VPERM_UNS UNSPEC_VRFIN UNSPEC_VCFUX @@ -1952,32 +1953,30 @@ ;; Slightly prefer vperm, since the target does not overlap the source (define_insn "*altivec_vperm__internal" - [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo") - (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo") - (match_operand:VM 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,0") + (match_operand:VM 2 "register_operand" "v,wo") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x2,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_insn "altivec_vperm_v8hiv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo") - (match_operand:V8HI 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:V16QI 0 "register_operand" "=v,?wo") + (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0") + (match_operand:V8HI 2 "register_operand" "v,wo") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x2,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_expand "altivec_vperm__uns" [(set (match_operand:VM 0 "register_operand" "") @@ -1995,18 +1994,17 @@ }) (define_insn "*altivec_vperm__uns_internal" - [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo") - (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo") - (match_operand:VM 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,0") + (match_operand:VM 2 "register_operand" "v,wo") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x2,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_expand "vec_permv16qi" [(set (match_operand:V16QI 0 "register_operand" "") @@ -2035,6 +2033,19 @@ FAIL; }) +(define_insn "*altivec_vpermr__internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,0") + (match_operand:VM 2 "register_operand" "v,wo") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERMR))] + "TARGET_P9_VECTOR" + "@ + vpermr %0,%1,%2,%3 + xxpermr %x0,%x2,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + (define_insn "altivec_vrfip" ; ceil [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] @@ -2844,32 +2855,30 @@ "") (define_insn "vperm_v8hiv4si" - [(set (match_operand:V4SI 0 "register_operand" "=v,?wo,?&wo") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0,wo") - (match_operand:V4SI 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:V4SI 0 "register_operand" "=v,?wo") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0") + (match_operand:V4SI 2 "register_operand" "v,wo") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERMSI))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x2,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_insn "vperm_v16qiv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=v,?wo,?&wo") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0,wo") - (match_operand:V8HI 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:V8HI 0 "register_operand" "=v,?wo") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0") + (match_operand:V8HI 2 "register_operand" "v,wo") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERMHI))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x2,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_expand "vec_unpacku_hi_v16qi" diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 0488db563e6..ff281302483 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6863,21 +6863,29 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt) gen_rtvec (3, target, reg, force_reg (V16QImode, x)), UNSPEC_VPERM); - else + else { - /* Invert selector. We prefer to generate VNAND on P8 so - that future fusion opportunities can kick in, but must - generate VNOR elsewhere. */ - rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); - rtx iorx = (TARGET_P8_VECTOR - ? gen_rtx_IOR (V16QImode, notx, notx) - : gen_rtx_AND (V16QImode, notx, notx)); - rtx tmp = gen_reg_rtx (V16QImode); - emit_insn (gen_rtx_SET (tmp, iorx)); + if (TARGET_P9_VECTOR) + x = gen_rtx_UNSPEC (mode, + gen_rtvec (3, target, reg, + force_reg (V16QImode, x)), + UNSPEC_VPERMR); + else + { + /* Invert selector. We prefer to generate VNAND on P8 so + that future fusion opportunities can kick in, but must + generate VNOR elsewhere. */ + rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); + rtx iorx = (TARGET_P8_VECTOR + ? gen_rtx_IOR (V16QImode, notx, notx) + : gen_rtx_AND (V16QImode, notx, notx)); + rtx tmp = gen_reg_rtx (V16QImode); + emit_insn (gen_rtx_SET (tmp, iorx)); - /* Permute with operands reversed and adjusted selector. */ - x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), - UNSPEC_VPERM); + /* Permute with operands reversed and adjusted selector. */ + x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), + UNSPEC_VPERM); + } } emit_insn (gen_rtx_SET (target, x)); @@ -34365,17 +34373,25 @@ altivec_expand_vec_perm_le (rtx operands[4]) if (!REG_P (target)) tmp = gen_reg_rtx (mode); - /* Invert the selector with a VNAND if available, else a VNOR. - The VNAND is preferred for future fusion opportunities. */ - notx = gen_rtx_NOT (V16QImode, sel); - iorx = (TARGET_P8_VECTOR - ? gen_rtx_IOR (V16QImode, notx, notx) - : gen_rtx_AND (V16QImode, notx, notx)); - emit_insn (gen_rtx_SET (norreg, iorx)); + if (TARGET_P9_VECTOR) + { + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), + UNSPEC_VPERMR); + } + else + { + /* Invert the selector with a VNAND if available, else a VNOR. + The VNAND is preferred for future fusion opportunities. */ + notx = gen_rtx_NOT (V16QImode, sel); + iorx = (TARGET_P8_VECTOR + ? gen_rtx_IOR (V16QImode, notx, notx) + : gen_rtx_AND (V16QImode, notx, notx)); + emit_insn (gen_rtx_SET (norreg, iorx)); - /* Permute with operands reversed and adjusted selector. */ - unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), - UNSPEC_VPERM); + /* Permute with operands reversed and adjusted selector. */ + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), + UNSPEC_VPERM); + } /* Copy into target, possibly by way of a register. */ if (!REG_P (target)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ef528f20344..ed80db55d44 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-05-23 Michael Meissner + Kelvin Nilsen + + * gcc.target/powerpc/p9-permute.c: Run test on big endian as well + as little endian. + 2016-05-23 Paolo Carlini PR c++/70972 diff --git a/gcc/testsuite/gcc.target/powerpc/p9-permute.c b/gcc/testsuite/gcc.target/powerpc/p9-permute.c index f090f180be8..c29c85ba1e8 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-permute.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-permute.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-do compile { target { powerpc64*-*-* } } } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ /* { dg-options "-mcpu=power9 -O2" } */ /* { dg-require-effective-target powerpc_p9vector_ok } */ @@ -17,5 +17,6 @@ permute (vector long long *p, vector long long *q, vector unsigned char mask) return vec_perm (a, b, mask); } +/* expect xxpermr on little-endian, xxperm on big-endian */ /* { dg-final { scan-assembler "xxperm" } } */ /* { dg-final { scan-assembler-not "vperm" } } */ -- 2.30.2