From 16122c22dc009696131c250603745b34340b803a Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Tue, 20 Jun 2017 06:26:27 +0000 Subject: [PATCH] re PR target/79799 (Improve vec_insert of float on Power9) [gcc] 2017-06-20 Michael Meissner PR target/79799 * config/rs6000/rs6000.c (rs6000_expand_vector_init): Add support for doing vector set of SFmode on ISA 3.0. * config/rs6000/vsx.md (vsx_set_v4sf_p9): Likewise. (vsx_set_v4sf_p9_zero): Special case setting 0.0f to a V4SF element. (vsx_insert_extract_v4sf_p9): Add an optimization for inserting a SFmode value into a V4SF variable that was extracted from another V4SF variable without converting the element to double precision and back to single precision vector format. (vsx_insert_extract_v4sf_p9_2): Likewise. [gcc/testsuite] 2017-06-20 Michael Meissner PR target/79799 * gcc.target/powerpc/pr79799-1.c: New test. * gcc.target/powerpc/pr79799-2.c: Likewise. * gcc.target/powerpc/pr79799-3.c: Likewise. * gcc.target/powerpc/pr79799-4.c: Likewise. * gcc.target/powerpc/pr79799-5.c: Likewise. From-SVN: r249395 --- gcc/ChangeLog | 14 ++ gcc/config/rs6000/rs6000.c | 2 + gcc/config/rs6000/vsx.md | 128 +++++++++++++++++++ gcc/testsuite/ChangeLog | 9 ++ gcc/testsuite/gcc.target/powerpc/pr79799-1.c | 43 +++++++ gcc/testsuite/gcc.target/powerpc/pr79799-2.c | 31 +++++ gcc/testsuite/gcc.target/powerpc/pr79799-3.c | 24 ++++ gcc/testsuite/gcc.target/powerpc/pr79799-4.c | 105 +++++++++++++++ gcc/testsuite/gcc.target/powerpc/pr79799-5.c | 25 ++++ 9 files changed, 381 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79799-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79799-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79799-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79799-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79799-5.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 268abf33656..5af9611b837 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2017-06-20 Michael Meissner + + PR target/79799 + * config/rs6000/rs6000.c (rs6000_expand_vector_init): Add support + for doing vector set of SFmode on ISA 3.0. + * config/rs6000/vsx.md (vsx_set_v4sf_p9): Likewise. + (vsx_set_v4sf_p9_zero): Special case setting 0.0f to a V4SF + element. + (vsx_insert_extract_v4sf_p9): Add an optimization for inserting a + SFmode value into a V4SF variable that was extracted from another + V4SF variable without converting the element to double precision + and back to single precision vector format. + (vsx_insert_extract_v4sf_p9_2): Likewise. + 2017-06-19 Jakub Jelinek * tree-ssa-structalias.c (get_constraint_for_ptr_offset): Multiply diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 14b17788ac5..eee9d0c00f3 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -7451,6 +7451,8 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt) insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx); else if (mode == V16QImode) insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx); + else if (mode == V4SFmode) + insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx); } if (insn) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 7aa6d32e821..4d73f0abe0a 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3173,6 +3173,134 @@ } [(set_attr "type" "vecperm")]) +(define_insn_and_split "vsx_set_v4sf_p9" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (match_operand:SF 2 "gpc_reg_operand" "ww") + (match_operand:QI 3 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET)) + (clobber (match_scratch:SI 4 "=&wJwK"))] + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" + "#" + "&& reload_completed" + [(set (match_dup 5) + (unspec:V4SF [(match_dup 2)] + UNSPEC_VSX_CVDPSPN)) + (parallel [(set (match_dup 4) + (vec_select:SI (match_dup 6) + (parallel [(match_dup 7)]))) + (clobber (scratch:SI))]) + (set (match_dup 8) + (unspec:V4SI [(match_dup 8) + (match_dup 4) + (match_dup 3)] + UNSPEC_VSX_SET))] +{ + unsigned int tmp_regno = reg_or_subregno (operands[4]); + + operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); + operands[6] = gen_rtx_REG (V4SImode, tmp_regno); + operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2); + operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); +} + [(set_attr "type" "vecperm") + (set_attr "length" "12")]) + +;; Special case setting 0.0f to a V4SF element +(define_insn_and_split "*vsx_set_v4sf_p9_zero" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (match_operand:SF 2 "zero_fp_constant" "j") + (match_operand:QI 3 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET)) + (clobber (match_scratch:SI 4 "=&wJwK"))] + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" + "#" + "&& reload_completed" + [(set (match_dup 4) + (const_int 0)) + (set (match_dup 5) + (unspec:V4SI [(match_dup 5) + (match_dup 4) + (match_dup 3)] + UNSPEC_VSX_SET))] +{ + operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); +} + [(set_attr "type" "vecperm") + (set_attr "length" "8")]) + +;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element +;; that is in the default scalar position (1 for big endian, 2 for little +;; endian). We just need to do an xxinsertw since the element is in the +;; correct location. + +(define_insn "*vsx_insert_extract_v4sf_p9" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") + (parallel + [(match_operand:QI 3 "const_0_to_3_operand" "n")])) + (match_operand:QI 4 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET))] + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64 + && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))" +{ + int ele = INTVAL (operands[4]); + + if (!VECTOR_ELT_ORDER_BIG) + ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele; + + operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); + return "xxinsertw %x0,%x2,%4"; +} + [(set_attr "type" "vecperm")]) + +;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element +;; that is in the default scalar position (1 for big endian, 2 for little +;; endian). Convert the insert/extract to int and avoid doing the conversion. + +(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") + (parallel + [(match_operand:QI 3 "const_0_to_3_operand" "n")])) + (match_operand:QI 4 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET)) + (clobber (match_scratch:SI 5 "=&wJwK"))] + "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64 + && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))" + "#" + "&& 1" + [(parallel [(set (match_dup 5) + (vec_select:SI (match_dup 6) + (parallel [(match_dup 3)]))) + (clobber (scratch:SI))]) + (set (match_dup 7) + (unspec:V4SI [(match_dup 8) + (match_dup 5) + (match_dup 4)] + UNSPEC_VSX_SET))] +{ + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (SImode); + + operands[6] = gen_lowpart (V4SImode, operands[2]); + operands[7] = gen_lowpart (V4SImode, operands[0]); + operands[8] = gen_lowpart (V4SImode, operands[1]); +} + [(set_attr "type" "vecperm")]) + ;; Expanders for builtins (define_expand "vsx_mergel_" [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3c17355f3f5..0809b2cd70a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2017-06-20 Michael Meissner + + PR target/79799 + * gcc.target/powerpc/pr79799-1.c: New test. + * gcc.target/powerpc/pr79799-2.c: Likewise. + * gcc.target/powerpc/pr79799-3.c: Likewise. + * gcc.target/powerpc/pr79799-4.c: Likewise. + * gcc.target/powerpc/pr79799-5.c: Likewise. + 2017-06-19 Nathan Sidwell PR c++/81124 diff --git a/gcc/testsuite/gcc.target/powerpc/pr79799-1.c b/gcc/testsuite/gcc.target/powerpc/pr79799-1.c new file mode 100644 index 00000000000..87a9e49b889 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr79799-1.c @@ -0,0 +1,43 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include + +/* GCC 7.1 did not have a specialized method for inserting 32-bit floating + point on ISA 3.0 (power9) systems. */ + +vector float +insert_arg_0 (vector float vf, float f) +{ + return vec_insert (f, vf, 0); +} + +vector float +insert_arg_1 (vector float vf, float f) +{ + return vec_insert (f, vf, 1); +} + +vector float +insert_arg_2 (vector float vf, float f) +{ + return vec_insert (f, vf, 2); +} + +vector float +insert_arg_3 (vector float vf, float f) +{ + return vec_insert (f, vf, 3); +} + +/* { dg-final { scan-assembler {\mxscvdpspn\M} } } */ +/* { dg-final { scan-assembler {\mxxinsertw\M} } } */ +/* { dg-final { scan-assembler-not {\mlvewx\M} } } */ +/* { dg-final { scan-assembler-not {\mlvx\M} } } */ +/* { dg-final { scan-assembler-not {\mvperm\M} } } */ +/* { dg-final { scan-assembler-not {\mvpermr\M} } } */ +/* { dg-final { scan-assembler-not {\mstfs\M} } } */ +/* { dg-final { scan-assembler-not {\mstxssp\M} } } */ +/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr79799-2.c b/gcc/testsuite/gcc.target/powerpc/pr79799-2.c new file mode 100644 index 00000000000..793e3b9b66c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr79799-2.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include + +/* Optimize x = vec_insert (vec_extract (v2, N), v1, M) for SFmode if N is the default + scalar position. */ + +#if __ORDER_LITTLE_ENDIAN__ +#define ELE 2 +#else +#define ELE 1 +#endif + +vector float +foo (vector float v1, vector float v2) +{ + return vec_insert (vec_extract (v2, ELE), v1, 0); +} + +/* { dg-final { scan-assembler {\mxxinsertw\M} } } */ +/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */ +/* { dg-final { scan-assembler-not {\mlvewx\M} } } */ +/* { dg-final { scan-assembler-not {\mlvx\M} } } */ +/* { dg-final { scan-assembler-not {\mvperm\M} } } */ +/* { dg-final { scan-assembler-not {\mvpermr\M} } } */ +/* { dg-final { scan-assembler-not {\mstfs\M} } } */ +/* { dg-final { scan-assembler-not {\mstxssp\M} } } */ +/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr79799-3.c b/gcc/testsuite/gcc.target/powerpc/pr79799-3.c new file mode 100644 index 00000000000..72550421859 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr79799-3.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include + +/* Optimize x = vec_insert (vec_extract (v2, N), v1, M) for SFmode. */ + +vector float +foo (vector float v1, vector float v2) +{ + return vec_insert (vec_extract (v2, 4), v1, 0); +} + +/* { dg-final { scan-assembler {\mxxinsertw\M} } } */ +/* { dg-final { scan-assembler {\mxxextractuw\M} } } */ +/* { dg-final { scan-assembler-not {\mlvewx\M} } } */ +/* { dg-final { scan-assembler-not {\mlvx\M} } } */ +/* { dg-final { scan-assembler-not {\mvperm\M} } } */ +/* { dg-final { scan-assembler-not {\mvpermr\M} } } */ +/* { dg-final { scan-assembler-not {\mstfs\M} } } */ +/* { dg-final { scan-assembler-not {\mstxssp\M} } } */ +/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr79799-4.c b/gcc/testsuite/gcc.target/powerpc/pr79799-4.c new file mode 100644 index 00000000000..056a005be25 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr79799-4.c @@ -0,0 +1,105 @@ +/* { dg-do run { target { powerpc*-*-linux* } } } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target p9vector_hw } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include +#include + +__attribute__ ((__noinline__)) +vector float +insert_0 (vector float v, float f) +{ + return vec_insert (f, v, 0); +} + +__attribute__ ((__noinline__)) +vector float +insert_1 (vector float v, float f) +{ + return vec_insert (f, v, 1); +} + +__attribute__ ((__noinline__)) +vector float +insert_2 (vector float v, float f) +{ + return vec_insert (f, v, 2); +} + +__attribute__ ((__noinline__)) +vector float +insert_3 (vector float v, float f) +{ + return vec_insert (f, v, 3); +} + +__attribute__ ((__noinline__)) +void +test_insert (void) +{ + vector float v1 = { 1.0f, 2.0f, 3.0f, 4.0f }; + vector float v2 = { 5.0f, 6.0f, 7.0f, 8.0f }; + + v1 = insert_0 (v1, 5.0f); + v1 = insert_1 (v1, 6.0f); + v1 = insert_2 (v1, 7.0f); + v1 = insert_3 (v1, 8.0f); + + if (vec_any_ne (v1, v2)) + abort (); +} + +__attribute__ ((__noinline__)) +vector float +insert_extract_0_3 (vector float v1, vector float v2) +{ + return vec_insert (vec_extract (v2, 3), v1, 0); +} + +__attribute__ ((__noinline__)) +vector float +insert_extract_1_2 (vector float v1, vector float v2) +{ + return vec_insert (vec_extract (v2, 2), v1, 1); +} + +__attribute__ ((__noinline__)) +vector float +insert_extract_2_1 (vector float v1, vector float v2) +{ + return vec_insert (vec_extract (v2, 1), v1, 2); +} + +__attribute__ ((__noinline__)) +vector float +insert_extract_3_0 (vector float v1, vector float v2) +{ + return vec_insert (vec_extract (v2, 0), v1, 3); +} + +__attribute__ ((__noinline__)) +void +test_insert_extract (void) +{ + vector float v1 = { 1.0f, 2.0f, 3.0f, 4.0f }; + vector float v2 = { 5.0f, 6.0f, 7.0f, 8.0f }; + vector float v3 = { 8.0f, 7.0f, 6.0f, 5.0f }; + + v1 = insert_extract_0_3 (v1, v2); + v1 = insert_extract_1_2 (v1, v2); + v1 = insert_extract_2_1 (v1, v2); + v1 = insert_extract_3_0 (v1, v2); + + if (vec_any_ne (v1, v3)) + abort (); +} + +int +main (void) +{ + test_insert (); + test_insert_extract (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr79799-5.c b/gcc/testsuite/gcc.target/powerpc/pr79799-5.c new file mode 100644 index 00000000000..fcd92ffd60a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr79799-5.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include + +/* Insure setting 0.0f to a V4SFmode element does not do a FP conversion. */ + +vector float +insert_arg_0 (vector float vf) +{ + return vec_insert (0.0f, vf, 0); +} + +/* { dg-final { scan-assembler {\mxxinsertw\M} } } */ +/* { dg-final { scan-assembler-not {\mlvewx\M} } } */ +/* { dg-final { scan-assembler-not {\mlvx\M} } } */ +/* { dg-final { scan-assembler-not {\mvperm\M} } } */ +/* { dg-final { scan-assembler-not {\mvpermr\M} } } */ +/* { dg-final { scan-assembler-not {\mstfs\M} } } */ +/* { dg-final { scan-assembler-not {\mstxssp\M} } } */ +/* { dg-final { scan-assembler-not {\mstxsspx\M} } } */ +/* { dg-final { scan-assembler-not {\mxscvdpspn\M} } } */ +/* { dg-final { scan-assembler-not {\mxxextractuw\M} } } */ -- 2.30.2