From 3ef9e1ec4bcd859fca2e8b80739ec5d98618bb23 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 15 Nov 2017 21:21:32 +0000 Subject: [PATCH] altivec.h (vec_xst_be): New #define. [gcc] 2017-11-15 Bill Schmidt * config/rs6000/altivec.h (vec_xst_be): New #define. * config/rs6000/altivec.md (altivec_vperm__direct): Rename and externalize from *altivec_vperm__internal. * config/rs6000/rs6000-builtin.def (XL_BE_V16QI): Remove macro instantiation. (XL_BE_V8HI): Likewise. (XL_BE_V4SI): Likewise. (XL_BE_V4SI): Likewise. (XL_BE_V2DI): Likewise. (XL_BE_V4SF): Likewise. (XL_BE_V2DF): Likewise. (XST_BE): Add BU_VSX_OVERLOAD_X macro instantiation. * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Correct all array entries with these keys: VSX_BUILTIN_VEC_XL, VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_VEC_XST. Add entries for key VSX_BUILTIN_VEC_XST_BE. * config/rs6000/rs6000.c (altivec_expand_xl_be_builtin): Remove. (altivec_expand_builtin): Remove handling for VSX_BUILTIN_XL_BE_* built-ins. (altivec_init_builtins): Replace conditional calls to def_builtin for __builtin_vsx_ld_elemrev_{v8hi,v16qi} and __builtin_vsx_st_elemrev_{v8hi,v16qi} based on TARGET_P9_VECTOR with unconditional calls. Remove calls to def_builtin for __builtin_vsx_le_be_. Add a call to def_builtin for __builtin_vec_xst_be. * config/rs6000/vsx.md (vsx_ld_elemrev_v8hi): Convert define_insn to define_expand, and add alternate RTL generation for P8. (*vsx_ld_elemrev_v8hi_internal): New define_insn based on vsx_ld_elemrev_v8hi. (vsx_ld_elemrev_v16qi): Convert define_insn to define_expand, and add alternate RTL generation for P8. (*vsx_ld_elemrev_v16qi_internal): New define_insn based on vsx_ld_elemrev_v16qi. (vsx_st_elemrev_v8hi): Convert define_insn to define_expand, and add alternate RTL generation for P8. (*vsx_st_elemrev_v8hi_internal): New define_insn based on vsx_st_elemrev_v8hi. (vsx_st_elemrev_v16qi): Convert define_insn to define_expand, and add alternate RTL generation for P8. (*vsx_st_elemrev_v16qi_internal): New define_insn based on vsx_st_elemrev_v16qi. [gcc/testsuite] 2017-11-15 Bill Schmidt * gcc.target/powerpc/swaps-p8-26.c: Modify expected code generation. From-SVN: r254787 --- gcc/ChangeLog | 44 ++++ gcc/config/rs6000/altivec.h | 1 + gcc/config/rs6000/altivec.md | 2 +- gcc/config/rs6000/rs6000-builtin.def | 9 +- gcc/config/rs6000/rs6000-c.c | 199 ++++++++++++----- gcc/config/rs6000/rs6000.c | 142 +----------- gcc/config/rs6000/vsx.md | 208 +++++++++++++++--- gcc/testsuite/ChangeLog | 5 + .../gcc.target/powerpc/swaps-p8-26.c | 6 +- 9 files changed, 380 insertions(+), 236 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 322124c2de3..8ad4d7b1d8c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,47 @@ +2017-11-15 Bill Schmidt + + * config/rs6000/altivec.h (vec_xst_be): New #define. + * config/rs6000/altivec.md (altivec_vperm__direct): Rename + and externalize from *altivec_vperm__internal. + * config/rs6000/rs6000-builtin.def (XL_BE_V16QI): Remove macro + instantiation. + (XL_BE_V8HI): Likewise. + (XL_BE_V4SI): Likewise. + (XL_BE_V4SI): Likewise. + (XL_BE_V2DI): Likewise. + (XL_BE_V4SF): Likewise. + (XL_BE_V2DF): Likewise. + (XST_BE): Add BU_VSX_OVERLOAD_X macro instantiation. + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Correct + all array entries with these keys: VSX_BUILTIN_VEC_XL, + VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_VEC_XST. Add entries for key + VSX_BUILTIN_VEC_XST_BE. + * config/rs6000/rs6000.c (altivec_expand_xl_be_builtin): Remove. + (altivec_expand_builtin): Remove handling for VSX_BUILTIN_XL_BE_* + built-ins. + (altivec_init_builtins): Replace conditional calls to def_builtin + for __builtin_vsx_ld_elemrev_{v8hi,v16qi} and + __builtin_vsx_st_elemrev_{v8hi,v16qi} based on TARGET_P9_VECTOR + with unconditional calls. Remove calls to def_builtin for + __builtin_vsx_le_be_. Add a call to def_builtin for + __builtin_vec_xst_be. + * config/rs6000/vsx.md (vsx_ld_elemrev_v8hi): Convert define_insn + to define_expand, and add alternate RTL generation for P8. + (*vsx_ld_elemrev_v8hi_internal): New define_insn based on + vsx_ld_elemrev_v8hi. + (vsx_ld_elemrev_v16qi): Convert define_insn to define_expand, and + add alternate RTL generation for P8. + (*vsx_ld_elemrev_v16qi_internal): New define_insn based on + vsx_ld_elemrev_v16qi. + (vsx_st_elemrev_v8hi): Convert define_insn + to define_expand, and add alternate RTL generation for P8. + (*vsx_st_elemrev_v8hi_internal): New define_insn based on + vsx_st_elemrev_v8hi. + (vsx_st_elemrev_v16qi): Convert define_insn to define_expand, and + add alternate RTL generation for P8. + (*vsx_st_elemrev_v16qi_internal): New define_insn based on + vsx_st_elemrev_v16qi. + 2017-11-15 H.J. Lu PR target/82990 diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 646712d31f5..068dfef2e00 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -357,6 +357,7 @@ #define vec_xl __builtin_vec_vsx_ld #define vec_xl_be __builtin_vec_xl_be #define vec_xst __builtin_vec_vsx_st +#define vec_xst_be __builtin_vec_xst_be /* Note, xxsldi and xxpermdi were added as __builtin_vsx_ functions instead of __builtin_vec_ */ diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 651f6c9b978..7122f99bffd 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2130,7 +2130,7 @@ }) ;; Slightly prefer vperm, since the target does not overlap the source -(define_insn "*altivec_vperm__internal" +(define_insn "altivec_vperm__direct" [(set (match_operand:VM 0 "register_operand" "=v,?wo") (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") (match_operand:VM 2 "register_operand" "v,0") diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 6842c122528..cfb6e55edc0 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1774,14 +1774,6 @@ BU_VSX_X (LXVW4X_V4SF, "lxvw4x_v4sf", MEM) BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM) BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM) BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM) - -BU_VSX_X (XL_BE_V16QI, "xl_be_v16qi", MEM) -BU_VSX_X (XL_BE_V8HI, "xl_be_v8hi", MEM) -BU_VSX_X (XL_BE_V4SI, "xl_be_v4si", MEM) -BU_VSX_X (XL_BE_V2DI, "xl_be_v2di", MEM) -BU_VSX_X (XL_BE_V4SF, "xl_be_v4sf", MEM) -BU_VSX_X (XL_BE_V2DF, "xl_be_v2df", MEM) - BU_VSX_X (STXSDX, "stxsdx", MEM) BU_VSX_X (STXVD2X_V1TI, "stxvd2x_v1ti", MEM) BU_VSX_X (STXVD2X_V2DF, "stxvd2x_v2df", MEM) @@ -1884,6 +1876,7 @@ BU_VSX_OVERLOAD_X (ST, "st") BU_VSX_OVERLOAD_X (XL, "xl") BU_VSX_OVERLOAD_X (XL_BE, "xl_be") BU_VSX_OVERLOAD_X (XST, "xst") +BU_VSX_OVERLOAD_X (XST_BE, "xst_be") /* 1 argument builtins pre ISA 2.04. */ BU_FP_MISC_1 (FCTID, "fctid", CONST, lrintdfdi2) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index ef21ba32e7a..645260a27a4 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -3111,69 +3111,94 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SUMS, ALTIVEC_BUILTIN_VSUMSWS, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, - { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, - RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI, - RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI, - RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI, - RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI, - RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI, - RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI, + + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI, - RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SF, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, - { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DF, - RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, @@ -3949,53 +3974,111 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DF, RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DF, RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V2DI, RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SF, RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SF, RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V4SI, RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V8HI, RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, - { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + { VSX_BUILTIN_VEC_XST_BE, VSX_BUILTIN_ST_ELEMREV_V16QI, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 731613bdf9b..80499fd198a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -14477,58 +14477,6 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk) return target; } -static rtx -altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool blk) -{ - rtx pat, addr; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - machine_mode tmode = insn_data[icode].operand[0].mode; - machine_mode mode0 = Pmode; - machine_mode mode1 = Pmode; - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - - if (icode == CODE_FOR_nothing) - /* Builtin not supported on this processor. */ - return 0; - - /* If we got invalid arguments bail out before generating bad rtl. */ - if (arg0 == error_mark_node || arg1 == error_mark_node) - return const0_rtx; - - if (target == 0 - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - op1 = copy_to_mode_reg (mode1, op1); - - if (op0 == const0_rtx) - addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); - else - { - op0 = copy_to_mode_reg (mode0, op0); - addr = gen_rtx_MEM (blk ? BLKmode : tmode, - gen_rtx_PLUS (Pmode, op1, op0)); - } - - pat = GEN_FCN (icode) (target, addr); - if (!pat) - return 0; - - emit_insn (pat); - /* Reverse element order of elements if in LE mode */ - if (!VECTOR_ELT_ORDER_BIG) - { - rtx sel = swap_selector_for_mode (tmode); - rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel), - UNSPEC_VPERM); - emit_insn (gen_rtx_SET (target, vperm)); - } - return target; -} - static rtx paired_expand_stv_builtin (enum insn_code icode, tree exp) { @@ -15925,50 +15873,6 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) /* Fall through. */ } - /* XL_BE We initialized them to always load in big endian order. */ - switch (fcode) - { - case VSX_BUILTIN_XL_BE_V2DI: - { - enum insn_code code = CODE_FOR_vsx_load_v2di; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V4SI: - { - enum insn_code code = CODE_FOR_vsx_load_v4si; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V8HI: - { - enum insn_code code = CODE_FOR_vsx_load_v8hi; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V16QI: - { - enum insn_code code = CODE_FOR_vsx_load_v16qi; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V2DF: - { - enum insn_code code = CODE_FOR_vsx_load_v2df; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - case VSX_BUILTIN_XL_BE_V4SF: - { - enum insn_code code = CODE_FOR_vsx_load_v4sf; - return altivec_expand_xl_be_builtin (code, exp, target, false); - } - break; - default: - break; - /* Fall through. */ - } - *expandedp = false; return NULL_RTX; } @@ -17629,6 +17533,10 @@ altivec_init_builtins (void) VSX_BUILTIN_LD_ELEMREV_V4SF); def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid, VSX_BUILTIN_LD_ELEMREV_V4SI); + def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V8HI); + def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid, + VSX_BUILTIN_LD_ELEMREV_V16QI); def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V2DF); def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid, @@ -17637,42 +17545,10 @@ altivec_init_builtins (void) VSX_BUILTIN_ST_ELEMREV_V4SF); def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V4SI); - - def_builtin ("__builtin_vsx_le_be_v8hi", v8hi_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V8HI); - def_builtin ("__builtin_vsx_le_be_v4si", v4si_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V4SI); - def_builtin ("__builtin_vsx_le_be_v2di", v2di_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V2DI); - def_builtin ("__builtin_vsx_le_be_v4sf", v4sf_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V4SF); - def_builtin ("__builtin_vsx_le_be_v2df", v2df_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V2DF); - def_builtin ("__builtin_vsx_le_be_v16qi", v16qi_ftype_long_pcvoid, - VSX_BUILTIN_XL_BE_V16QI); - - if (TARGET_P9_VECTOR) - { - def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, - VSX_BUILTIN_LD_ELEMREV_V8HI); - def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid, - VSX_BUILTIN_LD_ELEMREV_V16QI); - def_builtin ("__builtin_vsx_st_elemrev_v8hi", - void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI); - def_builtin ("__builtin_vsx_st_elemrev_v16qi", - void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI); - } - else - { - rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI]; - rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI]; - rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI]; - rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI] - = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI]; - } + def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V8HI); + def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid, + VSX_BUILTIN_ST_ELEMREV_V16QI); def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid, VSX_BUILTIN_VEC_LD); @@ -17684,6 +17560,8 @@ altivec_init_builtins (void) VSX_BUILTIN_VEC_XL_BE); def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid, VSX_BUILTIN_VEC_XST); + def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid, + VSX_BUILTIN_VEC_XST_BE); def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP); def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS); diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 6ea16be4f23..00d76563f37 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1122,7 +1122,7 @@ "lxvw4x %x0,%y1" [(set_attr "type" "vecload")]) -(define_insn "vsx_ld_elemrev_v8hi" +(define_expand "vsx_ld_elemrev_v8hi" [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") (vec_select:V8HI (match_operand:V8HI 1 "memory_operand" "Z") @@ -1130,22 +1130,94 @@ (const_int 5) (const_int 4) (const_int 3) (const_int 2) (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx tmp = gen_reg_rtx (V4SImode); + rtx subreg, subreg2, perm[16], pcv; + /* 2 is leftmost element in register */ + unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; + int i; + + subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); + emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); + subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, + subreg2, pcv)); + DONE; + } +}) + +(define_insn "*vsx_ld_elemrev_v8hi_internal" + [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") + (vec_select:V8HI + (match_operand:V8HI 1 "memory_operand" "Z") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "lxvh8x %x0,%y1" [(set_attr "type" "vecload")]) -(define_insn "vsx_ld_elemrev_v16qi" +(define_expand "vsx_ld_elemrev_v16qi" [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") (vec_select:V16QI - (match_operand:V16QI 1 "memory_operand" "Z") - (parallel [(const_int 15) (const_int 14) - (const_int 13) (const_int 12) - (const_int 11) (const_int 10) - (const_int 9) (const_int 8) - (const_int 7) (const_int 6) - (const_int 5) (const_int 4) - (const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V16QI 1 "memory_operand" "Z") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx tmp = gen_reg_rtx (V4SImode); + rtx subreg, subreg2, perm[16], pcv; + /* 3 is leftmost element in register */ + unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; + int i; + + subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); + emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); + subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, + subreg2, pcv)); + DONE; + } +}) + +(define_insn "*vsx_ld_elemrev_v16qi_internal" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (vec_select:V16QI + (match_operand:V16QI 1 "memory_operand" "Z") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "lxvb16x %x0,%y1" [(set_attr "type" "vecload")]) @@ -1153,8 +1225,8 @@ (define_insn "vsx_st_elemrev_v2df" [(set (match_operand:V2DF 0 "memory_operand" "=Z") (vec_select:V2DF - (match_operand:V2DF 1 "vsx_register_operand" "wa") - (parallel [(const_int 1) (const_int 0)])))] + (match_operand:V2DF 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -1162,8 +1234,8 @@ (define_insn "vsx_st_elemrev_v2di" [(set (match_operand:V2DI 0 "memory_operand" "=Z") (vec_select:V2DI - (match_operand:V2DI 1 "vsx_register_operand" "wa") - (parallel [(const_int 1) (const_int 0)])))] + (match_operand:V2DI 1 "vsx_register_operand" "wa") + (parallel [(const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -1171,9 +1243,9 @@ (define_insn "vsx_st_elemrev_v4sf" [(set (match_operand:V4SF 0 "memory_operand" "=Z") (vec_select:V4SF - (match_operand:V4SF 1 "vsx_register_operand" "wa") - (parallel [(const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V4SF 1 "vsx_register_operand" "wa") + (parallel [(const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" "stxvw4x %x1,%y0" [(set_attr "type" "vecstore")]) @@ -1188,30 +1260,98 @@ "stxvw4x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "vsx_st_elemrev_v8hi" +(define_expand "vsx_st_elemrev_v8hi" [(set (match_operand:V8HI 0 "memory_operand" "=Z") (vec_select:V8HI - (match_operand:V8HI 1 "vsx_register_operand" "wa") - (parallel [(const_int 7) (const_int 6) - (const_int 5) (const_int 4) - (const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx subreg, perm[16], pcv; + rtx tmp = gen_reg_rtx (V8HImode); + /* 2 is leftmost element in register */ + unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; + int i; + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], + operands[1], pcv)); + subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); + emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0])); + DONE; + } +}) + +(define_insn "*vsx_st_elemrev_v8hi_internal" + [(set (match_operand:V8HI 0 "memory_operand" "=Z") + (vec_select:V8HI + (match_operand:V8HI 1 "vsx_register_operand" "wa") + (parallel [(const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "stxvh8x %x1,%y0" [(set_attr "type" "vecstore")]) -(define_insn "vsx_st_elemrev_v16qi" +(define_expand "vsx_st_elemrev_v16qi" + [(set (match_operand:V16QI 0 "memory_operand" "=Z") + (vec_select:V16QI + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" +{ + if (!TARGET_P9_VECTOR) + { + rtx subreg, perm[16], pcv; + rtx tmp = gen_reg_rtx (V16QImode); + /* 3 is leftmost element in register */ + unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; + int i; + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (reorder[i]); + + pcv = force_reg (V16QImode, + gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); + emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], + operands[1], pcv)); + subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); + emit_insn (gen_vsx_st_elemrev_v4si (subreg, operands[0])); + DONE; + } +}) + +(define_insn "*vsx_st_elemrev_v16qi_internal" [(set (match_operand:V16QI 0 "memory_operand" "=Z") (vec_select:V16QI - (match_operand:V16QI 1 "vsx_register_operand" "wa") - (parallel [(const_int 15) (const_int 14) - (const_int 13) (const_int 12) - (const_int 11) (const_int 10) - (const_int 9) (const_int 8) - (const_int 7) (const_int 6) - (const_int 5) (const_int 4) - (const_int 3) (const_int 2) - (const_int 1) (const_int 0)])))] + (match_operand:V16QI 1 "vsx_register_operand" "wa") + (parallel [(const_int 15) (const_int 14) + (const_int 13) (const_int 12) + (const_int 11) (const_int 10) + (const_int 9) (const_int 8) + (const_int 7) (const_int 6) + (const_int 5) (const_int 4) + (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" "stxvb16x %x1,%y0" [(set_attr "type" "vecstore")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 676a2c4e844..eda6451457d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-11-15 Bill Schmidt + + * gcc.target/powerpc/swaps-p8-26.c: Modify expected code + generation. + 2017-11-15 Martin Sebor PR testsuite/82988 diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c index d01d86b94eb..28ce1cd39e4 100644 --- a/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-26.c @@ -1,11 +1,11 @@ /* { dg-do compile { target { powerpc64le-*-* } } } */ /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O3 " } */ -/* { dg-final { scan-assembler-times "lxvw4x" 2 } } */ -/* { dg-final { scan-assembler "stxvw4x" } } */ +/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */ +/* { dg-final { scan-assembler "stxvd2x" } } */ /* { dg-final { scan-assembler-not "xxpermdi" } } */ -/* Verify that swap optimization does not interfere with element-reversing +/* Verify that swap optimization does not interfere with unaligned loads and stores. */ /* Test case to resolve PR79044. */ -- 2.30.2