From: Andreas Krebbel Date: Tue, 2 Apr 2019 11:02:22 +0000 (+0000) Subject: S/390: arch13: vec_reve element order reversal builtins X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3278804e59a76e7f140a522286d7ac88c2cdb916;p=gcc.git S/390: arch13: vec_reve element order reversal builtins gcc/ChangeLog: 2019-04-02 Andreas Krebbel * config/s390/s390-builtin-types.def: Add new builtin function type. * config/s390/s390-builtins.def: Add overloaded builtin s390_vec_reve and low-level builtins for s390_vler and s390_vster. * config/s390/s390.md (UNSPEC_VEC_ELTSWAP): New constant definition. * config/s390/vecintrin.h (vec_reve): New builtin name definition. * config/s390/vx-builtins.md (V_HW_HSD): New mode iterator. ("eltswap"): New expander. ("*eltswapv16qi", "*eltswap", "*eltswap_emu"): New insn definitions. gcc/testsuite/ChangeLog: 2019-04-02 Andreas Krebbel * gcc.target/s390/zvector/vec-reve-load-byte-z14.c: New test. * gcc.target/s390/zvector/vec-reve-load-byte.c: New test. * gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: New test. * gcc.target/s390/zvector/vec-reve-load-halfword.c: New test. * gcc.target/s390/zvector/vec-reve-store-byte-z14.c: New test. * gcc.target/s390/zvector/vec-reve-store-byte.c: New test. From-SVN: r270085 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9017bb21481..9cb3b9843d3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2019-04-02 Andreas Krebbel + + * config/s390/s390-builtin-types.def: Add new builtin function type. + * config/s390/s390-builtins.def: Add overloaded builtin + s390_vec_reve and low-level builtins for s390_vler and s390_vster. + * config/s390/s390.md (UNSPEC_VEC_ELTSWAP): New constant definition. + * config/s390/vecintrin.h (vec_reve): New builtin name definition. + * config/s390/vx-builtins.md (V_HW_HSD): New mode iterator. + ("eltswap"): New expander. + ("*eltswapv16qi", "*eltswap", "*eltswap_emu"): New + insn definitions. + 2019-04-02 Andreas Krebbel * config/s390/s390-builtin-types.def: Add new builtin function types. diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def index cfd8f5783e5..ff53ec541f8 100644 --- a/gcc/config/s390/s390-builtin-types.def +++ b/gcc/config/s390/s390-builtin-types.def @@ -382,6 +382,7 @@ DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI, DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI) DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR) +DEF_OV_TYPE (BT_OV_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI) DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT) diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index a2276858fe8..e4cfa80adaa 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -2892,3 +2892,34 @@ B_DEF (s390_vstbrg, bswapv2di, 0, B_DEF (s390_vstbrq, bswapv1ti, 0, B_VX, 0, BT_FN_V1TI_V1TI) B_DEF (s390_vstbrf_flt, bswapv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF) B_DEF (s390_vstbrg_dbl, bswapv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF) + +/* Returns a vector with the elements of the input vector OP0 in reversed order. */ +OB_DEF (s390_vec_reve, s390_vec_reve_b8, s390_vec_reve_dbl, B_VX, BT_FN_OV4SI_OV4SI) +OB_DEF_VAR (s390_vec_reve_b8, s390_vlerb, 0, 0, BT_OV_BV16QI_BV16QI) +OB_DEF_VAR (s390_vec_reve_s8, s390_vlerb, 0, 0, BT_OV_V16QI_V16QI) +OB_DEF_VAR (s390_vec_reve_u8, s390_vlerb, 0, 0, BT_OV_UV16QI_UV16QI) +OB_DEF_VAR (s390_vec_reve_b16, s390_vlerh, 0, 0, BT_OV_BV8HI_BV8HI) +OB_DEF_VAR (s390_vec_reve_s16, s390_vlerh, 0, 0, BT_OV_V8HI_V8HI) +OB_DEF_VAR (s390_vec_reve_u16, s390_vlerh, 0, 0, BT_OV_UV8HI_UV8HI) +OB_DEF_VAR (s390_vec_reve_b32, s390_vlerf, 0, 0, BT_OV_BV4SI_BV4SI) +OB_DEF_VAR (s390_vec_reve_s32, s390_vlerf, 0, 0, BT_OV_V4SI_V4SI) +OB_DEF_VAR (s390_vec_reve_u32, s390_vlerf, 0, 0, BT_OV_UV4SI_UV4SI) +OB_DEF_VAR (s390_vec_reve_b64, s390_vlerg, 0, 0, BT_OV_BV2DI_BV2DI) +OB_DEF_VAR (s390_vec_reve_s64, s390_vlerg, 0, 0, BT_OV_V2DI_V2DI) +OB_DEF_VAR (s390_vec_reve_u64, s390_vlerg, 0, 0, BT_OV_UV2DI_UV2DI) +OB_DEF_VAR (s390_vec_reve_flt, s390_vlerf_flt, 0, B_VXE, BT_OV_V4SF_V4SF) +OB_DEF_VAR (s390_vec_reve_dbl, s390_vlerg_dbl, 0, 0, BT_OV_V2DF_V2DF) + +B_DEF (s390_vlerb, eltswapv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI) +B_DEF (s390_vlerh, eltswapv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI) +B_DEF (s390_vlerf, eltswapv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI) +B_DEF (s390_vlerg, eltswapv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI) +B_DEF (s390_vlerf_flt, eltswapv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF) +B_DEF (s390_vlerg_dbl, eltswapv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF) + +B_DEF (s390_vsterb, eltswapv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI) +B_DEF (s390_vsterh, eltswapv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI) +B_DEF (s390_vsterf, eltswapv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI) +B_DEF (s390_vsterg, eltswapv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI) +B_DEF (s390_vsterf_flt, eltswapv4sf, 0, B_VXE, 0, BT_FN_V4SF_V4SF) +B_DEF (s390_vsterg_dbl, eltswapv2df, 0, B_VX, 0, BT_FN_V2DF_V2DF) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 17aafe54afb..bdc7385cfe4 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -238,6 +238,8 @@ UNSPEC_VEC_VFMIN UNSPEC_VEC_VFMAX + + UNSPEC_VEC_ELTSWAP ]) ;; diff --git a/gcc/config/s390/vecintrin.h b/gcc/config/s390/vecintrin.h index 91f0a576236..1220bf6c41e 100644 --- a/gcc/config/s390/vecintrin.h +++ b/gcc/config/s390/vecintrin.h @@ -312,4 +312,5 @@ __lcbb(const void *ptr, int bndry) #define vec_sqrt __builtin_s390_vec_sqrt #define vec_fp_test_data_class __builtin_s390_vec_fp_test_data_class #define vec_revb __builtin_s390_vec_revb +#define vec_reve __builtin_s390_vec_reve #endif /* _VECINTRIN_H */ diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index b2bc8e2d725..55b49f456df 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -22,7 +22,7 @@ (define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF (V4SF "TARGET_VXE")]) (define_mode_iterator VI_HW_SD [V4SI V2DI]) -(define_mode_iterator V_HW_HSD [V8HI V4SI V2DI V2DF]) +(define_mode_iterator V_HW_HSD [V8HI V4SI (V4SF "TARGET_VXE") V2DI V2DF]) (define_mode_iterator V_HW_4 [V4SI V4SF]) ; Full size vector modes with more than one element which are directly supported in vector registers by the hardware. (define_mode_iterator VEC_HW [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")]) @@ -2058,3 +2058,84 @@ "TARGET_VXE" "fmaxb\t%v0,%v1,%v2,%b3" [(set_attr "op_type" "VRR")]) + +; The element reversal builtins introduced with arch13 have been made +; available also for older CPUs down to z13. +(define_expand "eltswap" + [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "") + (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "")] + UNSPEC_VEC_ELTSWAP))] + "TARGET_VX") + +; The byte element reversal is implemented as 128 bit byte swap. +; Alternatively this could be emitted as bswap:V1TI but the required +; subregs appear to confuse combine. +(define_insn "*eltswapv16qi" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,v,R") + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "v,R,v")] + UNSPEC_VEC_ELTSWAP))] + "TARGET_VXE2" + "@ + # + vlbrq\t%v0,%v1 + vstbrq\t%v1,%v0" + [(set_attr "op_type" "*,VRX,VRX")]) + +; vlerh, vlerf, vlerg, vsterh, vsterf, vsterg +(define_insn "*eltswap" + [(set (match_operand:V_HW_HSD 0 "nonimmediate_operand" "=v,v,R") + (unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "nonimmediate_operand" "v,R,v")] + UNSPEC_VEC_ELTSWAP))] + "TARGET_VXE2" + "@ + # + vler\t%v0,%v1 + vster\t%v1,%v0" + [(set_attr "op_type" "*,VRX,VRX")]) + +; arch13 has instructions for doing element reversal from mem to reg +; or the other way around. For reg to reg or on pre arch13 machines +; we have to emulate it with vector permute. +(define_insn_and_split "*eltswap_emu" + [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "=vR") + (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "vR")] + UNSPEC_VEC_ELTSWAP))] + "TARGET_VX && can_create_pseudo_p ()" + "#" + "&& ((!memory_operand (operands[0], mode) + && !memory_operand (operands[1], mode)) + || !TARGET_VXE2)" + [(set (match_dup 3) + (unspec:V16QI [(match_dup 4) + (match_dup 4) + (match_dup 2)] + UNSPEC_VEC_PERM)) + (set (match_dup 0) (subreg:VEC_HW (match_dup 3) 0))] +{ + static char p[4][16] = + { { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }, /* Q */ + { 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 }, /* H */ + { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 }, /* S */ + { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 } }; /* D */ + char *perm; + rtx perm_rtx[16], constv; + + switch (GET_MODE_SIZE (GET_MODE_INNER (mode))) + { + case 1: perm = p[0]; break; + case 2: perm = p[1]; break; + case 4: perm = p[2]; break; + case 8: perm = p[3]; break; + default: gcc_unreachable (); + } + + for (int i = 0; i < 16; i++) + perm_rtx[i] = GEN_INT (perm[i]); + + operands[1] = force_reg (mode, operands[1]); + operands[2] = gen_reg_rtx (V16QImode); + operands[3] = gen_reg_rtx (V16QImode); + operands[4] = simplify_gen_subreg (V16QImode, operands[1], mode, 0); + constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx))); + emit_move_insn (operands[2], constv); +}) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4b4d1f1c1e5..22eecb25aca 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2019-04-02 Andreas Krebbel + + * gcc.target/s390/zvector/vec-reve-load-byte-z14.c: New test. + * gcc.target/s390/zvector/vec-reve-load-byte.c: New test. + * gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: New test. + * gcc.target/s390/zvector/vec-reve-load-halfword.c: New test. + * gcc.target/s390/zvector/vec-reve-store-byte-z14.c: New test. + * gcc.target/s390/zvector/vec-reve-store-byte.c: New test. + 2019-04-02 Andreas Krebbel * gcc.target/s390/zvector/vec-revb-load-double-z14.c: New test. diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte-z14.c new file mode 100644 index 00000000000..e5d2c30bac3 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte-z14.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */ + +#include + +vector signed char +test (vector signed char x) +{ + return vec_reve (x); +} + +vector signed char +test2 (vector signed char *x) +{ + return vec_reve (*x); +} + +vector signed char +test3 (signed char *x) +{ + return vec_reve (vec_xl (0, x)); +} + +/* { dg-final { scan-assembler-times "vperm\t" 3 } } */ diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte.c new file mode 100644 index 00000000000..813b2518df0 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */ + +/* The vector byte element reversal is actually implemented with a 128 + bit bswap. */ + +#include + +vector signed char +test (vector signed char x) +{ + return vec_reve (x); +} + +/* { dg-final { scan-assembler-times "vperm\t" 1 } } */ + + +vector signed char +test2 (vector signed char *x) +{ + return vec_reve (*x); +} + +vector signed char +test3 (signed char *x) +{ + return vec_reve (vec_xl (0, x)); +} + +/* { dg-final { scan-assembler-times "vlbrq\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c new file mode 100644 index 00000000000..4938ac20613 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */ + +#include + +vector signed short +foo (vector signed short x) +{ + return vec_reve (x); +} + +vector signed short +bar (vector signed short *x) +{ + return vec_reve (*x); +} + +vector signed short +baz (signed short *x) +{ + return vec_reve (vec_xl (0, x)); +} + +/* { dg-final { scan-assembler-times "vperm\t" 3 } } */ diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c new file mode 100644 index 00000000000..3c9229922ec --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */ + +#include + +vector signed short +foo (vector signed short x) +{ + return vec_reve (x); +} + +/* { dg-final { scan-assembler-times "vperm\t" 1 } } */ + + +vector signed short +bar (vector signed short *x) +{ + return vec_reve (*x); +} + +vector signed short +baz (signed short *x) +{ + return vec_reve (vec_xl (0, x)); +} + +/* { dg-final { scan-assembler-times "vlerh\t" 2 } } */ diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte-z14.c new file mode 100644 index 00000000000..f07889273a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte-z14.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */ + +#include + +/* reg -> mem */ +void +foo (vector signed char *target, vector signed char x) +{ + *target = vec_reve (x); +} + +void +bar (signed char *target, vector signed char x) +{ + vec_xst (vec_reve (x), 0, target); +} + +/* mem -> mem */ +void +baz (vector signed char *target, vector signed char *x) +{ + *target = vec_reve (*x); +} + +/* { dg-final { scan-assembler-times "vperm\t" 3 } } */ diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte.c new file mode 100644 index 00000000000..db8284b1f8f --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */ + +#include + +/* reg -> mem */ +void +foo (vector signed char *target, vector signed char x) +{ + *target = vec_reve (x); +} + +void +bar (signed char *target, vector signed char x) +{ + vec_xst (vec_reve (x), 0, target); +} + +/* { dg-final { scan-assembler-times "vstbrq\t" 2 } } */ + +/* mem -> mem: This becomes vlbrq + vst */ +void +baz (vector signed char *target, vector signed char *x) +{ + *target = vec_reve (*x); +} + +/* { dg-final { scan-assembler-times "vlbrq\t" 1 } } */