S/390: arch13: vec_revb vector byte swap builtin
authorAndreas Krebbel <krebbel@linux.ibm.com>
Tue, 2 Apr 2019 11:01:06 +0000 (11:01 +0000)
committerAndreas Krebbel <krebbel@gcc.gnu.org>
Tue, 2 Apr 2019 11:01:06 +0000 (11:01 +0000)
gcc/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

* config/s390/s390-builtin-types.def: Add new builtin function types.
* config/s390/s390-builtins.def: Add overloaded builtin
s390_vec_revb. Add low-level builtins for vlbr and vstbr
instructions.
* config/s390/vecintrin.h (vec_revb): New builtin name definition.
* config/s390/vector.md (VT_HW_HSDT): New mode iterator.
("bswap<mode>"): New expander.
("*bswap<mode>", "*bswap<mode>_emu"): New insn definitions.

gcc/testsuite/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

* gcc.target/s390/zvector/vec-revb-load-double-z14.c: New test.
* gcc.target/s390/zvector/vec-revb-load-double.c: New test.
* gcc.target/s390/zvector/vec-revb-store-double-z14.c: New test.
* gcc.target/s390/zvector/vec-revb-store-double.c: New test.

From-SVN: r270084

gcc/ChangeLog
gcc/config/s390/s390-builtin-types.def
gcc/config/s390/s390-builtins.def
gcc/config/s390/vecintrin.h
gcc/config/s390/vector.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/s390/zvector/vec-revb-load-double-z14.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-revb-load-double.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-revb-store-double-z14.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-revb-store-double.c [new file with mode: 0644]

index cd98e9fec0de2112815244a9ae0e2129667f255b..9017bb214816940f3014786ee1bbbba91a61fad8 100644 (file)
@@ -1,3 +1,14 @@
+2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+       * config/s390/s390-builtin-types.def: Add new builtin function types.
+       * config/s390/s390-builtins.def: Add overloaded builtin
+       s390_vec_revb. Add low-level builtins for vlbr and vstbr
+       instructions.
+       * config/s390/vecintrin.h (vec_revb): New builtin name definition.
+       * config/s390/vector.md (VT_HW_HSDT): New mode iterator.
+       ("bswap<mode>"): New expander.
+       ("*bswap<mode>", "*bswap<mode>_emu"): New insn definitions.
+
 2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
 
        * config/s390/s390-builtins.def (B_VXE2): New builtin flag definition.
index 25e74fafb905743c62d827790a4d3a22b1e0b556..cfd8f5783e55cd7f6857c5467a5b74abf90cbd27 100644 (file)
@@ -83,6 +83,7 @@ DEF_VECTOR_TYPE (BT_UV2DI, BT_ULONGLONG, 2)
 DEF_VECTOR_TYPE (BT_UV4SI, BT_UINT, 4)
 DEF_VECTOR_TYPE (BT_UV8HI, BT_USHORT, 8)
 DEF_VECTOR_TYPE (BT_V16QI, BT_SCHAR, 16)
+DEF_VECTOR_TYPE (BT_V1TI, BT_INT128, 1)
 DEF_VECTOR_TYPE (BT_V2DF, BT_DBL, 2)
 DEF_VECTOR_TYPE (BT_V2DI, BT_LONGLONG, 2)
 DEF_VECTOR_TYPE (BT_V4SF, BT_FLT, 4)
@@ -151,6 +152,7 @@ DEF_FN_TYPE_1 (BT_FN_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI)
 DEF_FN_TYPE_1 (BT_FN_V16QI_SCHAR, BT_V16QI, BT_SCHAR)
 DEF_FN_TYPE_1 (BT_FN_V16QI_UCHAR, BT_V16QI, BT_UCHAR)
 DEF_FN_TYPE_1 (BT_FN_V16QI_V16QI, BT_V16QI, BT_V16QI)
+DEF_FN_TYPE_1 (BT_FN_V1TI_V1TI, BT_V1TI, BT_V1TI)
 DEF_FN_TYPE_1 (BT_FN_V2DF_DBL, BT_V2DF, BT_DBL)
 DEF_FN_TYPE_1 (BT_FN_V2DF_DBLCONSTPTR, BT_V2DF, BT_DBLCONSTPTR)
 DEF_FN_TYPE_1 (BT_FN_V2DF_FLTCONSTPTR, BT_V2DF, BT_FLTCONSTPTR)
index 5f5f05c6a3590cd7b2fba16bac86bb03e4261196..a2276858fe85af2e92b6e39c7def53d753827d97 100644 (file)
@@ -2867,3 +2867,28 @@ OB_DEF_VAR (s390_vec_fp_test_data_class_flt,s390_vftcisbcc, B_VXE,
 OB_DEF_VAR (s390_vec_fp_test_data_class_dbl,s390_vftcidbcc, 0,                  O2_U12,             BT_OV_BV2DI_V2DF_USHORT_INTPTR)          /* vftcidb */
 
 /* arch 13 builtins */
+
+/* Returns a vector where each vector element contains the corresponding byte-reversed vector element of the input vector in OP0.  */
+OB_DEF     (s390_vec_revb,              s390_vec_revb_s16,  s390_vec_revb_dbl,  B_VX,               BT_FN_OV4SI_OV4SI)
+OB_DEF_VAR (s390_vec_revb_s16,          s390_vlbrh,         0,                  0,                  BT_OV_V8HI_V8HI)
+OB_DEF_VAR (s390_vec_revb_u16,          s390_vlbrh,         0,                  0,                  BT_OV_UV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_revb_s32,          s390_vlbrf,         0,                  0,                  BT_OV_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_revb_u32,          s390_vlbrf,         0,                  0,                  BT_OV_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_revb_s64,          s390_vlbrg,         0,                  0,                  BT_OV_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_revb_u64,          s390_vlbrg,         0,                  0,                  BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_revb_flt,          s390_vlbrf_flt,     0,                  B_VXE,              BT_OV_V4SF_V4SF)
+OB_DEF_VAR (s390_vec_revb_dbl,          s390_vlbrg_dbl,     0,                  0,                  BT_OV_V2DF_V2DF)
+
+B_DEF      (s390_vlbrh,                 bswapv8hi,          0,                  B_VX,               0,                   BT_FN_V8HI_V8HI)
+B_DEF      (s390_vlbrf,                 bswapv4si,          0,                  B_VX,               0,                   BT_FN_V4SI_V4SI)
+B_DEF      (s390_vlbrg,                 bswapv2di,          0,                  B_VX,               0,                   BT_FN_V2DI_V2DI)
+B_DEF      (s390_vlbrq,                 bswapv1ti,          0,                  B_VX,               0,                   BT_FN_V1TI_V1TI)
+B_DEF      (s390_vlbrf_flt,             bswapv4sf,          0,                  B_VXE,              0,                   BT_FN_V4SF_V4SF)
+B_DEF      (s390_vlbrg_dbl,             bswapv2df,          0,                  B_VX,               0,                   BT_FN_V2DF_V2DF)
+
+B_DEF      (s390_vstbrh,                bswapv8hi,          0,                  B_VX,               0,                   BT_FN_V8HI_V8HI)
+B_DEF      (s390_vstbrf,                bswapv4si,          0,                  B_VX,               0,                   BT_FN_V4SI_V4SI)
+B_DEF      (s390_vstbrg,                bswapv2di,          0,                  B_VX,               0,                   BT_FN_V2DI_V2DI)
+B_DEF      (s390_vstbrq,                bswapv1ti,          0,                  B_VX,               0,                   BT_FN_V1TI_V1TI)
+B_DEF      (s390_vstbrf_flt,            bswapv4sf,          0,                  B_VXE,              0,                   BT_FN_V4SF_V4SF)
+B_DEF      (s390_vstbrg_dbl,            bswapv2df,          0,                  B_VX,               0,                   BT_FN_V2DF_V2DF)
index 7a8c97b06ca27be5681a32494cba05f1e7b063b5..91f0a57623619997da74485f032251fa9b883a5b 100644 (file)
@@ -311,4 +311,5 @@ __lcbb(const void *ptr, int bndry)
 #define vec_nabs __builtin_s390_vec_nabs
 #define vec_sqrt __builtin_s390_vec_sqrt
 #define vec_fp_test_data_class __builtin_s390_vec_fp_test_data_class
+#define vec_revb __builtin_s390_vec_revb
 #endif /* _VECINTRIN_H */
index 440af6dec979d3e0e7fda1f3dca78d5f4a377f99..f25c86691575d63e0a079b17ad7afd5ddda4684a 100644 (file)
@@ -33,6 +33,7 @@
 (define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")])
 
 (define_mode_iterator V_HW_64 [V2DI V2DF])
+(define_mode_iterator VT_HW_HSDT [V8HI V4SI V4SF V2DI V2DF V1TI V1TF TI TF])
 
 ; Including TI for instructions that support it (va, vn, ...)
 (define_mode_iterator VT_HW [V16QI V8HI V4SI V2DI V2DF V1TI TI (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")])
   "vcl<VX_VEC_CONV_INT:bhfgq><VX_VEC_CONV_BFP:xde>b\t%v0,%v1,0,5"
   [(set_attr "op_type" "VRR")])
 
+;
+; Vector byte swap patterns
+;
+
+; FIXME: The bswap rtl standard name currently does not appear to be
+; used for vector modes.
+(define_expand "bswap<mode>"
+  [(set (match_operand:VT_HW_HSDT                   0 "nonimmediate_operand" "")
+       (bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand" "")))]
+  "TARGET_VX")
+
+; vlbrh, vlbrf, vlbrg, vlbrq, vstbrh, vstbrf, vstbrg, vstbrq
+(define_insn "*bswap<mode>"
+  [(set (match_operand:VT_HW_HSDT                   0 "nonimmediate_operand" "=v,v,R")
+       (bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand"  "v,R,v")))]
+  "TARGET_VXE2"
+  "@
+   #
+   vlbr<bhfgq>\t%v0,%v1
+   vstbr<bhfgq>\t%v1,%v0"
+  [(set_attr "op_type" "*,VRX,VRX")])
+
+(define_insn_and_split "*bswap<mode>_emu"
+  [(set (match_operand:VT_HW_HSDT                   0 "nonimmediate_operand" "=vR")
+       (bswap:VT_HW_HSDT (match_operand:VT_HW_HSDT 1 "nonimmediate_operand" "vR")))]
+  "TARGET_VX && can_create_pseudo_p ()"
+  "#"
+  "&& ((!memory_operand (operands[1], <MODE>mode)
+        && !memory_operand (operands[0], <MODE>mode))
+        || !TARGET_VXE2)"
+  [(set (match_dup 3)
+       (unspec:V16QI [(match_dup 4)
+                      (match_dup 4)
+                      (match_dup 2)]
+                     UNSPEC_VEC_PERM))
+   (set (match_dup 0) (subreg:VT_HW_HSDT (match_dup 3) 0))]
+{
+  static char p[4][16] =
+    { { 1,  0,  3,  2,  5,  4,  7, 6, 9,  8,  11, 10, 13, 12, 15, 14 },   /* H */
+      { 3,  2,  1,  0,  7,  6,  5, 4, 11, 10, 9,  8,  15, 14, 13, 12 },   /* S */
+      { 7,  6,  5,  4,  3,  2,  1, 0, 15, 14, 13, 12, 11, 10, 9,  8  },   /* D */
+      { 15, 14, 13, 12, 11, 10, 9, 8, 7,  6,  5,  4,  3,  2,  1,  0  } }; /* T */
+  char *perm;
+  rtx perm_rtx[16], constv;
+
+  switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
+    {
+    case 2: perm = p[0]; break;
+    case 4: perm = p[1]; break;
+    case 8: perm = p[2]; break;
+    case 16: perm = p[3]; break;
+    default: gcc_unreachable ();
+    }
+  for (int i = 0; i < 16; i++)
+    perm_rtx[i] = GEN_INT (perm[i]);
+
+  operands[1] = force_reg (<MODE>mode, operands[1]);
+  operands[2] = gen_reg_rtx (V16QImode);
+  operands[3] = gen_reg_rtx (V16QImode);
+  operands[4] = simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0);
+  constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)));
+  emit_move_insn (operands[2], constv);
+})
+
+
 ; reduc_smin
 ; reduc_smax
 ; reduc_umin
index a86d26a5458e2d8a35a711a966ccd5588b4eec48..4b4d1f1c1e5ff1b332981430b7964ca99b821645 100644 (file)
@@ -1,3 +1,10 @@
+2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+       * gcc.target/s390/zvector/vec-revb-load-double-z14.c: New test.
+       * gcc.target/s390/zvector/vec-revb-load-double.c: New test.
+       * gcc.target/s390/zvector/vec-revb-store-double-z14.c: New test.
+       * gcc.target/s390/zvector/vec-revb-store-double.c: New test.
+
 2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
 
        * gcc.target/s390/arch13/fp-signedint-convert-1.c: New test.
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-revb-load-double-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-revb-load-double-z14.c
new file mode 100644 (file)
index 0000000..e394460
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector double
+test (vector double x)
+{
+  return vec_revb (x);
+}
+
+vector double
+test2 (vector double *x)
+{
+  return vec_revb (*x);
+}
+
+vector double
+test3 (double *x)
+{
+  return vec_revb (vec_xl (0, x));
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-revb-load-double.c b/gcc/testsuite/gcc.target/s390/zvector/vec-revb-load-double.c
new file mode 100644 (file)
index 0000000..e5be06c
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector double
+test (vector double x)
+{
+  return vec_revb (x);
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
+
+
+vector double
+test2 (vector double *x)
+{
+  return vec_revb (*x);
+}
+
+vector double
+test3 (double *x)
+{
+  return vec_revb (vec_xl (0, x));
+}
+
+/* { dg-final { scan-assembler-times "vlbrg\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-revb-store-double-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-revb-store-double-z14.c
new file mode 100644 (file)
index 0000000..56a7442
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
+
+#include <vecintrin.h>
+
+/* reg -> mem */
+void
+test (vector double *target, vector double x)
+{
+  *target = vec_revb (x);
+}
+
+void
+test3 (double *target, vector double x)
+{
+  vec_xst (vec_revb (x), 0, target);
+}
+
+/* mem -> mem */
+void
+test2 (vector double *target, vector double *x)
+{
+  *target = vec_revb (*x);
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-revb-store-double.c b/gcc/testsuite/gcc.target/s390/zvector/vec-revb-store-double.c
new file mode 100644 (file)
index 0000000..9aa6ec9
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
+
+#include <vecintrin.h>
+
+/* reg -> mem */
+void
+test (vector double *target, vector double x)
+{
+  *target = vec_revb (x);
+}
+
+void
+test3 (double *target, vector double x)
+{
+  vec_xst (vec_revb (x), 0, target);
+}
+
+/* { dg-final { scan-assembler-times "vstbrg\t" 2 } } */
+
+/* mem -> mem: This becomes vlbrg + vst */
+void
+test2 (vector double *target, vector double *x)
+{
+  *target = vec_revb (*x);
+}
+
+/* { dg-final { scan-assembler-times "vlbrg\t" 1 } } */