S/390: arch13: vec_reve element order reversal builtins
authorAndreas Krebbel <krebbel@linux.ibm.com>
Tue, 2 Apr 2019 11:02:22 +0000 (11:02 +0000)
committerAndreas Krebbel <krebbel@gcc.gnu.org>
Tue, 2 Apr 2019 11:02:22 +0000 (11:02 +0000)
gcc/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

* config/s390/s390-builtin-types.def: Add new builtin function type.
* config/s390/s390-builtins.def: Add overloaded builtin
s390_vec_reve and low-level builtins for s390_vler and s390_vster.
* config/s390/s390.md (UNSPEC_VEC_ELTSWAP): New constant definition.
* config/s390/vecintrin.h (vec_reve): New builtin name definition.
* config/s390/vx-builtins.md (V_HW_HSD): New mode iterator.
("eltswap<mode>"): New expander.
("*eltswapv16qi", "*eltswap<mode>", "*eltswap<mode>_emu"): New
insn definitions.

gcc/testsuite/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

* gcc.target/s390/zvector/vec-reve-load-byte-z14.c: New test.
* gcc.target/s390/zvector/vec-reve-load-byte.c: New test.
* gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: New test.
* gcc.target/s390/zvector/vec-reve-load-halfword.c: New test.
* gcc.target/s390/zvector/vec-reve-store-byte-z14.c: New test.
* gcc.target/s390/zvector/vec-reve-store-byte.c: New test.

From-SVN: r270085

13 files changed:
gcc/ChangeLog
gcc/config/s390/s390-builtin-types.def
gcc/config/s390/s390-builtins.def
gcc/config/s390/s390.md
gcc/config/s390/vecintrin.h
gcc/config/s390/vx-builtins.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte-z14.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte-z14.c [new file with mode: 0644]
gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte.c [new file with mode: 0644]

index 9017bb214816940f3014786ee1bbbba91a61fad8..9cb3b9843d3e580b0bf698d9265c34b5f661b2b9 100644 (file)
@@ -1,3 +1,15 @@
+2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+       * config/s390/s390-builtin-types.def: Add new builtin function type.
+       * config/s390/s390-builtins.def: Add overloaded builtin
+       s390_vec_reve and low-level builtins for s390_vler and s390_vster.
+       * config/s390/s390.md (UNSPEC_VEC_ELTSWAP): New constant definition.
+       * config/s390/vecintrin.h (vec_reve): New builtin name definition.
+       * config/s390/vx-builtins.md (V_HW_HSD): New mode iterator.
+       ("eltswap<mode>"): New expander.
+       ("*eltswapv16qi", "*eltswap<mode>", "*eltswap<mode>_emu"): New
+       insn definitions.
+
 2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
 
        * config/s390/s390-builtin-types.def: Add new builtin function types.
index cfd8f5783e55cd7f6857c5467a5b74abf90cbd27..ff53ec541f8db914c572fc1d1a9eaea15c9e8849 100644 (file)
@@ -382,6 +382,7 @@ DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI,
 DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI)
 DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR)
+DEF_OV_TYPE (BT_OV_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT)
index a2276858fe85af2e92b6e39c7def53d753827d97..e4cfa80adaa691017d3689a5f6053762b60e497f 100644 (file)
@@ -2892,3 +2892,34 @@ B_DEF      (s390_vstbrg,                bswapv2di,          0,
 B_DEF      (s390_vstbrq,                bswapv1ti,          0,                  B_VX,               0,                   BT_FN_V1TI_V1TI)
 B_DEF      (s390_vstbrf_flt,            bswapv4sf,          0,                  B_VXE,              0,                   BT_FN_V4SF_V4SF)
 B_DEF      (s390_vstbrg_dbl,            bswapv2df,          0,                  B_VX,               0,                   BT_FN_V2DF_V2DF)
+
+/* Returns a vector with the elements of the input vector OP0 in reversed order.  */
+OB_DEF     (s390_vec_reve,              s390_vec_reve_b8,   s390_vec_reve_dbl,  B_VX,               BT_FN_OV4SI_OV4SI)
+OB_DEF_VAR (s390_vec_reve_b8,           s390_vlerb,         0,                  0,                  BT_OV_BV16QI_BV16QI)
+OB_DEF_VAR (s390_vec_reve_s8,           s390_vlerb,         0,                  0,                  BT_OV_V16QI_V16QI)
+OB_DEF_VAR (s390_vec_reve_u8,           s390_vlerb,         0,                  0,                  BT_OV_UV16QI_UV16QI)
+OB_DEF_VAR (s390_vec_reve_b16,          s390_vlerh,         0,                  0,                  BT_OV_BV8HI_BV8HI)
+OB_DEF_VAR (s390_vec_reve_s16,          s390_vlerh,         0,                  0,                  BT_OV_V8HI_V8HI)
+OB_DEF_VAR (s390_vec_reve_u16,          s390_vlerh,         0,                  0,                  BT_OV_UV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_reve_b32,          s390_vlerf,         0,                  0,                  BT_OV_BV4SI_BV4SI)
+OB_DEF_VAR (s390_vec_reve_s32,          s390_vlerf,         0,                  0,                  BT_OV_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_reve_u32,          s390_vlerf,         0,                  0,                  BT_OV_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_reve_b64,          s390_vlerg,         0,                  0,                  BT_OV_BV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_reve_s64,          s390_vlerg,         0,                  0,                  BT_OV_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_reve_u64,          s390_vlerg,         0,                  0,                  BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_reve_flt,          s390_vlerf_flt,     0,                  B_VXE,              BT_OV_V4SF_V4SF)
+OB_DEF_VAR (s390_vec_reve_dbl,          s390_vlerg_dbl,     0,                  0,                  BT_OV_V2DF_V2DF)
+
+B_DEF      (s390_vlerb,                 eltswapv16qi,       0,                  B_VX,               0,                   BT_FN_V16QI_V16QI)
+B_DEF      (s390_vlerh,                 eltswapv8hi,        0,                  B_VX,               0,                   BT_FN_V8HI_V8HI)
+B_DEF      (s390_vlerf,                 eltswapv4si,        0,                  B_VX,               0,                   BT_FN_V4SI_V4SI)
+B_DEF      (s390_vlerg,                 eltswapv2di,        0,                  B_VX,               0,                   BT_FN_V2DI_V2DI)
+B_DEF      (s390_vlerf_flt,             eltswapv4sf,        0,                  B_VXE,              0,                   BT_FN_V4SF_V4SF)
+B_DEF      (s390_vlerg_dbl,             eltswapv2df,        0,                  B_VX,               0,                   BT_FN_V2DF_V2DF)
+
+B_DEF      (s390_vsterb,                eltswapv16qi,       0,                  B_VX,               0,                   BT_FN_V16QI_V16QI)
+B_DEF      (s390_vsterh,                eltswapv8hi,        0,                  B_VX,               0,                   BT_FN_V8HI_V8HI)
+B_DEF      (s390_vsterf,                eltswapv4si,        0,                  B_VX,               0,                   BT_FN_V4SI_V4SI)
+B_DEF      (s390_vsterg,                eltswapv2di,        0,                  B_VX,               0,                   BT_FN_V2DI_V2DI)
+B_DEF      (s390_vsterf_flt,            eltswapv4sf,        0,                  B_VXE,              0,                   BT_FN_V4SF_V4SF)
+B_DEF      (s390_vsterg_dbl,            eltswapv2df,        0,                  B_VX,               0,                   BT_FN_V2DF_V2DF)
index 17aafe54afb96db367197a82341bf108db632b23..bdc7385cfe4e7b04275d464bbfe4dba431a4f059 100644 (file)
 
    UNSPEC_VEC_VFMIN
    UNSPEC_VEC_VFMAX
+
+   UNSPEC_VEC_ELTSWAP
 ])
 
 ;;
index 91f0a57623619997da74485f032251fa9b883a5b..1220bf6c41e75ddba9d012cbfe9d757f9635d932 100644 (file)
@@ -312,4 +312,5 @@ __lcbb(const void *ptr, int bndry)
 #define vec_sqrt __builtin_s390_vec_sqrt
 #define vec_fp_test_data_class __builtin_s390_vec_fp_test_data_class
 #define vec_revb __builtin_s390_vec_revb
+#define vec_reve __builtin_s390_vec_reve
 #endif /* _VECINTRIN_H */
index b2bc8e2d725777935681b6fa421df1d86da0b717..55b49f456dfa9260681c3cb116052fad22ea5371 100644 (file)
@@ -22,7 +22,7 @@
 
 (define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF (V4SF "TARGET_VXE")])
 (define_mode_iterator VI_HW_SD [V4SI V2DI])
-(define_mode_iterator V_HW_HSD [V8HI V4SI V2DI V2DF])
+(define_mode_iterator V_HW_HSD [V8HI V4SI (V4SF "TARGET_VXE") V2DI V2DF])
 (define_mode_iterator V_HW_4 [V4SI V4SF])
 ; Full size vector modes with more than one element which are directly supported in vector registers by the hardware.
 (define_mode_iterator VEC_HW  [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")])
   "TARGET_VXE"
   "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
   [(set_attr "op_type" "VRR")])
+
+; The element reversal builtins introduced with arch13 have been made
+; available also for older CPUs down to z13.
+(define_expand "eltswap<mode>"
+  [(set (match_operand:VEC_HW                 0 "nonimmediate_operand" "")
+       (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "")]
+                      UNSPEC_VEC_ELTSWAP))]
+  "TARGET_VX")
+
+; The byte element reversal is implemented as 128 bit byte swap.
+; Alternatively this could be emitted as bswap:V1TI but the required
+; subregs appear to confuse combine.
+(define_insn "*eltswapv16qi"
+  [(set (match_operand:V16QI                0 "nonimmediate_operand" "=v,v,R")
+       (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand"  "v,R,v")]
+                     UNSPEC_VEC_ELTSWAP))]
+  "TARGET_VXE2"
+  "@
+   #
+   vlbrq\t%v0,%v1
+   vstbrq\t%v1,%v0"
+  [(set_attr "op_type" "*,VRX,VRX")])
+
+; vlerh, vlerf, vlerg, vsterh, vsterf, vsterg
+(define_insn "*eltswap<mode>"
+  [(set (match_operand:V_HW_HSD                   0 "nonimmediate_operand" "=v,v,R")
+       (unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "nonimmediate_operand"  "v,R,v")]
+                        UNSPEC_VEC_ELTSWAP))]
+  "TARGET_VXE2"
+  "@
+   #
+   vler<bhfgq>\t%v0,%v1
+   vster<bhfgq>\t%v1,%v0"
+  [(set_attr "op_type" "*,VRX,VRX")])
+
+; arch13 has instructions for doing element reversal from mem to reg
+; or the other way around.  For reg to reg or on pre arch13 machines
+; we have to emulate it with vector permute.
+(define_insn_and_split "*eltswap<mode>_emu"
+  [(set (match_operand:VEC_HW                 0 "nonimmediate_operand" "=vR")
+       (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "vR")]
+                      UNSPEC_VEC_ELTSWAP))]
+  "TARGET_VX && can_create_pseudo_p ()"
+  "#"
+  "&& ((!memory_operand (operands[0], <MODE>mode)
+        && !memory_operand (operands[1], <MODE>mode))
+       || !TARGET_VXE2)"
+  [(set (match_dup 3)
+       (unspec:V16QI [(match_dup 4)
+                      (match_dup 4)
+                      (match_dup 2)]
+                     UNSPEC_VEC_PERM))
+   (set (match_dup 0) (subreg:VEC_HW (match_dup 3) 0))]
+{
+  static char p[4][16] =
+    { { 15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0 },   /* Q */
+      { 14, 15, 12, 13, 10, 11, 8,  9,  6,  7,  4,  5,  2,  3,  0,  1 },   /* H */
+      { 12, 13, 14, 15, 8,  9,  10, 11, 4,  5,  6,  7,  0,  1,  2,  3 },   /* S */
+      { 8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7 } }; /* D */
+  char *perm;
+  rtx perm_rtx[16], constv;
+
+  switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
+    {
+    case 1: perm = p[0]; break;
+    case 2: perm = p[1]; break;
+    case 4: perm = p[2]; break;
+    case 8: perm = p[3]; break;
+    default: gcc_unreachable ();
+    }
+
+  for (int i = 0; i < 16; i++)
+    perm_rtx[i] = GEN_INT (perm[i]);
+
+  operands[1] = force_reg (<MODE>mode, operands[1]);
+  operands[2] = gen_reg_rtx (V16QImode);
+  operands[3] = gen_reg_rtx (V16QImode);
+  operands[4] = simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0);
+  constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)));
+  emit_move_insn (operands[2], constv);
+})
index 4b4d1f1c1e5ff1b332981430b7964ca99b821645..22eecb25aca2ed69ae123a4d7db626dd3369504b 100644 (file)
@@ -1,3 +1,12 @@
+2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+       * gcc.target/s390/zvector/vec-reve-load-byte-z14.c: New test.
+       * gcc.target/s390/zvector/vec-reve-load-byte.c: New test.
+       * gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: New test.
+       * gcc.target/s390/zvector/vec-reve-load-halfword.c: New test.
+       * gcc.target/s390/zvector/vec-reve-store-byte-z14.c: New test.
+       * gcc.target/s390/zvector/vec-reve-store-byte.c: New test.
+
 2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
 
        * gcc.target/s390/zvector/vec-revb-load-double-z14.c: New test.
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte-z14.c
new file mode 100644 (file)
index 0000000..e5d2c30
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector signed char
+test (vector signed char x)
+{
+  return vec_reve (x);
+}
+
+vector signed char
+test2 (vector signed char *x)
+{
+  return vec_reve (*x);
+}
+
+vector signed char
+test3 (signed char *x)
+{
+  return vec_reve (vec_xl (0, x));
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-byte.c
new file mode 100644 (file)
index 0000000..813b251
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
+
+/* The vector byte element reversal is actually implemented with a 128
+   bit bswap.  */
+
+#include <vecintrin.h>
+
+vector signed char
+test (vector signed char x)
+{
+  return vec_reve (x);
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
+
+
+vector signed char
+test2 (vector signed char *x)
+{
+  return vec_reve (*x);
+}
+
+vector signed char
+test3 (signed char *x)
+{
+  return vec_reve (vec_xl (0, x));
+}
+
+/* { dg-final { scan-assembler-times "vlbrq\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c
new file mode 100644 (file)
index 0000000..4938ac2
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector signed short
+foo (vector signed short x)
+{
+  return vec_reve (x);
+}
+
+vector signed short
+bar (vector signed short *x)
+{
+  return vec_reve (*x);
+}
+
+vector signed short
+baz (signed short *x)
+{
+  return vec_reve (vec_xl (0, x));
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c
new file mode 100644 (file)
index 0000000..3c92299
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
+
+#include <vecintrin.h>
+
+vector signed short
+foo (vector signed short x)
+{
+  return vec_reve (x);
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
+
+
+vector signed short
+bar (vector signed short *x)
+{
+  return vec_reve (*x);
+}
+
+vector signed short
+baz (signed short *x)
+{
+  return vec_reve (vec_xl (0, x));
+}
+
+/* { dg-final { scan-assembler-times "vlerh\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte-z14.c
new file mode 100644 (file)
index 0000000..f078892
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector" } */
+
+#include <vecintrin.h>
+
+/* reg -> mem */
+void
+foo (vector signed char *target, vector signed char x)
+{
+  *target = vec_reve (x);
+}
+
+void
+bar (signed char *target, vector signed char x)
+{
+  vec_xst (vec_reve (x), 0, target);
+}
+
+/* mem -> mem */
+void
+baz (vector signed char *target, vector signed char *x)
+{
+  *target = vec_reve (*x);
+}
+
+/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-store-byte.c
new file mode 100644 (file)
index 0000000..db8284b
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch13 -mzvector" } */
+
+#include <vecintrin.h>
+
+/* reg -> mem */
+void
+foo (vector signed char *target, vector signed char x)
+{
+  *target = vec_reve (x);
+}
+
+void
+bar (signed char *target, vector signed char x)
+{
+  vec_xst (vec_reve (x), 0, target);
+}
+
+/* { dg-final { scan-assembler-times "vstbrq\t" 2 } } */
+
+/* mem -> mem: This becomes vlbrq + vst */
+void
+baz (vector signed char *target, vector signed char *x)
+{
+  *target = vec_reve (*x);
+}
+
+/* { dg-final { scan-assembler-times "vlbrq\t" 1 } } */