[GCC][PATCH][AArch64]Add ACLE intrinsics for dot product (usdot - vector, <us/su...

author Stam Markianos-Wright <stam.markianos-wright@arm.com>

Thu, 16 Jan 2020 14:20:48 +0000 (14:20 +0000)

committer Stam Markianos-Wright <stam.markianos-wright@arm.com>

Thu, 16 Jan 2020 14:26:11 +0000 (14:26 +0000)
author Stam Markianos-Wright <stam.markianos-wright@arm.com>
Thu, 16 Jan 2020 14:20:48 +0000 (14:20 +0000)
committer Stam Markianos-Wright <stam.markianos-wright@arm.com>
Thu, 16 Jan 2020 14:26:11 +0000 (14:26 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 9a949980699d678a863e0c692fade0a9c62103ed..49dcecb6777e6522ebf5bfc12cd21f0e9d4d6564 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2020-01-16  Stam Markianos-Wright  <stam.markianos-wright@arm.com>
+
+       * config/aarch64/aarch64-builtins.c: (enum aarch64_type_qualifiers):
+       New qualifier_lane_quadtup_index, TYPES_TERNOP_SSUS,
+       TYPES_QUADOPSSUS_LANE_QUADTUP, TYPES_QUADOPSSSU_LANE_QUADTUP.
+       (aarch64_simd_expand_args): Add case SIMD_ARG_LANE_QUADTUP_INDEX.
+       (aarch64_simd_expand_builtin): Add qualifier_lane_quadtup_index.
+       * config/aarch64/aarch64-simd-builtins.def (usdot, usdot_lane,
+       usdot_laneq, sudot_lane,sudot_laneq): New.
+       * config/aarch64/aarch64-simd.md (aarch64_usdot): New.
+       (aarch64_<sur>dot_lane): New.
+       * config/aarch64/arm_neon.h (vusdot_s32): New.
+       (vusdotq_s32): New.
+       (vusdot_lane_s32): New.
+       (vsudot_lane_s32): New.
+       * config/aarch64/iterators.md (DOTPROD_I8MM): New iterator.
+       (UNSPEC_USDOT, UNSPEC_SUDOT): New unspecs.
+
  2020-01-16  Martin Liska  <mliska@suse.cz>
  
         * value-prof.c (dump_histogram_value): Fix
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c

index f0e0461b7f0459fbd88b1d8ecdd90e02140451ea..f50c4857e1cfec5b74e46a81d8068afa86312d88 100644 (file)
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -110,6 +110,9 @@ enum aarch64_type_qualifiers
    /* Lane indices selected in pairs. - must be in range, and flipped for
       bigendian.  */
    qualifier_lane_pair_index = 0x800,
+  /* Lane indices selected in quadtuplets. - must be in range, and flipped for
+     bigendian.  */
+  qualifier_lane_quadtup_index = 0x1000,
  };
  
  typedef struct
@@ -176,6 +179,10 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
    = { qualifier_unsigned, qualifier_unsigned,
        qualifier_unsigned, qualifier_immediate };
  #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
+#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
  
  
  static enum aarch64_type_qualifiers
@@ -194,6 +201,19 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
        qualifier_unsigned, qualifier_lane_index };
  #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
  
+static enum aarch64_type_qualifiers
+aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned,
+      qualifier_none, qualifier_lane_quadtup_index };
+#define TYPES_QUADOPSSUS_LANE_QUADTUP \
+       (aarch64_types_quadopssus_lane_quadtup_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_unsigned, qualifier_lane_quadtup_index };
+#define TYPES_QUADOPSSSU_LANE_QUADTUP \
+       (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
+
  static enum aarch64_type_qualifiers
  aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
    = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
@@ -1288,6 +1308,7 @@ typedef enum
    SIMD_ARG_LANE_INDEX,
    SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
    SIMD_ARG_LANE_PAIR_INDEX,
+  SIMD_ARG_LANE_QUADTUP_INDEX,
    SIMD_ARG_STOP
  } builtin_simd_arg;
  
@@ -1377,9 +1398,25 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
                   op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
                                           SImode);
                 }
-             /* Fall through - if the lane index isn't a constant then
-                the next case will error.  */
-             /* FALLTHRU */
+             /* If the lane index isn't a constant then error out.  */
+             goto constant_arg;
+           case SIMD_ARG_LANE_QUADTUP_INDEX:
+             /* Must be a previous operand into which this is an index and
+                index is restricted to nunits / 4.  */
+             gcc_assert (opc > 0);
+             if (CONST_INT_P (op[opc]))
+               {
+                 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
+                 unsigned int nunits
+                   = GET_MODE_NUNITS (vmode).to_constant ();
+                 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
+                 /* Keep to GCC-vector-extension lane indices in the RTL.  */
+                 int lane = INTVAL (op[opc]);
+                 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
+                                         SImode);
+               }
+             /* If the lane index isn't a constant then error out.  */
+             goto constant_arg;
             case SIMD_ARG_CONSTANT:
  constant_arg:
               if (!(*insn_data[icode].operand[opc].predicate)
@@ -1492,6 +1529,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
         args[k] = SIMD_ARG_LANE_INDEX;
        else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
         args[k] = SIMD_ARG_LANE_PAIR_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
+       args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
        else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
         args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
        else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index 57fc5933b43bfc0da132342c681b8a2c14549c9c..4744dd1f6b2f20327db810277ff65c59ce5abdec 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -212,10 +212,15 @@
    /* Implemented by aarch64_<sur><dotprod>{_lane}{q}<dot_mode>.  */
    BUILTIN_VB (TERNOP, sdot, 0)
    BUILTIN_VB (TERNOPU, udot, 0)
+  BUILTIN_VB (TERNOP_SSUS, usdot, 0)
    BUILTIN_VB (QUADOP_LANE, sdot_lane, 0)
    BUILTIN_VB (QUADOPU_LANE, udot_lane, 0)
    BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0)
    BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0)
  
    /* Implemented by aarch64_fcadd<rot><mode>.   */
    BUILTIN_VHSDF (BINOP, fcadd90, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 2989096b170364dc7cbcd3675e36cc81ded33e28..9e56e8caf3569b4b15f77f627ce6e6a839008083 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -506,6 +506,20 @@
    [(set_attr "type" "neon_dot<q>")]
  )
  
+;; These instructions map to the __builtins for the armv8.6a I8MM usdot
+;; (vector) Dot Product operation.
+(define_insn "aarch64_usdot<vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand" "=w")
+       (plus:VS
+         (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+                     (match_operand:<VSI2QI> 3 "register_operand" "w")]
+         UNSPEC_USDOT)
+         (match_operand:VS 1 "register_operand" "0")))]
+  "TARGET_I8MM"
+  "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
+  [(set_attr "type" "neon_dot<q>")]
+)
+
  ;; These expands map to the Dot Product optab the vectorizer checks for.
  ;; The auto-vectorizer expects a dot product builtin that also does an
  ;; accumulation into the provided register.
@@ -573,6 +587,26 @@
    [(set_attr "type" "neon_dot<q>")]
  )
  
+;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
+;; (by element) Dot Product operations.
+(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand" "=w")
+       (plus:VS
+         (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
+                     (match_operand:VB 3 "register_operand" "w")
+                     (match_operand:SI 4 "immediate_operand" "i")]
+         DOTPROD_I8MM)
+         (match_operand:VS 1 "register_operand" "0")))]
+  "TARGET_I8MM"
+  {
+    int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
+    int lane = INTVAL (operands[4]);
+    operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
+    return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
+  }
+  [(set_attr "type" "neon_dot<VS:q>")]
+)
+
  (define_expand "copysign<mode>3"
    [(match_operand:VHSDF 0 "register_operand")
     (match_operand:VHSDF 1 "register_operand")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index eaba156e26cf35b07b96972fe2741a9c00d6caa9..c96214003dd2478820ca2093f6cabc8fcf3f9b92 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34611,6 +34611,89 @@ vrnd64xq_f64 (float64x2_t __a)
  
  #include "arm_bf16.h"
  
+/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+i8mm")
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_usdot_lanev8qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a, int8x16_t __b,
+                 const int __index)
+{
+  return __builtin_aarch64_usdot_laneqv8qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a, int8x8_t __b,
+                 const int __index)
+{
+  return __builtin_aarch64_usdot_lanev16qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b,
+                  const int __index)
+{
+  return __builtin_aarch64_usdot_laneqv16qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, uint8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_sudot_lanev8qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a, uint8x16_t __b,
+                 const int __index)
+{
+  return __builtin_aarch64_sudot_laneqv8qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, uint8x8_t __b,
+                 const int __index)
+{
+  return __builtin_aarch64_sudot_lanev16qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b,
+                  const int __index)
+{
+  return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index);
+}
+
+#pragma GCC pop_options
+
  #undef __aarch64_vget_lane_any
  
  #undef __aarch64_vdup_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index b9843b83c5f1453739f7b1a8e35193b59884f912..83720d9802a0ef454214d04c473b0c60f0abdc2a 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -799,6 +799,8 @@
      UNSPEC_USUBLT      ; Used in aarch64-sve2.md.
      UNSPEC_USUBWB      ; Used in aarch64-sve2.md.
      UNSPEC_USUBWT      ; Used in aarch64-sve2.md.
+    UNSPEC_USDOT       ; Used in aarch64-simd.md.
+    UNSPEC_SUDOT       ; Used in aarch64-simd.md.
  ])
  
  ;; ------------------------------------------------------------------
@@ -1463,6 +1465,8 @@
  
  (define_mode_attr f16quad [(V2SF "") (V4SF "q")])
  
+(define_mode_attr isquadop [(V8QI "") (V16QI "q")])
+
  (define_code_attr f16mac [(plus "a") (minus "s")])
  
  ;; Map smax to smin and umax to umin.
@@ -2045,6 +2049,8 @@
  
  (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
  
+(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT])
+
  (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
                                UNSPEC_SUBHN UNSPEC_RSUBHN])
  
@@ -2738,6 +2744,7 @@
                       (UNSPEC_URSHL  "ur") (UNSPEC_SRSHL  "sr")
                       (UNSPEC_UQRSHL  "u") (UNSPEC_SQRSHL  "s")
                       (UNSPEC_SDOT "s") (UNSPEC_UDOT "u")
+                     (UNSPEC_USDOT "us") (UNSPEC_SUDOT "su")
  ])
  
  (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 0d8aa6063a73be21d64869d38dec60791957cb28..8b01aa06a40df665a59a51caf6b32a352eb09484 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2020-01-16  Stam Markianos-Wright  <stam.markianos-wright@arm.com>
+
+       * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c: New test.
+       * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c: New test.
+       * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c: New test.
+       * gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c: New test.
+
  2020-01-16  Andre Vieira  <andre.simoesdiasvieira@arm.com>
  
         PR tree-optimization/92429
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c

new file mode 100755 (executable)

index 0000000..ac4f821
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c
@@ -0,0 +1,136 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/* Unsigned-Signed Dot Product instructions.  */
+
+/*
+**ufoo:
+**     usdot   v0\.2s, v1\.8b, v2\.8b
+**     ret
+*/
+int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**ufooq:
+**     usdot   v0\.4s, v1\.16b, v2\.16b
+**     ret
+*/
+int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_s32 (r, x, y);
+}
+
+/*
+**ufoo_lane:
+**     usdot   v0\.2s, v1\.8b, v2\.4b\[0\]
+**     ret
+*/
+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**ufoo_laneq:
+**     usdot   v0\.2s, v1\.8b, v2\.4b\[2\]
+**     ret
+*/
+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+  return vusdot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**ufooq_lane:
+**     usdot   v0\.4s, v1\.16b, v2\.4b\[1\]
+**     ret
+*/
+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  return vusdotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**ufooq_laneq:
+**     usdot   v0\.4s, v1\.16b, v2\.4b\[3\]
+**     ret
+*/
+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
+
+/* Signed-Unsigned Dot Product instructions.  */
+
+/*
+**sfoo_lane:
+**     sudot   v0\.2s, v1\.8b, v2\.4b\[0\]
+**     ret
+*/
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  return vsudot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**sfoo_laneq:
+**     sudot   v0\.2s, v1\.8b, v2\.4b\[2\]
+**     ret
+*/
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+  return vsudot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**sfooq_lane:
+**     sudot   v0\.4s, v1\.16b, v2\.4b\[1\]
+**     ret
+*/
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  return vsudotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**sfooq_laneq:
+**     sudot   v0\.4s, v1\.16b, v2\.4b\[3\]
+**     ret
+*/
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+  return vsudotq_laneq_s32 (r, x, y, 3);
+}
+
+/*
+**ufoo_untied:
+**     mov     v0\.8b, v1\.8b
+**     usdot   v0\.2s, v2\.8b, v3\.8b
+**     ret
+*/
+int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**ufooq_laneq_untied:
+**     mov     v0\.16b, v1\.16b
+**     usdot   v0\.4s, v2\.16b, v3\.4b\[3\]
+**     ret
+*/
+int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c

new file mode 100755 (executable)

index 0000000..96bca23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c
@@ -0,0 +1,137 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "-mbig-endian -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/* Unsigned-Signed Dot Product instructions.  */
+
+/*
+**ufoo:
+**     usdot   v0\.2s, v1\.8b, v2\.8b
+**     ret
+*/
+int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**ufooq:
+**     usdot   v0\.4s, v1\.16b, v2\.16b
+**     ret
+*/
+int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_s32 (r, x, y);
+}
+
+/*
+**ufoo_lane:
+**     usdot   v0\.2s, v1\.8b, v2\.4b\[0\]
+**     ret
+*/
+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**ufoo_laneq:
+**     usdot   v0\.2s, v1\.8b, v2\.4b\[2\]
+**     ret
+*/
+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+  return vusdot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**ufooq_lane:
+**     usdot   v0\.4s, v1\.16b, v2\.4b\[1\]
+**     ret
+*/
+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  return vusdotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**ufooq_laneq:
+**     usdot   v0\.4s, v1\.16b, v2\.4b\[3\]
+**     ret
+*/
+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
+
+/* Signed-Unsigned Dot Product instructions.  */
+
+/*
+**sfoo_lane:
+**     sudot   v0\.2s, v1\.8b, v2\.4b\[0\]
+**     ret
+*/
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  return vsudot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**sfoo_laneq:
+**     sudot   v0\.2s, v1\.8b, v2\.4b\[2\]
+**     ret
+*/
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+  return vsudot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**sfooq_lane:
+**     sudot   v0\.4s, v1\.16b, v2\.4b\[1\]
+**     ret
+*/
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  return vsudotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**sfooq_laneq:
+**     sudot   v0\.4s, v1\.16b, v2\.4b\[3\]
+**     ret
+*/
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+  return vsudotq_laneq_s32 (r, x, y, 3);
+}
+
+/*
+**ufoo_untied:
+**     mov     v0\.8b, v1\.8b
+**     usdot   v0\.2s, v2\.8b, v3\.8b
+**     ret
+*/
+int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**ufooq_laneq_untied:
+**     mov     v0\.16b, v1\.16b
+**     usdot   v0\.4s, v2\.16b, v3\.4b\[3\]
+**     ret
+*/
+int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c

new file mode 100755 (executable)

index 0000000..18ecabe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c
@@ -0,0 +1,31 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vusdot_lane_s32 (r, x, y, -1);
+}
+
+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vusdot_laneq_s32 (r, x, y, -1);
+}
+
+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vusdotq_lane_s32 (r, x, y, 2);
+}
+
+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vusdotq_laneq_s32 (r, x, y, 4);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c

new file mode 100755 (executable)

index 0000000..66c87d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c
@@ -0,0 +1,31 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vsudot_lane_s32 (r, x, y, -1);
+}
+
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vsudot_laneq_s32 (r, x, y, -1);
+}
+
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vsudotq_lane_s32 (r, x, y, 2);
+}
+
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vsudotq_laneq_s32 (r, x, y, 4);
+}
author	Stam Markianos-Wright <stam.markianos-wright@arm.com>
	Thu, 16 Jan 2020 14:20:48 +0000 (14:20 +0000)
committer	Stam Markianos-Wright <stam.markianos-wright@arm.com>
	Thu, 16 Jan 2020 14:26:11 +0000 (14:26 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-builtins.c		patch \| blob \| history
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c	[new file with mode: 0755]	patch \| blob
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c	[new file with mode: 0755]	patch \| blob
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c	[new file with mode: 0755]	patch \| blob
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c	[new file with mode: 0755]	patch \| blob