[GCC][PATCH][ARM]Add ACLE intrinsics for dot product (vusdot - vector, v<us/su>dot...
authorStam Markianos-Wright <stam.markianos-wright@arm.com>
Tue, 11 Feb 2020 11:14:07 +0000 (11:14 +0000)
committerStam Markianos-Wright <stam.markianos-wright@arm.com>
Tue, 11 Feb 2020 11:14:07 +0000 (11:14 +0000)
This patch adds the ARMv8.6 Extension ACLE intrinsics for dot product
operations (vector/by element) to the ARM back-end.

These are:
usdot (vector), <us/su>dot (by element).

The functions are optional from ARMv8.2-a as -march=armv8.2-a+i8mm and
for ARM they remain optional after as of ARMv8.6-a.

The functions are declared in arm_neon.h, RTL patterns are defined to
generate assembler and tests are added to verify and perform adequate checks.

Regression testing on arm-none-eabi passed successfully.

gcc/ChangeLog:

2020-02-11  Stam Markianos-Wright  <stam.markianos-wright@arm.com>

* config/arm/arm-builtins.c (enum arm_type_qualifiers):
(USTERNOP_QUALIFIERS): New define.
(USMAC_LANE_QUADTUP_QUALIFIERS): New define.
(SUMAC_LANE_QUADTUP_QUALIFIERS): New define.
(arm_expand_builtin_args): Add case ARG_BUILTIN_LANE_QUADTUP_INDEX.
(arm_expand_builtin_1): Add qualifier_lane_quadtup_index.
* config/arm/arm_neon.h (vusdot_s32): New.
(vusdot_lane_s32): New.
(vusdotq_lane_s32): New.
(vsudot_lane_s32): New.
(vsudotq_lane_s32): New.
* config/arm/arm_neon_builtins.def (usdot, usdot_lane,sudot_lane): New.
* config/arm/iterators.md (DOTPROD_I8MM): New.
(sup, opsuffix): Add <us/su>.
* config/arm/neon.md (neon_usdot, <us/su>dot_lane: New.
* config/arm/unspecs.md (UNSPEC_DOT_US, UNSPEC_DOT_SU): New.

gcc/testsuite/ChangeLog:

2020-02-11  Stam Markianos-Wright  <stam.markianos-wright@arm.com>

* gcc.target/arm/simd/vdot-2-1.c: New test.
* gcc.target/arm/simd/vdot-2-2.c: New test.
* gcc.target/arm/simd/vdot-2-3.c: New test.
* gcc.target/arm/simd/vdot-2-4.c: New test.

12 files changed:
gcc/ChangeLog
gcc/config/arm/arm-builtins.c
gcc/config/arm/arm_neon.h
gcc/config/arm/arm_neon_builtins.def
gcc/config/arm/iterators.md
gcc/config/arm/neon.md
gcc/config/arm/unspecs.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/simd/vdot-2-3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/simd/vdot-2-4.c [new file with mode: 0644]

index 3eafd66f3b461cff003d0ebfecf95d73c29d9590..fa4e59e32efe3cc06b32da9dc372f4b6359e5ee3 100644 (file)
@@ -1,3 +1,22 @@
+2020-02-11  Stam Markianos-Wright  <stam.markianos-wright@arm.com>
+
+       * config/arm/arm-builtins.c (enum arm_type_qualifiers): 
+       (USTERNOP_QUALIFIERS): New define.
+       (USMAC_LANE_QUADTUP_QUALIFIERS): New define.
+       (SUMAC_LANE_QUADTUP_QUALIFIERS): New define.
+       (arm_expand_builtin_args): Add case ARG_BUILTIN_LANE_QUADTUP_INDEX.
+       (arm_expand_builtin_1): Add qualifier_lane_quadtup_index.
+       * config/arm/arm_neon.h (vusdot_s32): New.
+       (vusdot_lane_s32): New.
+       (vusdotq_lane_s32): New.
+       (vsudot_lane_s32): New.
+       (vsudotq_lane_s32): New.
+       * config/arm/arm_neon_builtins.def (usdot, usdot_lane,sudot_lane): New.
+       * config/arm/iterators.md (DOTPROD_I8MM): New.
+       (sup, opsuffix): Add <us/su>.
+       * config/arm/neon.md (neon_usdot, <us/su>dot_lane: New.
+       * config/arm/unspecs.md (UNSPEC_DOT_US, UNSPEC_DOT_SU): New.
+
 2020-02-11  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/93661
index 7f279cca6688c6f11948159666ee647ae533c61d..4d31405cf6e09e3a61faa3e8142940bbdb23c60a 100644 (file)
@@ -86,7 +86,10 @@ enum arm_type_qualifiers
   qualifier_const_void_pointer = 0x802,
   /* Lane indices selected in pairs - must be within range of previous
      argument = a vector.  */
-  qualifier_lane_pair_index = 0x1000
+  qualifier_lane_pair_index = 0x1000,
+  /* Lane indices selected in quadtuplets - must be within range of previous
+     argument = a vector.  */
+  qualifier_lane_quadtup_index = 0x2000
 };
 
 /*  The qualifier_internal allows generation of a unary builtin from
@@ -122,6 +125,13 @@ arm_unsigned_uternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_unsigned };
 #define UTERNOP_QUALIFIERS (arm_unsigned_uternop_qualifiers)
 
+/* T (T, unsigned T, T).  */
+static enum arm_type_qualifiers
+arm_usternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned,
+      qualifier_none };
+#define USTERNOP_QUALIFIERS (arm_usternop_qualifiers)
+
 /* T (T, immediate).  */
 static enum arm_type_qualifiers
 arm_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -176,6 +186,20 @@ arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_unsigned, qualifier_lane_index };
 #define UMAC_LANE_QUALIFIERS (arm_umac_lane_qualifiers)
 
+/* T (T, unsigned T, T, lane index).  */
+static enum arm_type_qualifiers
+arm_usmac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned,
+      qualifier_none, qualifier_lane_quadtup_index };
+#define USMAC_LANE_QUADTUP_QUALIFIERS (arm_usmac_lane_quadtup_qualifiers)
+
+/* T (T, T, unsigend T, lane index).  */
+static enum arm_type_qualifiers
+arm_sumac_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_unsigned, qualifier_lane_quadtup_index };
+#define SUMAC_LANE_QUADTUP_QUALIFIERS (arm_sumac_lane_quadtup_qualifiers)
+
 /* T (T, T, immediate).  */
 static enum arm_type_qualifiers
 arm_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -2177,6 +2201,7 @@ typedef enum {
   ARG_BUILTIN_LANE_INDEX,
   ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX,
   ARG_BUILTIN_LANE_PAIR_INDEX,
+  ARG_BUILTIN_LANE_QUADTUP_INDEX,
   ARG_BUILTIN_NEON_MEMORY,
   ARG_BUILTIN_MEMORY,
   ARG_BUILTIN_STOP
@@ -2325,11 +2350,24 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
              if (CONST_INT_P (op[argc]))
                {
                  machine_mode vmode = mode[argc - 1];
-                 neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode) / 2, exp);
+                 neon_lane_bounds (op[argc], 0,
+                                   GET_MODE_NUNITS (vmode) / 2, exp);
+               }
+             /* If the lane index isn't a constant then error out.  */
+             goto constant_arg;
+
+           case ARG_BUILTIN_LANE_QUADTUP_INDEX:
+             /* Previous argument must be a vector, which this indexes.  */
+             gcc_assert (argc > 0);
+             if (CONST_INT_P (op[argc]))
+               {
+                 machine_mode vmode = mode[argc - 1];
+                 neon_lane_bounds (op[argc], 0,
+                                   GET_MODE_NUNITS (vmode) / 4, exp);
                }
-             /* If the lane index isn't a constant then the next
-                case will error.  */
-             /* Fall through.  */
+             /* If the lane index isn't a constant then error out.  */
+             goto constant_arg;
+
            case ARG_BUILTIN_CONSTANT:
 constant_arg:
              if (!(*insn_data[icode].operand[opno].predicate)
@@ -2493,6 +2531,8 @@ arm_expand_builtin_1 (int fcode, tree exp, rtx target,
        args[k] = ARG_BUILTIN_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
        args[k] = ARG_BUILTIN_LANE_PAIR_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
+       args[k] = ARG_BUILTIN_LANE_QUADTUP_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
        args[k] = ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
index 3c78f435009ab027f92693d00ab5b40960d5419d..81f550988f7b987b4efe504ccc849e3f6fdc692a 100644 (file)
@@ -18742,6 +18742,52 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
   return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index);
 }
 
+
+/* AdvSIMD Matrix Multiply-Accumulate and Dot Product intrinsics.  */
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+i8mm")
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
+{
+  return __builtin_neon_usdotv8qi_ssus (__r, __a, __b);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a,
+                int8x8_t __b, const int __index)
+{
+  return __builtin_neon_usdot_lanev8qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a,
+                 int8x8_t __b, const int __index)
+{
+  return __builtin_neon_usdot_lanev16qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudot_lane_s32 (int32x2_t __r, int8x8_t __a,
+                uint8x8_t __b, const int __index)
+{
+  return __builtin_neon_sudot_lanev8qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a,
+                 uint8x8_t __b, const int __index)
+{
+  return __builtin_neon_sudot_lanev16qi_sssus (__r, __a, __b, __index);
+}
+
+#pragma GCC pop_options
+
 #pragma GCC pop_options
 #endif
 
index e9ff4e501cbb5d16b9211f5bc96db376ddf21afc..b4537ff5de97d6a59435cc7fb7afd5c4c2aa4d44 100644 (file)
@@ -352,6 +352,10 @@ VAR2 (UTERNOP, udot, v8qi, v16qi)
 VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
 VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
 
+VAR1 (USTERNOP, usdot, v8qi)
+VAR2 (USMAC_LANE_QUADTUP, usdot_lane, v8qi, v16qi)
+VAR2 (SUMAC_LANE_QUADTUP, sudot_lane, v8qi, v16qi)
+
 VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf)
 VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf)
 VAR4 (TERNOP, vcmla0, v2sf, v4sf, v4hf, v8hf)
index 33e29509f00a89fa23d0546687c0e4643f0b32d2..f8a76fb59cb8afcba762593d3617139490e3a091 100644 (file)
 
 (define_int_iterator DOTPROD [UNSPEC_DOT_S UNSPEC_DOT_U])
 
+(define_int_iterator DOTPROD_I8MM [UNSPEC_DOT_US UNSPEC_DOT_SU])
+
 (define_int_iterator VFMLHALVES [UNSPEC_VFML_LO UNSPEC_VFML_HI])
 
 (define_int_iterator VCADD [UNSPEC_VCADD90 UNSPEC_VCADD270])
   (UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u")
   (UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u")
   (UNSPEC_DOT_S "s") (UNSPEC_DOT_U "u")
+  (UNSPEC_DOT_US "us") (UNSPEC_DOT_SU "su")
   (UNSPEC_SSAT16 "s") (UNSPEC_USAT16 "u")
 ])
 
 (define_int_attr MRRC [(VUNSPEC_MRRC "MRRC") (VUNSPEC_MRRC2 "MRRC2")])
 
 (define_int_attr opsuffix [(UNSPEC_DOT_S "s8")
-                          (UNSPEC_DOT_U "u8")])
+                          (UNSPEC_DOT_U "u8")
+                          (UNSPEC_DOT_US "s8")
+                          (UNSPEC_DOT_SU "u8")
+                          ])
 
 (define_int_attr smlaw_op [(UNSPEC_SMLAWB "smlawb") (UNSPEC_SMLAWT "smlawt")])
index 6087ca6f2badde6a492bb515a2cb5846f3d4ad8e..5d085dc6f613f6ab4ce3c4302d6021b01446327f 100644 (file)
   [(set_attr "type" "neon_dot<q>")]
 )
 
+;; These instructions map to the __builtins for the Dot Product operations.
+(define_insn "neon_usdot<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand" "=w")
+       (plus:VCVTI
+         (unspec:VCVTI
+           [(match_operand:<VSI2QI> 2 "register_operand" "w")
+           (match_operand:<VSI2QI> 3 "register_operand" "w")]
+           UNSPEC_DOT_US)
+         (match_operand:VCVTI 1 "register_operand" "0")))]
+  "TARGET_I8MM"
+  "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set_attr "type" "neon_dot<q>")]
+)
+
 ;; These instructions map to the __builtins for the Dot Product
 ;; indexed operations.
 (define_insn "neon_<sup>dot_lane<vsi2qi>"
   [(set_attr "type" "neon_dot<q>")]
 )
 
+;; These instructions map to the __builtins for the Dot Product
+;; indexed operations in the v8.6 I8MM extension.
+(define_insn "neon_<sup>dot_lane<vsi2qi>"
+  [(set (match_operand:VCVTI 0 "register_operand" "=w")
+       (plus:VCVTI
+         (unspec:VCVTI
+          [(match_operand:<VSI2QI> 2 "register_operand" "w")
+           (match_operand:V8QI 3 "register_operand" "t")
+           (match_operand:SI 4 "immediate_operand" "i")]
+           DOTPROD_I8MM)
+         (match_operand:VCVTI 1 "register_operand" "0")))]
+  "TARGET_I8MM"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[4]));
+    return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
+  }
+  [(set_attr "type" "neon_dot<q>")]
+)
+
 ;; These expands map to the Dot Product optab the vectorizer checks for.
 ;; The auto-vectorizer expects a dot product builtin that also does an
 ;; accumulation into the provided register.
index 8f4a705f43efdb6baf03b39cee589cf728620687..2dd62467fe6e221ad569c7186454b1460436efef 100644 (file)
   UNSPEC_VRNDX
   UNSPEC_DOT_S
   UNSPEC_DOT_U
+  UNSPEC_DOT_US
+  UNSPEC_DOT_SU
   UNSPEC_VFML_LO
   UNSPEC_VFML_HI
   UNSPEC_VCADD90
index 1844550b5a2d1196a70e52e19fd4f65947bbe3d9..e90ba18466fc02461b0e2d7d9bd7af2d991653f5 100644 (file)
@@ -1,3 +1,10 @@
+2020-02-11  Stam Markianos-Wright  <stam.markianos-wright@arm.com>
+
+       * gcc.target/arm/simd/vdot-2-1.c: New test.
+       * gcc.target/arm/simd/vdot-2-2.c: New test.
+       * gcc.target/arm/simd/vdot-2-3.c: New test.
+       * gcc.target/arm/simd/vdot-2-4.c: New test.
+
 2020-02-11  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/93661
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c b/gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c
new file mode 100644 (file)
index 0000000..4d5f07b
--- /dev/null
@@ -0,0 +1,91 @@
+/* { dg-do assemble { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "-O -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/* Unsigned-Signed Dot Product instructions.  */
+
+/*
+**usfoo:
+**     ...
+**     vusdot\.s8      d0, d1, d2
+**     bx      lr
+*/
+int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**usfoo_lane:
+**     ...
+**     vusdot\.s8      d0, d1, d2\[0\]
+**     bx      lr
+*/
+int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**usfooq_lane:
+**     ...
+**     vusdot\.s8      q0, q1, d4\[1\]
+**     bx      lr
+*/
+int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  return vusdotq_lane_s32 (r, x, y, 1);
+}
+
+/* Signed-Unsigned Dot Product instructions.  */
+
+/*
+**sfoo_lane:
+**     ...
+**     vsudot\.u8      d0, d1, d2\[0\]
+**     bx      lr
+*/
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  return vsudot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**sfooq_lane:
+**     ...
+**     vsudot\.u8      q0, q1, d4\[1\]
+**     bx      lr
+*/
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  return vsudotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**usfoo_untied:
+**     ...
+**     vusdot\.s8      d1, d2, d3
+**     vmov    d0, d1  @ v2si
+**     bx      lr
+*/
+int32x2_t usfoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**usfoo_lane_untied:
+**     ...
+**     vusdot.s8       d1, d2, d3\[0\]
+**     vmov    d0, d1  @ v2si
+**     bx      lr
+*/
+int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c b/gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c
new file mode 100644 (file)
index 0000000..b7b76e2
--- /dev/null
@@ -0,0 +1,90 @@
+/* { dg-do assemble { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "-O -save-temps -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/* Unsigned-Signed Dot Product instructions.  */
+
+/*
+**usfoo:
+**     ...
+**     vusdot\.s8      d0, d1, d2
+**     bx      lr
+*/
+int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**usfoo_lane:
+**     ...
+**     vusdot\.s8      d0, d1, d2\[0\]
+**     bx      lr
+*/
+int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**usfooq_lane:
+**     ...
+**     vusdot\.s8      q0, q1, d4\[1\]
+**     bx      lr
+*/
+int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  return vusdotq_lane_s32 (r, x, y, 1);
+}
+
+/* Signed-Unsigned Dot Product instructions.  */
+
+/*
+**sfoo_lane:
+**     ...
+**     vsudot\.u8      d0, d1, d2\[0\]
+**     bx      lr
+*/
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  return vsudot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**sfooq_lane:
+**     ...
+**     vsudot\.u8      q0, q1, d4\[1\]
+**     bx      lr
+*/
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  return vsudotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**usfoo_untied:
+**     ...
+**     vusdot\.s8      d1, d2, d3
+**     vmov    d0, d1  @ v2si
+**     bx      lr
+*/
+int32x2_t usfoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**usfoo_lane_untied:
+**     ...
+**     vusdot.s8       d1, d2, d3\[0\]
+**     vmov    d0, d1  @ v2si
+**     bx      lr
+*/
+int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-2-3.c b/gcc/testsuite/gcc.target/arm/simd/vdot-2-3.c
new file mode 100644 (file)
index 0000000..e14fe8f
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do assemble { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+/* Unsigned-Signed Dot Product instructions.  */
+
+int32x2_t usfoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vusdot_lane_s32 (r, x, y, -1);
+}
+
+
+int32x4_t usfooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vusdotq_lane_s32 (r, x, y, 2);
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-2-4.c b/gcc/testsuite/gcc.target/arm/simd/vdot-2-4.c
new file mode 100644 (file)
index 0000000..fb7ebb4
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do assemble { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+/* Signed-Unsigned Dot Product instructions.  */
+
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vsudot_lane_s32 (r, x, y, -1);
+}
+
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vsudotq_lane_s32 (r, x, y, 2);
+}