aarch64: ACLE intrinsics bfmmla and bfmlal<b/t>

author Delia Burduv <delia.burduv@arm.com>

Thu, 6 Feb 2020 09:45:52 +0000 (09:45 +0000)

committer Richard Sandiford <richard.sandiford@arm.com>

Thu, 6 Feb 2020 16:40:12 +0000 (16:40 +0000)
author Delia Burduv <delia.burduv@arm.com>
Thu, 6 Feb 2020 09:45:52 +0000 (09:45 +0000)
committer Richard Sandiford <richard.sandiford@arm.com>
Thu, 6 Feb 2020 16:40:12 +0000 (16:40 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index c842c4fc02ffe0bc84c1701637d439e8d1e0c251..1fe29d337cd455d8cfc89363e90852ce51294a2c 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2020-02-06  Delia Burduv  <delia.burduv@arm.com>
+
+       * config/aarch64/aarch64-simd-builtins.def
+       (bfmlaq): New built-in function.
+       (bfmlalb): New built-in function.
+       (bfmlalt): New built-in function.
+       (bfmlalb_lane): New built-in function.
+       (bfmlalt_lane): New built-in function.
+       * config/aarch64/aarch64-simd.md
+       (aarch64_bfmmlaqv4sf): New pattern.
+       (aarch64_bfmlal<bt>v4sf): New pattern.
+       (aarch64_bfmlal<bt>_lane<q>v4sf): New pattern.
+       * config/aarch64/arm_neon.h (vbfmmlaq_f32): New intrinsic.
+       (vbfmlalbq_f32): New intrinsic.
+       (vbfmlaltq_f32): New intrinsic.
+       (vbfmlalbq_lane_f32): New intrinsic.
+       (vbfmlaltq_lane_f32): New intrinsic.
+       (vbfmlalbq_laneq_f32): New intrinsic.
+       (vbfmlaltq_laneq_f32): New intrinsic.
+       * config/aarch64/iterators.md (BF_MLA): New int iterator.
+       (bt): New int attribute.
+
  2020-02-06  Uroš Bizjak  <ubizjak@gmail.com>
  
         * config/i386/i386.md (*pushtf): Emit "#" instead of
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index a118f4f121de067c0a80f691b852247b0ab27f7a..02b2154cf64dad02cf57b110af51b19dd7f91c51 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -692,3 +692,14 @@
    VAR2 (TERNOP, bfdot, 0, v2sf, v4sf)
    VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, v2sf, v4sf)
    VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, v2sf, v4sf)
+
+  /* Implemented by aarch64_bfmmlaqv4sf  */
+  VAR1 (TERNOP, bfmmlaq, 0, v4sf)
+
+  /* Implemented by aarch64_bfmlal<bt>{_lane{q}}v4sf  */
+  VAR1 (TERNOP, bfmlalb, 0, v4sf)
+  VAR1 (TERNOP, bfmlalt, 0, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalb_lane, 0, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalt_lane, 0, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf)
+  VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 5a58051cf7e558b43c0d110c00f53d50d2ff5080..f2b440c36bbbf58a101a0c4909dc5d45b1c8180b 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7134,3 +7134,42 @@
  }
    [(set_attr "type" "neon_dot<VDQSF:q>")]
  )
+
+;; bfmmla
+(define_insn "aarch64_bfmmlaqv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+                   (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
+                                 (match_operand:V8BF 3 "register_operand" "w")]
+                    UNSPEC_BFMMLA)))]
+  "TARGET_BF16_SIMD"
+  "bfmmla\\t%0.4s, %2.8h, %3.8h"
+  [(set_attr "type" "neon_fp_mla_s_q")]
+)
+
+;; bfmlal<bt>
+(define_insn "aarch64_bfmlal<bt>v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
+                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
+                                  (match_operand:V8BF 3 "register_operand" "w")]
+                     BF_MLA)))]
+  "TARGET_BF16_SIMD"
+  "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
+  [(set_attr "type" "neon_fp_mla_s_q")]
+)
+
+(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
+                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
+                                  (match_operand:VBF 3 "register_operand" "w")
+                                  (match_operand:SI 4 "const_int_operand" "n")]
+                     BF_MLA)))]
+  "TARGET_BF16_SIMD"
+{
+  operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
+  return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
+}
+  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
+)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 7f05c3f9eca844b0e7b824a191223a4906c825b1..db845a3d2d204d28f0e62fa61927e01dcb15f4a4 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34660,6 +34660,60 @@ vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
    return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index);
  }
  
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+
+{
+  return __builtin_aarch64_bfmmlaqv4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+{
+  return __builtin_aarch64_bfmlalbv4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlaltq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+{
+  return __builtin_aarch64_bfmlaltv4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlalbq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
+                   const int __index)
+{
+  return __builtin_aarch64_bfmlalb_lanev4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlaltq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
+                   const int __index)
+{
+  return __builtin_aarch64_bfmlalt_lanev4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlalbq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
+                    const int __index)
+{
+  return __builtin_aarch64_bfmlalb_lane_qv4sf (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
+                    const int __index)
+{
+  return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
+}
+
  #pragma GCC pop_options
  
  /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 3e3fd9d0cd26185d92be264b27321b28f4c99e46..7c62f164347e29e7d0bd4a881acdcf84712ead4d 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2620,6 +2620,9 @@
  
  (define_int_iterator FMMLA [UNSPEC_FMMLA])
  
+(define_int_iterator BF_MLA [UNSPEC_BFMLALB
+                            UNSPEC_BFMLALT])
+
  ;; Iterators for atomic operations.
  
  (define_int_iterator ATOMIC_LDOP
@@ -2871,6 +2874,8 @@
  (define_int_attr ab [(UNSPEC_CLASTA "a") (UNSPEC_CLASTB "b")
                      (UNSPEC_LASTA "a") (UNSPEC_LASTB "b")])
  
+(define_int_attr bt [(UNSPEC_BFMLALB "b") (UNSPEC_BFMLALT "t")])
+
  (define_int_attr addsub [(UNSPEC_SHADD "add")
                          (UNSPEC_UHADD "add")
                          (UNSPEC_SRHADD "add")
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c

new file mode 100644 (file)

index 0000000..9810e4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c
@@ -0,0 +1,67 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfmlalb:
+**      bfmlalb        v0.4s, v1.8h, v2.8h
+**      ret
+*/
+float32x4_t test_bfmlalb (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlalbq_f32 (r, a, b);
+}
+
+/*
+**test_bfmlalt:
+**      bfmlalt        v0.4s, v1.8h, v2.8h
+**      ret
+*/
+float32x4_t test_bfmlalt (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlaltq_f32 (r, a, b);
+}
+
+/*
+**test_bfmlalb_lane:
+**      bfmlalb        v0.4s, v1.8h, v2.h[0]
+**      ret
+*/
+float32x4_t test_bfmlalb_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  return vbfmlalbq_lane_f32 (r, a, b, 0);
+}
+
+/*
+**test_bfmlalt_lane:
+**      bfmlalt        v0.4s, v1.8h, v2.h[2]
+**      ret
+*/
+float32x4_t test_bfmlalt_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  return vbfmlaltq_lane_f32 (r, a, b, 2);
+}
+
+/*
+**test_bfmlalb_laneq:
+**      bfmlalb        v0.4s, v1.8h, v2.h[4]
+**      ret
+*/
+float32x4_t test_bfmlalb_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlalbq_laneq_f32 (r, a, b, 4);
+}
+
+/*
+**test_bfmlalt_laneq:
+**      bfmlalt        v0.4s, v1.8h, v2.h[7]
+**      ret
+*/
+float32x4_t test_bfmlalt_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlaltq_laneq_f32 (r, a, b, 7);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c

new file mode 100644 (file)

index 0000000..0aaa69f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include <arm_neon.h>
+
+
+/*
+**test_bfmmla:
+**     bfmmla  v0.4s, v1.8h, v2.8h
+**     ret
+*/
+float32x4_t test_bfmmla (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
+{
+  return vbfmmlaq_f32 (r, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c

new file mode 100644 (file)

index 0000000..4d50ba3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c
@@ -0,0 +1,46 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include <arm_neon.h>
+
+void
+f_vbfmlaltq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  vbfmlaltq_lane_f32 (r, a, b, -1);
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  vbfmlaltq_lane_f32 (r, a, b, 4);
+  return;
+}
+
+void
+f_vbfmlaltq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+  vbfmlaltq_laneq_f32 (r, a, b, -1);
+  /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+  vbfmlaltq_laneq_f32 (r, a, b, 8);
+  return;
+}
+
+void
+f_vbfmlalbq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  /* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */
+  vbfmlalbq_lane_f32 (r, a, b, -2);
+  /* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */
+  vbfmlalbq_lane_f32 (r, a, b, 5);
+  return;
+}
+
+void
+f_vbfmlalbq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  /* { dg-error "lane -2 out of range 0 - 7" "" { target *-*-* } 0 } */
+  vbfmlalbq_laneq_f32 (r, a, b, -2);
+  /* { dg-error "lane 9 out of range 0 - 7" "" { target *-*-* } 0 } */
+  vbfmlalbq_laneq_f32 (r, a, b, 9);
+  return;
+}
author	Delia Burduv <delia.burduv@arm.com>
	Thu, 6 Feb 2020 09:45:52 +0000 (09:45 +0000)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Thu, 6 Feb 2020 16:40:12 +0000 (16:40 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c	[new file with mode: 0644]	patch \| blob