[AArch32] ACLE intrinsics bfloat16 vmmla and vfma<b/t> for AArch32 AdvSIMD
authorDelia Burduv <delia.burduv@arm.com>
Thu, 5 Mar 2020 11:18:04 +0000 (11:18 +0000)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 5 Mar 2020 11:19:43 +0000 (11:19 +0000)
This patch adds the ARMv8.6 ACLE intrinsics for vmmla, vfmab and vfmat
as part of the BFloat16 extension.
(https://developer.arm.com/docs/101028/latest.)
The intrinsics are declared in arm_neon.h and the RTL patterns are
defined in neon.md.
Two new tests are added to check assembler output and lane indices.

2020-03-05  Delia Burduv  <delia.burduv@arm.com>

* config/arm/arm_neon.h (vbfmmlaq_f32): New.
(vbfmlalbq_f32): New.
(vbfmlaltq_f32): New.
(vbfmlalbq_lane_f32): New.
(vbfmlaltq_lane_f32): New.
(vbfmlalbq_laneq_f32): New.
(vbfmlaltq_laneq_f32): New.
* config/arm/arm_neon_builtins.def (vmmla): New.
(vfmab): New.
(vfmat): New.
(vfmab_lane): New.
(vfmat_lane): New.
(vfmab_laneq): New.
(vfmat_laneq): New.
* config/arm/iterators.md (BF_MA): New int iterator.
(bt): New int attribute.
(VQXBF): Copy of VQX with V8BF.
* config/arm/neon.md (neon_vmmlav8bf): New insn.
(neon_vfma<bt>v8bf): New insn.
(neon_vfma<bt>_lanev8bf): New insn.
(neon_vfma<bt>_laneqv8bf): New expand.
(neon_vget_high<mode>): Changed iterator to VQXBF.
* config/arm/unspecs.md (UNSPEC_BFMMLA): New UNSPEC.
(UNSPEC_BFMAB): New UNSPEC.
(UNSPEC_BFMAT): New UNSPEC.

2020-03-05  Delia Burduv  <delia.burduv@arm.com>

* gcc.target/arm/simd/bf16_ma_1.c: New test.
* gcc.target/arm/simd/bf16_ma_2.c: New test.
* gcc.target/arm/simd/bf16_mmla_1.c: New test.

gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c [new file with mode: 0644]

index 5b94ab519a0a914d824a5f74024a3102281251a9..7dcc80d08510070679e47318da4eeed21733e8a1 100644 (file)
@@ -1,3 +1,9 @@
+2020-03-05  Delia Burduv  <delia.burduv@arm.com>
+
+       * gcc.target/arm/simd/bf16_ma_1.c: New test.
+       * gcc.target/arm/simd/bf16_ma_2.c: New test.
+       * gcc.target/arm/simd/bf16_mmla_1.c: New test.
+
 2020-03-05  Jakub Jelinek  <jakub@redhat.com>
 
        PR middle-end/93399
diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_1.c
new file mode 100644 (file)
index 0000000..6729af7
--- /dev/null
@@ -0,0 +1,79 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2" }  */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+**test_vfmabq_f32:
+**      ...
+**      vfmab.bf16     q0, q1, q2
+**      bx     lr
+*/
+float32x4_t
+test_vfmabq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlalbq_f32 (r, a, b);
+}
+
+/*
+**test_vfmatq_f32:
+**      ...
+**      vfmat.bf16     q0, q1, q2
+**      bx     lr
+*/
+float32x4_t
+test_vfmatq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlaltq_f32 (r, a, b);
+}
+
+/*
+**test_vfmabq_lane_f32:
+**      ...
+**      vfmab.bf16     q0, q1, d4\[0\]
+**      bx     lr
+*/
+float32x4_t
+test_vfmabq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  return vbfmlalbq_lane_f32 (r, a, b, 0);
+}
+
+/*
+**test_vfmatq_lane_f32:
+**      ...
+**      vfmat.bf16     q0, q1, d4\[2\]
+**      bx     lr
+*/
+float32x4_t
+test_vfmatq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  return vbfmlaltq_lane_f32 (r, a, b, 2);
+}
+
+/*
+**test_vfmabq_laneq_f32:
+**      ...
+**      vfmab.bf16     q0, q1, d5\[1\]
+**      bx     lr
+*/
+float32x4_t
+test_vfmabq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlalbq_laneq_f32 (r, a, b, 5);
+}
+
+/*
+**test_vfmatq_laneq_f32:
+**      ...
+**      vfmat.bf16     q0, q1, d5\[3\]
+**      bx     lr
+*/
+float32x4_t
+test_vfmatq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vbfmlaltq_laneq_f32 (r, a, b, 7);
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c b/gcc/testsuite/gcc.target/arm/simd/bf16_ma_2.c
new file mode 100644 (file)
index 0000000..5a7a2a7
--- /dev/null
@@ -0,0 +1,35 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+/* Test lane index limits for vfmabq_lane_f32  */
+float32x4_t
+test_vfmabq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vbfmlalbq_lane_f32 (r, a, b, -1);
+}
+
+float32x4_t
+test_vfmabq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vbfmlalbq_lane_f32 (r, a, b, 4);
+}
+
+/* Test lane index limits for vfmatq_lane_f32  */
+float32x4_t
+test_vfmatq_lane_f32_low (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  /* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vbfmlaltq_lane_f32 (r, a, b, -2);
+}
+
+float32x4_t
+test_vfmatq_lane_f32_high (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b)
+{
+  /* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vbfmlaltq_lane_f32 (r, a, b, 5);
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c b/gcc/testsuite/gcc.target/arm/simd/bf16_mmla_1.c
new file mode 100644 (file)
index 0000000..5f9c85b
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_vmmlaq_f32:
+**        ...
+**        vmmla.bf16   q0, q1, q2
+**        bx   lr
+*/
+float32x4_t
+test_vmmlaq_f32 (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y)
+{
+  return vbfmmlaq_f32 (r, x, y);
+}