From 44e570d9fb01682e24ae53e6004f18a3c2a52493 Mon Sep 17 00:00:00 2001 From: Andrea Corallo Date: Thu, 15 Oct 2020 10:16:18 +0200 Subject: [PATCH] aarch64: Add bfloat16 vldN_lane_bf16 + vldNq_lane_bf16 intrisics gcc/ChangeLog 2020-10-15 Andrea Corallo * config/aarch64/arm_neon.h (__LD2_LANE_FUNC, __LD3_LANE_FUNC) (__LD4_LANE_FUNC): Rename the macro generating the 'q' variants into __LD2Q_LANE_FUNC, __LD2Q_LANE_FUNC, __LD2Q_LANE_FUNC so they all can be undefed at the and of the file. (vld2_lane_bf16, vld2q_lane_bf16, vld3_lane_bf16, vld3q_lane_bf16) (vld4_lane_bf16, vld4q_lane_bf16): Add new intrinsics. gcc/testsuite/ChangeLog 2020-10-15 Andrea Corallo * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_1.c: New testcase. * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_bf16_indices_1.c: Likewise. --- gcc/config/aarch64/arm_neon.h | 118 +++++++++--------- .../advsimd-intrinsics/bf16_vldN_lane_1.c | 74 +++++++++++ .../advsimd-intrinsics/bf16_vldN_lane_2.c | 52 ++++++++ .../vld2_lane_bf16_indices_1.c | 17 +++ .../vld2q_lane_bf16_indices_1.c | 17 +++ .../vld3_lane_bf16_indices_1.c | 17 +++ .../vld3q_lane_bf16_indices_1.c | 17 +++ .../vld4_lane_bf16_indices_1.c | 17 +++ .../vld4q_lane_bf16_indices_1.c | 17 +++ 9 files changed, 289 insertions(+), 57 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_bf16_indices_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_bf16_indices_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_bf16_indices_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_bf16_indices_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_bf16_indices_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_bf16_indices_1.c diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 55c2a6590aa..c9faa4fa19f 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -20848,11 +20848,9 @@ __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, s __LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di, u64, int64x2_t) -#undef __LD2_LANE_FUNC - /* vld2q_lane */ -#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ +#define __LD2Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ __extension__ extern __inline intype \ __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ @@ -20868,22 +20866,20 @@ vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ return ret; \ } -__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64) -__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32) -__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64) -__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64) - -#undef __LD2_LANE_FUNC +__LD2Q_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16) +__LD2Q_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32) +__LD2Q_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64) +__LD2Q_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8) +__LD2Q_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16) +__LD2Q_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64) +__LD2Q_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8) +__LD2Q_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16) +__LD2Q_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32) +__LD2Q_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64) +__LD2Q_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8) +__LD2Q_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16) +__LD2Q_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32) +__LD2Q_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64) /* vld3_lane */ @@ -20947,11 +20943,9 @@ __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, s __LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di, u64, int64x2_t) -#undef __LD3_LANE_FUNC - /* vld3q_lane */ -#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ +#define __LD3Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ __extension__ extern __inline intype \ __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ @@ -20969,22 +20963,20 @@ vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ return ret; \ } -__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64) -__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32) -__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64) -__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64) - -#undef __LD3_LANE_FUNC +__LD3Q_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16) +__LD3Q_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32) +__LD3Q_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64) +__LD3Q_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8) +__LD3Q_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16) +__LD3Q_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64) +__LD3Q_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8) +__LD3Q_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16) +__LD3Q_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32) +__LD3Q_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64) +__LD3Q_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8) +__LD3Q_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16) +__LD3Q_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32) +__LD3Q_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64) /* vld4_lane */ @@ -21056,11 +21048,9 @@ __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, s __LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di, u64, int64x2_t) -#undef __LD4_LANE_FUNC - /* vld4q_lane */ -#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ +#define __LD4Q_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ __extension__ extern __inline intype \ __attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ @@ -21080,22 +21070,20 @@ vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ return ret; \ } -__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16) -__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32) -__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64) -__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8) -__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16) -__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64) -__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8) -__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16) -__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32) -__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64) -__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8) -__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16) -__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32) -__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64) - -#undef __LD4_LANE_FUNC +__LD4Q_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16) +__LD4Q_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32) +__LD4Q_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64) +__LD4Q_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8) +__LD4Q_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16) +__LD4Q_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64) +__LD4Q_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8) +__LD4Q_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16) +__LD4Q_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32) +__LD4Q_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64) +__LD4Q_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8) +__LD4Q_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16) +__LD4Q_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32) +__LD4Q_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64) /* vmax */ @@ -35752,6 +35740,15 @@ vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1, __a, __lane1); } +__LD2_LANE_FUNC (bfloat16x4x2_t, bfloat16x4_t, bfloat16x8x2_t, bfloat16_t, v4bf, + v8bf, bf, bf16, bfloat16x8_t) +__LD2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) +__LD3_LANE_FUNC (bfloat16x4x3_t, bfloat16x4_t, bfloat16x8x3_t, bfloat16_t, v4bf, + v8bf, bf, bf16, bfloat16x8_t) +__LD3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) +__LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, + v8bf, bf, bf16, bfloat16x8_t) +__LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) #pragma GCC pop_options /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ @@ -35965,4 +35962,11 @@ vaddq_p128 (poly128_t __a, poly128_t __b) #undef __aarch64_vdupq_laneq_u32 #undef __aarch64_vdupq_laneq_u64 +#undef __LD2_LANE_FUNC +#undef __LD2Q_LANE_FUNC +#undef __LD3_LANE_FUNC +#undef __LD3Q_LANE_FUNC +#undef __LD4_LANE_FUNC +#undef __LD4Q_LANE_FUNC + #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_1.c new file mode 100644 index 00000000000..a83ed3e45da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_1.c @@ -0,0 +1,74 @@ +/* { dg-do run { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include + +extern void abort (void); + +typedef union +{ + bfloat16_t bf16; + uint16_t u16; +} bfloat16_u_t; + +#define VARIANTS(VARIANT, STRUCT) \ +VARIANT (bfloat16, , 4, _bf16, 3, STRUCT) \ +VARIANT (bfloat16, q, 8, _bf16, 7, STRUCT) + +#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \ + int \ + test_vld##STRUCT##Q##_lane##SUFFIX (const bfloat16_u_t *data, \ + const bfloat16_u_t *overwrite) \ + { \ + BASE##x##ELTS##x##STRUCT##_t vectors; \ + bfloat16_u_t temp[ELTS]; \ + int i,j; \ + for (i = 0; i < STRUCT; i++, data += ELTS) \ + vectors.val[i] = vld1##Q##SUFFIX ((bfloat16_t *)data); \ + vectors = vld##STRUCT##Q##_lane##SUFFIX ((bfloat16_t *) overwrite, \ + vectors, LANE); \ + while (--i >= 0) \ + { \ + vst1##Q##SUFFIX ((bfloat16_t *)temp, vectors.val[i]); \ + data -= ELTS; /* Point at value loaded before vldN_lane. */ \ + for (j = 0; j < ELTS; j++) \ + if (temp[j].u16 != (j == LANE ? overwrite[i].u16 : data[j].u16)) \ + return 1; \ + } \ + return 0; \ + } + +/* Tests of vld2_lane and vld2q_lane. */ +VARIANTS (TESTMETH, 2) +/* Tests of vld3_lane and vld3q_lane. */ +VARIANTS (TESTMETH, 3) +/* Tests of vld4_lane and vld4q_lane. */ +VARIANTS (TESTMETH, 4) + +#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \ + if (test_vld##STRUCT##Q##_lane##SUFFIX ((const bfloat16_u_t *)orig_data, \ + BASE##_data) != 0) \ + abort (); + +int +main (int argc, char **argv) +{ + /* Original data for all vector formats. */ + uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL, + 0x012389ab4567cdefULL, 0xdeeddadacafe0431ULL, + 0x1032547698badcfeULL, 0xbadbadbadbad0badULL, + 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL}; + + /* Data with which vldN_lane will overwrite some of previous. */ + bfloat16_u_t bfloat16_data[4]; + bfloat16_data[0].u16 = 0xABAB; + bfloat16_data[1].u16 = 0x0; + bfloat16_data[2].u16 = 0xCAFE; + bfloat16_data[3].u16 = 0x1234; + + VARIANTS (CHECK, 2); + VARIANTS (CHECK, 3); + VARIANTS (CHECK, 4); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c new file mode 100644 index 00000000000..670cf0ba75a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c @@ -0,0 +1,52 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-O2 --save-temps" } */ + +#include + +bfloat16x4x2_t +test_vld2_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x2_t b) +{ + return vld2_lane_bf16 (ptr, b, 2); +} + +bfloat16x8x2_t +test_vld2q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x2_t b) +{ + return vld2q_lane_bf16 (ptr, b, 2); +} + +/* { dg-final { scan-assembler-times "ld2\\t{v2.h - v3.h}\\\[2\\\], \\\[x0\\\]" 2 } } */ + +bfloat16x4x3_t +test_vld3_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x3_t b) +{ + return vld3_lane_bf16 (ptr, b, 2); +} + +/* { dg-final { scan-assembler-times "ld3\t{v4.h - v6.h}\\\[2\\\], \\\[x0\\\]" 1 } } */ + +bfloat16x8x3_t +test_vld3q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x3_t b) +{ + return vld3q_lane_bf16 (ptr, b, 2); +} + +/* { dg-final { scan-assembler-times "ld3\t{v1.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */ + +bfloat16x4x4_t +test_vld4_lane_bf16 (const bfloat16_t *ptr, bfloat16x4x4_t b) +{ + return vld4_lane_bf16 (ptr, b, 2); +} + +/* { dg-final { scan-assembler-times "ld4\t{v4.h - v7.h}\\\[2\\\], \\\[x0\\\]" 1 } } */ + +bfloat16x8x4_t +test_vld4q_lane_bf16 (const bfloat16_t *ptr, bfloat16x8x4_t b) +{ + return vld4q_lane_bf16 (ptr, b, 2); +} + +/* { dg-final { scan-assembler-times "ld4\t{v0.h - v3.h}\\\[2\\\], \\\[x0\\\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_bf16_indices_1.c new file mode 100644 index 00000000000..99c979393ff --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_bf16_indices_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include + +bfloat16x4x2_t +f_vld2_lane_bf16 (bfloat16_t * p, bfloat16x4x2_t v) +{ + bfloat16x4x2_t res; + /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ + res = vld2_lane_bf16 (p, v, 4); + /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ + res = vld2_lane_bf16 (p, v, -1); + return res; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_bf16_indices_1.c new file mode 100644 index 00000000000..86d778a07b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_bf16_indices_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include + +bfloat16x8x2_t +f_vld2q_lane_bf16 (bfloat16_t * p, bfloat16x8x2_t v) +{ + bfloat16x8x2_t res; + /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */ + res = vld2q_lane_bf16 (p, v, 8); + /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ + res = vld2q_lane_bf16 (p, v, -1); + return res; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_bf16_indices_1.c new file mode 100644 index 00000000000..e91a2bea1ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_bf16_indices_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include + +bfloat16x4x3_t +f_vld3_lane_bf16 (bfloat16_t * p, bfloat16x4x3_t v) +{ + bfloat16x4x3_t res; + /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ + res = vld3_lane_bf16 (p, v, 4); + /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ + res = vld3_lane_bf16 (p, v, -1); + return res; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_bf16_indices_1.c new file mode 100644 index 00000000000..95421befd9f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_bf16_indices_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include + +bfloat16x8x3_t +f_vld3q_lane_bf16 (bfloat16_t * p, bfloat16x8x3_t v) +{ + bfloat16x8x3_t res; + /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */ + res = vld3q_lane_bf16 (p, v, 8); + /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ + res = vld3q_lane_bf16 (p, v, -1); + return res; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_bf16_indices_1.c new file mode 100644 index 00000000000..1c819aa8aad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_bf16_indices_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include + +bfloat16x4x4_t +f_vld4_lane_bf16 (bfloat16_t * p, bfloat16x4x4_t v) +{ + bfloat16x4x4_t res; + /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ + res = vld4_lane_bf16 (p, v, 4); + /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ + res = vld4_lane_bf16 (p, v, -1); + return res; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_bf16_indices_1.c new file mode 100644 index 00000000000..f7c76fa0cde --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_bf16_indices_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include + +bfloat16x8x4_t +f_vld4q_lane_bf16 (bfloat16_t * p, bfloat16x8x4_t v) +{ + bfloat16x8x4_t res; + /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */ + res = vld4q_lane_bf16 (p, v, 8); + /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ + res = vld4q_lane_bf16 (p, v, -1); + return res; +} -- 2.30.2