From c2b7062d584397685e0da236f2c0e430b818cbb1 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 10 Jan 2019 03:34:06 +0000 Subject: [PATCH] arm-builtins.c (enum arm_type_qualifiers): Add qualifier_lane_pair_index. 2019-01-10 Tamar Christina * config/arm/arm-builtins.c (enum arm_type_qualifiers): Add qualifier_lane_pair_index. (MAC_LANE_PAIR_QUALIFIERS): New. (arm_expand_builtin_args): Use it. (arm_expand_builtin_1): Likewise. * config/arm/arm-protos.h (neon_vcmla_lane_prepare_operands): New. * config/arm/arm.c (neon_vcmla_lane_prepare_operands): New. * config/arm/arm-c.c (arm_cpu_builtins): Add __ARM_FEATURE_COMPLEX. * config/arm/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot90_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_lane_f16): New. (vcmla_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/arm/arm_neon_builtins.def (vcadd90, vcadd270, vcmla0, vcmla90, vcmla180, vcmla270, vcmla_lane0, vcmla_lane90, vcmla_lane180, vcmla_lane270, vcmla_laneq0, vcmla_laneq90, vcmla_laneq180, vcmla_laneq270, vcmlaq_lane0, vcmlaq_lane90, vcmlaq_lane180, vcmlaq_lane270): New. * config/arm/neon.md (neon_vcmla_lane, neon_vcmla_laneq, neon_vcmlaq_lane): New. * config/arm/arm.c (arm_arch8_3, arm_arch8_4): New. * config/arm/arm.h (TARGET_COMPLEX, arm_arch8_3, arm_arch8_4): New. (arm_option_reconfigure_globals): Use them. * config/arm/iterators.md (VDF, VQ_HSF): New. (VCADD, VCMLA): New. (VF_constraint, rot, rotsplit1, rotsplit2): Add V4HF and V8HF. * config/arm/neon.md (neon_vcadd, neon_vcmla): New. * config/arm/unspecs.md (UNSPEC_VCADD90, UNSPEC_VCADD270, UNSPEC_VCMLA, UNSPEC_VCMLA90, UNSPEC_VCMLA180, UNSPEC_VCMLA270): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: Add AArch32 regexpr. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: Likewise. From-SVN: r267796 --- gcc/ChangeLog | 83 ++++ gcc/config/arm/arm-builtins.c | 28 +- gcc/config/arm/arm-c.c | 1 + gcc/config/arm/arm-protos.h | 2 + gcc/config/arm/arm.c | 46 ++ gcc/config/arm/arm.h | 9 + gcc/config/arm/arm_neon.h | 439 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 22 + gcc/config/arm/iterators.md | 19 +- gcc/config/arm/neon.md | 74 +++ gcc/config/arm/unspecs.md | 6 + gcc/testsuite/ChangeLog | 5 + .../advsimd-intrinsics/vector-complex.c | 22 +- .../advsimd-intrinsics/vector-complex_f16.c | 30 +- 14 files changed, 780 insertions(+), 6 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f80cad99048..e64e1f0fa69 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,86 @@ +2019-01-10 Tamar Christina + + * config/arm/arm-builtins.c + (enum arm_type_qualifiers): Add qualifier_lane_pair_index. + (MAC_LANE_PAIR_QUALIFIERS): New. + (arm_expand_builtin_args): Use it. + (arm_expand_builtin_1): Likewise. + * config/arm/arm-protos.h (neon_vcmla_lane_prepare_operands): New. + * config/arm/arm.c (neon_vcmla_lane_prepare_operands): New. + * config/arm/arm-c.c (arm_cpu_builtins): Add __ARM_FEATURE_COMPLEX. + * config/arm/arm_neon.h: + (vcadd_rot90_f16): New. + (vcaddq_rot90_f16): New. + (vcadd_rot270_f16): New. + (vcaddq_rot270_f16): New. + (vcmla_f16): New. + (vcmlaq_f16): New. + (vcmla_lane_f16): New. + (vcmla_laneq_f16): New. + (vcmlaq_lane_f16): New. + (vcmlaq_laneq_f16): New. + (vcmla_rot90_f16): New. + (vcmlaq_rot90_f16): New. + (vcmla_rot90_lane_f16): New. + (vcmla_rot90_laneq_f16): New. + (vcmlaq_rot90_lane_f16): New. + (vcmlaq_rot90_laneq_f16): New. + (vcmla_rot180_f16): New. + (vcmlaq_rot180_f16): New. + (vcmla_rot180_lane_f16): New. + (vcmla_rot180_laneq_f16): New. + (vcmlaq_rot180_lane_f16): New. + (vcmlaq_rot180_laneq_f16): New. + (vcmla_rot270_f16): New. + (vcmlaq_rot270_f16): New. + (vcmla_rot270_lane_f16): New. + (vcmla_rot270_laneq_f16): New. + (vcmlaq_rot270_lane_f16): New. + (vcmlaq_rot270_laneq_f16): New. + (vcadd_rot90_f32): New. + (vcaddq_rot90_f32): New. + (vcadd_rot270_f32): New. + (vcaddq_rot270_f32): New. + (vcmla_f32): New. + (vcmlaq_f32): New. + (vcmla_lane_f32): New. + (vcmla_laneq_f32): New. + (vcmlaq_lane_f32): New. + (vcmlaq_laneq_f32): New. + (vcmla_rot90_f32): New. + (vcmlaq_rot90_f32): New. + (vcmla_rot90_lane_f32): New. + (vcmla_rot90_laneq_f32): New. + (vcmlaq_rot90_lane_f32): New. + (vcmlaq_rot90_laneq_f32): New. + (vcmla_rot180_f32): New. + (vcmlaq_rot180_f32): New. + (vcmla_rot180_lane_f32): New. + (vcmla_rot180_laneq_f32): New. + (vcmlaq_rot180_lane_f32): New. + (vcmlaq_rot180_laneq_f32): New. + (vcmla_rot270_f32): New. + (vcmlaq_rot270_f32): New. + (vcmla_rot270_lane_f32): New. + (vcmla_rot270_laneq_f32): New. + (vcmlaq_rot270_lane_f32): New. + (vcmlaq_rot270_laneq_f32): New. + * config/arm/arm_neon_builtins.def (vcadd90, vcadd270, vcmla0, vcmla90, + vcmla180, vcmla270, vcmla_lane0, vcmla_lane90, vcmla_lane180, vcmla_lane270, + vcmla_laneq0, vcmla_laneq90, vcmla_laneq180, vcmla_laneq270, + vcmlaq_lane0, vcmlaq_lane90, vcmlaq_lane180, vcmlaq_lane270): New. + * config/arm/neon.md (neon_vcmla_lane, + neon_vcmla_laneq, neon_vcmlaq_lane): New. + * config/arm/arm.c (arm_arch8_3, arm_arch8_4): New. + * config/arm/arm.h (TARGET_COMPLEX, arm_arch8_3, arm_arch8_4): New. + (arm_option_reconfigure_globals): Use them. + * config/arm/iterators.md (VDF, VQ_HSF): New. + (VCADD, VCMLA): New. + (VF_constraint, rot, rotsplit1, rotsplit2): Add V4HF and V8HF. + * config/arm/neon.md (neon_vcadd, neon_vcmla): New. + * config/arm/unspecs.md (UNSPEC_VCADD90, UNSPEC_VCADD270, + UNSPEC_VCMLA, UNSPEC_VCMLA90, UNSPEC_VCMLA180, UNSPEC_VCMLA270): New. + 2019-01-10 Tamar Christina * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 8ea000aca19..f646ab537fc 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -82,7 +82,10 @@ enum arm_type_qualifiers /* A void pointer. */ qualifier_void_pointer = 0x800, /* A const void pointer. */ - qualifier_const_void_pointer = 0x802 + qualifier_const_void_pointer = 0x802, + /* Lane indices selected in pairs - must be within range of previous + argument = a vector. */ + qualifier_lane_pair_index = 0x1000 }; /* The qualifier_internal allows generation of a unary builtin from @@ -144,6 +147,13 @@ arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] qualifier_none, qualifier_lane_index }; #define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers) +/* T (T, T, T, lane pair index). */ +static enum arm_type_qualifiers +arm_mac_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_none, qualifier_lane_pair_index }; +#define MAC_LANE_PAIR_QUALIFIERS (arm_mac_lane_pair_qualifiers) + /* unsigned T (unsigned T, unsigned T, unsigend T, lane index). */ static enum arm_type_qualifiers arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] @@ -2129,6 +2139,7 @@ typedef enum { ARG_BUILTIN_CONSTANT, ARG_BUILTIN_LANE_INDEX, ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX, + ARG_BUILTIN_LANE_PAIR_INDEX, ARG_BUILTIN_NEON_MEMORY, ARG_BUILTIN_MEMORY, ARG_BUILTIN_STOP @@ -2266,6 +2277,19 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode, machine_mode vmode = mode[argc - 1]; neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp); } + /* If the lane index isn't a constant then error out. */ + goto constant_arg; + + case ARG_BUILTIN_LANE_PAIR_INDEX: + /* Previous argument must be a vector, which this indexes. The + indexing will always select i and i+1 out of the vector, which + puts a limit on i. */ + gcc_assert (argc > 0); + if (CONST_INT_P (op[argc])) + { + machine_mode vmode = mode[argc - 1]; + neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode) / 2, exp); + } /* If the lane index isn't a constant then the next case will error. */ /* Fall through. */ @@ -2427,6 +2451,8 @@ arm_expand_builtin_1 (int fcode, tree exp, rtx target, if (d->qualifiers[qualifiers_k] & qualifier_lane_index) args[k] = ARG_BUILTIN_LANE_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) + args[k] = ARG_BUILTIN_LANE_PAIR_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) args[k] = ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_immediate) diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c index ab4a0ab9a2e..6e256ee0a12 100644 --- a/gcc/config/arm/arm-c.c +++ b/gcc/config/arm/arm-c.c @@ -76,6 +76,7 @@ arm_cpu_builtins (struct cpp_reader* pfile) def_or_undef_macro (pfile, "__ARM_FEATURE_CRC32", TARGET_CRC32); def_or_undef_macro (pfile, "__ARM_FEATURE_DOTPROD", TARGET_DOTPROD); + def_or_undef_macro (pfile, "__ARM_FEATURE_COMPLEX", TARGET_COMPLEX); def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT); cpp_undef (pfile, "__ARM_FEATURE_CMSE"); diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index cda13a2ebb8..2bc43019864 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -109,6 +109,8 @@ extern int arm_coproc_mem_operand (rtx, bool); extern int neon_vector_mem_operand (rtx, int, bool); extern int neon_struct_mem_operand (rtx); +extern rtx *neon_vcmla_lane_prepare_operands (machine_mode, rtx *); + extern int tls_mentioned_p (rtx); extern int symbol_mentioned_p (rtx); extern int label_mentioned_p (rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 839238057ce..f473aee8d34 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -895,6 +895,12 @@ int arm_arch8_1 = 0; /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */ int arm_arch8_2 = 0; +/* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */ +int arm_arch8_3 = 0; + +/* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */ +int arm_arch8_4 = 0; + /* Nonzero if this chip supports the FP16 instructions extension of ARM Architecture 8.2. */ int arm_fp16_inst = 0; @@ -3649,6 +3655,8 @@ arm_option_reconfigure_globals (void) arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8); arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1); arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2); + arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3); + arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4); arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb); arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2); arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale); @@ -12713,6 +12721,44 @@ neon_struct_mem_operand (rtx op) return FALSE; } +/* Prepares the operands for the VCMLA by lane instruction such that the right + register number is selected. This instruction is special in that it always + requires a D register, however there is a choice to be made between Dn[0], + Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers and + the PATTERNMODE of the insn. + + The VCMLA by lane function always selects two values. For instance given D0 + and a V2SF, the only valid index is 0 as the values in S0 and S1 will be + used by the instruction. However given V4SF then index 0 and 1 are valid as + D0[0] or D1[0] are both valid. + + This function centralizes that information based on OPERANDS, OPERANDS[3] + will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be + updated to contain the right index. */ + +rtx * +neon_vcmla_lane_prepare_operands (machine_mode patternmode, rtx *operands) +{ + int lane = NEON_ENDIAN_LANE_N (patternmode, INTVAL (operands[4])); + machine_mode constmode = SImode; + machine_mode mode = GET_MODE (operands[3]); + int regno = REGNO (operands[3]); + regno = ((regno - FIRST_VFP_REGNUM) >> 1); + if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4) + { + operands[3] = gen_int_mode (regno + 1, constmode); + operands[4] + = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode); + } + else + { + operands[3] = gen_int_mode (regno, constmode); + operands[4] = gen_int_mode (lane, constmode); + } + return operands; +} + + /* Return true if X is a register that will be eliminated later on. */ int arm_eliminable_register (rtx x) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 5cebabeaea7..103d390dd17 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -220,6 +220,9 @@ extern tree arm_fp16_type_node; isa_bit_dotprod) \ && arm_arch8_2) +/* Supports the Armv8.3-a Complex number AdvSIMD extensions. */ +#define TARGET_COMPLEX (TARGET_NEON && arm_arch8_3) + /* FPU supports the floating point FP16 instructions for ARMv8.2-A and later. */ #define TARGET_VFP_FP16INST \ @@ -442,6 +445,12 @@ extern int arm_arch8_1; /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */ extern int arm_arch8_2; +/* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */ +extern int arm_arch8_3; + +/* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */ +extern int arm_arch8_4; + /* Nonzero if this chip supports the FP16 instructions extension of ARM Architecture 8.2. */ extern int arm_fp16_inst; diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 274bad92d6c..3cc2179ddee 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -18307,6 +18307,445 @@ vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b, #pragma GCC pop_options #endif +/* AdvSIMD Complex numbers intrinsics. */ +#if __ARM_ARCH >= 8 +#pragma GCC push_options +#pragma GCC target(("arch=armv8.3-a")) + + +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +#pragma GCC push_options +#pragma GCC target(("+fp16")) +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcadd90v4hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vcadd90v8hf (__a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcadd90v4hf (__a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vcadd90v8hf (__a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcmla0v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vcmla0v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane0v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq0v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane0v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane0v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcmla90v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vcmla90v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane90v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq90v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane90v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane90v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcmla180v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vcmla180v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane180v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq180v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane180v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane180v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcmla270v4hf (__r, __a, __b); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b) +{ + return __builtin_neon_vcmla270v8hf (__r, __a, __b); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane270v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq270v4hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane270v8hf (__r, __a, __b, __index); +} + +__extension__ extern __inline float16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane270v8hf (__r, __a, __b, __index); +} + +#pragma GCC pop_options +#endif + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_neon_vcadd90v2sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_neon_vcadd90v4sf (__a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_neon_vcadd90v2sf (__a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_neon_vcadd90v4sf (__a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_neon_vcmla0v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_neon_vcmla0v4sf (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane0v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq0v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane0v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane0v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_neon_vcmla90v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_neon_vcmla90v4sf (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane90v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq90v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane90v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane90v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_neon_vcmla180v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_neon_vcmla180v4sf (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane180v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq180v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane180v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane180v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b) +{ + return __builtin_neon_vcmla270v2sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) +{ + return __builtin_neon_vcmla270v4sf (__r, __a, __b); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane270v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_laneq270v2sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b, + const int __index) +{ + return __builtin_neon_vcmlaq_lane270v4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b, + const int __index) +{ + return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index); +} + +#pragma GCC pop_options +#endif + #ifdef __cplusplus } #endif diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index e0b2e7fe68e..bcccf93f7fa 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -351,3 +351,25 @@ VAR2 (TERNOP, sdot, v8qi, v16qi) VAR2 (UTERNOP, udot, v8qi, v16qi) VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi) VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi) + +VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf) +VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf) +VAR4 (TERNOP, vcmla0, v2sf, v4sf, v4hf, v8hf) +VAR4 (TERNOP, vcmla90, v2sf, v4sf, v4hf, v8hf) +VAR4 (TERNOP, vcmla180, v2sf, v4sf, v4hf, v8hf) +VAR4 (TERNOP, vcmla270, v2sf, v4sf, v4hf, v8hf) + +VAR4 (MAC_LANE_PAIR, vcmla_lane0, v2sf, v4hf, v8hf, v4sf) +VAR4 (MAC_LANE_PAIR, vcmla_lane90, v2sf, v4hf, v8hf, v4sf) +VAR4 (MAC_LANE_PAIR, vcmla_lane180, v2sf, v4hf, v8hf, v4sf) +VAR4 (MAC_LANE_PAIR, vcmla_lane270, v2sf, v4hf, v8hf, v4sf) + +VAR2 (MAC_LANE_PAIR, vcmla_laneq0, v2sf, v4hf) +VAR2 (MAC_LANE_PAIR, vcmla_laneq90, v2sf, v4hf) +VAR2 (MAC_LANE_PAIR, vcmla_laneq180, v2sf, v4hf) +VAR2 (MAC_LANE_PAIR, vcmla_laneq270, v2sf, v4hf) + +VAR2 (MAC_LANE_PAIR, vcmlaq_lane0, v4sf, v8hf) +VAR2 (MAC_LANE_PAIR, vcmlaq_lane90, v4sf, v8hf) +VAR2 (MAC_LANE_PAIR, vcmlaq_lane180, v4sf, v8hf) +VAR2 (MAC_LANE_PAIR, vcmlaq_lane270, v4sf, v8hf) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 5f46895d5c7..c33e572c3e8 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -123,6 +123,13 @@ (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST") (V8HF "TARGET_NEON_FP16INST") V2SF V4SF]) +;; Double vector modes. +(define_mode_iterator VDF [V2SF V4HF]) + +;; Quad vector Float modes with half/single elements. +(define_mode_iterator VQ_HSF [V8HF V4SF]) + + ;; All supported vector modes (except those with 64-bit integer elements). (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) @@ -423,6 +430,9 @@ (define_int_iterator VFMLHALVES [UNSPEC_VFML_LO UNSPEC_VFML_HI]) +(define_int_iterator VCADD [UNSPEC_VCADD90 UNSPEC_VCADD270]) +(define_int_iterator VCMLA [UNSPEC_VCMLA UNSPEC_VCMLA90 UNSPEC_VCMLA180 UNSPEC_VCMLA270]) + ;;---------------------------------------------------------------------------- ;; Mode attributes ;;---------------------------------------------------------------------------- @@ -741,7 +751,7 @@ (define_mode_attr F_constraint [(SF "t") (DF "w")]) (define_mode_attr vfp_type [(SF "s") (DF "d")]) (define_mode_attr vfp_double_cond [(SF "") (DF "&& TARGET_VFP_DOUBLE")]) -(define_mode_attr VF_constraint [(V2SF "t") (V4SF "w")]) +(define_mode_attr VF_constraint [(V4HF "t") (V8HF "t") (V2SF "t") (V4SF "w")]) ;; Mode attribute used to build the "type" attribute. (define_mode_attr q [(V8QI "") (V16QI "_q") @@ -989,6 +999,13 @@ (UNSPEC_SHA1SU0 "V4SI") (UNSPEC_SHA256H "V4SI") (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")]) +(define_int_attr rot [(UNSPEC_VCADD90 "90") + (UNSPEC_VCADD270 "270") + (UNSPEC_VCMLA "0") + (UNSPEC_VCMLA90 "90") + (UNSPEC_VCMLA180 "180") + (UNSPEC_VCMLA270 "270")]) + ;; Both kinds of return insn. (define_code_iterator RETURNS [return simple_return]) (define_code_attr return_str [(return "") (simple_return "simple_")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 6263cd43ab0..6f8e7c1cffd 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3457,6 +3457,80 @@ DONE; }) + +;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the +;; fact that their usage need to guarantee that the source vectors are +;; contiguous. It would be wrong to describe the operation without being able +;; to describe the permute that is also required, but even if that is done +;; the permute would have been created as a LOAD_LANES which means the values +;; in the registers are in the wrong order. +(define_insn "neon_vcadd" + [(set (match_operand:VF 0 "register_operand" "=w") + (unspec:VF [(match_operand:VF 1 "register_operand" "w") + (match_operand:VF 2 "register_operand" "w")] + VCADD))] + "TARGET_COMPLEX" + "vcadd.\t%0, %1, %2, #" + [(set_attr "type" "neon_fcadd")] +) + +(define_insn "neon_vcmla" + [(set (match_operand:VF 0 "register_operand" "=w") + (plus:VF (match_operand:VF 1 "register_operand" "0") + (unspec:VF [(match_operand:VF 2 "register_operand" "w") + (match_operand:VF 3 "register_operand" "w")] + VCMLA)))] + "TARGET_COMPLEX" + "vcmla.\t%0, %2, %3, #" + [(set_attr "type" "neon_fcmla")] +) + +(define_insn "neon_vcmla_lane" + [(set (match_operand:VF 0 "s_register_operand" "=w") + (plus:VF (match_operand:VF 1 "s_register_operand" "0") + (unspec:VF [(match_operand:VF 2 "s_register_operand" "w") + (match_operand:VF 3 "s_register_operand" "") + (match_operand:SI 4 "const_int_operand" "n")] + VCMLA)))] + "TARGET_COMPLEX" + { + operands = neon_vcmla_lane_prepare_operands (mode, operands); + return "vcmla.\t%0, %2, d%c3[%c4], #"; + } + [(set_attr "type" "neon_fcmla")] +) + +(define_insn "neon_vcmla_laneq" + [(set (match_operand:VDF 0 "s_register_operand" "=w") + (plus:VDF (match_operand:VDF 1 "s_register_operand" "0") + (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "const_int_operand" "n")] + VCMLA)))] + "TARGET_COMPLEX" + { + operands = neon_vcmla_lane_prepare_operands (mode, operands); + return "vcmla.\t%0, %2, d%c3[%c4], #"; + } + [(set_attr "type" "neon_fcmla")] +) + +(define_insn "neon_vcmlaq_lane" + [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w") + (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0") + (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "const_int_operand" "n")] + VCMLA)))] + "TARGET_COMPLEX" + { + operands = neon_vcmla_lane_prepare_operands (mode, operands); + return "vcmla.\t%0, %2, d%c3[%c4], #"; + } + [(set_attr "type" "neon_fcmla")] +) + + ;; These instructions map to the __builtins for the Dot Product operations. (define_insn "neon_dot" [(set (match_operand:VCVTI 0 "register_operand" "=w") diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 05e89ff0bed..174bcc5e3d5 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -418,4 +418,10 @@ UNSPEC_DOT_U UNSPEC_VFML_LO UNSPEC_VFML_HI + UNSPEC_VCADD90 + UNSPEC_VCADD270 + UNSPEC_VCMLA + UNSPEC_VCMLA90 + UNSPEC_VCMLA180 + UNSPEC_VCMLA270 ]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2df44c759cc..7ba27e1f622 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-01-10 Tamar Christina + + * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: Add AArch32 regexpr. + * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: Likewise. + 2019-01-10 Tamar Christina * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c index b7c999333ed..1428cbe3f69 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex.c @@ -1,5 +1,4 @@ -/* { dg-skip-if "" { arm-*-* } } */ -/* { dg-do assemble } */ +/* { dg-do compile } */ /* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */ /* { dg-add-options arm_v8_3a_complex_neon } */ /* { dg-additional-options "-O2 -save-temps" } */ @@ -249,3 +248,22 @@ test_vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b) /* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #270} 1 { target { aarch64*-*-* } } } } */ /* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #90} 1 { target { aarch64*-*-* } } } } */ /* { dg-final { scan-assembler-times {dup\td[0-9]+, v[0-9]+.d\[1\]} 4 { target { aarch64*-*-* } } } } */ + +/* { dg-final { scan-assembler-times {vcadd.f32\td[0-9]+, d[0-9]+, d[0-9]+, #90} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcadd.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #0} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #180} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #270} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #90} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #0} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #180} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #270} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #90} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #0} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #180} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #270} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #90} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #0} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #180} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #270} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 1 { target { arm*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c index dbcebcbfba6..99754b67e4b 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c @@ -1,5 +1,4 @@ -/* { dg-skip-if "" { arm-*-* } } */ -/* { dg-do assemble } */ +/* { dg-do compile } */ /* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */ /* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ /* { dg-add-options arm_v8_3a_complex_neon } */ @@ -304,3 +303,30 @@ test_vcmlaq_rot270_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __ /* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #180} 1 { target { aarch64*-*-* } } } } */ /* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #270} 1 { target { aarch64*-*-* } } } } */ /* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #90} 1 { target { aarch64*-*-* } } } } */ + +/* { dg-final { scan-assembler-times {vcadd.f16\td[0-9]+, d[0-9]+, d[0-9]+, #90} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcadd.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 2 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #0} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #0} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #180} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #270} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #90} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #0} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #180} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #270} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #180} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #90} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #270} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #90} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #0} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #180} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #270} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #90} 3 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #0} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #180} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #270} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #90} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #0} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #180} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #270} 1 { target { arm*-*-* } } } } */ +/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 1 { target { arm*-*-* } } } } */ -- 2.30.2