Revert "Arm: Add NEON and MVE RTL patterns for Complex Addition, Multiply and FMA."
authorTamar Christina <tamar.christina@arm.com>
Sun, 13 Dec 2020 16:49:55 +0000 (16:49 +0000)
committerTamar Christina <tamar.christina@arm.com>
Sun, 13 Dec 2020 16:49:55 +0000 (16:49 +0000)
This reverts commit 3b8a82f97dd48e153ce93b317c44254839e11461.

Has a dependency on the AArch64 patch which hasn't been approved yet.

gcc/config/arm/arm_mve.h
gcc/config/arm/arm_mve_builtins.def
gcc/config/arm/constraints.md
gcc/config/arm/iterators.md
gcc/config/arm/mve.md
gcc/config/arm/neon.md
gcc/config/arm/unspecs.md
gcc/config/arm/vec-common.md

index 45014621f2533497e90ddf5257fb04e1fd9325b4..6c0d1e2e634a32196eb31079166a7733dcd3a4b6 100644 (file)
@@ -3981,16 +3981,14 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)
-    __builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b);
+  return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)
-    __builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b);
+  return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b);
 }
 
 __extension__ extern __inline uint8x16_t
@@ -4522,14 +4520,14 @@ __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_mve_vcaddq_rot90v16qi (__a, __b);
+  return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b);
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return __builtin_mve_vcaddq_rot270v16qi (__a, __b);
+  return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b);
 }
 
 __extension__ extern __inline int8x16_t
@@ -4823,16 +4821,14 @@ __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)
-    __builtin_mve_vcaddq_rot90v8hi ((int16x8_t)__a, (int16x8_t)__b);
+  return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)
-    __builtin_mve_vcaddq_rot270v8hi ((int16x8_t)__a, (int16x8_t)__b);
+  return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
@@ -5364,14 +5360,14 @@ __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcaddq_rot90v8hi (__a, __b);
+  return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcaddq_rot270v8hi (__a, __b);
+  return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
@@ -5665,16 +5661,14 @@ __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)
-    __builtin_mve_vcaddq_rot90v4si ((int32x4_t)__a, (int32x4_t)__b);
+  return __builtin_mve_vcaddq_rot90_uv4si (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)
-    __builtin_mve_vcaddq_rot270v4si ((int32x4_t)__a, (int32x4_t)__b);
+  return __builtin_mve_vcaddq_rot270_uv4si (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
@@ -6206,14 +6200,14 @@ __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_mve_vcaddq_rot90v4si (__a, __b);
+  return __builtin_mve_vcaddq_rot90_sv4si (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return __builtin_mve_vcaddq_rot270v4si (__a, __b);
+  return __builtin_mve_vcaddq_rot270_sv4si (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
@@ -17348,42 +17342,42 @@ __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmulq_rot90v8hf (__a, __b);
+  return __builtin_mve_vcmulq_rot90_fv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmulq_rot270v8hf (__a, __b);
+  return __builtin_mve_vcmulq_rot270_fv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmulq_rot180v8hf (__a, __b);
+  return __builtin_mve_vcmulq_rot180_fv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmulqv8hf (__a, __b);
+  return __builtin_mve_vcmulq_fv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcaddq_rot90v8hf (__a, __b);
+  return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcaddq_rot270v8hf (__a, __b);
+  return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
@@ -17600,42 +17594,42 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_rot90_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_mve_vcmulq_rot90v4sf (__a, __b);
+  return __builtin_mve_vcmulq_rot90_fv4sf (__a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_rot270_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_mve_vcmulq_rot270v4sf (__a, __b);
+  return __builtin_mve_vcmulq_rot270_fv4sf (__a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_rot180_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_mve_vcmulq_rot180v4sf (__a, __b);
+  return __builtin_mve_vcmulq_rot180_fv4sf (__a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_mve_vcmulqv4sf (__a, __b);
+  return __builtin_mve_vcmulq_fv4sf (__a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_mve_vcaddq_rot90v4sf (__a, __b);
+  return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_mve_vcaddq_rot270v4sf (__a, __b);
+  return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b);
 }
 
 __extension__ extern __inline float32x4_t
@@ -17790,28 +17784,28 @@ __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
-  return __builtin_mve_vcmlaqv8hf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_fv8hf (__a, __b, __c);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_rot180_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
-  return __builtin_mve_vcmlaq_rot180v8hf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_rot180_fv8hf (__a, __b, __c);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_rot270_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
-  return __builtin_mve_vcmlaq_rot270v8hf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_rot270_fv8hf (__a, __b, __c);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
-  return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_rot90_fv8hf (__a, __b, __c);
 }
 
 __extension__ extern __inline float16x8_t
@@ -18098,28 +18092,28 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  return __builtin_mve_vcmlaqv4sf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_fv4sf (__a, __b, __c);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_rot180_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  return __builtin_mve_vcmlaq_rot180v4sf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_rot180_fv4sf (__a, __b, __c);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_rot270_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  return __builtin_mve_vcmlaq_rot270v4sf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_rot270_fv4sf (__a, __b, __c);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
 {
-  return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c);
+  return __builtin_mve_vcmlaq_rot90_fv4sf (__a, __b, __c);
 }
 
 __extension__ extern __inline float32x4_t
index 56b652fff0a6729d04982cc13a479587180b0208..f38926ffd8e44f63d25a8fb9bf8f7d8680570ef0 100644 (file)
@@ -125,6 +125,8 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si)
 VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si)
 VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si)
 VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si)
+VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si)
+VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si)
 VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
 VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
 VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si)
@@ -200,6 +202,8 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si)
 VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si)
 VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si)
 VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si)
+VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si)
+VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si)
 VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si)
 VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si)
 VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si)
@@ -260,6 +264,12 @@ VAR2 (BINOP_NONE_NONE_NONE, vmaxnmq_f, v8hf, v4sf)
 VAR2 (BINOP_NONE_NONE_NONE, vmaxnmavq_f, v8hf, v4sf)
 VAR2 (BINOP_NONE_NONE_NONE, vmaxnmaq_f, v8hf, v4sf)
 VAR2 (BINOP_NONE_NONE_NONE, veorq_f, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf)
 VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf)
 VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf)
 VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf)
@@ -460,6 +470,10 @@ VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmsq_f, v8hf, v4sf)
 VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmasq_n_f, v8hf, v4sf)
 VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_n_f, v8hf, v4sf)
 VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_f, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90_f, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270_f, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180_f, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_f, v8hf, v4sf)
 VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrntq_n_s, v8hi, v4si)
 VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrnbq_n_s, v8hi, v4si)
 VAR2 (TERNOP_NONE_NONE_NONE_IMM, vrshrntq_n_s, v8hi, v4si)
@@ -878,15 +892,3 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si)
 VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si)
-
-/* optabs without any suffixes.  */
-VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf)
-VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq, v8hf, v4sf)
index 6ebddb95b4f9c835f10f5265573f27a06ccbd11f..789e3332abb7495b308509d03ed241d39498a8b6 100644 (file)
  "@internal
   In ARM/Thumb-2 state a vector of constant zeros."
  (and (match_code "const_vector")
-      (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)")))
+      (match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
 
 (define_constraint "Da"
  "@internal
index 5fc75cb8d076e09febacc7ad53226c1aa584295b..5fcb7afe5657b6d373d7ea314f53206cdde93c8d 100644 (file)
 
 (define_int_attr rot [(UNSPEC_VCADD90 "90")
                      (UNSPEC_VCADD270 "270")
-                     (UNSPEC_VCMLS "0")
                      (UNSPEC_VCMLA "0")
                      (UNSPEC_VCMLA90 "90")
                      (UNSPEC_VCMLA180 "180")
                      (UNSPEC_VCMLA270 "270")])
 
-(define_int_attr mve_rotsplit1 [(UNSPEC_VCMLA "")
-                               (UNSPEC_VCMLA180 "")
-                               (UNSPEC_VCMUL "")
-                               (UNSPEC_VCMUL180 "")
-                               (UNSPEC_VCMLS "_rot270")
-                               (UNSPEC_VCMLS180 "_rot90")])
-
-(define_int_attr mve_rotsplit2 [(UNSPEC_VCMLA "_rot90")
-                               (UNSPEC_VCMLA180 "_rot270")
-                               (UNSPEC_VCMUL "_rot90")
-                               (UNSPEC_VCMUL180 "_rot270")
-                               (UNSPEC_VCMLS "_rot180")
-                               (UNSPEC_VCMLS180 "_rot180")])
-
-(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90")
-                         (UNSPEC_VCADD270 "_rot270")
-                         (UNSPEC_VCMLA "")
-                         (UNSPEC_VCMLA90 "_rot90")
-                         (UNSPEC_VCMLA180 "_rot180")
-                         (UNSPEC_VCMLA270 "_rot270")
-                         (UNSPEC_VCMUL "")
-                         (UNSPEC_VCMUL90 "_rot90")
-                         (UNSPEC_VCMUL180 "_rot180")
-                         (UNSPEC_VCMUL270 "_rot270")])
-
-(define_int_iterator VCMUL [UNSPEC_VCMUL UNSPEC_VCMUL90
-                           UNSPEC_VCMUL180 UNSPEC_VCMUL270])
-
 (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
                            (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
                            (UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8")
                       (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
                       (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
                       (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s") (VBICQ_U "u")
-                      (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
-                      (VCMPEQQ_S "s") (VCMPEQQ_U "u")
+                      (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
+                      (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
+                      (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
                       (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
                       (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
                       (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
 (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
 (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
 (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
+(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
+(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
 (define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
 (define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
 (define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
 (define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
 (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
 (define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
-;; Define iterators for VCMLA operations
-(define_int_iterator VCMLA_OP [UNSPEC_VCMLA
-                              UNSPEC_VCMLA180
-                              UNSPEC_VCMLS])
-
-;; Define iterators for VCMLA operations as MUL
-(define_int_iterator VCMUL_OP [UNSPEC_VCMUL
-                              UNSPEC_VCMUL180])
index d29f387b5bffc77c7ab24e0e95c6a730fe0e8822..4b2e46afc196df8133d81198e5e1d57a7a27ccae 100644 (file)
 ])
 
 ;;
-;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
+;; [vcaddq_rot270_s, vcaddq_rot270_u])
 ;;
-(define_insn "mve_vcaddq<mve_rot><mode>"
+(define_insn "mve_vcaddq_rot270_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
        (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
                       (match_operand:MVE_2 2 "s_register_operand" "w")]
-        VCADD))
+        VCADDQ_ROT270))
   ]
   "TARGET_HAVE_MVE"
-  "vcadd.i%#<V_sz_elem>        %q0, %q1, %q2, #<rot>"
+  "vcadd.i%#<V_sz_elem>        %q0, %q1, %q2, #270"
   [(set_attr "type" "mve_move")
 ])
 
-;; Auto vectorizer pattern for int vcadd
-(define_expand "cadd<rot><mode>3"
-  [(set (match_operand:MVE_2 0 "register_operand")
-       (unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand")
-                      (match_operand:MVE_2 2 "register_operand")]
-         VCADD))]
-  "TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN"
-)
+;;
+;; [vcaddq_rot90_u, vcaddq_rot90_s])
+;;
+(define_insn "mve_vcaddq_rot90_<supf><mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
+       (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
+                      (match_operand:MVE_2 2 "s_register_operand" "w")]
+        VCADDQ_ROT90))
+  ]
+  "TARGET_HAVE_MVE"
+  "vcadd.i%#<V_sz_elem>        %q0, %q1, %q2, #90"
+  [(set_attr "type" "mve_move")
+])
 
 ;;
 ;; [vcmpcsq_n_u])
 ])
 
 ;;
-;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
+;; [vcaddq_rot270_f])
+;;
+(define_insn "mve_vcaddq_rot270_f<mode>"
+  [
+   (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")]
+        VCADDQ_ROT270_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcadd.f%#<V_sz_elem>        %q0, %q1, %q2, #270"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcaddq_rot90_f])
 ;;
-(define_insn "mve_vcaddq<mve_rot><mode>"
+(define_insn "mve_vcaddq_rot90_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
        (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
                       (match_operand:MVE_0 2 "s_register_operand" "w")]
-        VCADD))
+        VCADDQ_ROT90_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcadd.f%#<V_sz_elem>        %q0, %q1, %q2, #<rot>"
+  "vcadd.f%#<V_sz_elem>        %q0, %q1, %q2, #90"
   [(set_attr "type" "mve_move")
 ])
 
 ])
 
 ;;
-;; [vcmulq, vcmulq_rot90, vcmulq_rot180, vcmulq_rot270])
+;; [vcmulq_f])
+;;
+(define_insn "mve_vcmulq_f<mode>"
+  [
+   (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")]
+        VCMULQ_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcmul.f%#<V_sz_elem>        %q0, %q1, %q2, #0"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcmulq_rot180_f])
+;;
+(define_insn "mve_vcmulq_rot180_f<mode>"
+  [
+   (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")]
+        VCMULQ_ROT180_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcmul.f%#<V_sz_elem>        %q0, %q1, %q2, #180"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcmulq_rot270_f])
+;;
+(define_insn "mve_vcmulq_rot270_f<mode>"
+  [
+   (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")]
+        VCMULQ_ROT270_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcmul.f%#<V_sz_elem>        %q0, %q1, %q2, #270"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcmulq_rot90_f])
 ;;
-(define_insn "mve_vcmulq<mve_rot><mode>"
+(define_insn "mve_vcmulq_rot90_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
        (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
                       (match_operand:MVE_0 2 "s_register_operand" "w")]
-        VCMUL))
+        VCMULQ_ROT90_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcmul.f%#<V_sz_elem>        %q0, %q1, %q2, #<rot>"
+  "vcmul.f%#<V_sz_elem>        %q0, %q1, %q2, #90"
   [(set_attr "type" "mve_move")
 ])
 
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 ;;
-;; [vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270])
+;; [vcmlaq_f])
 ;;
-(define_insn "mve_vcmlaq<mve_rot><mode>"
+(define_insn "mve_vcmlaq_f<mode>"
   [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w,w")
-       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0,Dz")
-                      (match_operand:MVE_0 2 "s_register_operand" "w,w")
-                      (match_operand:MVE_0 3 "s_register_operand" "w,w")]
-        VCMLA))
+   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")
+                      (match_operand:MVE_0 3 "s_register_operand" "w")]
+        VCMLAQ_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "@
-   vcmla.f%#<V_sz_elem>        %q0, %q2, %q3, #<rot>
-   vcmul.f%#<V_sz_elem>        %q0, %q2, %q3, #<rot>"
+  "vcmla.f%#<V_sz_elem>        %q0, %q2, %q3, #0"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcmlaq_rot180_f])
+;;
+(define_insn "mve_vcmlaq_rot180_f<mode>"
+  [
+   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")
+                      (match_operand:MVE_0 3 "s_register_operand" "w")]
+        VCMLAQ_ROT180_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcmla.f%#<V_sz_elem>        %q0, %q2, %q3, #180"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcmlaq_rot270_f])
+;;
+(define_insn "mve_vcmlaq_rot270_f<mode>"
+  [
+   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")
+                      (match_operand:MVE_0 3 "s_register_operand" "w")]
+        VCMLAQ_ROT270_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcmla.f%#<V_sz_elem>        %q0, %q2, %q3, #270"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcmlaq_rot90_f])
+;;
+(define_insn "mve_vcmlaq_rot90_f<mode>"
+  [
+   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
+       (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
+                      (match_operand:MVE_0 2 "s_register_operand" "w")
+                      (match_operand:MVE_0 3 "s_register_operand" "w")]
+        VCMLAQ_ROT90_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcmla.f%#<V_sz_elem>        %q0, %q2, %q3, #90"
   [(set_attr "type" "mve_move")
 ])
 
index 487c0a168b2536648338c52ce0778aeb79fd32d1..669c34da4e0c67d714455fbf07832029abc7d19d 100644 (file)
   [(set_attr "type" "neon_fcmla")]
 )
 
-;; The complex mul operations always need to expand to two instructions.
-;; The first operation does half the computation and the second does the
-;; remainder.  Because of this, expand early.
-(define_expand "cmul<rot_op><mode>3"
-  [(set (match_operand:VDF 0 "register_operand")
-       (unspec:VDF [(match_operand:VDF 1 "register_operand")
-                    (match_operand:VDF 2 "register_operand")]
-                   VCMUL_OP))]
-  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
-{
-  rtx tmp = gen_reg_rtx (<MODE>mode);
-  rtx res1 = gen_reg_rtx (<MODE>mode);
-  emit_move_insn (tmp, CONST0_RTX (<MODE>mode));
-  emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
-                                             operands[1], operands[2]));
-  emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
-                                             operands[1], operands[2]));
-  DONE;
-})
-
 
 ;; These instructions map to the __builtins for the Dot Product operations.
 (define_insn "neon_<sup>dot<vsi2qi>"
index 8bb00602103c0cf336a2cdf6ca96bb314a4aaa5b..c2076c9ce6fcabdd2bb94d124670c39c18a30495 100644 (file)
   UNSPEC_VCMLA90
   UNSPEC_VCMLA180
   UNSPEC_VCMLA270
-  UNSPEC_VCMUL
-  UNSPEC_VCMUL90
-  UNSPEC_VCMUL180
-  UNSPEC_VCMUL270
-  UNSPEC_VCMLS
-  UNSPEC_VCMLS180
   UNSPEC_MATMUL_S
   UNSPEC_MATMUL_U
   UNSPEC_MATMUL_US
   VADDVQ_P_S
   VBICQ_S
   VBRSRQ_N_S
+  VCADDQ_ROT270_S
+  VCADDQ_ROT90_S
   VCMPEQQ_S
   VCMPEQQ_N_S
   VCMPNEQ_N_S
   VADDVQ_P_U
   VBICQ_U
   VBRSRQ_N_U
+  VCADDQ_ROT270_U
+  VCADDQ_ROT90_U
   VCMPEQQ_U
   VCMPEQQ_N_U
   VCMPNEQ_N_U
   VABDQ_F
   VADDQ_N_F
   VBICQ_F
+  VCADDQ_ROT270_F
+  VCADDQ_ROT90_F
   VCMPEQQ_F
   VCMPEQQ_N_F
   VCMPGEQ_F
   VCMPLTQ_N_F
   VCMPNEQ_F
   VCMPNEQ_N_F
+  VCMULQ_F
+  VCMULQ_ROT180_F
+  VCMULQ_ROT270_F
+  VCMULQ_ROT90_F
   VEORQ_F
   VMAXNMAQ_F
   VMAXNMAVQ_F
   VMLSLDAVAQ_S
   VQSHRUNBQ_N_S
   VQRSHRUNTQ_N_S
+  VCMLAQ_F
   VMINNMAQ_M_F
   VFMASQ_N_F
   VDUPQ_M_N_F
   VADDLVAQ_P_S
   VQMOVUNBQ_M_S
   VCMPLEQ_M_F
+  VCMLAQ_ROT180_F
   VMLSLDAVAXQ_S
   VRNDXQ_M_F
   VFMSQ_F
   VMINNMVQ_P_F
   VMAXNMVQ_P_F
   VPSELQ_F
+  VCMLAQ_ROT90_F
   VQMOVUNTQ_M_S
   VREV64Q_M_F
   VNEGQ_M_F
   VRMLALDAVHQ_P_S
   VRMLALDAVHXQ_P_S
   VCMPEQQ_M_N_F
+  VCMLAQ_ROT270_F
   VMAXNMAQ_M_F
   VRNDQ_M_F
   VMLALDAVQ_P_U
index 784305955eeef3fc40ccc2c52eee74df572822c4..8d9c89c5b2bca2779c6920c46a34f2dcc2f66e54 100644 (file)
                 (match_operand:VDQ 2 "neon_logic_op2" "")))]
   "ARM_HAVE_<MODE>_ARITH"
 )
-
-(define_expand "cadd<rot><mode>3"
-  [(set (match_operand:VF 0 "register_operand")
-       (unspec:VF [(match_operand:VF 1 "register_operand")
-                   (match_operand:VF 2 "register_operand")]
-                  VCADD))]
-  "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
-                     && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
-)
-
-;; The complex mul operations always need to expand to two instructions.
-;; The first operation does half the computation and the second does the
-;; remainder.  Because of this, expand early.
-(define_expand "cmul<rot_op><mode>3"
-  [(set (match_operand:VQ_HSF 0 "register_operand")
-        (unspec:VQ_HSF [(match_operand:VQ_HSF 1 "register_operand")
-                       (match_operand:VQ_HSF 2 "register_operand")]
-                      VCMUL_OP))]
-  "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT))
-   && !BYTES_BIG_ENDIAN"
-{
-  rtx res1 = gen_reg_rtx (<MODE>mode);
-  if (TARGET_COMPLEX)
-    {
-      rtx tmp = gen_reg_rtx (<MODE>mode);
-      emit_move_insn (tmp, CONST0_RTX (<MODE>mode));
-      emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
-                                                  operands[1], operands[2]));
-      emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
-                                                  operands[1], operands[2]));
-    }
-  else
-    {
-      emit_insn (gen_mve_vcmulq<mve_rotsplit1><mode> (operands[0], operands[1],
-                                                      operands[2]));
-      emit_insn (gen_mve_vcmulq<mve_rotsplit2><mode> (operands[0], operands[1],
-                                                      operands[2]));
-    }
-  DONE;
-})
-
-(define_expand "arm_vcmla<rot><mode>"
-  [(set (match_operand:VF 0 "register_operand")
-       (plus:VF (match_operand:VF 1 "register_operand")
-                (unspec:VF [(match_operand:VF 2 "register_operand")
-                            (match_operand:VF 3 "register_operand")]
-                            VCMLA)))]
-  "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
-                     && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
-)
-
-;; The complex mla/mls operations always need to expand to two instructions.
-;; The first operation does half the computation and the second does the
-;; remainder.  Because of this, expand early.
-(define_expand "cml<fcmac1><rot_op><mode>4"
-  [(set (match_operand:VF 0 "register_operand")
-       (plus:VF (match_operand:VF 1 "register_operand")
-                (unspec:VF [(match_operand:VF 2 "register_operand")
-                            (match_operand:VF 3 "register_operand")]
-                           VCMLA_OP)))]
-  "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
-                     && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
-{
-  rtx tmp = gen_reg_rtx (<MODE>mode);
-  emit_insn (gen_arm_vcmla<rotsplit1><mode> (tmp, operands[1],
-                                            operands[2], operands[3]));
-  emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], tmp,
-                                            operands[2], operands[3]));
-  DONE;
-})