[AArch64] Fixup the vget_lane RTL patterns and intrinsics

author James Greenhalgh <james.greenhalgh@arm.com>

Fri, 9 Aug 2013 09:28:51 +0000 (09:28 +0000)

committer James Greenhalgh <jgreenhalgh@gcc.gnu.org>

Fri, 9 Aug 2013 09:28:51 +0000 (09:28 +0000)
author James Greenhalgh <james.greenhalgh@arm.com>
Fri, 9 Aug 2013 09:28:51 +0000 (09:28 +0000)
committer James Greenhalgh <jgreenhalgh@gcc.gnu.org>
Fri, 9 Aug 2013 09:28:51 +0000 (09:28 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 8ad53a5be455d5c90f829baf8f352cbdcdb3f82e..8d80204803eb5416433b529f36a97a406618b705 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+2013-08-09  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * config/aarch64/aarch64-simd-builtins.def (get_lane_signed): Remove.
+       (get_lane_unsigned): Likewise.
+       (dup_lane_scalar): Likewise.
+       (get_lane): enable for VALL.
+       * config/aarch64/aarch64-simd.md
+       (aarch64_dup_lane_scalar<mode>): Remove.
+       (aarch64_get_lane_signed<mode>): Likewise.
+       (aarch64_get_lane_unsigned<mode>): Likewise.
+       (aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): New.
+       (aarch64_get_lane_zero_extendsi<mode>): Likewise.
+       (aarch64_get_lane<mode>): Enable for all vector modes.
+       (aarch64_get_lanedi): Remove misleading constraints.
+       * config/aarch64/arm_neon.h
+       (__aarch64_vget_lane_any): Define.
+       (__aarch64_vget<q>_lane_<fpsu><8,16,32,64>): Likewise.
+       (vget<q>_lane_<fpsu><8,16,32,64>): Use __aarch64_vget_lane macros.
+       (vdup<bhsd>_lane_<su><8,16,32,64>): Likewise.
+       * config/aarch64/iterators.md (VDQQH): New.
+       (VDQQHS): Likewise.
+       (vwcore): Likewise.
+
  2013-08-09  Eric Botcazou  <ebotcazou@adacore.com>
  
         * configure.ac: Add GAS check for LEON instructions on SPARC.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index 55dead6e404f70a5162f80e38711a91e23d83497..4046d7a7001c1eb81e79ae59a8a639312e1e7e6b 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -40,10 +40,6 @@
     10 - CODE_FOR_<name><mode>.  */
  
    BUILTIN_VD_RE (CREATE, create, 0)
-  BUILTIN_VQ_S (GETLANE, get_lane_signed, 0)
-  BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0)
-  BUILTIN_VDQF (GETLANE, get_lane, 0)
-  VAR1 (GETLANE, get_lane, 0, di)
    BUILTIN_VDC (COMBINE, combine, 0)
    BUILTIN_VB (BINOP, pmul, 0)
    BUILTIN_VDQF (UNOP, sqrt, 2)
@@ -51,6 +47,9 @@
    VAR1 (UNOP, addp, 0, di)
    VAR1 (UNOP, clz, 2, v4si)
  
+  BUILTIN_VALL (GETLANE, get_lane, 0)
+  VAR1 (GETLANE, get_lane, 0, di)
+
    BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
    BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
    BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
@@ -64,7 +63,6 @@
    BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
  
    BUILTIN_VDQ_I (BINOP, dup_lane, 0)
-  BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0)
    /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
    BUILTIN_VSDQ_I (BINOP, sqshl, 0)
    BUILTIN_VSDQ_I (BINOP, uqshl, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 3c76032499d9bf3573ad54714d9fa7e84c541a8e..982373099f714ee4694b9918ec93bb2724713195 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -357,20 +357,6 @@
     (set_attr "simd_mode" "<MODE>")]
  )
  
-(define_insn "aarch64_dup_lane_scalar<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w, r")
-       (vec_select:<VEL>
-         (match_operand:VDQ 1 "register_operand" "w, w")
-         (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])
-        ))]
-  "TARGET_SIMD"
-  "@
-   dup\\t%<Vetype>0, %1.<Vetype>[%2]
-   umov\\t%<vw>0, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_dup, simd_movgp")
-   (set_attr "simd_mode" "<MODE>")]
-)
-
  (define_insn "aarch64_simd_dup<mode>"
    [(set (match_operand:VDQF 0 "register_operand" "=w")
          (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
@@ -2147,45 +2133,50 @@
    DONE;
  })
  
-(define_insn "aarch64_get_lane_signed<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r")
-       (sign_extend:<VEL>
+;; Lane extraction with sign extension to general purpose register.
+(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (sign_extend:GPI
           (vec_select:<VEL>
-           (match_operand:VQ_S 1 "register_operand" "w")
+           (match_operand:VDQQH 1 "register_operand" "w")
             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
    "TARGET_SIMD"
-  "smov\\t%0, %1.<Vetype>[%2]"
+  "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"
    [(set_attr "simd_type" "simd_movgp")
-   (set_attr "simd_mode" "<MODE>")]
+   (set_attr "simd_mode" "<VDQQH:MODE>")]
  )
  
-(define_insn "aarch64_get_lane_unsigned<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r")
-       (zero_extend:<VEL>
+(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (zero_extend:SI
           (vec_select:<VEL>
-           (match_operand:VDQ 1 "register_operand" "w")
+           (match_operand:VDQQH 1 "register_operand" "w")
             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
    "TARGET_SIMD"
-  "umov\\t%<vw>0, %1.<Vetype>[%2]"
+  "umov\\t%w0, %1.<Vetype>[%2]"
    [(set_attr "simd_type" "simd_movgp")
     (set_attr "simd_mode" "<MODE>")]
  )
  
+;; Lane extraction of a value, neither sign nor zero extension
+;; is guaranteed so upper bits should be considered undefined.
  (define_insn "aarch64_get_lane<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
         (vec_select:<VEL>
-           (match_operand:VDQF 1 "register_operand" "w")
-           (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+         (match_operand:VALL 1 "register_operand" "w, w")
+         (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])))]
    "TARGET_SIMD"
-  "mov\\t%0.<Vetype>[0], %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_ins")
+  "@
+   umov\\t%<vwcore>0, %1.<Vetype>[%2]
+   dup\\t%<Vetype>0, %1.<Vetype>[%2]"
+  [(set_attr "simd_type" "simd_movgp, simd_dup")
     (set_attr "simd_mode" "<MODE>")]
  )
  
  (define_expand "aarch64_get_lanedi"
-  [(match_operand:DI 0 "register_operand" "=r")
-   (match_operand:DI 1 "register_operand" "w")
-   (match_operand:SI 2 "immediate_operand" "i")]
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
  {
    aarch64_simd_lane_bounds (operands[2], 0, 1);
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 99cf123e29ed0ff220ff6d9f72e556437797e06a..73a5400831dd96c5d705c5fc4213b2681bba270a 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -446,7 +446,66 @@ typedef struct poly16x8x4_t
    poly16x8_t val[4];
  } poly16x8x4_t;
  
-
+/* vget_lane internal macros.  */
+
+#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
+  (__cast_ret                                                          \
+     __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
+
+#define __aarch64_vget_lane_f32(__a, __b) \
+  __aarch64_vget_lane_any (v2sf, , , __a, __b)
+#define __aarch64_vget_lane_f64(__a, __b) (__a)
+
+#define __aarch64_vget_lane_p8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
+#define __aarch64_vget_lane_p16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
+
+#define __aarch64_vget_lane_s8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, , ,__a, __b)
+#define __aarch64_vget_lane_s16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, , ,__a, __b)
+#define __aarch64_vget_lane_s32(__a, __b) \
+  __aarch64_vget_lane_any (v2si, , ,__a, __b)
+#define __aarch64_vget_lane_s64(__a, __b) (__a)
+
+#define __aarch64_vget_lane_u8(__a, __b) \
+  __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
+#define __aarch64_vget_lane_u16(__a, __b) \
+  __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
+#define __aarch64_vget_lane_u32(__a, __b) \
+  __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
+#define __aarch64_vget_lane_u64(__a, __b) (__a)
+
+#define __aarch64_vgetq_lane_f32(__a, __b) \
+  __aarch64_vget_lane_any (v4sf, , , __a, __b)
+#define __aarch64_vgetq_lane_f64(__a, __b) \
+  __aarch64_vget_lane_any (v2df, , , __a, __b)
+
+#define __aarch64_vgetq_lane_p8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
+#define __aarch64_vgetq_lane_p16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
+
+#define __aarch64_vgetq_lane_s8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, , ,__a, __b)
+#define __aarch64_vgetq_lane_s16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, , ,__a, __b)
+#define __aarch64_vgetq_lane_s32(__a, __b) \
+  __aarch64_vget_lane_any (v4si, , ,__a, __b)
+#define __aarch64_vgetq_lane_s64(__a, __b) \
+  __aarch64_vget_lane_any (v2di, , ,__a, __b)
+
+#define __aarch64_vgetq_lane_u8(__a, __b) \
+  __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
+#define __aarch64_vgetq_lane_u16(__a, __b) \
+  __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
+#define __aarch64_vgetq_lane_u32(__a, __b) \
+  __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
+#define __aarch64_vgetq_lane_u64(__a, __b) \
+  __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
+
+/* vadd  */
  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  vadd_s8 (int8x8_t __a, int8x8_t __b)
  {
@@ -2307,155 +2366,156 @@ vcreate_p16 (uint64_t __a)
    return (poly16x4_t) __a;
  }
  
+/* vget_lane  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vget_lane_f32 (float32x2_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f32 (__a, __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vget_lane_f64 (float64x1_t __a, const int __b)
+{
+  return __aarch64_vget_lane_f64 (__a, __b);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vget_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vget_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return __aarch64_vget_lane_p16 (__a, __b);
+}
+
  __extension__ static __inline int8_t __attribute__ ((__always_inline__))
  vget_lane_s8 (int8x8_t __a, const int __b)
  {
-  return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b);
+  return __aarch64_vget_lane_s8 (__a, __b);
  }
  
  __extension__ static __inline int16_t __attribute__ ((__always_inline__))
  vget_lane_s16 (int16x4_t __a, const int __b)
  {
-  return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b);
+  return __aarch64_vget_lane_s16 (__a, __b);
  }
  
  __extension__ static __inline int32_t __attribute__ ((__always_inline__))
  vget_lane_s32 (int32x2_t __a, const int __b)
  {
-  return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b);
+  return __aarch64_vget_lane_s32 (__a, __b);
  }
  
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vget_lane_f32 (float32x2_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vget_lane_s64 (int64x1_t __a, const int __b)
  {
-  return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b);
+  return __aarch64_vget_lane_s64 (__a, __b);
  }
  
  __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
  vget_lane_u8 (uint8x8_t __a, const int __b)
  {
-  return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
-                                                           __b);
+  return __aarch64_vget_lane_u8 (__a, __b);
  }
  
  __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
  vget_lane_u16 (uint16x4_t __a, const int __b)
  {
-  return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
-                                                            __b);
+  return __aarch64_vget_lane_u16 (__a, __b);
  }
  
  __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
  vget_lane_u32 (uint32x2_t __a, const int __b)
  {
-  return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a,
-                                                            __b);
+  return __aarch64_vget_lane_u32 (__a, __b);
  }
  
-__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
-vget_lane_p8 (poly8x8_t __a, const int __b)
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vget_lane_u64 (uint64x1_t __a, const int __b)
  {
-  return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
-                                                           __b);
+  return __aarch64_vget_lane_u64 (__a, __b);
  }
  
-__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
-vget_lane_p16 (poly16x4_t __a, const int __b)
+/* vgetq_lane  */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vgetq_lane_f32 (float32x4_t __a, const int __b)
  {
-  return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
-                                                            __b);
+  return __aarch64_vgetq_lane_f32 (__a, __b);
  }
  
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-vget_lane_s64 (int64x1_t __a, const int __b)
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vgetq_lane_f64 (float64x2_t __a, const int __b)
  {
-  return (int64_t) __builtin_aarch64_get_lanedi (__a, __b);
+  return __aarch64_vgetq_lane_f64 (__a, __b);
  }
  
-__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
-vget_lane_u64 (uint64x1_t __a, const int __b)
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vgetq_lane_p8 (poly8x16_t __a, const int __b)
+{
+  return __aarch64_vgetq_lane_p8 (__a, __b);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vgetq_lane_p16 (poly16x8_t __a, const int __b)
  {
-  return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b);
+  return __aarch64_vgetq_lane_p16 (__a, __b);
  }
  
  __extension__ static __inline int8_t __attribute__ ((__always_inline__))
  vgetq_lane_s8 (int8x16_t __a, const int __b)
  {
-  return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b);
+  return __aarch64_vgetq_lane_s8 (__a, __b);
  }
  
  __extension__ static __inline int16_t __attribute__ ((__always_inline__))
  vgetq_lane_s16 (int16x8_t __a, const int __b)
  {
-  return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b);
+  return __aarch64_vgetq_lane_s16 (__a, __b);
  }
  
  __extension__ static __inline int32_t __attribute__ ((__always_inline__))
  vgetq_lane_s32 (int32x4_t __a, const int __b)
  {
-  return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b);
-}
-
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vgetq_lane_f32 (float32x4_t __a, const int __b)
-{
-  return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b);
+  return __aarch64_vgetq_lane_s32 (__a, __b);
  }
  
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vgetq_lane_f64 (float64x2_t __a, const int __b)
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vgetq_lane_s64 (int64x2_t __a, const int __b)
  {
-  return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b);
+  return __aarch64_vgetq_lane_s64 (__a, __b);
  }
  
  __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
  vgetq_lane_u8 (uint8x16_t __a, const int __b)
  {
-  return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
-                                                            __b);
+  return __aarch64_vgetq_lane_u8 (__a, __b);
  }
  
  __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
  vgetq_lane_u16 (uint16x8_t __a, const int __b)
  {
-  return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
-                                                            __b);
+  return __aarch64_vgetq_lane_u16 (__a, __b);
  }
  
  __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
  vgetq_lane_u32 (uint32x4_t __a, const int __b)
  {
-  return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a,
-                                                            __b);
-}
-
-__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
-vgetq_lane_p8 (poly8x16_t __a, const int __b)
-{
-  return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
-                                                            __b);
-}
-
-__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
-vgetq_lane_p16 (poly16x8_t __a, const int __b)
-{
-  return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
-                                                            __b);
-}
-
-__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-vgetq_lane_s64 (int64x2_t __a, const int __b)
-{
-  return __builtin_aarch64_get_lane_unsignedv2di (__a, __b);
+  return __aarch64_vgetq_lane_u32 (__a, __b);
  }
  
  __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
  vgetq_lane_u64 (uint64x2_t __a, const int __b)
  {
-  return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a,
-                                                            __b);
+  return __aarch64_vgetq_lane_u64 (__a, __b);
  }
  
+/* vreinterpret  */
+
  __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
  vreinterpret_p8_s8 (int8x8_t __a)
  {
@@ -6724,18 +6784,6 @@ vget_high_u64 (uint64x2_t a)
    return result;
  }
  
-#define vget_lane_f64(a, b)                                             \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x1_t a_ = (a);                                            \
-       float64_t result;                                                \
-       __asm__ ("umov %x0, %1.d[%2]"                                    \
-                : "=r"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
  __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
  vget_low_f32 (float32x4_t a)
  {
@@ -19732,49 +19780,49 @@ vcvtpq_u64_f64 (float64x2_t __a)
  __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
  vdupb_lane_s8 (int8x16_t a, int const b)
  {
-  return __builtin_aarch64_dup_lane_scalarv16qi (a, b);
+  return __aarch64_vget_laneq_s8 (a, b);
  }
  
  __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
  vdupb_lane_u8 (uint8x16_t a, int const b)
  {
-  return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b);
+  return __aarch64_vget_laneq_u8 (a, b);
  }
  
  __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
  vduph_lane_s16 (int16x8_t a, int const b)
  {
-  return __builtin_aarch64_dup_lane_scalarv8hi (a, b);
+  return __aarch64_vget_laneq_s16 (a, b);
  }
  
  __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
  vduph_lane_u16 (uint16x8_t a, int const b)
  {
-  return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b);
+  return __aarch64_vget_laneq_u16 (a, b);
  }
  
  __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
  vdups_lane_s32 (int32x4_t a, int const b)
  {
-  return __builtin_aarch64_dup_lane_scalarv4si (a, b);
+  return __aarch64_vget_laneq_s32 (a, b);
  }
  
  __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
  vdups_lane_u32 (uint32x4_t a, int const b)
  {
-  return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b);
+  return __aarch64_vget_laneq_u32 (a, b);
  }
  
  __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
  vdupd_lane_s64 (int64x2_t a, int const b)
  {
-  return __builtin_aarch64_dup_lane_scalarv2di (a, b);
+  return __aarch64_vget_laneq_s64 (a, b);
  }
  
  __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
  vdupd_lane_u64 (uint64x2_t a, int const b)
  {
-  return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b);
+  return __aarch64_vget_laneq_s64 (a, b);
  }
  
  /* vld1 */
@@ -25581,4 +25629,31 @@ __INTERLEAVE_LIST (zip)
  
  /* End of optimal implementations in approved order.  */
  
+#undef __aarch64_vget_lane_any
+#undef __aarch64_vget_lane_f32
+#undef __aarch64_vget_lane_f64
+#undef __aarch64_vget_lane_p8
+#undef __aarch64_vget_lane_p16
+#undef __aarch64_vget_lane_s8
+#undef __aarch64_vget_lane_s16
+#undef __aarch64_vget_lane_s32
+#undef __aarch64_vget_lane_s64
+#undef __aarch64_vget_lane_u8
+#undef __aarch64_vget_lane_u16
+#undef __aarch64_vget_lane_u32
+#undef __aarch64_vget_lane_u64
+
+#undef __aarch64_vgetq_lane_f32
+#undef __aarch64_vgetq_lane_f64
+#undef __aarch64_vgetq_lane_p8
+#undef __aarch64_vgetq_lane_p16
+#undef __aarch64_vgetq_lane_s8
+#undef __aarch64_vgetq_lane_s16
+#undef __aarch64_vgetq_lane_s32
+#undef __aarch64_vgetq_lane_s64
+#undef __aarch64_vgetq_lane_u8
+#undef __aarch64_vgetq_lane_u16
+#undef __aarch64_vgetq_lane_u32
+#undef __aarch64_vgetq_lane_u64
+
  #endif
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 3ec889f28fd8d6a9169792cd81737be71e77c768..37b6cbc8dc8c0f6575302821883ddd8739b0e57a 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -134,9 +134,15 @@
  ;; Vector modes except double int.
  (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
  
+;; Vector modes for Q and H types.
+(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
+
  ;; Vector modes for H and S types.
  (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
  
+;; Vector modes for Q, H and S types.
+(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
+
  ;; Vector and scalar integer modes for H and S
  (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI])
  
@@ -453,6 +459,15 @@
                          (V2SF "s") (V4SF "s")
                          (V2DF "d")])
  
+;; Corresponding core element mode for each vector mode.  This is a
+;; variation on <vw> mapping FP modes to GP regs.
+(define_mode_attr vwcore  [(V8QI "w") (V16QI "w")
+                          (V4HI "w") (V8HI "w")
+                          (V2SI "w") (V4SI "w")
+                          (DI   "x") (V2DI "x")
+                          (V2SF "w") (V4SF "w")
+                          (V2DF "x")])
+
  ;; Double vector types for ALLX.
  (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
  
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index a777c7d5eae11c5ca9daf2c14820e710dbb61de9..1e682b96fd85ea12f3bb7109beee6f853813d576 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-08-09  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * gcc.target/aarch64/scalar_intrinsics.c: Update expected
+       output of vdup intrinsics.
+
  2013-08-09  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
  
         * gcc.target/arm/lp1189445.c: New testcase.
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c

index 3d902f6342d581e68e0bb82a10e12980ab8016a8..d84bfeb55e903509be35248d031e7c59677dc9d6 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -193,7 +193,7 @@ test_vcltzd_s64 (int64x1_t a)
    return res;
  }
  
-/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv16qi"  2 } } */
+/* { dg-final { scan-assembler-times "aarch64_get_lanev16qi" 2 } } */
  
  int8x1_t
  test_vdupb_lane_s8 (int8x16_t a)
@@ -207,7 +207,7 @@ test_vdupb_lane_u8 (uint8x16_t a)
    return vdupb_lane_u8 (a, 2);
  }
  
-/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv8hi"  2 } } */
+/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */
  
  int16x1_t
  test_vduph_lane_s16 (int16x8_t a)
@@ -221,7 +221,7 @@ test_vduph_lane_u16 (uint16x8_t a)
    return vduph_lane_u16 (a, 2);
  }
  
-/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv4si"  2 } } */
+/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */
  
  int32x1_t
  test_vdups_lane_s32 (int32x4_t a)
@@ -235,7 +235,7 @@ test_vdups_lane_u32 (uint32x4_t a)
    return vdups_lane_u32 (a, 2);
  }
  
-/* { dg-final { scan-assembler-times "aarch64_dup_lane_scalarv2di"  2 } } */
+/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */
  
  int64x1_t
  test_vdupd_lane_s64 (int64x2_t a)
author	James Greenhalgh <james.greenhalgh@arm.com>
	Fri, 9 Aug 2013 09:28:51 +0000 (09:28 +0000)
committer	James Greenhalgh <jgreenhalgh@gcc.gnu.org>
	Fri, 9 Aug 2013 09:28:51 +0000 (09:28 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c		patch \| blob \| history