= { qualifier_none, qualifier_none, qualifier_maybe_immediate };
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
static enum aarch64_type_qualifiers
-aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_void, qualifier_none, qualifier_none };
-#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
-static enum aarch64_type_qualifiers
aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
#define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
AARCH64_BUILTIN_SET_FPSR,
AARCH64_SIMD_BUILTIN_BASE,
+ AARCH64_SIMD_BUILTIN_LANE_CHECK,
#include "aarch64-simd-builtins.def"
- AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
- + ARRAY_SIZE (aarch64_simd_builtin_data),
+ /* The first enum element which is based on an insn_data pattern. */
+ AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
+ AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
+ + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
AARCH64_CRC32_BUILTIN_BASE,
AARCH64_CRC32_BUILTINS
AARCH64_CRC32_BUILTIN_MAX,
static void
aarch64_init_simd_builtins (void)
{
- unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1;
+ unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
aarch64_init_simd_builtin_types ();
system. */
aarch64_init_simd_builtin_scalar_types ();
+ tree lane_check_fpr = build_function_type_list (void_type_node,
+ intSI_type_node,
+ intSI_type_node,
+ NULL);
+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] =
+ add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr,
+ AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD,
+ NULL, NULL_TREE);
+
for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
{
bool print_type_signature_p = false;
rtx
aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
{
+ if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
+ {
+ tree nlanes = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (TREE_CODE (nlanes) == INTEGER_CST);
+ rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1));
+ if (CONST_INT_P (lane_idx))
+ aarch64_simd_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp);
+ else
+ error ("%Klane index must be a constant immediate", exp);
+ /* Don't generate any RTL. */
+ return const0_rtx;
+ }
aarch64_simd_builtin_datum *d =
- &aarch64_simd_builtin_data[fcode - (AARCH64_SIMD_BUILTIN_BASE + 1)];
+ &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
enum insn_code icode = d->code;
builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS];
int num_args = insn_data[d->code].n_operands;
__aarch64_vget_lane_any (v2sf, , , __a, __b)
#define __aarch64_vget_lane_f64(__a, __b) __extension__ \
({ \
- __builtin_aarch64_im_lane_boundsi (__b, 1); \
+ __AARCH64_LANE_CHECK (__a, __b); \
__a[0]; \
})
__aarch64_vget_lane_any (v2si, , ,__a, __b)
#define __aarch64_vget_lane_s64(__a, __b) __extension__ \
({ \
- __builtin_aarch64_im_lane_boundsi (__b, 1); \
+ __AARCH64_LANE_CHECK (__a, __b); \
__a[0]; \
})
__aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
#define __aarch64_vget_lane_u64(__a, __b) __extension__ \
({ \
- __builtin_aarch64_im_lane_boundsi (__b, 1); \
+ __AARCH64_LANE_CHECK (__a, __b); \
__a[0]; \
})
/* Internal macro for lane indices. */
#define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
+#define __AARCH64_LANE_CHECK(__vec, __idx) \
+ __builtin_aarch64_im_lane_boundsi (__AARCH64_NUM_LANES (__vec), __idx)
/* For big-endian, GCC's vector indices are the opposite way around
to the architectural lane indices used by Neon intrinsics. */
#define __aarch64_vset_lane_any(__elem, __vec, __index) \
__extension__ \
({ \
- __builtin_aarch64_im_lane_boundsi (__index, \
- __AARCH64_NUM_LANES (__vec)); \
+ __AARCH64_LANE_CHECK (__vec, __index); \
__vec[__aarch64_lane (__vec, __index)] = __elem; \
__vec; \
})
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vdupd_lane_f64 (float64x1_t __a, const int __b)
{
- __builtin_aarch64_im_lane_boundsi (__b, 1);
+ __AARCH64_LANE_CHECK (__a, __b);
return __a[0];
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vdupd_lane_s64 (int64x1_t __a, const int __b)
{
- __builtin_aarch64_im_lane_boundsi (__b, 1);
+ __AARCH64_LANE_CHECK (__a, __b);
return __a[0];
}
__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vdupd_lane_u64 (uint64x1_t __a, const int __b)
{
- __builtin_aarch64_im_lane_boundsi (__b, 1);
+ __AARCH64_LANE_CHECK (__a, __b);
return __a[0];
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 2);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
#else
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
{
+ __AARCH64_LANE_CHECK (__a, __c);
/* The only possible index to the assembler instruction returns element 0. */
- __builtin_aarch64_im_lane_boundsi (__c, 1);
return __a;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 8);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 4);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 8);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 4);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 2);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
#else
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
{
+ __AARCH64_LANE_CHECK (__a, __c);
/* The only possible index to the assembler instruction returns element 0. */
- __builtin_aarch64_im_lane_boundsi (__c, 1);
return __a;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 8);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 4);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 2);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
#else
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
{
+ __AARCH64_LANE_CHECK (__a, __c);
/* The only possible index to the assembler instruction returns element 0. */
- __builtin_aarch64_im_lane_boundsi (__c, 1);
return __a;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 4);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 2);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
#else
__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 16);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x16_t)
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 8);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint16x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 16);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x16_t)
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 8);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint16x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 4);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 2);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
#else
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 16);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint8x16_t)
{16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 8);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint16x8_t)
{8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 4);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a,
(uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
{
- __builtin_aarch64_im_lane_boundsi (__c, 2);
+ __AARCH64_LANE_CHECK (__a, __c);
#ifdef __AARCH64EB__
return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
#else
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
{
- __builtin_aarch64_im_lane_boundsi (__lane, 1);
+ __AARCH64_LANE_CHECK (__a, __lane);
return __a * __b[0];
}