From: David Candler Date: Fri, 6 Nov 2020 17:53:03 +0000 (+0000) Subject: aarch64: Use intrinsics for upper saturating shift right X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=05f1883cfd041adba5f595e97118100b6e00a524;p=gcc.git aarch64: Use intrinsics for upper saturating shift right The use of vqshrn_high_n_s32 was triggering an unneeded register move, because sqshrn2 is destructive but was declared as inline assembly in arm_neon.h. This patch implements sqshrn2 and uqshrn2 as actual intrinsics which do not trigger the unnecessary move, along with new tests to cover them. gcc/ChangeLog 2020-11-06 David Candler * config/aarch64/aarch64-builtins.c (TYPES_SHIFT2IMM): Add define. (TYPES_SHIFT2IMM_UUSS): Add define. (TYPES_USHIFT2IMM): Add define. * config/aarch64/aarch64-simd.md (aarch64_qshrn2_n): Add new insn for upper saturating shift right. * config/aarch64/aarch64-simd-builtins.def: Add intrinsics. * config/aarch64/arm_neon.h: (vqrshrn_high_n_s16): Expand using intrinsic rather than inline asm. (vqrshrn_high_n_s32): Likewise. (vqrshrn_high_n_s64): Likewise. (vqrshrn_high_n_u16): Likewise. (vqrshrn_high_n_u32): Likewise. (vqrshrn_high_n_u64): Likewise. (vqrshrun_high_n_s16): Likewise. (vqrshrun_high_n_s32): Likewise. (vqrshrun_high_n_s64): Likewise. (vqshrn_high_n_s16): Likewise. (vqshrn_high_n_s32): Likewise. (vqshrn_high_n_s64): Likewise. (vqshrn_high_n_u16): Likewise. (vqshrn_high_n_u32): Likewise. (vqshrn_high_n_u64): Likewise. (vqshrun_high_n_s16): Likewise. (vqshrun_high_n_s32): Likewise. (vqshrun_high_n_s64): Likewise. gcc/testsuite/ChangeLog 2020-11-06 David Candler * gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: New testcase. * gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise. * gcc.target/aarch64/narrow_high-intrinsics.c: Update expected assembler for sqshrun2, sqrshrun2, sqshrn2, uqshrn2, sqrshrn2 and uqrshrn2. --- diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 9d5e8c75c55..5defdb050fa 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -266,6 +266,11 @@ static enum aarch64_type_qualifiers aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) +#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate }; +#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] @@ -277,6 +282,7 @@ aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers) #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers) #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers) +#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 6d898d4503a..09f275cd4fc 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -285,6 +285,13 @@ BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, ALL) BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, ALL) BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, ALL) + /* Implemented by aarch64_qshrn2_n. */ + BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE) + BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE) + BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, NONE) + BUILTIN_VQN (USHIFT2IMM, uqshrn2_n, 0, NONE) + BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, NONE) + BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, NONE) /* Implemented by aarch64_si_n. */ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, ALL) BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0, ALL) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index a667efa15ed..2cf6fe9154a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4720,6 +4720,17 @@ [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) +(define_insn "aarch64_qshrn2_n" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 "register_operand" "0") + (match_operand:VQN 2 "register_operand" "w") + (match_operand:SI 3 "aarch64_simd_shift_imm_offset_" "i")] + VQSHRN_N))] + "TARGET_SIMD" + "qshrn2\\t%0., %2., %3" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + ;; cm(eq|ge|gt|lt|le) ;; Note, we have constraints for Dz and Z as different expanders diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0009e50f3fe..d79c1a27ece 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -9993,275 +9993,131 @@ vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) return __result; } -#define vqrshrn_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int8x8_t a_ = (a); \ - int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrshrn2_nv8hi (__a, __b, __c); +} -#define vqrshrn_high_n_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrshrn2_nv4si (__a, __b, __c); +} -#define vqrshrn_high_n_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c) +{ + return __builtin_aarch64_sqrshrn2_nv2di (__a, __b, __c); +} -#define vqrshrn_high_n_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c) +{ + return __builtin_aarch64_uqrshrn2_nv8hi_uuus (__a, __b, __c); +} -#define vqrshrn_high_n_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c) +{ + return __builtin_aarch64_uqrshrn2_nv4si_uuus (__a, __b, __c); +} -#define vqrshrn_high_n_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c) +{ + return __builtin_aarch64_uqrshrn2_nv2di_uuus (__a, __b, __c); +} -#define vqrshrun_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrun_high_n_s16 (uint8x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrshrun2_nv8hi_uuss (__a, __b, __c); +} -#define vqrshrun_high_n_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrun_high_n_s32 (uint16x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrshrun2_nv4si_uuss (__a, __b, __c); +} -#define vqrshrun_high_n_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqrshrun_high_n_s64 (uint32x2_t __a, int64x2_t __b, const int __c) +{ + return __builtin_aarch64_sqrshrun2_nv2di_uuss (__a, __b, __c); +} -#define vqshrn_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int8x8_t a_ = (a); \ - int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqshrn2_nv8hi (__a, __b, __c); +} -#define vqshrn_high_n_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqshrn2_nv4si (__a, __b, __c); +} -#define vqshrn_high_n_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c) +{ + return __builtin_aarch64_sqshrn2_nv2di (__a, __b, __c); +} -#define vqshrn_high_n_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c) +{ + return __builtin_aarch64_uqshrn2_nv8hi_uuus (__a, __b, __c); +} -#define vqshrn_high_n_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c) +{ + return __builtin_aarch64_uqshrn2_nv4si_uuus (__a, __b, __c); +} -#define vqshrn_high_n_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c) +{ + return __builtin_aarch64_uqshrn2_nv2di_uuus (__a, __b, __c); +} -#define vqshrun_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrun_high_n_s16 (uint8x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqshrun2_nv8hi_uuss (__a, __b, __c); +} -#define vqshrun_high_n_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrun_high_n_s32 (uint16x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqshrun2_nv4si_uuss (__a, __b, __c); +} -#define vqshrun_high_n_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 \ - (__AARCH64_UINT64_C (0x0))); \ - __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ - : "+w"(result) \ - : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vqshrun_high_n_s64 (uint32x2_t __a, int64x2_t __b, const int __c) +{ + return __builtin_aarch64_sqshrun2_nv2di_uuss (__a, __b, __c); +} #define vrshrn_high_n_s16(a, b, c) \ __extension__ \ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c new file mode 100644 index 00000000000..d9add2908d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c @@ -0,0 +1,192 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xf9, 0xfa, + 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff8, 0xfff9, 0xfff9, 0xfffa }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xffffffff, 0xffffffff }; + +/* Expected results with shift by 3. */ +VECT_VAR_DECL(expected_sh3,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected_sh3,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected_sh3,int,32,4) [] = { 0x7fffffff, 0x7fffffff, + 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected_sh3,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected_sh3,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected_sh3,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff }; + +/* Expected results with shift by max amount. */ +VECT_VAR_DECL(expected_shmax,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected_shmax,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected_shmax,int,32,4) [] = { 0x7fffffff, 0x7fffffff, + 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected_shmax,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected_shmax,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected_shmax,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff }; + +#define INSN vqrshrn_high_n +#define TEST_MSG "VQRSHRN_HIGH_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqrshrn_high_n(x,v), then store the result. */ +#define TEST_VQRSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \ + VECT_VAR(vector_res, T1, W2, N2) = \ + INSN##_##T2##W(VECT_VAR(vector1, T1, W2, N), \ + VECT_VAR(vector2, T1, W, N), V); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N2), \ + VECT_VAR(vector_res, T1, W2, N2)); \ + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \ + TEST_VQRSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) + +#define TEST_VQRSHRN_HIGH_N(T1, T2, W, W2, N, N2, V) \ + TEST_VQRSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) + + + DECL_VARIABLE(vector1, int, 8, 8); + DECL_VARIABLE(vector1, int, 16, 4); + DECL_VARIABLE(vector1, int, 32, 2); + DECL_VARIABLE(vector1, uint, 8, 8); + DECL_VARIABLE(vector1, uint, 16, 4); + DECL_VARIABLE(vector1, uint, 32, 2); + + /* vector is twice as large as vector_res. */ + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, int, 64, 2); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + DECL_VARIABLE(vector2, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + + VLOAD(vector2, buffer, q, int, s, 16, 8); + VLOAD(vector2, buffer, q, int, s, 32, 4); + VLOAD(vector2, buffer, q, int, s, 64, 2); + VLOAD(vector2, buffer, q, uint, u, 16, 8); + VLOAD(vector2, buffer, q, uint, u, 32, 4); + VLOAD(vector2, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily. */ +#define CMT "" + TEST_VQRSHRN_HIGH_N(int, s, 16, 8, 8, 16, 1); + TEST_VQRSHRN_HIGH_N(int, s, 32, 16, 4, 8, 1); + TEST_VQRSHRN_HIGH_N(int, s, 64, 32, 2, 4, 2); + TEST_VQRSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 2); + TEST_VQRSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3); + TEST_VQRSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3); + + CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); + + + /* Another set of tests, shifting max value by 3. */ + VDUP(vector1, , int, s, 8, 8, 0x7F); + VDUP(vector1, , int, s, 16, 4, 0x7FFF); + VDUP(vector1, , int, s, 32, 2, 0x7FFFFFFFLL); + VDUP(vector1, , uint, u, 8, 8, 0xFF); + VDUP(vector1, , uint, u, 16, 4, 0xFFFF); + VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFFFULL); + + VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector2, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + +#undef CMT +#define CMT " (check saturation: shift by 3)" + TEST_VQRSHRN_HIGH_N(int, s, 16, 8, 8, 16, 3); + TEST_VQRSHRN_HIGH_N(int, s, 32, 16, 4, 8, 3); + TEST_VQRSHRN_HIGH_N(int, s, 64, 32, 2, 4, 3); + TEST_VQRSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 3); + TEST_VQRSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3); + TEST_VQRSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3); + + CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_sh3, CMT); + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_sh3, CMT); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_sh3, CMT); + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh3, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh3, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh3, CMT); + + + /* Shift by max amount. */ +#undef CMT +#define CMT " (check saturation: shift by max)" + TEST_VQRSHRN_HIGH_N(int, s, 16, 8, 8, 16, 8); + TEST_VQRSHRN_HIGH_N(int, s, 32, 16, 4, 8, 16); + TEST_VQRSHRN_HIGH_N(int, s, 64, 32, 2, 4, 32); + TEST_VQRSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 8); + TEST_VQRSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 16); + TEST_VQRSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 32); + + CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_shmax, CMT); + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_shmax, CMT); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_shmax, CMT); + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_shmax, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_shmax, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_shmax, CMT); +} + +int main (void) +{ + exec_vqrshrn_high_n (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c new file mode 100644 index 00000000000..1a3788cd14a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c @@ -0,0 +1,194 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results with negative input. */ +VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0xfffffffc, 0xfffffffc, 0x0, 0x0 }; + +/* Expected results with max input value shifted by 1. */ +VECT_VAR_DECL(expected_max_sh1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected_max_sh1,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected_max_sh1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff, + 0xffffffff, 0xffffffff }; + +/* Expected results with max input value shifted by max amount. */ +VECT_VAR_DECL(expected_max_shmax,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80 }; +VECT_VAR_DECL(expected_max_shmax,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected_max_shmax,uint,32,4) [] = { 0x7fffffff, 0x7fffffff, + 0x80000000, 0x80000000 }; + +/* Expected results with min input value shifted by max amount. */ +VECT_VAR_DECL(expected_min_shmax,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_min_shmax,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_min_shmax,uint,32,4) [] = { 0x80000000, 0x80000000, + 0x0, 0x0 }; + +/* Expected results with inputs in usual range. */ +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x12, 0x12, 0x12, 0x12, + 0x12, 0x12, 0x12, 0x12, + 0x49, 0x49, 0x49, 0x49, + 0x49, 0x49, 0x49, 0x49 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x4321, 0x4321, 0x4321, 0x4321, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xdeadbeef, 0xdeadbeef, + 0xdeadbf, 0xdeadbf }; + +#define INSN vqrshrun_high_n +#define TEST_MSG "VQRSHRUN_HIGH_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqrshrun_high_n(x,v), then store the result. */ +#define TEST_VQRSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \ + VECT_VAR(vector_res, uint, W2, N2) = \ + INSN##_##T2##W(VECT_VAR(vector1, uint, W2, N), \ + VECT_VAR(vector2, T1, W, N), V); \ + vst1q_u##W2(VECT_VAR(result, uint, W2, N2), \ + VECT_VAR(vector_res, uint, W2, N2)); \ + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQRSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \ + TEST_VQRSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) + +#define TEST_VQRSHRUN_HIGH_N(T1, T2, W, W2, N, N2, V) \ + TEST_VQRSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) + + + DECL_VARIABLE(vector1, uint, 8, 8); + DECL_VARIABLE(vector1, uint, 16, 4); + DECL_VARIABLE(vector1, uint, 32, 2); + + /* vector is twice as large as vector_res. */ + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on + limits. */ + VDUP(vector1, , uint, u, 8, 8, -2); + VDUP(vector1, , uint, u, 16, 4, -3); + VDUP(vector1, , uint, u, 32, 2, -4); + + VDUP(vector2, q, int, s, 16, 8, -2); + VDUP(vector2, q, int, s, 32, 4, -3); + VDUP(vector2, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily. */ +#define CMT " (negative input)" + TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 3); + TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 4); + TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 2); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); + + + /* Fill input vector with max value, to check saturation on + limits. */ + VDUP(vector1, , uint, u, 8, 8, 0x7F); + VDUP(vector1, , uint, u, 16, 4, 0x7FFF); + VDUP(vector1, , uint, u, 32, 2, 0x7FFFFFFFLL); + + VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + + /* shift by 1. */ +#undef CMT +#define CMT " (check cumulative saturation: shift by 1)" + TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 1); + TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 1); + TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 1); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh1, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh1, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh1, CMT); + + + /* shift by max. */ +#undef CMT +#define CMT " (check cumulative saturation: shift by max, positive input)" + TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 8); + TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 16); + TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 32); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shmax, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shmax, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shmax, CMT); + + + /* Fill input vector with min value, to check saturation on limits. */ + VDUP(vector1, , uint, u, 8, 8, 0x80); + VDUP(vector1, , uint, u, 16, 4, 0x8000); + VDUP(vector1, , uint, u, 32, 2, 0x80000000LL); + + VDUP(vector2, q, int, s, 16, 8, 0x8000); + VDUP(vector2, q, int, s, 32, 4, 0x80000000); + VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000LL); + + /* shift by max */ +#undef CMT +#define CMT " (check cumulative saturation: shift by max, negative input)" + TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 8); + TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 16); + TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 32); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_shmax, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_shmax, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_shmax, CMT); + + + /* Fill input vector with positive values, to check normal case. */ + VDUP(vector1, , uint, u, 8, 8, 0x12); + VDUP(vector1, , uint, u, 16, 4, 0x4321); + VDUP(vector1, , uint, u, 32, 2, 0xDEADBEEF); + + VDUP(vector2, q, int, s, 16, 8, 0x1234); + VDUP(vector2, q, int, s, 32, 4, 0x87654321); + VDUP(vector2, q, int, s, 64, 2, 0xDEADBEEF); + + /* shift arbitrary amount. */ +#undef CMT +#define CMT "" + TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 6); + TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 7); + TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 8); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); +} + +int main (void) +{ + exec_vqrshrun_high_n (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c new file mode 100644 index 00000000000..72aecc15ba2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c @@ -0,0 +1,190 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf8, 0xf9, 0xf9, + 0xfa, 0xfa, 0xfb, 0xfb }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff8, 0xfff8, 0xfff9, 0xfff9 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffffc, 0xfffffffc }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xffffffff, 0xffffffff }; + +/* Expected results with max input value shifted by 3. */ +VECT_VAR_DECL(expected_max_sh3,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected_max_sh3,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected_max_sh3,int,32,4) [] = { 0x7fffffff, 0x7fffffff, + 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected_max_sh3,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected_max_sh3,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected_max_sh3,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff }; + +/* Expected results with max input value shifted by type size. */ +VECT_VAR_DECL(expected_max_shmax,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f }; +VECT_VAR_DECL(expected_max_shmax,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0x7fff, 0x7fff, 0x7fff, 0x7fff }; +VECT_VAR_DECL(expected_max_shmax,int,32,4) [] = { 0x7fffffff, 0x7fffffff, + 0x7fffffff, 0x7fffffff }; +VECT_VAR_DECL(expected_max_shmax,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected_max_shmax,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected_max_shmax,uint,32,4) [] = { 0xffffffff, 0xffffffff, + 0xffffffff, 0xffffffff }; + +#define INSN vqshrn_high_n +#define TEST_MSG "VQSHRN_HIGH_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqshrn_high_n(x1,x2,v), then store the result. */ +#define TEST_VQSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \ + VECT_VAR(vector_res, T1, W2, N2) = \ + INSN##_##T2##W(VECT_VAR(vector1, T1, W2, N), \ + VECT_VAR(vector2, T1, W, N), V); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N2), \ + VECT_VAR(vector_res, T1, W2, N2)); + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \ + TEST_VQSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) + +#define TEST_VQSHRN_HIGH_N(T1, T2, W, W2, N, N2, V) \ + TEST_VQSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) + + + DECL_VARIABLE(vector1, int, 8, 8); + DECL_VARIABLE(vector1, int, 16, 4); + DECL_VARIABLE(vector1, int, 32, 2); + DECL_VARIABLE(vector1, uint, 8, 8); + DECL_VARIABLE(vector1, uint, 16, 4); + DECL_VARIABLE(vector1, uint, 32, 2); + + /* vector is twice as large as vector_res. */ + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, int, 64, 2); + DECL_VARIABLE(vector2, uint, 16, 8); + DECL_VARIABLE(vector2, uint, 32, 4); + DECL_VARIABLE(vector2, uint, 64, 2); + + DECL_VARIABLE(vector_res, int, 8, 16); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + VLOAD(vector1, buffer, , int, s, 8, 8); + VLOAD(vector1, buffer, , int, s, 16, 4); + VLOAD(vector1, buffer, , int, s, 32, 2); + VLOAD(vector1, buffer, , uint, u, 8, 8); + VLOAD(vector1, buffer, , uint, u, 16, 4); + VLOAD(vector1, buffer, , uint, u, 32, 2); + + VLOAD(vector2, buffer, q, int, s, 16, 8); + VLOAD(vector2, buffer, q, int, s, 32, 4); + VLOAD(vector2, buffer, q, int, s, 64, 2); + VLOAD(vector2, buffer, q, uint, u, 16, 8); + VLOAD(vector2, buffer, q, uint, u, 32, 4); + VLOAD(vector2, buffer, q, uint, u, 64, 2); + + /* Choose shift amount arbitrarily. */ +#define CMT "" + TEST_VQSHRN_HIGH_N(int, s, 16, 8, 8, 16, 1); + TEST_VQSHRN_HIGH_N(int, s, 32, 16, 4, 8, 1); + TEST_VQSHRN_HIGH_N(int, s, 64, 32, 2, 4, 2); + TEST_VQSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 2); + TEST_VQSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3); + TEST_VQSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3); + + CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT); + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT); + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); + + /* Use max possible value as input. */ + VDUP(vector1, , int, s, 8, 8, 0x7F); + VDUP(vector1, , int, s, 16, 4, 0x7FFF); + VDUP(vector1, , int, s, 32, 2, 0x7FFFFFFFLL); + VDUP(vector1, , uint, u, 8, 8, 0xFF); + VDUP(vector1, , uint, u, 16, 4, 0xFFFF); + VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFFFULL); + + VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + VDUP(vector2, q, uint, u, 16, 8, 0xFFFF); + VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF); + VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL); + +#undef CMT +#define CMT " (check saturation: shift by 3)" + TEST_VQSHRN_HIGH_N(int, s, 16, 8, 8, 16, 3); + TEST_VQSHRN_HIGH_N(int, s, 32, 16, 4, 8, 3); + TEST_VQSHRN_HIGH_N(int, s, 64, 32, 2, 4, 3); + TEST_VQSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 3); + TEST_VQSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3); + TEST_VQSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3); + + CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh3, CMT); + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh3, CMT); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh3, CMT); + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh3, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh3, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh3, CMT); + + +#undef CMT +#define CMT " (check saturation: shift by max)" + TEST_VQSHRN_HIGH_N(int, s, 16, 8, 8, 16, 8); + TEST_VQSHRN_HIGH_N(int, s, 32, 16, 4, 8, 16); + TEST_VQSHRN_HIGH_N(int, s, 64, 32, 2, 4, 32); + TEST_VQSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 8); + TEST_VQSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 16); + TEST_VQSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 32); + + CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shmax, CMT); + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shmax, CMT); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shmax, CMT); + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shmax, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shmax, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shmax, CMT); +} + +int main (void) +{ + exec_vqshrn_high_n (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c new file mode 100644 index 00000000000..4885c029d1a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c @@ -0,0 +1,140 @@ +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected results with negative input. */ +VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, + 0x0, 0x0, 0x0, 0x0, + 0x0,0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0x0, 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0xfffffffc, 0xfffffffc, + 0x0, 0x0 }; + +/* Expected results with max input value shifted by 1. */ +VECT_VAR_DECL(expected_max_sh1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff }; +VECT_VAR_DECL(expected_max_sh1,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff, + 0xffff, 0xffff, 0xffff, 0xffff }; +VECT_VAR_DECL(expected_max_sh1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff, + 0xffffffff, 0xffffffff }; + +/* Expected results. */ +VECT_VAR_DECL(expected,uint,8,16) [] = { 0x12, 0x12, 0x12, 0x12, + 0x12, 0x12, 0x12, 0x12, + 0x48, 0x48, 0x48, 0x48, + 0x48, 0x48, 0x48, 0x48 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x4321, 0x4321, 0x4321, 0x4321, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xdeadbeef, 0xdeadbeef, + 0xdeadbe, 0xdeadbe }; + +#define INSN vqshrun_high_n +#define TEST_MSG "VQSHRUN_HIGH_N" + +#define FNNAME1(NAME) void exec_ ## NAME (void) +#define FNNAME(NAME) FNNAME1(NAME) + +FNNAME (INSN) +{ + /* Basic test: y=vqshrun_high_n(x,v), then store the result. */ +#define TEST_VQSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \ + VECT_VAR(vector_res, uint, W2, N2) = \ + INSN##_##T2##W(VECT_VAR(vector1,uint, W2, N), \ + VECT_VAR(vector2, T1, W, N), V); \ + vst1q_u##W2(VECT_VAR(result, uint, W2, N2), \ + VECT_VAR(vector_res, uint, W2, N2)); \ + + /* Two auxliary macros are necessary to expand INSN */ +#define TEST_VQSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \ + TEST_VQSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) + +#define TEST_VQSHRUN_HIGH_N(T1, T2, W, W2, N, N2, V) \ + TEST_VQSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) + + + DECL_VARIABLE(vector1, uint, 8, 8); + DECL_VARIABLE(vector1, uint, 16, 4); + DECL_VARIABLE(vector1, uint, 32, 2); + + /* vector is twice as large as vector_res. */ + DECL_VARIABLE(vector2, int, 16, 8); + DECL_VARIABLE(vector2, int, 32, 4); + DECL_VARIABLE(vector2, int, 64, 2); + + DECL_VARIABLE(vector_res, uint, 8, 16); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + + clean_results (); + + /* Fill input vector with negative values, to check saturation on + limits. */ + VDUP(vector1, , uint, u, 8, 8, -2); + VDUP(vector1, , uint, u, 16, 4, -3); + VDUP(vector1, , uint, u, 32, 2, -4); + + VDUP(vector2, q, int, s, 16, 8, -2); + VDUP(vector2, q, int, s, 32, 4, -3); + VDUP(vector2, q, int, s, 64, 2, -4); + + /* Choose shift amount arbitrarily. */ +#define CMT " (negative input)" + TEST_VQSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 3); + TEST_VQSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 4); + TEST_VQSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 2); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT); + + + /* Fill input vector with max value, to check saturation on + limits. */ + VDUP(vector1, , uint, u, 8, 8, 0x7F); + VDUP(vector1, , uint, u, 16, 4, 0x7FFF); + VDUP(vector1, , uint, u, 32, 2, 0x7FFFFFFFLL); + + VDUP(vector2, q, int, s, 16, 8, 0x7FFF); + VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF); + VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL); + +#undef CMT +#define CMT " (check cumulative saturation)" + TEST_VQSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 1); + TEST_VQSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 1); + TEST_VQSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 1); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh1, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh1, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh1, CMT); + + + /* Fill input vector with positive values, to check normal case. */ + VDUP(vector1, , uint, u, 8, 8, 0x12); + VDUP(vector1, , uint, u, 16, 4, 0x4321); + VDUP(vector1, , uint, u, 32, 2, 0xDEADBEEF); + + VDUP(vector2, q, int, s, 16, 8, 0x1234); + VDUP(vector2, q, int, s, 32, 4, 0x87654321); + VDUP(vector2, q, int, s, 64, 2, 0xDEADBEEF); + +#undef CMT +#define CMT "" + TEST_VQSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 6); + TEST_VQSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 7); + TEST_VQSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 8); + + CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); +} + +int main (void) +{ + exec_vqshrun_high_n (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_high-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/narrow_high-intrinsics.c index 8b8a6302692..07d78030058 100644 --- a/gcc/testsuite/gcc.target/aarch64/narrow_high-intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/narrow_high-intrinsics.c @@ -113,12 +113,12 @@ ONE (vmovn_high, uint32x4_t, uint32x2_t, uint64x2_t, u64) /* { dg-final { scan-assembler-times "raddhn2\\tv" 6} } */ /* { dg-final { scan-assembler-times "\\trshrn2 v" 6} } */ /* { dg-final { scan-assembler-times "\\tshrn2 v" 6} } */ -/* { dg-final { scan-assembler-times "sqshrun2 v" 3} } */ -/* { dg-final { scan-assembler-times "sqrshrun2 v" 3} } */ -/* { dg-final { scan-assembler-times "sqshrn2 v" 3} } */ -/* { dg-final { scan-assembler-times "uqshrn2 v" 3} } */ -/* { dg-final { scan-assembler-times "sqrshrn2 v" 3} } */ -/* { dg-final { scan-assembler-times "uqrshrn2 v" 3} } */ +/* { dg-final { scan-assembler-times "sqshrun2\\tv" 3} } */ +/* { dg-final { scan-assembler-times "sqrshrun2\\tv" 3} } */ +/* { dg-final { scan-assembler-times "sqshrn2\\tv" 3} } */ +/* { dg-final { scan-assembler-times "uqshrn2\\tv" 3} } */ +/* { dg-final { scan-assembler-times "sqrshrn2\\tv" 3} } */ +/* { dg-final { scan-assembler-times "uqrshrn2\\tv" 3} } */ /* { dg-final { scan-assembler-times "uqxtn2 v" 3} } */ /* { dg-final { scan-assembler-times "sqxtn2 v" 3} } */ /* { dg-final { scan-assembler-times "sqxtun2 v" 3} } */