From c46f9051654ff52ed083084c109e9247b7fce9aa Mon Sep 17 00:00:00 2001 From: Julia Koval Date: Thu, 8 Jun 2017 15:04:43 +0200 Subject: [PATCH] Add mov[us]wb store intrinsics. gcc/ * config/i386/avx512bwintrin.h (_mm512_mask_cvtepi16_storeu_epi8, _mm512_mask_cvtsepi16_storeu_epi8, _mm512_mask_cvtusepi16_storeu_epi8): New intrinsics. * config/i386/avx512vlbwintrin.h (_mm256_mask_cvtepi16_storeu_epi8, _mm_mask_cvtsepi16_storeu_epi8, _mm256_mask_cvtsepi16_storeu_epi8, _mm_mask_cvtusepi16_storeu_epi8, _mm256_mask_cvtusepi16_storeu_epi8, _mm_mask_cvtepi16_storeu_epi8): New intrinsics. * config/i386/i386-builtin-types.def (PV8Q, V8QI): New pointer type. (VOID_FTYPE_PV32QI_V32HI_USI, VOID_FTYPE_PV8QI_V8HI_UQI, VOID_FTYPE_PV16QI_V16HI_UHI): New function types. * config/i386/i386-builtin.def (__builtin_ia32_pmovwb128mem_mask, __builtin_ia32_pmovwb256mem_mask, __builtin_ia32_pmovswb128mem_mask, __builtin_ia32_pmovswb256mem_mask, __builtin_ia32_pmovuswb128mem_mask, __builtin_ia32_pmovuswb256mem_mask, __builtin_ia32_pmovuswb512mem_mask, __builtin_ia32_pmovswb512mem_mask) __builtin_ia32_pmovwb512mem_mask): New builtins. gcc/testsuite/ * gcc.target/i386/avx512bw-vpmovswb-1.c: Add new intrinsics to test. * gcc.target/i386/avx512bw-vpmovswb-2.c: Ditto. * gcc.target/i386/avx512bw-vpmovuswb-1.c: Ditto. * gcc.target/i386/avx512bw-vpmovuswb-2.c: Ditto. * gcc.target/i386/avx512bw-vpmovwb-1.c: Ditto. * gcc.target/i386/avx512bw-vpmovwb-2.c: Ditto. From-SVN: r249012 --- gcc/ChangeLog | 19 +++++++++ gcc/config/i386/avx512bwintrin.h | 21 ++++++++++ gcc/config/i386/avx512vlbwintrin.h | 42 +++++++++++++++++++ gcc/config/i386/i386-builtin-types.def | 4 ++ gcc/config/i386/i386-builtin.def | 9 ++++ gcc/config/i386/i386.c | 3 ++ gcc/testsuite/ChangeLog | 9 ++++ .../gcc.target/i386/avx512bw-vpmovswb-1.c | 12 ++++-- .../gcc.target/i386/avx512bw-vpmovswb-2.c | 10 +++++ .../gcc.target/i386/avx512bw-vpmovuswb-1.c | 12 ++++-- .../gcc.target/i386/avx512bw-vpmovuswb-2.c | 10 +++++ .../gcc.target/i386/avx512bw-vpmovwb-1.c | 12 ++++-- .../gcc.target/i386/avx512bw-vpmovwb-2.c | 10 +++++ 13 files changed, 164 insertions(+), 9 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 461c06b3623..d91f3841eff 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2017-08-08 Julia Koval + + * config/i386/avx512bwintrin.h (_mm512_mask_cvtepi16_storeu_epi8, + _mm512_mask_cvtsepi16_storeu_epi8, + _mm512_mask_cvtusepi16_storeu_epi8): New intrinsics. + * config/i386/avx512vlbwintrin.h (_mm256_mask_cvtepi16_storeu_epi8, + _mm_mask_cvtsepi16_storeu_epi8, _mm256_mask_cvtsepi16_storeu_epi8, + _mm_mask_cvtusepi16_storeu_epi8, _mm256_mask_cvtusepi16_storeu_epi8, + _mm_mask_cvtepi16_storeu_epi8): New intrinsics. + * config/i386/i386-builtin-types.def (PV8Q, V8QI): New pointer type. + (VOID_FTYPE_PV32QI_V32HI_USI, VOID_FTYPE_PV8QI_V8HI_UQI, + VOID_FTYPE_PV16QI_V16HI_UHI): New function types. + * config/i386/i386-builtin.def (__builtin_ia32_pmovwb128mem_mask, + __builtin_ia32_pmovwb256mem_mask, __builtin_ia32_pmovswb128mem_mask, + __builtin_ia32_pmovswb256mem_mask, __builtin_ia32_pmovuswb128mem_mask, + __builtin_ia32_pmovuswb256mem_mask, + __builtin_ia32_pmovuswb512mem_mask, __builtin_ia32_pmovswb512mem_mask) + __builtin_ia32_pmovwb512mem_mask): New builtins. + 2017-08-08 Julia Koval PR target/73350,80862 diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index 2b0fb6b5f08..71a75ee54df 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -425,6 +425,13 @@ _mm512_cvtepi16_epi8 (__m512i __A) (__mmask32) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) +{ + __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); +} + extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) @@ -452,6 +459,13 @@ _mm512_cvtsepi16_epi8 (__m512i __A) (__mmask32) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) +{ + __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); +} + extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) @@ -489,6 +503,13 @@ _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) __M); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) +{ + __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); +} + extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 0dc7d7a2cb5..a81180102f6 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -216,6 +216,13 @@ _mm256_cvtepi16_epi8 (__m256i __A) (__mmask16) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A) +{ + __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) @@ -244,6 +251,13 @@ _mm_cvtsepi16_epi8 (__m128i __A) (__mmask8) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A) +{ + __builtin_ia32_pmovswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) @@ -272,6 +286,13 @@ _mm256_cvtsepi16_epi8 (__m256i __A) (__mmask16) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A) +{ + __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) @@ -300,6 +321,13 @@ _mm_cvtusepi16_epi8 (__m128i __A) (__mmask8) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A) +{ + __builtin_ia32_pmovuswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) @@ -329,6 +357,13 @@ _mm256_cvtusepi16_epi8 (__m256i __A) (__mmask16) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A) +{ + __builtin_ia32_pmovuswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) @@ -4009,6 +4044,13 @@ _mm_cvtepi16_epi8 (__m128i __A) (__mmask8) -1); } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A) +{ + __builtin_ia32_pmovwb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 8de30862c58..19d876d73f2 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -155,6 +155,7 @@ DEF_POINTER_TYPE (PV4SF, V4SF) DEF_POINTER_TYPE (PV8DF, V8DF) DEF_POINTER_TYPE (PV8SF, V8SF) DEF_POINTER_TYPE (PV4SI, V4SI) +DEF_POINTER_TYPE (PV8QI, V8QI) DEF_POINTER_TYPE (PV8HI, V8HI) DEF_POINTER_TYPE (PV8SI, V8SI) DEF_POINTER_TYPE (PV8DI, V8DI) @@ -964,6 +965,7 @@ DEF_FUNCTION_TYPE (QI, V2DF, INT, UQI) DEF_FUNCTION_TYPE (HI, V16SF, INT, UHI) DEF_FUNCTION_TYPE (QI, V8SF, INT, UQI) DEF_FUNCTION_TYPE (QI, V4SF, INT, UQI) +DEF_FUNCTION_TYPE (VOID, PV32QI, V32HI, USI) DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT) DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI) @@ -1106,6 +1108,8 @@ DEF_FUNCTION_TYPE (VOID, PVOID, QI, V2DI, V2DI, INT) DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCVOID, INT, INT) DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCVOID, INT, INT) DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCVOID, INT, INT) +DEF_FUNCTION_TYPE (VOID, PV8QI, V8HI, UQI) +DEF_FUNCTION_TYPE (VOID, PV16QI, V16HI, UHI) DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 2663cb9faa7..23e88839048 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -378,6 +378,15 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_sto BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovwb128mem_mask", IX86_BUILTIN_PMOVWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovwb256mem_mask", IX86_BUILTIN_PMOVWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovswb128mem_mask", IX86_BUILTIN_PMOVSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) /* RDPKRU and WRPKRU. */ BDESC (OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 94265b0b471..d5c2d46bf5e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -37033,6 +37033,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PFLOAT_V16SF_UHI: case VOID_FTYPE_PFLOAT_V8SF_UQI: case VOID_FTYPE_PFLOAT_V4SF_UQI: + case VOID_FTYPE_PV32QI_V32HI_USI: + case VOID_FTYPE_PV16QI_V16HI_UHI: + case VOID_FTYPE_PV8QI_V8HI_UQI: nargs = 2; klass = store; /* Reserve memory operand for target. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 59b25749f4b..64acda40d74 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2017-08-08 Julia Koval + + * gcc.target/i386/avx512bw-vpmovswb-1.c: Add new intrinsics to test. + * gcc.target/i386/avx512bw-vpmovswb-2.c: Ditto. + * gcc.target/i386/avx512bw-vpmovuswb-1.c: Ditto. + * gcc.target/i386/avx512bw-vpmovuswb-2.c: Ditto. + * gcc.target/i386/avx512bw-vpmovwb-1.c: Ditto. + * gcc.target/i386/avx512bw-vpmovwb-2.c: Ditto. + 2017-06-08 Marek Polacek PR sanitize/80932 diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c index b5b6ef79a87..6bb87d25f4d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c @@ -3,18 +3,21 @@ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128i x, z; -volatile __m256i y; -volatile __m512i u; +volatile __m128i x, z, res1; +volatile __m256i y, res2; +volatile __m512i u, res3; volatile __mmask8 m1; volatile __mmask16 m2; volatile __mmask32 m3; @@ -25,10 +28,13 @@ avx512bw_test (void) z = _mm_cvtsepi16_epi8 (x); z = _mm_mask_cvtsepi16_epi8 (z, m1, x); z = _mm_maskz_cvtsepi16_epi8 (m1, x); + _mm_mask_cvtsepi16_storeu_epi8 ((void *) &res1, m1, x); z = _mm256_cvtsepi16_epi8 (y); z = _mm256_mask_cvtsepi16_epi8 (z, m2, y); z = _mm256_maskz_cvtsepi16_epi8 (m2, y); + _mm256_mask_cvtsepi16_storeu_epi8 ((void *) &res2, m2, y); y = _mm512_cvtsepi16_epi8 (u); y = _mm512_mask_cvtsepi16_epi8 (y, m3, u); y = _mm512_maskz_cvtsepi16_epi8 (m3, u); + _mm512_mask_cvtsepi16_storeu_epi8 ((void *) &res3, m3, u); } diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c index 69e25a27c4d..fd9cb4c681e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c @@ -31,9 +31,11 @@ TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; + char res4[SIZE]; UNION_TYPE (AVX512F_LEN, i_w) src; MASK_TYPE mask = MASK_VALUE; char res_ref[32]; + char res_ref2[SIZE_HALF]; sign = -1; for (i = 0; i < SIZE; i++) @@ -41,6 +43,7 @@ TEST (void) src.a[i] = 1 + 34 * i * sign; sign = sign * -1; res2.a[i] = DEFAULT_VALUE; + res4[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_cvtsepi16_epi8) (src.x); @@ -59,4 +62,11 @@ TEST (void) MASK_ZERO (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) abort (); + + INTRINSIC (_mask_cvtsepi16_storeu_epi8) (res4, mask, src.x); + CALC (res_ref2, src.a); + MASK_MERGE (i_b) (res_ref2, mask, SIZE); + + if (checkVc (res4, res_ref2, SIZE)) + abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c index e1c62bf56e3..7825e46a944 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c @@ -3,18 +3,21 @@ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128i x, z; -volatile __m256i y; -volatile __m512i u; +volatile __m128i x, z, res1; +volatile __m256i y, res2; +volatile __m512i u, res3; volatile __mmask8 m1; volatile __mmask16 m2; volatile __mmask32 m3; @@ -25,10 +28,13 @@ avx512bw_test (void) z = _mm_cvtusepi16_epi8 (x); z = _mm_mask_cvtusepi16_epi8 (z, m1, x); z = _mm_maskz_cvtusepi16_epi8 (m1, x); + _mm_mask_cvtusepi16_storeu_epi8 ((void *) &res1, m1, x); z = _mm256_cvtusepi16_epi8 (y); z = _mm256_mask_cvtusepi16_epi8 (z, m2, y); z = _mm256_maskz_cvtusepi16_epi8 (m2, y); + _mm256_mask_cvtusepi16_storeu_epi8 ((void *) &res2, m2, y); y = _mm512_cvtusepi16_epi8 (u); y = _mm512_mask_cvtusepi16_epi8 (y, m3, u); y = _mm512_maskz_cvtusepi16_epi8 (m3, u); + _mm512_mask_cvtusepi16_storeu_epi8 ((void *) &res3, m3, u); } diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c index 094d4329ab0..d5198f39eb6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c @@ -23,14 +23,17 @@ TEST (void) { int i; UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; + unsigned char res4[SIZE]; UNION_TYPE (AVX512F_LEN, i_w) src; MASK_TYPE mask = MASK_VALUE; unsigned char res_ref[32]; + unsigned char res_ref2[SIZE]; for (i = 0; i < SIZE; i++) { src.a[i] = 1 + 34 * i; res2.a[i] = DEFAULT_VALUE; + res4[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_cvtusepi16_epi8) (src.x); @@ -49,4 +52,11 @@ TEST (void) MASK_ZERO (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) abort (); + + INTRINSIC (_mask_cvtusepi16_storeu_epi8) (res4, mask, src.x); + CALC (res_ref2, src.a); + + MASK_MERGE (i_b) (res_ref2, mask, SIZE); + if (checkVc (res4, res_ref2, SIZE)) + abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c index e7adbb5a351..a1ad551392b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c @@ -3,18 +3,21 @@ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -volatile __m128i x, z; -volatile __m256i y; -volatile __m512i u; +volatile __m128i x, z, res1; +volatile __m256i y, res2; +volatile __m512i u, res3; volatile __mmask8 m1; volatile __mmask16 m2; volatile __mmask32 m3; @@ -25,10 +28,13 @@ avx512bw_test (void) z = _mm_cvtepi16_epi8 (x); z = _mm_mask_cvtepi16_epi8 (z, m1, x); z = _mm_maskz_cvtepi16_epi8 (m1, x); + _mm_mask_cvtepi16_storeu_epi8 ((void *) &res1, m1, x); z = _mm256_cvtepi16_epi8 (y); z = _mm256_mask_cvtepi16_epi8 (z, m2, y); z = _mm256_maskz_cvtepi16_epi8 (m2, y); + _mm256_mask_cvtepi16_storeu_epi8 ((void *) &res2, m2, y); y = _mm512_cvtepi16_epi8 (u); y = _mm512_mask_cvtepi16_epi8 (y, m3, u); y = _mm512_maskz_cvtepi16_epi8 (m3, u); + _mm512_mask_cvtepi16_storeu_epi8 ((void *) &res3, m3, u); } diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c index 32857dae551..6b0f86f57d8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c @@ -24,9 +24,11 @@ TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; + char res4[SIZE]; UNION_TYPE (AVX512F_LEN, i_w) src; MASK_TYPE mask = MASK_VALUE; char res_ref[32]; + char res_ref2[SIZE]; sign = -1; for (i = 0; i < SIZE; i++) @@ -34,6 +36,7 @@ TEST (void) src.a[i] = 1 + 34 * i * sign; sign = sign * -1; res2.a[i] = DEFAULT_VALUE; + res4[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_cvtepi16_epi8) (src.x); @@ -52,4 +55,11 @@ TEST (void) MASK_ZERO (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) abort (); + + INTRINSIC (_mask_cvtepi16_storeu_epi8) (res4, mask, src.x); + CALC (res_ref2, src.a); + + MASK_MERGE (i_b) (res_ref2, mask, SIZE); + if (checkVc (res4, res_ref2, SIZE)) + abort (); } -- 2.30.2