From 4a948703db483c3ba3df67816425d951c0268623 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 25 May 2020 16:10:06 +0800 Subject: [PATCH] Fix nonconforming memory_operand for vpmovq{d,w,b}/vpmovd{w,b}/vpmovwb. According to Intel SDM, VPMOVQB xmm1/m16 {k1}{z}, xmm2 has 16-bit memory_operand instead of 128-bit one which existed in current implementation. Also for other vpmov instructions which have memory_operand narrower than 128bits. 2020-05-25 Hongtao Liu gcc/ChangeLog * config/i386/sse.md (*avx512vl_v2div2qi2_store_1): Rename from *avx512vl_v2div2qi_store and refine memory size of the pattern. (*avx512vl_v2div2qi2_mask_store_1): Ditto. (*avx512vl_v4qi2_store_1): Ditto. (*avx512vl_v4qi2_mask_store_1): Ditto. (*avx512vl_v8qi2_store_1): Ditto. (*avx512vl_v8qi2_mask_store_1): Ditto. (*avx512vl_v4hi2_store_1): Ditto. (*avx512vl_v4hi2_mask_store_1): Ditto. (*avx512vl_v2div2hi2_store_1): Ditto. (*avx512vl_v2div2hi2_mask_store_1): Ditto. (*avx512vl_v2div2si2_store_1): Ditto. (*avx512vl_v2div2si2_mask_store_1): Ditto. (*avx512f_v8div16qi2_store_1): Ditto. (*avx512f_v8div16qi2_mask_store_1): Ditto. (*avx512vl_v2div2qi2_store_2): New define_insn_and_split. (*avx512vl_v2div2qi2_mask_store_2): Ditto. (*avx512vl_v4qi2_store_2): Ditto. (*avx512vl_v4qi2_mask_store_2): Ditto. (*avx512vl_v8qi2_store_2): Ditto. (*avx512vl_v8qi2_mask_store_2): Ditto. (*avx512vl_v4hi2_store_2): Ditto. (*avx512vl_v4hi2_mask_store_2): Ditto. (*avx512vl_v2div2hi2_store_2): Ditto. (*avx512vl_v2div2hi2_mask_store_2): Ditto. (*avx512vl_v2div2si2_store_2): Ditto. (*avx512vl_v2div2si2_mask_store_2): Ditto. (*avx512f_v8div16qi2_store_2): Ditto. (*avx512f_v8div16qi2_mask_store_2): Ditto. * config/i386/i386-builtin-types.def: Adjust builtin type. * config/i386/i386-expand.c: Ditto. * config/i386/i386-builtin.def: Adjust builtin. * config/i386/avx512fintrin.h: Ditto. * config/i386/avx512vlbwintrin.h: Ditto. * config/i386/avx512vlintrin.h: Ditto. --- gcc/config/i386/avx512fintrin.h | 7 +- gcc/config/i386/avx512vlbwintrin.h | 6 +- gcc/config/i386/avx512vlintrin.h | 49 +-- gcc/config/i386/i386-builtin-types.def | 20 +- gcc/config/i386/i386-builtin.def | 60 +-- gcc/config/i386/i386-expand.c | 20 +- gcc/config/i386/sse.md | 542 ++++++++++++++++--------- 7 files changed, 421 insertions(+), 283 deletions(-) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 012cf4eb31e..4bcd697387a 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -5613,7 +5613,8 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { - __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); + __builtin_ia32_pmovqb512mem_mask ((unsigned long long *) __P, + (__v8di) __A, __M); } extern __inline __m128i @@ -5648,7 +5649,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { - __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); + __builtin_ia32_pmovsqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M); } extern __inline __m128i @@ -5683,7 +5684,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { - __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); + __builtin_ia32_pmovusqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M); } extern __inline __m128i diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index bee2639d60a..cd4275e0781 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -255,7 +255,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A) { - __builtin_ia32_pmovswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M); + __builtin_ia32_pmovswb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M); } extern __inline __m128i @@ -325,7 +325,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A) { - __builtin_ia32_pmovuswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M); + __builtin_ia32_pmovuswb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M); } extern __inline __m128i @@ -4048,7 +4048,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A) { - __builtin_ia32_pmovwb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M); + __builtin_ia32_pmovwb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M); } extern __inline __m128i diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index cb6cc0ce782..7abd6018f4f 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -1485,7 +1485,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); + __builtin_ia32_pmovdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M); } extern __inline __m128i @@ -1528,7 +1528,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); + __builtin_ia32_pmovdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M); } extern __inline __m128i @@ -1555,7 +1555,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); + __builtin_ia32_pmovsdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M); } extern __inline __m128i @@ -1590,7 +1590,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); + __builtin_ia32_pmovsdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M); } extern __inline __m128i @@ -1625,7 +1625,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); + __builtin_ia32_pmovusdb128mem_mask ((unsigned int *) __P, (__v4si) __A, __M); } extern __inline __m128i @@ -1661,7 +1661,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); + __builtin_ia32_pmovusdb256mem_mask ((unsigned long long *) __P, (__v8si) __A, __M); } extern __inline __m128i @@ -1697,7 +1697,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); + __builtin_ia32_pmovdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M); } extern __inline __m128i @@ -1767,7 +1767,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); + __builtin_ia32_pmovsdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M); } extern __inline __m128i @@ -1838,7 +1838,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); + __builtin_ia32_pmovusdw128mem_mask ((unsigned long long *) __P, (__v4si) __A, __M); } extern __inline __m128i @@ -1908,7 +1908,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M); } extern __inline __m128i @@ -1943,7 +1943,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M); } extern __inline __m128i @@ -1978,7 +1978,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovsqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M); } extern __inline __m128i @@ -2013,7 +2013,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovsqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M); } extern __inline __m128i @@ -2048,7 +2048,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovusqb128mem_mask ((unsigned short *) __P, (__v2di) __A, __M); } extern __inline __m128i @@ -2084,7 +2084,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovusqb256mem_mask ((unsigned int *) __P, (__v4di) __A, __M); } extern __inline __m128i @@ -2120,7 +2120,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M); } extern __inline __m128i @@ -2156,7 +2156,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M); } extern __inline __m128i @@ -2191,7 +2191,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovsqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M); } extern __inline __m128i @@ -2226,7 +2226,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovsqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M); } extern __inline __m128i @@ -2261,7 +2261,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovusqw128mem_mask ((unsigned int *) __P, (__v2di) __A, __M); } extern __inline __m128i @@ -2296,7 +2296,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { - __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovusqw256mem_mask ((unsigned long long *) __P, (__v4di) __A, __M); } extern __inline __m128i @@ -2331,7 +2331,8 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovqd128mem_mask ((unsigned long long *) __P, + (__v2di) __A, __M); } extern __inline __m128i @@ -2401,7 +2402,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovsqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M); } extern __inline __m128i @@ -2472,7 +2473,7 @@ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { - __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); + __builtin_ia32_pmovusqd128mem_mask ((unsigned long long *) __P, (__v2di) __A, __M); } extern __inline __m128i diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 38fea5cc5be..1adf7c44f4a 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -870,12 +870,12 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, UQI) DEF_FUNCTION_TYPE (VOID, PV8DF, V8DF, UQI) DEF_FUNCTION_TYPE (VOID, PV8SI, V8DI, UQI) DEF_FUNCTION_TYPE (VOID, PV8HI, V8DI, UQI) -DEF_FUNCTION_TYPE (VOID, PV8HI, V4DI, UQI) -DEF_FUNCTION_TYPE (VOID, PV8HI, V2DI, UQI) +DEF_FUNCTION_TYPE (VOID, PUDI, V4DI, UQI) +DEF_FUNCTION_TYPE (VOID, PUSI, V2DI, UQI) DEF_FUNCTION_TYPE (VOID, PV4SI, V4DI, UQI) -DEF_FUNCTION_TYPE (VOID, PV4SI, V2DI, UQI) +DEF_FUNCTION_TYPE (VOID, PUDI, V2DI, UQI) DEF_FUNCTION_TYPE (VOID, PV8HI, V8SI, UQI) -DEF_FUNCTION_TYPE (VOID, PV8HI, V4SI, UQI) +DEF_FUNCTION_TYPE (VOID, PUDI, V4SI, UQI) DEF_FUNCTION_TYPE (VOID, PV4DF, V4DF, UQI) DEF_FUNCTION_TYPE (VOID, PV2DF, V2DF, UQI) DEF_FUNCTION_TYPE (VOID, PV16SF, V16SF, UHI) @@ -887,11 +887,11 @@ DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI, UQI) DEF_FUNCTION_TYPE (VOID, PV16SI, V16SI, UHI) DEF_FUNCTION_TYPE (VOID, PV16HI, V16SI, UHI) DEF_FUNCTION_TYPE (VOID, PV16QI, V16SI, UHI) -DEF_FUNCTION_TYPE (VOID, PV16QI, V8SI, UQI) -DEF_FUNCTION_TYPE (VOID, PV16QI, V4SI, UQI) -DEF_FUNCTION_TYPE (VOID, PV16QI, V8DI, UQI) -DEF_FUNCTION_TYPE (VOID, PV16QI, V4DI, UQI) -DEF_FUNCTION_TYPE (VOID, PV16QI, V2DI, UQI) +DEF_FUNCTION_TYPE (VOID, PUDI, V8SI, UQI) +DEF_FUNCTION_TYPE (VOID, PUSI, V4SI, UQI) +DEF_FUNCTION_TYPE (VOID, PUDI, V8DI, UQI) +DEF_FUNCTION_TYPE (VOID, PUSI, V4DI, UQI) +DEF_FUNCTION_TYPE (VOID, PUHI, V2DI, UQI) DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, UQI) DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, UQI) DEF_FUNCTION_TYPE (VOID, PDOUBLE, V8DF, UQI) @@ -1130,7 +1130,7 @@ DEF_FUNCTION_TYPE (VOID, PVOID, QI, V2DI, V2DI, INT) DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCVOID, INT, INT) DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCVOID, INT, INT) DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCVOID, INT, INT) -DEF_FUNCTION_TYPE (VOID, PV8QI, V8HI, UQI) +DEF_FUNCTION_TYPE (VOID, PUDI, V8HI, UQI) DEF_FUNCTION_TYPE (VOID, PV16QI, V16HI, UHI) DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 134981a798f..6270068fba1 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -244,9 +244,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask_store BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) @@ -362,40 +362,40 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv2di_maskz, "__built BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev2div2si2_mask_store_2, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store_2, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store_2, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store_2, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store_2, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store_2, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store_2, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store_2, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store_2, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store_2, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store_2, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUHI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store_2, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store_2, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUHI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store_2, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store_2, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUHI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store_2, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store_2, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store_2, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store_2, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store_2, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store_2, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUSI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store_2, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store_2, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovwb128mem_mask", IX86_BUILTIN_PMOVWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store_2, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovwb128mem_mask", IX86_BUILTIN_PMOVWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovwb256mem_mask", IX86_BUILTIN_PMOVWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovswb128mem_mask", IX86_BUILTIN_PMOVSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovswb128mem_mask", IX86_BUILTIN_PMOVSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 96f70ae5aaa..270585decb2 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -10556,18 +10556,18 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PV8SI_V8DI_UQI: case VOID_FTYPE_PV8HI_V8DI_UQI: case VOID_FTYPE_PV16HI_V16SI_UHI: - case VOID_FTYPE_PV16QI_V8DI_UQI: + case VOID_FTYPE_PUDI_V8DI_UQI: case VOID_FTYPE_PV16QI_V16SI_UHI: case VOID_FTYPE_PV4SI_V4DI_UQI: - case VOID_FTYPE_PV4SI_V2DI_UQI: - case VOID_FTYPE_PV8HI_V4DI_UQI: - case VOID_FTYPE_PV8HI_V2DI_UQI: + case VOID_FTYPE_PUDI_V2DI_UQI: + case VOID_FTYPE_PUDI_V4DI_UQI: + case VOID_FTYPE_PUSI_V2DI_UQI: case VOID_FTYPE_PV8HI_V8SI_UQI: - case VOID_FTYPE_PV8HI_V4SI_UQI: - case VOID_FTYPE_PV16QI_V4DI_UQI: - case VOID_FTYPE_PV16QI_V2DI_UQI: - case VOID_FTYPE_PV16QI_V8SI_UQI: - case VOID_FTYPE_PV16QI_V4SI_UQI: + case VOID_FTYPE_PUDI_V4SI_UQI: + case VOID_FTYPE_PUSI_V4DI_UQI: + case VOID_FTYPE_PUHI_V2DI_UQI: + case VOID_FTYPE_PUDI_V8SI_UQI: + case VOID_FTYPE_PUSI_V4SI_UQI: case VOID_FTYPE_PCHAR_V64QI_UDI: case VOID_FTYPE_PCHAR_V32QI_USI: case VOID_FTYPE_PCHAR_V16QI_UHI: @@ -10588,7 +10588,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PFLOAT_V4SF_UQI: case VOID_FTYPE_PV32QI_V32HI_USI: case VOID_FTYPE_PV16QI_V16HI_UHI: - case VOID_FTYPE_PV8QI_V8HI_UQI: + case VOID_FTYPE_PUDI_V8HI_UQI: nargs = 2; klass = store; /* Reserve memory operand for target. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1cf1b8cea3b..f2a5a34284d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10720,27 +10720,29 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2qi2_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (any_truncate:V2QI - (match_operand:V2DI 1 "register_operand" "v")) - (vec_select:V14QI - (match_dup 0) - (parallel [(const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7) - (const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn "*avx512vl_v2div2qi2_store_1" + [(set (match_operand:V2QI 0 "memory_operand" "=m") + (any_truncate:V2QI + (match_operand:V2DI 1 "register_operand" "v")))] "TARGET_AVX512VL" - "vpmovqb\t{%1, %0|%w0, %1}" + "vpmovqb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "*avx512vl_v2div2qi2_store_2" + [(set (match_operand:HI 0 "memory_operand") + (subreg:HI + (any_truncate:V2QI + (match_operand:V2DI 1 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (any_truncate:V2QI (match_dup 1)))] + "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);") + (define_insn "avx512vl_v2div2qi2_mask" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI @@ -10785,52 +10787,66 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "avx512vl_v2div2qi2_mask_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (vec_merge:V2QI - (any_truncate:V2QI - (match_operand:V2DI 1 "register_operand" "v")) - (vec_select:V2QI - (match_dup 0) - (parallel [(const_int 0) (const_int 1)])) - (match_operand:QI 2 "register_operand" "Yk")) - (vec_select:V14QI - (match_dup 0) - (parallel [(const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7) - (const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn "*avx512vl_v2div2qi2_mask_store_1" + [(set (match_operand:V2QI 0 "memory_operand" "=m") + (vec_merge:V2QI + (any_truncate:V2QI + (match_operand:V2DI 1 "register_operand" "v")) + (match_dup 0) + (match_operand:QI 2 "register_operand" "Yk")))] "TARGET_AVX512VL" - "vpmovqb\t{%1, %0%{%2%}|%w0%{%2%}, %1}" + "vpmovqb\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v4qi2_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (any_truncate:V4QI - (match_operand:VI4_128_8_256 1 "register_operand" "v")) - (vec_select:V12QI - (match_dup 0) - (parallel [(const_int 4) (const_int 5) - (const_int 6) (const_int 7) - (const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn_and_split "avx512vl_v2div2qi2_mask_store_2" + [(set (match_operand:HI 0 "memory_operand") + (subreg:HI + (vec_merge:V2QI + (any_truncate:V2QI + (match_operand:V2DI 1 "register_operand")) + (vec_select:V2QI + (subreg:V4QI + (vec_concat:V2HI + (match_dup 0) + (const_int 0)) 0) + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 2 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V2QI + (any_truncate:V2QI (match_dup 1)) + (match_dup 0) + (match_dup 2)))] + "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);") + +(define_insn "*avx512vl_v4qi2_store_1" + [(set (match_operand:V4QI 0 "memory_operand" "=m") + (any_truncate:V4QI + (match_operand:VI4_128_8_256 1 "register_operand" "v")))] "TARGET_AVX512VL" - "vpmov\t{%1, %0|%k0, %1}" + "vpmov\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "*avx512vl_v4qi2_store_2" + [(set (match_operand:SI 0 "memory_operand") + (subreg:SI + (any_truncate:V4QI + (match_operand:VI4_128_8_256 1 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (any_truncate:V4QI (match_dup 1)))] + "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);") + (define_insn "avx512vl_v4qi2_mask" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI @@ -10875,53 +10891,70 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "avx512vl_v4qi2_mask_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (vec_merge:V4QI - (any_truncate:V4QI - (match_operand:VI4_128_8_256 1 "register_operand" "v")) - (vec_select:V4QI - (match_dup 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3)])) - (match_operand:QI 2 "register_operand" "Yk")) - (vec_select:V12QI - (match_dup 0) - (parallel [(const_int 4) (const_int 5) - (const_int 6) (const_int 7) - (const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn "*avx512vl_v4qi2_mask_store_1" + [(set (match_operand:V4QI 0 "memory_operand" "=m") + (vec_merge:V4QI + (any_truncate:V4QI + (match_operand:VI4_128_8_256 1 "register_operand" "v")) + (match_dup 0) + (match_operand:QI 2 "register_operand" "Yk")))] "TARGET_AVX512VL" - "vpmov\t{%1, %0%{%2%}|%k0%{%2%}, %1}" + "vpmov\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "avx512vl_v4qi2_mask_store_2" + [(set (match_operand:SI 0 "memory_operand") + (subreg:SI + (vec_merge:V4QI + (any_truncate:V4QI + (match_operand:VI4_128_8_256 1 "register_operand")) + (vec_select:V4QI + (subreg:V8QI + (vec_concat:V2SI + (match_dup 0) + (const_int 0)) 0) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])) + (match_operand:QI 2 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V4QI + (any_truncate:V4QI (match_dup 1)) + (match_dup 0) + (match_dup 2)))] + "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);") + (define_mode_iterator VI2_128_BW_4_256 [(V8HI "TARGET_AVX512BW") V8SI]) -(define_insn "*avx512vl_v8qi2_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (any_truncate:V8QI - (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) - (vec_select:V8QI - (match_dup 0) - (parallel [(const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn "*avx512vl_v8qi2_store_1" + [(set (match_operand:V8QI 0 "memory_operand" "=m") + (any_truncate:V8QI + (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))] "TARGET_AVX512VL" - "vpmov\t{%1, %0|%q0, %1}" + "vpmov\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "*avx512vl_v8qi2_store_2" + [(set (match_operand:DI 0 "memory_operand" "=m") + (subreg:DI + (any_truncate:V8QI + (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (any_truncate:V8QI (match_dup 1)))] + "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") + (define_insn "avx512vl_v8qi2_mask" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI @@ -10966,32 +10999,46 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "avx512vl_v8qi2_mask_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (vec_merge:V8QI - (any_truncate:V8QI - (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) - (vec_select:V8QI - (match_dup 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7)])) - (match_operand:QI 2 "register_operand" "Yk")) - (vec_select:V8QI - (match_dup 0) - (parallel [(const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn "*avx512vl_v8qi2_mask_store_1" + [(set (match_operand:V8QI 0 "memory_operand" "=m") + (vec_merge:V8QI + (any_truncate:V8QI + (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) + (match_dup 0) + (match_operand:QI 2 "register_operand" "Yk")))] "TARGET_AVX512VL" - "vpmov\t{%1, %0%{%2%}|%q0%{%2%}, %1}" + "vpmov\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "avx512vl_v8qi2_mask_store_2" + [(set (match_operand:DI 0 "memory_operand") + (subreg:DI + (vec_merge:V8QI + (any_truncate:V8QI + (match_operand:VI2_128_BW_4_256 1 "register_operand")) + (vec_select:V8QI + (subreg:V16QI + (vec_concat:V2DI + (match_dup 0) + (const_int 0)) 0) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])) + (match_operand:QI 2 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V8QI + (any_truncate:V8QI (match_dup 1)) + (match_dup 0) + (match_dup 2)))] + "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") + (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI]) (define_mode_attr pmov_dst_4 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")]) @@ -11026,15 +11073,10 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v4hi2_store" - [(set (match_operand:V8HI 0 "memory_operand" "=m") - (vec_concat:V8HI - (any_truncate:V4HI - (match_operand:VI4_128_8_256 1 "register_operand" "v")) - (vec_select:V4HI - (match_dup 0) - (parallel [(const_int 4) (const_int 5) - (const_int 6) (const_int 7)]))))] +(define_insn "*avx512vl_v4hi2_store_1" + [(set (match_operand:V4HI 0 "memory_operand" "=m") + (any_truncate:V4HI + (match_operand:VI4_128_8_256 1 "register_operand" "v")))] "TARGET_AVX512VL" "vpmov\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") @@ -11042,6 +11084,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "*avx512vl_v4hi2_store_2" + [(set (match_operand:DI 0 "memory_operand") + (subreg:DI + (any_truncate:V4HI + (match_operand:VI4_128_8_256 1 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (any_truncate:V4HI (match_dup 1)))] + "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);") + (define_insn "avx512vl_v4hi2_mask" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_concat:V8HI @@ -11078,21 +11132,13 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "avx512vl_v4hi2_mask_store" - [(set (match_operand:V8HI 0 "memory_operand" "=m") - (vec_concat:V8HI - (vec_merge:V4HI - (any_truncate:V4HI - (match_operand:VI4_128_8_256 1 "register_operand" "v")) - (vec_select:V4HI - (match_dup 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3)])) - (match_operand:QI 2 "register_operand" "Yk")) - (vec_select:V4HI - (match_dup 0) - (parallel [(const_int 4) (const_int 5) - (const_int 6) (const_int 7)]))))] +(define_insn "*avx512vl_v4hi2_mask_store_1" + [(set (match_operand:V4HI 0 "memory_operand" "=m") + (vec_merge:V4HI + (any_truncate:V4HI + (match_operand:VI4_128_8_256 1 "register_operand" "v")) + (match_dup 0) + (match_operand:QI 2 "register_operand" "Yk")))] "TARGET_AVX512VL" { if (GET_MODE_SIZE (GET_MODE_INNER (mode)) == 4) @@ -11104,16 +11150,35 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2hi2_store" - [(set (match_operand:V8HI 0 "memory_operand" "=m") - (vec_concat:V8HI - (any_truncate:V2HI - (match_operand:V2DI 1 "register_operand" "v")) - (vec_select:V6HI - (match_dup 0) - (parallel [(const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7)]))))] +(define_insn_and_split "avx512vl_v4hi2_mask_store_2" + [(set (match_operand:DI 0 "memory_operand") + (subreg:DI + (vec_merge:V4HI + (any_truncate:V4HI + (match_operand:VI4_128_8_256 1 "register_operand")) + (vec_select:V4HI + (subreg:V8HI + (vec_concat:V2DI + (match_dup 0) + (const_int 0)) 0) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])) + (match_operand:QI 2 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V4HI + (any_truncate:V4HI (match_dup 1)) + (match_dup 0) + (match_dup 2)))] + "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);") + + +(define_insn "*avx512vl_v2div2hi2_store_1" + [(set (match_operand:V2HI 0 "memory_operand" "=m") + (any_truncate:V2HI + (match_operand:V2DI 1 "register_operand" "v")))] "TARGET_AVX512VL" "vpmovqw\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") @@ -11121,6 +11186,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "*avx512vl_v2div2hi2_store_2" + [(set (match_operand:SI 0 "memory_operand") + (subreg:SI + (any_truncate:V2HI + (match_operand:V2DI 1 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (any_truncate:V2HI (match_dup 1)))] + "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);") + (define_insn "avx512vl_v2div2hi2_mask" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_concat:V8HI @@ -11157,21 +11234,13 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "avx512vl_v2div2hi2_mask_store" - [(set (match_operand:V8HI 0 "memory_operand" "=m") - (vec_concat:V8HI - (vec_merge:V2HI - (any_truncate:V2HI - (match_operand:V2DI 1 "register_operand" "v")) - (vec_select:V2HI - (match_dup 0) - (parallel [(const_int 0) (const_int 1)])) - (match_operand:QI 2 "register_operand" "Yk")) - (vec_select:V6HI - (match_dup 0) - (parallel [(const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7)]))))] +(define_insn "*avx512vl_v2div2hi2_mask_store_1" + [(set (match_operand:V2HI 0 "memory_operand" "=m") + (vec_merge:V2HI + (any_truncate:V2HI + (match_operand:V2DI 1 "register_operand" "v")) + (match_dup 0) + (match_operand:QI 2 "register_operand" "Yk")))] "TARGET_AVX512VL" "vpmovqw\t{%1, %0%{%2%}|%0%{%2%}, %g1}" [(set_attr "type" "ssemov") @@ -11179,6 +11248,29 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "avx512vl_v2div2hi2_mask_store_2" + [(set (match_operand:SI 0 "memory_operand") + (subreg:SI + (vec_merge:V2HI + (any_truncate:V2HI + (match_operand:V2DI 1 "register_operand")) + (vec_select:V2HI + (subreg:V4HI + (vec_concat:V2SI + (match_dup 0) + (const_int 0)) 0) + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 2 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V2HI + (any_truncate:V2HI (match_dup 1)) + (match_dup 0) + (match_dup 2)))] + "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);") + (define_expand "truncv2div2si2" [(set (match_operand:V2SI 0 "register_operand") (truncate:V2SI @@ -11204,14 +11296,10 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2si2_store" - [(set (match_operand:V4SI 0 "memory_operand" "=m") - (vec_concat:V4SI - (any_truncate:V2SI - (match_operand:V2DI 1 "register_operand" "v")) - (vec_select:V2SI - (match_dup 0) - (parallel [(const_int 2) (const_int 3)]))))] +(define_insn "*avx512vl_v2div2si2_store_1" + [(set (match_operand:V2SI 0 "memory_operand" "=m") + (any_truncate:V2SI + (match_operand:V2DI 1 "register_operand" "v")))] "TARGET_AVX512VL" "vpmovqd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") @@ -11219,6 +11307,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "*avx512vl_v2div2si2_store_2" + [(set (match_operand:DI 0 "memory_operand") + (subreg:DI + (any_truncate:V2SI + (match_operand:V2DI 1 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (any_truncate:V2SI (match_dup 1)))] + "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);") + (define_insn "avx512vl_v2div2si2_mask" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI @@ -11251,26 +11351,43 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "avx512vl_v2div2si2_mask_store" - [(set (match_operand:V4SI 0 "memory_operand" "=m") - (vec_concat:V4SI - (vec_merge:V2SI - (any_truncate:V2SI - (match_operand:V2DI 1 "register_operand" "v")) - (vec_select:V2SI - (match_dup 0) - (parallel [(const_int 0) (const_int 1)])) - (match_operand:QI 2 "register_operand" "Yk")) - (vec_select:V2SI - (match_dup 0) - (parallel [(const_int 2) (const_int 3)]))))] +(define_insn "*avx512vl_v2div2si2_mask_store_1" + [(set (match_operand:V2SI 0 "memory_operand" "=m") + (vec_merge:V2SI + (any_truncate:V2SI + (match_operand:V2DI 1 "register_operand" "v")) + (match_dup 0) + (match_operand:QI 2 "register_operand" "Yk")))] "TARGET_AVX512VL" - "vpmovqd\t{%1, %0%{%2%}|%0%{%2%}, %t1}" + "vpmovqd\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "avx512vl_v2div2si2_mask_store_2" + [(set (match_operand:DI 0 "memory_operand") + (subreg:DI + (vec_merge:V2SI + (any_truncate:V2SI + (match_operand:V2DI 1 "register_operand")) + (vec_select:V2SI + (subreg:V4SI + (vec_concat:V2DI + (match_dup 0) + (const_int 0)) 0) + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 2 "register_operand")) 0))] + "TARGET_AVX512VL && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V2SI + (any_truncate:V2SI (match_dup 1)) + (match_dup 0) + (match_dup 2)))] + "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);") + (define_expand "truncv8div8qi2" [(set (match_operand:V8QI 0 "register_operand") (truncate:V8QI @@ -11297,17 +11414,10 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512f_v8div16qi2_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (any_truncate:V8QI - (match_operand:V8DI 1 "register_operand" "v")) - (vec_select:V8QI - (match_dup 0) - (parallel [(const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn "*avx512f_v8div16qi2_store_1" + [(set (match_operand:V8QI 0 "memory_operand" "=m") + (any_truncate:V8QI + (match_operand:V8DI 1 "register_operand" "v")))] "TARGET_AVX512F" "vpmovqb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") @@ -11315,6 +11425,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "*avx512f_v8div16qi2_store_2" + [(set (match_operand:DI 0 "memory_operand") + (subreg:DI + (any_truncate:V8QI + (match_operand:V8DI 1 "register_operand")) 0))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (any_truncate:V8QI (match_dup 1)))] + "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") + (define_insn "avx512f_v8div16qi2_mask" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI @@ -11359,32 +11481,46 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "avx512f_v8div16qi2_mask_store" - [(set (match_operand:V16QI 0 "memory_operand" "=m") - (vec_concat:V16QI - (vec_merge:V8QI - (any_truncate:V8QI - (match_operand:V8DI 1 "register_operand" "v")) - (vec_select:V8QI - (match_dup 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7)])) - (match_operand:QI 2 "register_operand" "Yk")) - (vec_select:V8QI - (match_dup 0) - (parallel [(const_int 8) (const_int 9) - (const_int 10) (const_int 11) - (const_int 12) (const_int 13) - (const_int 14) (const_int 15)]))))] +(define_insn "*avx512f_v8div16qi2_mask_store_1" + [(set (match_operand:V8QI 0 "memory_operand" "=m") + (vec_merge:V8QI + (any_truncate:V8QI + (match_operand:V8DI 1 "register_operand" "v")) + (match_dup 0) + (match_operand:QI 2 "register_operand" "Yk")))] "TARGET_AVX512F" - "vpmovqb\t{%1, %0%{%2%}|%q0%{%2%}, %1}" + "vpmovqb\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn_and_split "avx512f_v8div16qi2_mask_store_2" + [(set (match_operand:DI 0 "memory_operand") + (subreg:DI + (vec_merge:V8QI + (any_truncate:V8QI + (match_operand:V8DI 1 "register_operand")) + (vec_select:V8QI + (subreg:V16QI + (vec_concat:V2DI + (match_dup 0) + (const_int 0)) 0) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])) + (match_operand:QI 2 "register_operand")) 0))] + "TARGET_AVX512F && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V8QI + (any_truncate:V8QI (match_dup 1)) + (match_dup 0) + (match_dup 2)))] + "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral arithmetic -- 2.30.2