From: Uros Bizjak Date: Wed, 25 Mar 2015 21:56:11 +0000 (+0100) Subject: sse-13.c: Include x86intrin.h and adjust #defines. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9c8129f563e1788f5ff6ad041d8ece56d575a05c;p=gcc.git sse-13.c: Include x86intrin.h and adjust #defines. * gcc.target/i386/sse-13.c: Include x86intrin.h and adjust #defines. * gcc.target/i386/sse-23.c: Do not explicitly include wmmintrin.h, smmintrin.h and mm3dnow.h. From-SVN: r221674 --- diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 68534fc4e6a..605f6fcfc35 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-03-25 Uros Bizjak + + * gcc.target/i386/sse-13.c: Include x86intrin.h and adjust #defines. + * gcc.target/i386/sse-23.c: Do not explicitly include wmmintrin.h, + smmintrin.h and mm3dnow.h. + 2015-03-25 Marek Polacek PR c++/65558 diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 104c63ec7fc..b8ee0f2abb6 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -55,20 +55,6 @@ #define __builtin_ia32_vcvtps2ph(A, I) __builtin_ia32_vcvtps2ph(A, 1) #define __builtin_ia32_vcvtps2ph256(A, I) __builtin_ia32_vcvtps2ph256(A, 1) -/* avx512pfintrin.h */ -#define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps (A, B, C, 1, 1) -#define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps (A, B, C, 1, 1) -#define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps (A, B, C, 1, 1) -#define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps (A, B, C, 1, 1) - -/* avx512erintrin.h */ -#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 1) -#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 1) -#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 1) -#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 1) -#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 1) -#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 1) - /* wmmintrin.h */ #define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1) #define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1) @@ -195,13 +181,13 @@ #define __builtin_ia32_gatherdiv4si256(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si256(X, Y, Z, K, 1) /* rtmintrin.h */ -#define __builtin_ia32_xabort (N) __builtin_ia32_xabort (1) +#define __builtin_ia32_xabort(N) __builtin_ia32_xabort(1) /* avx512fintrin.h */ #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 8) #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 8) -#define __builtin_ia32_addsd_mask(A, B, C, D, E) __builtin_ia32_addsd_mask(A, B, C, D, 8) -#define __builtin_ia32_addss_mask(A, B, C, D, E) __builtin_ia32_addss_mask(A, B, C, D, 8) +#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 8) +#define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 8) #define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) #define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) #define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) @@ -217,11 +203,11 @@ #define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 8) #define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 8) #define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 8) -#define __builtin_ia32_cvtsd2ss_mask(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask(A, B, C, D, 8) +#define __builtin_ia32_cvtsd2ss_round(A, B, C) __builtin_ia32_cvtsd2ss_round(A, B, 8) +#define __builtin_ia32_cvtss2sd_round(A, B, C) __builtin_ia32_cvtss2sd_round(A, B, 4) #define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 8) #define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 8) #define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 8) -#define __builtin_ia32_cvtss2sd_mask(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask(A, B, C, D, 8) #define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 8) #define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 8) #define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 8) @@ -232,8 +218,8 @@ #define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 8) #define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 8) #define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 8) -#define __builtin_ia32_divsd_mask(A, B, C, D, E) __builtin_ia32_divsd_mask(A, B, C, D, 8) -#define __builtin_ia32_divss_mask(A, B, C, D, E) __builtin_ia32_divss_mask(A, B, C, D, 8) +#define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 8) +#define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 8) #define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) #define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) #define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) @@ -256,28 +242,28 @@ #define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 8) #define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 8) #define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 8) -#define __builtin_ia32_getexpsd128_mask(A, B, C, D, E) __builtin_ia32_getexpsd128_mask(A, B, C, D, 8) -#define __builtin_ia32_getexpss128_mask(A, B, C, D, E) __builtin_ia32_getexpss128_mask(A, B, C, D, 8) +#define __builtin_ia32_getexpsd128_round(A, B, C) __builtin_ia32_getexpsd128_round(A, B, 4) +#define __builtin_ia32_getexpss128_round(A, B, C) __builtin_ia32_getexpss128_round(A, B, 4) #define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 8) #define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 8) -#define __builtin_ia32_getmantsd_mask(A, B, I, D, E, F) __builtin_ia32_getmantsd_mask(A, B, 1, D, E, 8) -#define __builtin_ia32_getmantss_mask(A, B, I, D, E, F) __builtin_ia32_getmantss_mask(A, B, 1, D, E, 8) +#define __builtin_ia32_getmantsd_round(A, B, C, D) __builtin_ia32_getmantsd_round(A, B, 1, 4) +#define __builtin_ia32_getmantss_round(A, B, C, D) __builtin_ia32_getmantss_round(A, B, 1, 4) #define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E) #define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E) #define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E) #define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E) #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 8) #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 8) -#define __builtin_ia32_maxsd_mask(A, B, C, D, E) __builtin_ia32_maxsd_mask(A, B, C, D, 8) -#define __builtin_ia32_maxss_mask(A, B, C, D, E) __builtin_ia32_maxss_mask(A, B, C, D, 8) +#define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4) +#define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4) #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 8) #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 8) -#define __builtin_ia32_minsd_mask(A, B, C, D, E) __builtin_ia32_minsd_mask(A, B, C, D, 8) -#define __builtin_ia32_minss_mask(A, B, C, D, E) __builtin_ia32_minss_mask(A, B, C, D, 8) +#define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4) +#define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4) #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 8) #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 8) -#define __builtin_ia32_mulsd_mask(A, B, C, D, E) __builtin_ia32_mulsd_mask(A, B, C, D, 8) -#define __builtin_ia32_mulss_mask(A, B, C, D, E) __builtin_ia32_mulss_mask(A, B, C, D, 8) +#define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 8) +#define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 8) #define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) #define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) #define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) @@ -297,12 +283,12 @@ #define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E) #define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 8) #define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 8) -#define __builtin_ia32_rndscalesd_mask(A, B, I, D, E, F) __builtin_ia32_rndscalesd_mask(A, B, 1, D, E, 8) -#define __builtin_ia32_rndscaless_mask(A, B, I, D, E, F) __builtin_ia32_rndscaless_mask(A, B, 1, D, E, 8) +#define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4) +#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4) #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8) #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8) -#define __builtin_ia32_scalefsd_mask(A, B, C, D, E) __builtin_ia32_scalefsd_mask(A, B, C, D, 8) -#define __builtin_ia32_scalefss_mask(A, B, C, D, E) __builtin_ia32_scalefss_mask(A, B, C, D, 8) +#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 8) +#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 8) #define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8) #define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8) #define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8) @@ -319,12 +305,12 @@ #define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) #define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 8) #define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 8) -#define __builtin_ia32_sqrtsd_mask(A, B, C, D, E) __builtin_ia32_sqrtsd_mask(A, B, C, D, 8) -#define __builtin_ia32_sqrtss_mask(A, B, C, D, E) __builtin_ia32_sqrtss_mask(A, B, C, D, 8) +#define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 8) +#define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 8) #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 8) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 8) -#define __builtin_ia32_subsd_mask(A, B, C, D, E) __builtin_ia32_subsd_mask(A, B, C, D, 8) -#define __builtin_ia32_subss_mask(A, B, C, D, E) __builtin_ia32_subss_mask(A, B, C, D, 8) +#define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 8) +#define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 8) #define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) #define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) #define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 8) @@ -353,12 +339,8 @@ #define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 8) #define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 8) #define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 8) -#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8) -#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8) -#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8) -#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8) -#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8) -#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsd3_round(A, B, C, D) __builtin_ia32_vfmaddsd3_round(A, B, C, 8) +#define __builtin_ia32_vfmaddss3_round(A, B, C, D) __builtin_ia32_vfmaddss3_round(A, B, C, 8) #define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 8) #define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 8) #define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 8) @@ -380,10 +362,31 @@ #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) __builtin_ia32_vpermilpd512_mask(A, 1, C, D) #define __builtin_ia32_vpermilps512_mask(A, E, C, D) __builtin_ia32_vpermilps512_mask(A, 1, C, D) +/* avx512erintrin.h */ +#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 8) +#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask(A, B, C, 8) +#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask(A, B, C, 8) +#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask(A, B, C, 8) +#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask(A, B, C, 8) +#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask(A, B, C, 8) +#define __builtin_ia32_rcp28ss_round(A, B, C) __builtin_ia32_rcp28ss_round(A, B, 8) +#define __builtin_ia32_rcp28sd_round(A, B, C) __builtin_ia32_rcp28sd_round(A, B, 8) +#define __builtin_ia32_rsqrt28ss_round(A, B, C) __builtin_ia32_rsqrt28ss_round(A, B, 8) +#define __builtin_ia32_rsqrt28sd_round(A, B, C) __builtin_ia32_rsqrt28sd_round(A, B, 8) + +/* avx512pfintrin.h */ +#define __builtin_ia32_gatherpfdps(A, B, C, D, E) __builtin_ia32_gatherpfdps(A, B, C, 1, _MM_HINT_T0) +#define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, _MM_HINT_T0) +#define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, _MM_HINT_T0) +#define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, _MM_HINT_T0) +#define __builtin_ia32_gatherpfdpd(A, B, C, D, E) __builtin_ia32_gatherpfdpd(A, B, C, 1, _MM_HINT_T0) +#define __builtin_ia32_gatherpfqpd(A, B, C, D, E) __builtin_ia32_gatherpfqpd(A, B, C, 1, _MM_HINT_T0) +#define __builtin_ia32_scatterpfdpd(A, B, C, D, E) __builtin_ia32_scatterpfdpd(A, B, C, 1, _MM_HINT_T0) +#define __builtin_ia32_scatterpfqpd(A, B, C, D, E) __builtin_ia32_scatterpfqpd(A, B, C, 1, _MM_HINT_T0) + /* shaintrin.h */ #define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1) - /* avx512bwintrin.h */ #define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D) #define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D) @@ -591,3 +594,5 @@ #define __builtin_ia32_fpclasspd128_mask(A, D, C) __builtin_ia32_fpclasspd128_mask(A, 1, C) #define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D) #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D) + +#include diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 9f81a8ac558..0671d57ac19 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -595,7 +595,5 @@ #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D) #pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,clwb,pcommit") -#include -#include -#include + #include