From: Jakub Jelinek Date: Mon, 12 Aug 2019 15:55:56 +0000 (+0200) Subject: re PR target/83250 (_mm256_zextsi128_si256 missing for AVX2 zero extension) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e6b2dc248df351be58ecaa8bb5af8ec523d2530e;p=gcc.git re PR target/83250 (_mm256_zextsi128_si256 missing for AVX2 zero extension) PR target/83250 PR target/91340 * config/i386/avxintrin.h (_mm256_zextpd128_pd256, _mm256_zextps128_ps256, _mm256_zextsi128_si256): New intrinsics. * config/i386/avx512fintrin.h (_mm512_zextpd128_pd512, _mm512_zextps128_ps512, _mm512_zextsi128_si512, _mm512_zextpd256_pd512, _mm512_zextps256_ps512, _mm512_zextsi256_si512): Likewise. * gcc.target/i386/avx-typecast-1.c: New test. * gcc.target/i386/avx-typecast-2.c: New test. * gcc.target/i386/avx512f-typecast-2.c: New test. From-SVN: r274313 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9f767127a7d..93e600d68f2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-08-12 Jakub Jelinek + + PR target/83250 + PR target/91340 + * config/i386/avxintrin.h (_mm256_zextpd128_pd256, + _mm256_zextps128_ps256, _mm256_zextsi128_si256): New intrinsics. + * config/i386/avx512fintrin.h (_mm512_zextpd128_pd512, + _mm512_zextps128_ps512, _mm512_zextsi128_si512, _mm512_zextpd256_pd512, + _mm512_zextps256_ps512, _mm512_zextsi256_si512): Likewise. + 2019-08-12 Richard Biener PR lto/91375 diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 454fd3d247d..c2ca4e15acd 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -15437,6 +15437,48 @@ _mm512_castsi256_si512 (__m256i __A) return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A); } +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextpd128_pd512 (__m128d __A) +{ + return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextps128_ps512 (__m128 __A) +{ + return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextsi128_si512 (__m128i __A) +{ + return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextpd256_pd512 (__m256d __A) +{ + return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextps256_ps512 (__m256 __A) +{ + return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextsi256_si512 (__m256i __A) +{ + return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B) diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 18c0711a8ad..c8f8d53c86f 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -1484,6 +1484,26 @@ _mm256_castsi128_si256 (__m128i __A) return (__m256i) __builtin_ia32_si256_si ((__v4si)__A); } +/* Similarly, but with zero extension instead of undefined values. */ + +extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_zextpd128_pd256 (__m128d __A) +{ + return _mm256_insertf128_pd (_mm256_setzero_pd (), __A, 0); +} + +extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_zextps128_ps256 (__m128 __A) +{ + return _mm256_insertf128_ps (_mm256_setzero_ps (), __A, 0); +} + +extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_zextsi128_si256 (__m128i __A) +{ + return _mm256_insertf128_si256 (_mm256_setzero_si256 (), __A, 0); +} + extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_set_m128 ( __m128 __H, __m128 __L) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d5267a84cfa..fc1175ff7bb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2019-08-12 Jakub Jelinek + + PR target/83250 + PR target/91340 + * gcc.target/i386/avx-typecast-1.c: New test. + * gcc.target/i386/avx-typecast-2.c: New test. + * gcc.target/i386/avx512f-typecast-2.c: New test. + 2019-08-12 Ed Schonberg * gnat.dg/null_check.adb: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/avx-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx-typecast-1.c new file mode 100644 index 00000000000..3b5932644c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-typecast-1.c @@ -0,0 +1,83 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +void +avx_test (void) +{ + union256i_d a, ad; + union256 b, bd; + union256d c, cd; + union128i_d d, dd; + union128 e, ed; + union128d f, fd; + int i; + + for (i = 0; i < 8; i++) + { + a.a[i] = 7146908634 + i; + b.a[i] = 45.12f + i; + } + + for (i = 0; i < 4; i++) + { + c.a[i] = 41234512513451345.0905 + i; + d.a[i] = 109534 + i; + e.a[i] = 85034.095f + i; + } + + for (i = 0; i < 2; i++) + f.a[i] = 41234512451345.0905 + i; + + bd.x = _mm256_castpd_ps (c.x); + if (memcmp (bd.a, c.a, 32)) + abort (); + + ad.x = _mm256_castpd_si256 (c.x); + if (memcmp (ad.a, c.a, 32)) + abort (); + + cd.x = _mm256_castps_pd (b.x); + if (memcmp (cd.a, b.a, 32)) + abort (); + + ad.x = _mm256_castps_si256 (b.x); + if (memcmp (ad.a, b.a, 32)) + abort (); + + bd.x = _mm256_castsi256_ps (a.x); + if (memcmp (bd.a, a.a, 32)) + abort (); + + cd.x = _mm256_castsi256_pd (a.x); + if (memcmp (cd.a, a.a, 32)) + abort (); + + fd.x = _mm256_castpd256_pd128 (c.x); + if (memcmp (fd.a, c.a, 16)) + abort (); + + ed.x = _mm256_castps256_ps128 (b.x); + if (memcmp (ed.a, b.a, 16)) + abort (); + + dd.x = _mm256_castsi256_si128 (a.x); + if (memcmp (dd.a, a.a, 16)) + abort (); + + cd.x = _mm256_castpd128_pd256 (f.x); + if (memcmp (cd.a, f.a, 16)) + abort (); + + bd.x = _mm256_castps128_ps256 (e.x); + if (memcmp (bd.a, e.a, 16)) + abort (); + + ad.x = _mm256_castsi128_si256 (d.x); + if (memcmp (ad.a, d.a, 16)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx-typecast-2.c new file mode 100644 index 00000000000..679906781ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-typecast-2.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-require-effective-target avx } */ + +#include "avx-check.h" + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +void +avx_test (void) +{ + union256i_d ad, zero; + union256 bd; + union256d cd; + union128i_d d; + union128 e; + union128d f; + int i; + + for (i = 0; i < 8; i++) + zero.a[i] = 0; + + for (i = 0; i < 4; i++) + { + d.a[i] = 109534 + i; + e.a[i] = 85034.095f + i; + } + + for (i = 0; i < 2; i++) + f.a[i] = 41234512451345.0905 + i; + + cd.x = _mm256_zextpd128_pd256 (f.x); + if (memcmp (cd.a, f.a, 16) + || memcmp (&cd.a[2], &zero.a, 16)) + abort (); + + bd.x = _mm256_zextps128_ps256 (e.x); + if (memcmp (bd.a, e.a, 16) + || memcmp (&bd.a[4], &zero.a, 16)) + abort (); + + ad.x = _mm256_zextsi128_si256 (d.x); + if (memcmp (ad.a, d.a, 16) + || memcmp (&ad.a[4], &zero.a, 16)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512f-typecast-2.c new file mode 100644 index 00000000000..a048342959c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-typecast-2.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-require-effective-target avx512f } */ + +#include "avx512f-check.h" + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +void +avx512f_test (void) +{ + union512i_d ad, zero; + union512 bd; + union512d cd; + union256i_d d; + union256 e; + union256d f; + union128i_d g; + union128 h; + union128d k; + int i; + + for (i = 0; i < 16; i++) + zero.a[i] = 0; + + for (i = 0; i < 8; i++) + { + d.a[i] = 109534 + i; + e.a[i] = 85034.095f + i; + } + + for (i = 0; i < 4; i++) + { + f.a[i] = 41234512451345.0905 + i; + g.a[i] = 71469086341 + i; + h.a[i] = 45.1264f + i; + } + + for (i = 0; i < 2; i++) + k.a[i] = 7146908634.576 + i; + + cd.x = _mm512_zextpd128_pd512 (k.x); + if (memcmp (cd.a, k.a, 16) + || memcmp (&cd.a[2], &zero.a, 48)) + abort (); + + bd.x = _mm512_zextps128_ps512 (h.x); + if (memcmp (bd.a, h.a, 16) + || memcmp (&bd.a[4], &zero.a, 48)) + abort (); + + ad.x = _mm512_zextsi128_si512 (g.x); + if (memcmp (ad.a, g.a, 16) + || memcmp (&ad.a[4], &zero.a, 48)) + abort (); + + cd.x = _mm512_zextpd256_pd512 (f.x); + if (memcmp (cd.a, f.a, 32) + || memcmp (&cd.a[4], &zero.a, 32)) + abort (); + + bd.x = _mm512_zextps256_ps512 (e.x); + if (memcmp (bd.a, e.a, 32) + || memcmp (&bd.a[8], &zero.a, 32)) + abort (); + + ad.x = _mm512_zextsi256_si512 (d.x); + if (memcmp (ad.a, d.a, 32) + || memcmp (&ad.a[8], &zero.a, 32)) + abort (); +}