From: Paul A. Clarke Date: Mon, 22 Jul 2019 14:07:10 +0000 (+0000) Subject: [rs6000] Add _mm_blend_epi16 and _mm_blendv_epi8 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1c4547f16138b69f955fdc24c9523e7f8d28aa15;p=gcc.git [rs6000] Add _mm_blend_epi16 and _mm_blendv_epi8 Add compatibility implementations of _mm_blend_epi16 and _mm_blendv_epi8 intrinsics. Respective test cases are copied almost verbatim (minor changes to the dejagnu head lines) from i386. 2019-07-22 Paul A. Clarke [gcc] * config/rs6000/smmintrin.h (_mm_blend_epi16): New. (_mm_blendv_epi8): New. [gcc/testsuite] * gcc.target/powerpc/sse4_1-check.h: New. * gcc.target/powerpc/sse4_1-pblendvb.c: New. * gcc.target/powerpc/sse4_1-pblendw.c: New. * gcc.target/powerpc/sse4_1-pblendw-2.c: New. From-SVN: r273698 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e58dfe153c0..eb786a072ec 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-07-22 Paul A. Clarke + + * config/rs6000/smmintrin.h (_mm_blend_epi16): New. + (_mm_blendv_epi8): New. + 2019-07-22 Richard Biener PR tree-optimization/91221 diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 914dade70d0..5ef0822febc 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -66,4 +66,24 @@ _mm_extract_ps (__m128 __X, const int __N) return ((__v4si)__X)[__N & 3]; } +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8) +{ + __v16qi __charmask = vec_splats ((signed char) __imm8); + __charmask = vec_gb (__charmask); + __v8hu __shortmask = (__v8hu) vec_unpackh (__charmask); + #ifdef __BIG_ENDIAN__ + __shortmask = vec_reve (__shortmask); + #endif + return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) +{ + const __v16qu __seven = vec_splats ((unsigned char) 0x07); + __v16qu __lmask = vec_sra ((__v16qu) __mask, __seven); + return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); +} + #endif diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9d20101b427..d005ca6f750 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-07-22 Paul A. Clarke + + * gcc.target/powerpc/sse4_1-check.h: New. + * gcc.target/powerpc/sse4_1-pblendvb.c: New. + * gcc.target/powerpc/sse4_1-pblendw.c: New. + * gcc.target/powerpc/sse4_1-pblendw-2.c: New. + 2019-07-22 Eric Botcazou * gnat.dg/fixedpnt6.adb: New testcase. diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-check.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-check.h new file mode 100644 index 00000000000..5f855b9fd53 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-check.h @@ -0,0 +1,27 @@ +#include +#include + +#include "m128-check.h" + +//#define DEBUG 1 + +#define TEST sse4_1_test + +static void sse4_1_test (void); + +static void +__attribute__ ((noinline)) +do_test (void) +{ + sse4_1_test (); +} + +int +main () +{ + do_test (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendvb.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendvb.c new file mode 100644 index 00000000000..6aa77fea1ae --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendvb.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#define NO_WARN_X86_INTRINSICS 1 +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +#define NUM 20 + +static void +init_pblendvb (unsigned char *src1, unsigned char *src2, + unsigned char *mask) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 16; i++) + { + src1[i] = i* i * sign; + src2[i] = (i + 20) * sign; + mask[i] = (i % 3) + ((i * (14 + sign)) + ^ (src1[i] | src2[i] | (i*3))); + sign = -sign; + } +} + +static int +check_pblendvb (__m128i *dst, unsigned char *src1, + unsigned char *src2, unsigned char *mask) +{ + unsigned char tmp[16]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 16; j++) + if (mask [j] & 0x80) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +TEST (void) +{ + union + { + __m128i x[NUM]; + unsigned char c[NUM * 16]; + } dst, src1, src2, mask; + int i; + + init_pblendvb (src1.c, src2.c, mask.c); + + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blendv_epi8 (src1.x[i], src2.x[i], mask.x[i]); + if (check_pblendvb (&dst.x[i], &src1.c[i * 16], &src2.c[i * 16], + &mask.c[i * 16])) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendw-2.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendw-2.c new file mode 100644 index 00000000000..d3f96e823c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendw-2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include "sse4_1-check.h" + +#include +#include + +#define NUM 20 + +#undef MASK +#define MASK 0xfe + +static void +init_pblendw (short *src1, short *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 8; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_pblendw (__m128i *dst, short *src1, short *src2) +{ + short tmp[8]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 8; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + __m128i x, y; + union + { + __m128i x[NUM]; + short s[NUM * 8]; + } dst, src1, src2; + union + { + __m128i x; + short s[8]; + } src3; + int i; + + init_pblendw (src1.s, src2.s); + + /* Check pblendw imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK); + if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8])) + abort (); + } + + /* Check pblendw imm8, xmm, xmm */ + src3.x = _mm_setzero_si128 (); + + x = _mm_blend_epi16 (dst.x[2], src3.x, MASK); + y = _mm_blend_epi16 (src3.x, dst.x[2], MASK); + + if (check_pblendw (&x, &dst.s[16], &src3.s[0])) + abort (); + + if (check_pblendw (&y, &src3.s[0], &dst.s[16])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendw.c new file mode 100644 index 00000000000..1c48c7621ba --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pblendw.c @@ -0,0 +1,89 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#define NO_WARN_X86_INTRINSICS 1 +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +#define NUM 20 + +#ifndef MASK +#define MASK 0x0f +#endif + +static void +init_pblendw (short *src1, short *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 8; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_pblendw (__m128i *dst, short *src1, short *src2) +{ + short tmp[8]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 8; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +TEST (void) +{ + __m128i x, y; + union + { + __m128i x[NUM]; + short s[NUM * 8]; + } dst, src1, src2; + union + { + __m128i x; + short s[8]; + } src3; + int i; + + init_pblendw (src1.s, src2.s); + + /* Check pblendw imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK); + if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8])) + abort (); + } + + /* Check pblendw imm8, xmm, xmm */ + src3.x = _mm_setzero_si128 (); + + x = _mm_blend_epi16 (dst.x[2], src3.x, MASK); + y = _mm_blend_epi16 (src3.x, dst.x[2], MASK); + + if (check_pblendw (&x, &dst.s[16], &src3.s[0])) + abort (); + + if (check_pblendw (&y, &src3.s[0], &dst.s[16])) + abort (); +}