From d0208f9b647dc20ca5e1cff958f81c063ff25a17 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 16 May 2008 06:19:39 +0000 Subject: [PATCH] i386.c (ix86_expand_vector_init_general): Optimize V8HImode for SSE2 and V16QImode for SSE4.1. gcc/ 2008-05-15 H.J. Lu * config/i386/i386.c (ix86_expand_vector_init_general): Optimize V8HImode for SSE2 and V16QImode for SSE4.1. gcc/testsuite/ 2008-05-15 H.J. Lu * gcc.target/i386/m128-check.h: New. * gcc.target/i386/set-v16qi-1.h: Likewise. * gcc.target/i386/set-v16qi-2.h: Likewise. * gcc.target/i386/set-v8hi-1.h: Likewise. * gcc.target/i386/set-v8hi-2.h: Likewise. * gcc.target/i386/sse2-set-v16qi-1.c: Likewise. * gcc.target/i386/sse2-set-v16qi-2.c: Likewise. * gcc.target/i386/sse2-set-v8hi-1.c: Likewise. * gcc.target/i386/sse2-set-v8hi-2.c: Likewise. * gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise. * gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise. * gcc.target/i386/sse2-check.h: Include m128-check.h. Don't include . * gcc.target/i386/sse4_1-check.h: Likewise. From-SVN: r135409 --- gcc/ChangeLog | 5 + gcc/config/i386/i386.c | 135 ++++++++++++++++++ gcc/testsuite/ChangeLog | 18 +++ gcc/testsuite/gcc.target/i386/m128-check.h | 69 +++++++++ gcc/testsuite/gcc.target/i386/set-v16qi-1.h | 30 ++++ gcc/testsuite/gcc.target/i386/set-v16qi-2.h | 30 ++++ gcc/testsuite/gcc.target/i386/set-v8hi-1.h | 19 +++ gcc/testsuite/gcc.target/i386/set-v8hi-2.h | 21 +++ gcc/testsuite/gcc.target/i386/sse2-check.h | 3 +- .../gcc.target/i386/sse2-set-v16qi-1.c | 7 + .../gcc.target/i386/sse2-set-v16qi-2.c | 7 + .../gcc.target/i386/sse2-set-v8hi-1.c | 7 + .../gcc.target/i386/sse2-set-v8hi-2.c | 7 + gcc/testsuite/gcc.target/i386/sse4_1-check.h | 2 +- .../gcc.target/i386/sse4_1-set-v16qi-1.c | 8 ++ .../gcc.target/i386/sse4_1-set-v16qi-2.c | 8 ++ 16 files changed, 373 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/m128-check.h create mode 100644 gcc/testsuite/gcc.target/i386/set-v16qi-1.h create mode 100644 gcc/testsuite/gcc.target/i386/set-v16qi-2.h create mode 100644 gcc/testsuite/gcc.target/i386/set-v8hi-1.h create mode 100644 gcc/testsuite/gcc.target/i386/set-v8hi-2.h create mode 100644 gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6d9adc3cef9..77b9620b059 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2008-05-15 H.J. Lu + + * config/i386/i386.c (ix86_expand_vector_init_general): Optimize + V8HImode for SSE2 and V16QImode for SSE4.1. + 2008-05-15 Kenneth Zadeck * cgraph.h (compute_inline_parameters): Made public. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d8fdc22226c..af1e6c60b55 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -23892,7 +23892,142 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, break; case V8HImode: + if (TARGET_SSE2) + { + rtx ops[4]; + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE (ops); i++) + { + /* Extend the odd elment from HImode to SImode using + a paradoxical SUBREG. */ + op0 = gen_reg_rtx (SImode); + emit_move_insn (op0, gen_lowpart (SImode, + XVECEXP (vals, 0, + i + i))); + + /* Insert the SImode value as low element of V4SImode + vector. */ + op1 = gen_reg_rtx (V4SImode); + op0 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, + op0), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, op1, op0)); + + /* Cast the V4SImode vector back to a V8HImode vector. */ + op0 = gen_reg_rtx (mode); + emit_move_insn (op0, gen_lowpart (mode, op1)); + + /* Load even HI elements into the second positon. */ + emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0, + i + i + 1), + const1_rtx)); + + /* Cast V8HImode vector to V4SImode vector. */ + ops[i] = gen_reg_rtx (V4SImode); + emit_move_insn (ops[i], gen_lowpart (V4SImode, op0)); + } + + /* Interleave low V4SIs. */ + for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++) + { + op0 = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_interleave_lowv4si (op0, ops[i], + ops[i + 1])); + + /* Cast V4SImode vectors to V2DImode vectors. */ + op1 = gen_reg_rtx (V2DImode); + emit_move_insn (op1, gen_lowpart (V2DImode, op0)); + ops[j] = op1; + } + + /* Interleave low V2DIs. */ + op0 = gen_reg_rtx (V2DImode); + emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1])); + + /* Cast the V2DImode vector back to a V8HImode vector. */ + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_lowpart (mode, op0))); + return; + } + case V16QImode: + if (TARGET_SSE4_1) + { + rtx ops[8]; + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE (ops); i++) + { + /* Extend the odd elment from QImode to SImode using + a paradoxical SUBREG. */ + op0 = gen_reg_rtx (SImode); + emit_move_insn (op0, gen_lowpart (SImode, + XVECEXP (vals, 0, + i + i))); + + /* Insert the SImode value as low element of V4SImode + vector. */ + op1 = gen_reg_rtx (V4SImode); + op0 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, + op0), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, op1, op0)); + + /* Cast the V4SImode vector back to a V16QImode vector. */ + op0 = gen_reg_rtx (mode); + emit_move_insn (op0, gen_lowpart (mode, op1)); + + /* Load even QI elements into the second positon. */ + emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0, + i + i + 1), + const1_rtx)); + + /* Cast V16QImode vector to V8HImode vector. */ + ops[i] = gen_reg_rtx (V8HImode); + emit_move_insn (ops[i], gen_lowpart (V8HImode, op0)); + } + + /* Interleave low V8HIs. */ + for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++) + { + op0 = gen_reg_rtx (V8HImode); + emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i], + ops[i + 1])); + + /* Cast V8HImode vector to V4SImode vector. */ + op1 = gen_reg_rtx (V4SImode); + emit_move_insn (op1, gen_lowpart (V4SImode, op0)); + ops[j] = op1; + } + + /* Interleave low V4SIs. */ + for (i = j = 0; i < ARRAY_SIZE (ops) / 2; i += 2, j++) + { + op0 = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_interleave_lowv4si (op0, ops[i], + ops[i + 1])); + + /* Cast V4SImode vectors to V2DImode vectors. */ + op1 = gen_reg_rtx (V2DImode); + emit_move_insn (op1, gen_lowpart (V2DImode, op0)); + ops[j] = op1; + } + + /* Interleave low V2DIs. */ + op0 = gen_reg_rtx (V2DImode); + emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1])); + + /* Cast the V2DImode vector back to a V8HImode vector. */ + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_lowpart (mode, op0))); + return; + } + case V4HImode: case V8QImode: break; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a91d3c5644e..45ea5223609 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,21 @@ +2008-05-15 H.J. Lu + + * gcc.target/i386/m128-check.h: New. + * gcc.target/i386/set-v16qi-1.h: Likewise. + * gcc.target/i386/set-v16qi-2.h: Likewise. + * gcc.target/i386/set-v8hi-1.h: Likewise. + * gcc.target/i386/set-v8hi-2.h: Likewise. + * gcc.target/i386/sse2-set-v16qi-1.c: Likewise. + * gcc.target/i386/sse2-set-v16qi-2.c: Likewise. + * gcc.target/i386/sse2-set-v8hi-1.c: Likewise. + * gcc.target/i386/sse2-set-v8hi-2.c: Likewise. + * gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise. + * gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise. + + * gcc.target/i386/sse2-check.h: Include m128-check.h. Don't + include . + * gcc.target/i386/sse4_1-check.h: Likewise. + 2008-05-15 Adam Nemet PR middle-end/36194 diff --git a/gcc/testsuite/gcc.target/i386/m128-check.h b/gcc/testsuite/gcc.target/i386/m128-check.h new file mode 100644 index 00000000000..3231c079009 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/m128-check.h @@ -0,0 +1,69 @@ +#include +#include + +typedef union +{ + __m128i x; + char a[16]; +} union128i_b; + +typedef union +{ + __m128i x; + short a[8]; +} union128i_w; + +typedef union +{ + __m128i x; + int a[4]; +} union128i_d; + +typedef union +{ + __m128i x; + long long a[2]; +} union128i_q; + +typedef union +{ + __m128 x; + float a[4]; +} union128; + +typedef union +{ + __m128d x; + double a[2]; +} union128d; + +#ifdef DEBUG +#define PRINTF printf +#else +#define PRINTF(...) +#endif + +#define CHECK_EXP(UINON_TYPE, VALUE_TYPE, FMT) \ +static int \ +__attribute__((noinline, unused)) \ +check_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v) \ +{ \ + int i; \ + int err = 0; \ + \ + for (i = 0; i < sizeof (u.a) / sizeof (u.a[0]); i++) \ + if (u.a[i] != v[i]) \ + { \ + err++; \ + PRINTF ("%i: " FMT " != " FMT "\n", \ + i, v[i], u.a[i]); \ + } \ + return err; \ +} + +CHECK_EXP (union128i_b, char, "%d") +CHECK_EXP (union128i_w, short, "%d") +CHECK_EXP (union128i_d, int, "0x%x") +CHECK_EXP (union128i_q, long long, "0x%llx") +CHECK_EXP (union128, float, "%f") +CHECK_EXP (union128d, double, "%f") diff --git a/gcc/testsuite/gcc.target/i386/set-v16qi-1.h b/gcc/testsuite/gcc.target/i386/set-v16qi-1.h new file mode 100644 index 00000000000..79556e87460 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/set-v16qi-1.h @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include CHECK_H + +static __m128i +__attribute__((noinline)) +foo (char *v) +{ + return _mm_set_epi8 (v[15], v[14], v[13], v[12], + v[11], v[10], v[9], v[8], + v[7], v[6], v[5], v[4], + v[3], v[2], v[1], v[0]); +} + +static void +TEST (void) +{ + char v[16] = + { + -3, 60, 48, 104, -90, 37, -48, 78, + 4, 33, 81, 4, -89, 17, 8, 68 + }; + union128i_b u; + + u.x = foo (v); + if (check_union128i_b (u, v)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/set-v16qi-2.h b/gcc/testsuite/gcc.target/i386/set-v16qi-2.h new file mode 100644 index 00000000000..9768806c50c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/set-v16qi-2.h @@ -0,0 +1,30 @@ +#include CHECK_H + +static __m128i +__attribute__((noinline)) +foo (char x1, char x2, char x3, char x4, + char x5, char x6, char x7, char x8, + char x9, char x10, char x11, char x12, + char x13, char x14, char x15, char x16) +{ + return _mm_set_epi8 (x1, x2, x3, x4, x5, x6, x7, x8, + x9, x10, x11, x12, x13, x14, x15, x16); +} + +static void +TEST (void) +{ + char v[16] = + { + -3, 60, 48, 104, -90, 37, -48, 78, + 4, 33, 81, 4, -89, 17, 8, 68 + }; + union128i_b u; + + u.x = foo (v[15], v[14], v[13], v[12], + v[11], v[10], v[9], v[8], + v[7], v[6], v[5], v[4], + v[3], v[2], v[1], v[0]); + if (check_union128i_b (u, v)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/set-v8hi-1.h b/gcc/testsuite/gcc.target/i386/set-v8hi-1.h new file mode 100644 index 00000000000..87762b82e99 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/set-v8hi-1.h @@ -0,0 +1,19 @@ +#include CHECK_H + +static __m128i +__attribute__((noinline)) +foo (short *v) +{ + return _mm_set_epi16 (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); +} + +static void +TEST (void) +{ + short v[8] = { -3, 6000, 48, 104, -90, 34567, -1248, 34678 }; + union128i_w u; + + u.x = foo (v); + if (check_union128i_w (u, v)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/set-v8hi-2.h b/gcc/testsuite/gcc.target/i386/set-v8hi-2.h new file mode 100644 index 00000000000..835e7b4d696 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/set-v8hi-2.h @@ -0,0 +1,21 @@ +#include CHECK_H + +__m128i +__attribute__((noinline)) +foo (short x1, short x2, short x3, short x4, + short x5, short x6, short x7, short x8) +{ + return _mm_set_epi16 (x1, x2, x3, x4, x5, x6, x7, x8); +} + +static void +TEST (void) +{ + short v[8] = { -3, 2, 1, 9, 23, -173, -13, 69 }; + union128i_w u; + + u.x = foo (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]); + + if (check_union128i_w (u, v)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-check.h b/gcc/testsuite/gcc.target/i386/sse2-check.h index 007ff543228..a69333e391a 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-check.h +++ b/gcc/testsuite/gcc.target/i386/sse2-check.h @@ -1,7 +1,6 @@ -#include #include - #include "cpuid.h" +#include "m128-check.h" static void sse2_test (void); diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c b/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c new file mode 100644 index 00000000000..61f19cb2b49 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c @@ -0,0 +1,7 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2" } */ + +#define CHECK_H "sse2-check.h" +#define TEST sse2_test + +#include "set-v16qi-1.h" diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c b/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c new file mode 100644 index 00000000000..918fa5c9155 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c @@ -0,0 +1,7 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2" } */ + +#define CHECK_H "sse2-check.h" +#define TEST sse2_test + +#include "set-v16qi-2.h" diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c b/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c new file mode 100644 index 00000000000..cab461e3e38 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c @@ -0,0 +1,7 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2" } */ + +#define CHECK_H "sse2-check.h" +#define TEST sse2_test + +#include "set-v8hi-1.h" diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c b/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c new file mode 100644 index 00000000000..2b4a8be728d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c @@ -0,0 +1,7 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -msse2" } */ + +#define CHECK_H "sse2-check.h" +#define TEST sse2_test + +#include "set-v8hi-2.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-check.h b/gcc/testsuite/gcc.target/i386/sse4_1-check.h index bac37cb56ef..2d1c4e835a3 100644 --- a/gcc/testsuite/gcc.target/i386/sse4_1-check.h +++ b/gcc/testsuite/gcc.target/i386/sse4_1-check.h @@ -1,7 +1,7 @@ -#include #include #include "cpuid.h" +#include "m128-check.h" static void sse4_1_test (void); diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c new file mode 100644 index 00000000000..23c0903300a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#define CHECK_H "sse4_1-check.h" +#define TEST sse4_1_test + +#include "set-v16qi-1.h" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c new file mode 100644 index 00000000000..5245870824b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c @@ -0,0 +1,8 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#define CHECK_H "sse4_1-check.h" +#define TEST sse4_1_test + +#include "set-v16qi-2.h" -- 2.30.2