i386.c (ix86_expand_vector_init_general): Optimize V8HImode for SSE2 and V16QImode...
authorH.J. Lu <hongjiu.lu@intel.com>
Fri, 16 May 2008 06:19:39 +0000 (06:19 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Fri, 16 May 2008 06:19:39 +0000 (23:19 -0700)
gcc/

2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>

* config/i386/i386.c (ix86_expand_vector_init_general): Optimize
V8HImode for SSE2 and V16QImode for SSE4.1.

gcc/testsuite/

2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>

* gcc.target/i386/m128-check.h: New.
* gcc.target/i386/set-v16qi-1.h: Likewise.
* gcc.target/i386/set-v16qi-2.h: Likewise.
* gcc.target/i386/set-v8hi-1.h: Likewise.
* gcc.target/i386/set-v8hi-2.h: Likewise.
* gcc.target/i386/sse2-set-v16qi-1.c: Likewise.
* gcc.target/i386/sse2-set-v16qi-2.c: Likewise.
* gcc.target/i386/sse2-set-v8hi-1.c: Likewise.
* gcc.target/i386/sse2-set-v8hi-2.c: Likewise.
* gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise.
* gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise.

* gcc.target/i386/sse2-check.h: Include m128-check.h. Don't
include <stdio.h>.
* gcc.target/i386/sse4_1-check.h: Likewise.

From-SVN: r135409

16 files changed:
gcc/ChangeLog
gcc/config/i386/i386.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/m128-check.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/set-v16qi-1.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/set-v16qi-2.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/set-v8hi-1.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/set-v8hi-2.h [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse2-check.h
gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse4_1-check.h
gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c [new file with mode: 0644]

index 6d9adc3cef9706fae7073ead2ca8fcf75bbd53d9..77b9620b0590e285dc8098904dae107c5da0d815 100644 (file)
@@ -1,3 +1,8 @@
+2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config/i386/i386.c (ix86_expand_vector_init_general): Optimize
+       V8HImode for SSE2 and V16QImode for SSE4.1.
+
 2008-05-15  Kenneth Zadeck <zadeck@naturalbridge.com>
 
        * cgraph.h (compute_inline_parameters): Made public.
index d8fdc22226c73f239d7c122e5512b7108934afc6..af1e6c60b55ea8c058fb6f125d7c6ed7e919cc2b 100644 (file)
@@ -23892,7 +23892,142 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
       break;
 
     case V8HImode:
+      if (TARGET_SSE2)
+       {
+         rtx ops[4];
+         unsigned int i, j;
+
+         for (i = 0; i < ARRAY_SIZE (ops); i++)
+           {
+             /* Extend the odd elment from HImode to SImode using
+                a paradoxical SUBREG.  */
+             op0 = gen_reg_rtx (SImode);
+             emit_move_insn (op0, gen_lowpart (SImode,
+                                               XVECEXP (vals, 0,
+                                                        i + i)));
+
+             /* Insert the SImode value as low element of V4SImode
+                vector. */
+             op1 = gen_reg_rtx (V4SImode);
+             op0 = gen_rtx_VEC_MERGE (V4SImode,
+                                      gen_rtx_VEC_DUPLICATE (V4SImode,
+                                                             op0),
+                                      CONST0_RTX (V4SImode),
+                                      const1_rtx);
+             emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
+
+             /* Cast the V4SImode vector back to a V8HImode vector.  */
+             op0 = gen_reg_rtx (mode);
+             emit_move_insn (op0, gen_lowpart (mode, op1));
+
+             /* Load even HI elements into the second positon.  */
+             emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0,
+                                                       i + i + 1),
+                                         const1_rtx));
+
+             /* Cast V8HImode vector to V4SImode vector.  */
+             ops[i] = gen_reg_rtx (V4SImode);
+             emit_move_insn (ops[i], gen_lowpart (V4SImode, op0));
+           }
+
+         /* Interleave low V4SIs.  */
+         for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
+           {
+             op0 = gen_reg_rtx (V4SImode);
+             emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
+                                                    ops[i + 1]));
+
+             /* Cast V4SImode vectors to V2DImode vectors.  */
+             op1 = gen_reg_rtx (V2DImode);
+             emit_move_insn (op1, gen_lowpart (V2DImode, op0));
+             ops[j] = op1;
+           }
+
+         /* Interleave low V2DIs.  */
+         op0 = gen_reg_rtx (V2DImode);
+         emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
+
+         /* Cast the V2DImode vector back to a V8HImode vector.  */
+         emit_insn (gen_rtx_SET (VOIDmode, target,
+                                 gen_lowpart (mode, op0)));
+         return;
+       }
+
     case V16QImode:
+      if (TARGET_SSE4_1)
+       {
+         rtx ops[8];
+         unsigned int i, j;
+
+         for (i = 0; i < ARRAY_SIZE (ops); i++)
+           {
+             /* Extend the odd elment from QImode to SImode using
+                a paradoxical SUBREG.  */
+             op0 = gen_reg_rtx (SImode);
+             emit_move_insn (op0, gen_lowpart (SImode,
+                                               XVECEXP (vals, 0,
+                                                        i + i)));
+
+             /* Insert the SImode value as low element of V4SImode
+                vector. */
+             op1 = gen_reg_rtx (V4SImode);
+             op0 = gen_rtx_VEC_MERGE (V4SImode,
+                                      gen_rtx_VEC_DUPLICATE (V4SImode,
+                                                             op0),
+                                      CONST0_RTX (V4SImode),
+                                      const1_rtx);
+             emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
+
+             /* Cast the V4SImode vector back to a V16QImode vector.  */
+             op0 = gen_reg_rtx (mode);
+             emit_move_insn (op0, gen_lowpart (mode, op1));
+
+             /* Load even QI elements into the second positon.  */
+             emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0,
+                                                        i + i + 1),
+                                          const1_rtx));
+
+             /* Cast V16QImode vector to V8HImode vector.  */
+             ops[i] = gen_reg_rtx (V8HImode);
+             emit_move_insn (ops[i], gen_lowpart (V8HImode, op0));
+           }
+
+         /* Interleave low V8HIs.  */
+         for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
+           {
+             op0 = gen_reg_rtx (V8HImode);
+             emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i],
+                                                    ops[i + 1]));
+
+             /* Cast V8HImode vector to V4SImode vector.  */
+             op1 = gen_reg_rtx (V4SImode);
+             emit_move_insn (op1, gen_lowpart (V4SImode, op0));
+             ops[j] = op1;
+           }
+
+         /* Interleave low V4SIs.  */
+         for (i = j = 0; i < ARRAY_SIZE (ops) / 2; i += 2, j++)
+           {
+             op0 = gen_reg_rtx (V4SImode);
+             emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
+                                                    ops[i + 1]));
+
+             /* Cast V4SImode vectors to V2DImode vectors.  */
+             op1 = gen_reg_rtx (V2DImode);
+             emit_move_insn (op1, gen_lowpart (V2DImode, op0));
+             ops[j] = op1;
+           }
+
+         /* Interleave low V2DIs.  */
+         op0 = gen_reg_rtx (V2DImode);
+         emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
+
+         /* Cast the V2DImode vector back to a V8HImode vector.  */
+         emit_insn (gen_rtx_SET (VOIDmode, target,
+                                 gen_lowpart (mode, op0)));
+         return;
+       }
+
     case V4HImode:
     case V8QImode:
       break;
index a91d3c5644ef006c1b4a060abd60efe350e236d6..45ea522360956014bfe36264bbd65e342b34a331 100644 (file)
@@ -1,3 +1,21 @@
+2008-05-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * gcc.target/i386/m128-check.h: New.
+       * gcc.target/i386/set-v16qi-1.h: Likewise.
+       * gcc.target/i386/set-v16qi-2.h: Likewise.
+       * gcc.target/i386/set-v8hi-1.h: Likewise.
+       * gcc.target/i386/set-v8hi-2.h: Likewise.
+       * gcc.target/i386/sse2-set-v16qi-1.c: Likewise.
+       * gcc.target/i386/sse2-set-v16qi-2.c: Likewise.
+       * gcc.target/i386/sse2-set-v8hi-1.c: Likewise.
+       * gcc.target/i386/sse2-set-v8hi-2.c: Likewise.
+       * gcc.target/i386/sse4_1-set-v16qi-1.c: Likewise.
+       * gcc.target/i386/sse4_1-set-v16qi-2.c: Likewise.
+
+       * gcc.target/i386/sse2-check.h: Include m128-check.h. Don't
+       include <stdio.h>.
+       * gcc.target/i386/sse4_1-check.h: Likewise.
+
 2008-05-15  Adam Nemet  <anemet@caviumnetworks.com>
 
        PR middle-end/36194
diff --git a/gcc/testsuite/gcc.target/i386/m128-check.h b/gcc/testsuite/gcc.target/i386/m128-check.h
new file mode 100644 (file)
index 0000000..3231c07
--- /dev/null
@@ -0,0 +1,69 @@
+#include <stdio.h>
+#include <emmintrin.h>
+
+typedef union
+{
+  __m128i x;
+  char a[16];
+} union128i_b;
+
+typedef union
+{
+  __m128i x;
+  short a[8];
+} union128i_w;
+
+typedef union
+{
+  __m128i x;
+  int a[4];
+} union128i_d;
+
+typedef union
+{
+  __m128i x;
+  long long a[2];
+} union128i_q;
+
+typedef union
+{
+  __m128  x;
+  float a[4];
+} union128;
+
+typedef union
+{
+  __m128d x;
+  double a[2];
+} union128d;
+
+#ifdef DEBUG
+#define PRINTF printf
+#else
+#define PRINTF(...)    
+#endif
+
+#define CHECK_EXP(UINON_TYPE, VALUE_TYPE, FMT)         \
+static int                                             \
+__attribute__((noinline, unused))                      \
+check_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v) \
+{                                                      \
+  int i;                                               \
+  int err = 0;                                         \
+                                                       \
+  for (i = 0; i < sizeof (u.a) / sizeof (u.a[0]); i++) \
+    if (u.a[i] != v[i])                                        \
+      {                                                        \
+       err++;                                          \
+       PRINTF ("%i: " FMT " != " FMT "\n",             \
+               i, v[i], u.a[i]);                       \
+      }                                                        \
+  return err;                                          \
+}
+
+CHECK_EXP (union128i_b, char, "%d")
+CHECK_EXP (union128i_w, short, "%d")
+CHECK_EXP (union128i_d, int, "0x%x")
+CHECK_EXP (union128i_q, long long, "0x%llx")
+CHECK_EXP (union128, float, "%f")
+CHECK_EXP (union128d, double, "%f")
diff --git a/gcc/testsuite/gcc.target/i386/set-v16qi-1.h b/gcc/testsuite/gcc.target/i386/set-v16qi-1.h
new file mode 100644 (file)
index 0000000..79556e8
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include CHECK_H
+
+static __m128i
+__attribute__((noinline))
+foo (char *v)
+{
+  return _mm_set_epi8 (v[15], v[14], v[13], v[12],
+                      v[11], v[10], v[9], v[8],
+                      v[7], v[6], v[5], v[4],
+                      v[3], v[2], v[1], v[0]);
+}
+
+static void
+TEST (void)
+{
+  char v[16] =
+    { 
+      -3, 60, 48, 104, -90, 37, -48, 78,
+      4, 33, 81, 4, -89, 17, 8, 68
+    };
+  union128i_b u;
+
+  u.x = foo (v);
+  if (check_union128i_b (u, v))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/set-v16qi-2.h b/gcc/testsuite/gcc.target/i386/set-v16qi-2.h
new file mode 100644 (file)
index 0000000..9768806
--- /dev/null
@@ -0,0 +1,30 @@
+#include CHECK_H
+
+static __m128i
+__attribute__((noinline))
+foo (char x1, char x2, char x3, char x4,
+     char x5, char x6, char x7, char x8,
+     char x9, char x10, char x11, char x12,
+     char x13, char x14, char x15, char x16)
+{
+  return _mm_set_epi8 (x1, x2, x3, x4, x5, x6, x7, x8,
+                      x9, x10, x11, x12, x13, x14, x15, x16);
+}
+
+static void
+TEST (void)
+{
+  char v[16] =
+    { 
+      -3, 60, 48, 104, -90, 37, -48, 78,
+      4, 33, 81, 4, -89, 17, 8, 68
+    };
+  union128i_b u;
+
+  u.x = foo (v[15], v[14], v[13], v[12],
+            v[11], v[10], v[9], v[8],
+            v[7], v[6], v[5], v[4],
+            v[3], v[2], v[1], v[0]);
+  if (check_union128i_b (u, v))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/set-v8hi-1.h b/gcc/testsuite/gcc.target/i386/set-v8hi-1.h
new file mode 100644 (file)
index 0000000..87762b8
--- /dev/null
@@ -0,0 +1,19 @@
+#include CHECK_H
+
+static __m128i
+__attribute__((noinline))
+foo (short *v)
+{
+  return _mm_set_epi16 (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+}
+
+static void
+TEST (void)
+{
+  short v[8] = { -3, 6000, 48, 104, -90, 34567, -1248, 34678 };
+  union128i_w u;
+
+  u.x = foo (v);
+  if (check_union128i_w (u, v))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/set-v8hi-2.h b/gcc/testsuite/gcc.target/i386/set-v8hi-2.h
new file mode 100644 (file)
index 0000000..835e7b4
--- /dev/null
@@ -0,0 +1,21 @@
+#include CHECK_H
+
+__m128i
+__attribute__((noinline))
+foo (short x1, short x2, short x3, short x4,
+     short x5, short x6, short x7, short x8)
+{
+  return _mm_set_epi16 (x1, x2, x3, x4, x5, x6, x7, x8);
+}
+
+static void
+TEST (void)
+{
+  short v[8] = { -3, 2, 1, 9, 23, -173, -13, 69 };
+  union128i_w u;
+
+  u.x = foo (v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
+
+  if (check_union128i_w (u, v))
+     abort ();
+}
index 007ff543228791194c6cd795afa957f2151c05fc..a69333e391a09ed05e90482b778cd3f06cdbe6b7 100644 (file)
@@ -1,7 +1,6 @@
-#include <stdio.h>
 #include <stdlib.h>
-
 #include "cpuid.h"
+#include "m128-check.h"
 
 static void sse2_test (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c b/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-1.c
new file mode 100644 (file)
index 0000000..61f19cb
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#define CHECK_H "sse2-check.h"
+#define TEST sse2_test
+
+#include "set-v16qi-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c b/gcc/testsuite/gcc.target/i386/sse2-set-v16qi-2.c
new file mode 100644 (file)
index 0000000..918fa5c
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#define CHECK_H "sse2-check.h"
+#define TEST sse2_test
+
+#include "set-v16qi-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c b/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1.c
new file mode 100644 (file)
index 0000000..cab461e
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#define CHECK_H "sse2-check.h"
+#define TEST sse2_test
+
+#include "set-v8hi-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c b/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2.c
new file mode 100644 (file)
index 0000000..2b4a8be
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#define CHECK_H "sse2-check.h"
+#define TEST sse2_test
+
+#include "set-v8hi-2.h"
index bac37cb56efa5dc50a03c1e1e120f9e217bf99aa..2d1c4e835a343b7a46c8353401b34dc6351a2c8d 100644 (file)
@@ -1,7 +1,7 @@
-#include <stdio.h>
 #include <stdlib.h>
 
 #include "cpuid.h"
+#include "m128-check.h"
 
 static void sse4_1_test (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1.c
new file mode 100644 (file)
index 0000000..23c0903
--- /dev/null
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "set-v16qi-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2.c
new file mode 100644 (file)
index 0000000..5245870
--- /dev/null
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "set-v16qi-2.h"