Properly handle 256bit load cast.
authorH.J. Lu <hongjiu.lu@intel.com>
Wed, 18 May 2011 22:12:28 +0000 (22:12 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Wed, 18 May 2011 22:12:28 +0000 (15:12 -0700)
gcc/

2011-05-18  H.J. Lu  <hongjiu.lu@intel.com>

PR target/49002
* config/i386/sse.md (avx_<ssemodesuffix><avxsizesuffix>_<ssemodesuffix>):
Properly handle load cast.

gcc/testsuite/

2011-05-18  H.J. Lu  <hongjiu.lu@intel.com>

PR target/49002
* gcc.target/i386/pr49002-1.c: New test.
* gcc.target/i386/pr49002-2.c: Likewise.

From-SVN: r173880

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr49002-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr49002-2.c [new file with mode: 0644]

index 0850640b837f257a740c12ae86fc920ba7ec5f2e..c695f9b7998723033a9bb43a2bf8de02beb4b642 100644 (file)
@@ -1,3 +1,9 @@
+2011-05-18  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/49002
+       * config/i386/sse.md (avx_<ssemodesuffix><avxsizesuffix>_<ssemodesuffix>):
+       Properly handle load cast.
+
 2011-05-18  Jakub Jelinek  <jakub@redhat.com>
 
        PR tree-optimization/49039
index 291bffb636e8cae448850f14b84b51e1fd6b48a3..cf12a6d0c045fa75b2b43ac64c07dc3e46a9c929 100644 (file)
   "&& reload_completed"
   [(const_int 0)]
 {
+  rtx op0 = operands[0];
   rtx op1 = operands[1];
-  if (REG_P (op1))
+  if (REG_P (op0))
+    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
+  else 
     op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
-  else
-    op1 = gen_lowpart (<MODE>mode, op1);
-  emit_move_insn (operands[0], op1);
+  emit_move_insn (op0, op1);
   DONE;
 })
 
index bf4107cc42d38594b0df89b9e76a410aa15d425b..649ac62f6895c97754e12f997dc8ddc15df0a279 100644 (file)
@@ -1,3 +1,9 @@
+2011-05-18  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/49002
+       * gcc.target/i386/pr49002-1.c: New test.
+       * gcc.target/i386/pr49002-2.c: Likewise.
+
 2011-05-18  Jakub Jelinek  <jakub@redhat.com>
 
        PR tree-optimization/49039
diff --git a/gcc/testsuite/gcc.target/i386/pr49002-1.c b/gcc/testsuite/gcc.target/i386/pr49002-1.c
new file mode 100644 (file)
index 0000000..7553e82
--- /dev/null
@@ -0,0 +1,16 @@
+/* PR target/49002 */
+/* { dg-do compile } */
+/* { dg-options "-O -mavx" } */
+
+#include <immintrin.h>
+
+void foo(const __m128d *from, __m256d *to, int s)
+{
+  __m256d var = _mm256_castpd128_pd256(from[0]);
+  var = _mm256_insertf128_pd(var, from[s], 1);
+  to[0] = var;
+}
+
+/* Ensure we load into xmm, not ymm.  */
+/* { dg-final { scan-assembler-not "vmovapd\[\t \]*\[^,\]*,\[\t \]*%ymm" } } */
+/* { dg-final { scan-assembler "vmovapd\[\t \]*\[^,\]*,\[\t \]*%xmm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr49002-2.c b/gcc/testsuite/gcc.target/i386/pr49002-2.c
new file mode 100644 (file)
index 0000000..b0e1009
--- /dev/null
@@ -0,0 +1,14 @@
+/* PR target/49002 */
+/* { dg-do compile } */
+/* { dg-options "-O -mavx" } */
+
+#include <immintrin.h>
+
+void foo(const __m128d from, __m256d *to)
+{
+  *to = _mm256_castpd128_pd256(from);
+}
+
+/* Ensure we store ymm, not xmm.  */
+/* { dg-final { scan-assembler-not "vmovapd\[\t \]*%xmm\[0-9\]\+,\[^,\]*" } } */
+/* { dg-final { scan-assembler "vmovapd\[\t \]*%ymm\[0-9\]\+,\[^,\]*" } } */