re PR target/46716 (wrong code generated with -mno-sse2 -m64)
authorUros Bizjak <uros@gcc.gnu.org>
Fri, 2 Mar 2012 17:03:36 +0000 (18:03 +0100)
committerUros Bizjak <uros@gcc.gnu.org>
Fri, 2 Mar 2012 17:03:36 +0000 (18:03 +0100)
PR target/46716
* config/i386/i386.c (construct_container): Use gen_reg_or_parallel
to pass the argument in the register of "natural" mode.

testsuite/ChangeLog:

PR target/46716
* gcc.target/i386/pr46176.c: New test.

From-SVN: r184802

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr46716.c [new file with mode: 0644]

index d43069a8c1940ad4d64c556e9b975a3a8fe94e2b..d1b971bed3a52b5ac8296df6278b86ea67e57ec7 100644 (file)
@@ -1,3 +1,9 @@
+2012-03-02  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/46716
+       * config/i386/i386.c (construct_container): Use gen_reg_or_parallel
+       to pass the argument in the register of "natural" mode.
+
 2012-03-02  Richard Guenther  <rguenther@suse.de>
 
        PR tree-optimization/52406
 
 2012-03-02  Greta Yorsh  <Greta.Yorsh@arm.com>
 
-       * config/arm/arm-ldmstm.ml (write_ldm_commutative_peephole):
+       * config/arm/arm-ldmstm.ml (write_ldm_commutative_peephole):
        Improve conditions for peepholes of loads followed by commutative
        operators.
-       * config/arm/ldmstm.md: Regenerated.
+       * config/arm/ldmstm.md: Regenerated.
 
 2012-03-02  Richard Guenther  <rguenther@suse.de>
 
 2012-03-01  Georg-Johann Lay  <avr@gjlay.de>
 
        * config/avr/avr-c.c (avr_cpu_cpp_builtins): Restore built-in
-       defines for __UINT24_MAX__, __INT24_MAX__, __INT24_MIN__ 
+       defines for __UINT24_MAX__, __INT24_MAX__, __INT24_MIN__
        unintentionally removed in r184616.
 
 2012-03-01  Venkataramanan Kumar  <venkataramanan.kumar@amd.com>
        (neon_vcgeu): New insn.
        (neon_vcgtu): Likewise.
        * config/arm/neon.ml (s_8_32, u_8_32): New lists.
-       (ops): Unsigned comparison intrinsics call a different
-       builtin.
+       (ops): Unsigned comparison intrinsics call a different builtin.
 
 2012-02-28  Richard Guenther  <rguenther@suse.de>
 
 
        * config/avr/avr-devices.c (avr_mcu_type): Adjust NULL part
        of initializer to changes from r184614.
-       
+
 2012-02-28  Richard Guenther  <rguenther@suse.de>
 
        PR tree-optimization/52395
 2012-02-27  H.J. Lu  <hongjiu.lu@intel.com>
 
        PR target/52352
-       * config/i386/i386.md (*movabs<mode>_1): Enable only for
-       TARGET_LP64.
+       * config/i386/i386.md (*movabs<mode>_1): Enable only for TARGET_LP64.
        (*movabs<mode>_2): Likewise.
 
 2012-02-27  Jakub Jelinek  <jakub@redhat.com>
index f93583f90463f8a314de8feb40b675ff0ca446a2..65c4c421d1f8fdcc6720f0682bea60310623eb0c 100644 (file)
@@ -5829,7 +5829,8 @@ classify_argument (enum machine_mode mode, const_tree type,
 {
   HOST_WIDE_INT bytes =
     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
-  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  int words
+    = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 
   /* Variable sized entities are always passed/returned in memory.  */
   if (bytes < 0)
@@ -5879,7 +5880,8 @@ classify_argument (enum machine_mode mode, const_tree type,
                     misaligned integers.  */
                  if (DECL_BIT_FIELD (field))
                    {
-                     for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+                     for (i = (int_bit_position (field)
+                               + (bit_offset % 64)) / 8 / 8;
                           i < ((int_bit_position (field) + (bit_offset % 64))
                                + tree_low_cst (DECL_SIZE (field), 0)
                                + 63) / 8 / 8; i++)
@@ -5919,7 +5921,8 @@ classify_argument (enum machine_mode mode, const_tree type,
                                                + bit_offset) % 256);
                      if (!num)
                        return 0;
-                     pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+                     pos = (int_bit_position (field)
+                            + (bit_offset % 64)) / 8 / 8;
                      for (i = 0; i < num && (i + pos) < words; i++)
                        classes[i + pos] =
                          merge_classes (subclasses[i], classes[i + pos]);
@@ -6336,21 +6339,27 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
       default:
        gcc_unreachable ();
       }
-  if (n == 2 && regclass[0] == X86_64_SSE_CLASS
-      && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
-    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+  if (n == 2
+      && regclass[0] == X86_64_SSE_CLASS
+      && regclass[1] == X86_64_SSEUP_CLASS
+      && mode != BLKmode)
+    return gen_reg_or_parallel (mode, orig_mode,
+                               SSE_REGNO (sse_regno));
   if (n == 4
       && regclass[0] == X86_64_SSE_CLASS
       && regclass[1] == X86_64_SSEUP_CLASS
       && regclass[2] == X86_64_SSEUP_CLASS
       && regclass[3] == X86_64_SSEUP_CLASS
       && mode != BLKmode)
-    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
-
+    return gen_reg_or_parallel (mode, orig_mode,
+                               SSE_REGNO (sse_regno));
   if (n == 2
-      && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
+      && regclass[0] == X86_64_X87_CLASS
+      && regclass[1] == X86_64_X87UP_CLASS)
     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
-  if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
+
+  if (n == 2
+      && regclass[0] == X86_64_INTEGER_CLASS
       && regclass[1] == X86_64_INTEGER_CLASS
       && (mode == CDImode || mode == TImode || mode == TFmode)
       && intreg[0] + 1 == intreg[1])
@@ -6369,31 +6378,36 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
          case X86_64_INTEGERSI_CLASS:
            /* Merge TImodes on aligned occasions here too.  */
            if (i * 8 + 8 > bytes)
-             tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
+             tmpmode
+               = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
            else if (regclass[i] == X86_64_INTEGERSI_CLASS)
              tmpmode = SImode;
            else
              tmpmode = DImode;
-           /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
+           /* We've requested 24 bytes we
+              don't have mode for.  Use DImode.  */
            if (tmpmode == BLKmode)
              tmpmode = DImode;
-           exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
-                                              gen_rtx_REG (tmpmode, *intreg),
-                                              GEN_INT (i*8));
+           exp [nexps++]
+             = gen_rtx_EXPR_LIST (VOIDmode,
+                                  gen_rtx_REG (tmpmode, *intreg),
+                                  GEN_INT (i*8));
            intreg++;
            break;
          case X86_64_SSESF_CLASS:
-           exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
-                                              gen_rtx_REG (SFmode,
-                                                           SSE_REGNO (sse_regno)),
-                                              GEN_INT (i*8));
+           exp [nexps++]
+             = gen_rtx_EXPR_LIST (VOIDmode,
+                                  gen_rtx_REG (SFmode,
+                                               SSE_REGNO (sse_regno)),
+                                  GEN_INT (i*8));
            sse_regno++;
            break;
          case X86_64_SSEDF_CLASS:
-           exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
-                                              gen_rtx_REG (DFmode,
-                                                           SSE_REGNO (sse_regno)),
-                                              GEN_INT (i*8));
+           exp [nexps++]
+             = gen_rtx_EXPR_LIST (VOIDmode,
+                                  gen_rtx_REG (DFmode,
+                                               SSE_REGNO (sse_regno)),
+                                  GEN_INT (i*8));
            sse_regno++;
            break;
          case X86_64_SSE_CLASS:
@@ -6423,10 +6437,11 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
              default:
                gcc_unreachable ();
              }
-           exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
-                                              gen_rtx_REG (tmpmode,
-                                                           SSE_REGNO (sse_regno)),
-                                              GEN_INT (pos*8));
+           exp [nexps++]
+             = gen_rtx_EXPR_LIST (VOIDmode,
+                                  gen_rtx_REG (tmpmode,
+                                               SSE_REGNO (sse_regno)),
+                                  GEN_INT (pos*8));
            sse_regno++;
            break;
          default:
index 9efc01be3b3b76d1c8befc9c7e8f49cd31bfe440..108fd363455f9e994baa76ae9fd34676e2a961fc 100644 (file)
@@ -1,3 +1,8 @@
+2012-03-02  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/46716
+       * gcc.target/i386/pr46176.c: New test.
+
 2012-03-02  Paolo Carlini  <paolo.carlini@oracle.com>
 
        PR c++/51989
@@ -30,8 +35,7 @@
 
        * gcc.dg/torture/pr47917.c: Make test using POSIX-printf
        routines on mingw targets.
-       * gcc.dg/vect/pr46126.c (uintptr_t): Add support for
-       LLP64 target.
+       * gcc.dg/vect/pr46126.c (uintptr_t): Add support for LLP64 target.
 
 2012-03-01  Jakub Jelinek  <jakub@redhat.com>
 
 
 2012-02-28  Kai Tietz  <ktietz@redhat.com>
 
-       * gcc.target/i386/pr46939.c (long): Fix LP64 vs LLP64
-       issue.
+       * gcc.target/i386/pr46939.c (long): Fix LP64 vs LLP64 issue.
        * gcc.target/i386/pr45352-2.c: Likewise.
-       * gcc.target/i386/bitfield3.c: Add -mno-ms-bitfields for
-       mingw targets.
-       * gcc.target/i386/xop-vshift-1.c(random): Use on mingw
+       * gcc.target/i386/bitfield3.c: Add -mno-ms-bitfields for mingw targets.
+       * gcc.target/i386/xop-vshift-1.c (random): Use on mingw
        targets instead rand.
        * gcc.target/i386/sse4_1-blendps-2.c: Likewise.
        * gcc.target/i386/sse2-mul-1.c: Likewise.
        * gcc.target/i386/sse4_1-blendps.c: Likewise.
-       * gcc.target/i386/pad-6b.c: Adjust test for x64 mingw
-       target.
+       * gcc.target/i386/pad-6b.c: Adjust test for x64 mingw target.
        * gcc.target/i386/pad-1.c: Likewise.
        * gcc.target/i386/pad-9.c: Likewise.
        * gcc.target/i386/pad-2.c: Likewise.
 
 2012-02-23  Kai Tietz  <ktietz@redhat.com>
 
-       * gcc.dg/pack-test-5.c: Add -mno-ms-bitfields option
-       for mingw-targets.
+       * gcc.dg/pack-test-5.c: Add -mno-ms-bitfields option for mingw-targets.
        * gcc.dg/Wpadded.c: Likewise.
        * gcc.dg/bf-ms-layout-2.c: Adjust offsets to fit ms-bitfield
        structure-layout.
        targets.
        * gcc.dg/stack-usage-1.c (SIZE): Provide proper SIZE for x64 mingw
        target.
-       * gcc.dg/tls/thr-cse-1.c: Provide proper pattern for x64 mingw
-       target.
+       * gcc.dg/tls/thr-cse-1.c: Provide proper pattern for x64 mingw target.
        * gcc.dg/tls/opt-11.c (memset): Use __extension__ to avoid fail
        on x64 mingw target.
        * gcc.dg/bf-ms-attrib.c: Adjust expected size for ms_struct layout.
diff --git a/gcc/testsuite/gcc.target/i386/pr46716.c b/gcc/testsuite/gcc.target/i386/pr46716.c
new file mode 100644 (file)
index 0000000..29c5e1e
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse -mno-sse2" } */
+/* { dg-require-effective-target sse } */
+
+#include "sse-check.h"
+
+typedef double V __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef union
+{
+  V x;
+  double a[2];
+} u;
+
+#define EMM_FLT8(a) ((double *)&(a))
+
+void __attribute__ ((noinline))
+test (V s1, V s2)
+{
+  if (EMM_FLT8(s1)[0] != EMM_FLT8(s2)[0]
+      || EMM_FLT8(s1)[1] != EMM_FLT8(s2)[1])
+    abort ();
+}
+
+static void
+sse_test (void)
+{
+  u s1;
+
+  s1.a[0] = 1.0;
+  s1.a[1] = 2.0;
+
+  test (s1.x, s1.x);
+}