re PR target/80286 (AVX2 _mm_cvtsi128_si32 doesn't return a proper 32bits int)
authorJakub Jelinek <jakub@redhat.com>
Tue, 4 Apr 2017 10:45:55 +0000 (12:45 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 4 Apr 2017 10:45:55 +0000 (12:45 +0200)
PR target/80286
* config/i386/i386.c (ix86_expand_args_builtin): If op has scalar
int mode, convert_modes it to mode as unsigned, otherwise use
lowpart_subreg to mode rather than SImode.
* config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>,
ashr<mode>3, ashr<mode>3<mask_name>, <shift_insn><mode>3<mask_name>):
Use DImode instead of SImode for the shift count operand.
* config/i386/mmx.md (mmx_ashr<mode>3, mmx_<shift_insn><mode>3):
Likewise.
testsuite/
* gcc.target/i386/avx-pr80286.c: New test.
* gcc.dg/pr80286.c: New test.

From-SVN: r246676

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/mmx.md
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/pr80286.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-pr80286.c [new file with mode: 0644]

index eceada15c3663f7f77705ac73790a1ef456c6df1..66d6cb719b30ba82975a2f085a7a12e93fb2d4c0 100644 (file)
@@ -1,3 +1,15 @@
+2017-04-04  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/80286
+       * config/i386/i386.c (ix86_expand_args_builtin): If op has scalar
+       int mode, convert_modes it to mode as unsigned, otherwise use
+       lowpart_subreg to mode rather than SImode.
+       * config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>,
+       ashr<mode>3, ashr<mode>3<mask_name>, <shift_insn><mode>3<mask_name>):
+       Use DImode instead of SImode for the shift count operand.
+       * config/i386/mmx.md (mmx_ashr<mode>3, mmx_<shift_insn><mode>3):
+       Likewise.
+
 2017-04-04  Richard Biener  <rguenther@suse.de>
 
        PR middle-end/80281
index 80b8773649460d8e1fabfd7c79c27209edbca302..118ab50b6bb29afdf9a220d0b78d6cb2b5d2ede4 100644 (file)
@@ -35582,10 +35582,17 @@ ix86_expand_args_builtin (const struct builtin_description *d,
        {
          /* SIMD shift insns take either an 8-bit immediate or
             register as count.  But builtin functions take int as
-            count.  If count doesn't match, we put it in register.  */
+            count.  If count doesn't match, we put it in register.
+            The instructions are using 64-bit count, if op is just
+            32-bit, zero-extend it, as negative shift counts
+            are undefined behavior and zero-extension is more
+            efficient.  */
          if (!match)
            {
-             op = lowpart_subreg (SImode, op, GET_MODE (op));
+             if (SCALAR_INT_MODE_P (GET_MODE (op)))
+               op = convert_modes (mode, GET_MODE (op), op, 1);
+             else
+               op = lowpart_subreg (mode, op, GET_MODE (op));
              if (!insn_p->operand[i + 1].predicate (op, mode))
                op = copy_to_reg (op);
            }
index 1e2e6c26eaa6d6c01c9f4026c79c6d1d19664e52..68972bb45c90506ec0d208ef6b414e257ba51d2f 100644 (file)
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
         (ashiftrt:MMXMODE24
          (match_operand:MMXMODE24 1 "register_operand" "0")
-         (match_operand:SI 2 "nonmemory_operand" "yN")))]
+         (match_operand:DI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "psra<mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
   [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
         (any_lshift:MMXMODE248
          (match_operand:MMXMODE248 1 "register_operand" "0")
-         (match_operand:SI 2 "nonmemory_operand" "yN")))]
+         (match_operand:DI 2 "nonmemory_operand" "yN")))]
   "TARGET_MMX"
   "p<vshift><mmxvecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
index 1a9b332cc29ed32dbe8e8a030a18ddd2d8aae059..15ced880504b5fce94e6d8b17eec8b0e68332211 100644 (file)
   [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
        (ashiftrt:VI24_AVX512BW_1
          (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
-         (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+         (match_operand:DI 2 "nonmemory_operand" "v,N")))]
   "TARGET_AVX512VL"
   "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseishft")
   [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
        (ashiftrt:VI24_AVX2
          (match_operand:VI24_AVX2 1 "register_operand" "0,x")
-         (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
+         (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
   "TARGET_SSE2"
   "@
    psra<ssemodesuffix>\t{%2, %0|%0, %2}
   [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
        (ashiftrt:VI248_AVX512BW_AVX512VL
          (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
-         (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+         (match_operand:DI 2 "nonmemory_operand" "v,N")))]
   "TARGET_AVX512F"
   "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseishft")
   [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
        (any_lshift:VI2_AVX2_AVX512BW
          (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
-         (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
+         (match_operand:DI 2 "nonmemory_operand" "xN,vN")))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
   [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,x,v")
        (any_lshift:VI48_AVX2
          (match_operand:VI48_AVX2 1 "register_operand" "0,x,v")
-         (match_operand:SI 2 "nonmemory_operand" "xN,xN,vN")))]
+         (match_operand:DI 2 "nonmemory_operand" "xN,xN,vN")))]
   "TARGET_SSE2 && <mask_mode512bit_condition>"
   "@
    p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
   [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
        (any_lshift:VI48_512
          (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
-         (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
+         (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
   "TARGET_AVX512F && <mask_mode512bit_condition>"
   "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "isa" "avx512f")
index 8d7418cd36642702dfa2cdcf6a8b9513cd3ab208..e14af29e2b94fb40c3dcbba19d54a9c680c814d6 100644 (file)
@@ -1,3 +1,9 @@
+2017-04-04  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/80286
+       * gcc.target/i386/avx-pr80286.c: New test.
+       * gcc.dg/pr80286.c: New test.
+
 2017-04-04  Richard Biener  <rguenther@suse.de>
 
        PR middle-end/80281
diff --git a/gcc/testsuite/gcc.dg/pr80286.c b/gcc/testsuite/gcc.dg/pr80286.c
new file mode 100644 (file)
index 0000000..82f35b5
--- /dev/null
@@ -0,0 +1,23 @@
+/* PR target/80286 */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-psabi" } */
+
+typedef int V __attribute__((vector_size (4 * sizeof (int))));
+
+__attribute__((noinline, noclone)) V
+foo (V x, V y)
+{
+  return x << y[0];
+}
+
+int
+main ()
+{
+  V x = { 1, 2, 3, 4 };
+  V y = { 5, 6, 7, 8 };
+  V z = foo (x, y);
+  V e = { 1 << 5, 2 << 5, 3 << 5, 4 << 5 };
+  if (__builtin_memcmp (&z, &e, sizeof (V)))
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-pr80286.c b/gcc/testsuite/gcc.target/i386/avx-pr80286.c
new file mode 100644 (file)
index 0000000..2e1881d
--- /dev/null
@@ -0,0 +1,26 @@
+/* PR target/80286 */
+/* { dg-do run { target avx } } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+#include <immintrin.h>
+
+__m256i m;
+
+__attribute__((noinline, noclone)) __m128i
+foo (__m128i x)
+{
+  int s = _mm_cvtsi128_si32 (_mm256_castsi256_si128 (m));
+  return _mm_srli_epi16 (x, s);
+}
+
+static void
+avx_test (void)
+{
+  __m128i a = (__m128i) (__v8hi) { 1 << 7, 2 << 8, 3 << 9, 4 << 10, 5 << 11, 6 << 12, 7 << 13, 8 << 12 };
+  m = (__m256i) (__v8si) { 7, 8, 9, 10, 11, 12, 13, 14 };
+  __m128i c = foo (a);
+  __m128i b = (__m128i) (__v8hi) { 1, 2 << 1, 3 << 2, 4 << 3, 5 << 4, 6 << 5, 7 << 6, 8 << 5 };
+  if (__builtin_memcmp (&c, &b, sizeof (__m128i)))
+    __builtin_abort ();
+}