Simplify vec_select of a subreg of X to just a vec_select of X.
authorliuhongt <hongtao.liu@intel.com>
Tue, 13 Oct 2020 07:35:29 +0000 (15:35 +0800)
committerliuhongt <hongtao.liu@intel.com>
Thu, 22 Oct 2020 03:37:11 +0000 (11:37 +0800)
gcc/ChangeLog
PR rtl-optimization/97249
* simplify-rtx.c (simplify_binary_operation_1): Simplify
vec_select of a subreg of X to a vec_select of X.

gcc/testsuite/ChangeLog

* gcc.target/i386/pr97249-1.c: New test.

gcc/simplify-rtx.c
gcc/testsuite/gcc.target/i386/pr97249-1.c [new file with mode: 0644]

index 869f0d11b2e363d1224418cde7ffb15ac73fb78c..47e7aebda8ab37ae6abe1ad5178b7377be8bef31 100644 (file)
@@ -4170,6 +4170,47 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
                    return subop1;
                }
            }
+
+         /* Simplify vec_select of a subreg of X to just a vec_select of X
+            when X has same component mode as vec_select.  */
+         unsigned HOST_WIDE_INT subreg_offset = 0;
+         if (GET_CODE (trueop0) == SUBREG
+             && GET_MODE_INNER (mode)
+                == GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
+             && GET_MODE_NUNITS (mode).is_constant (&l1)
+             && constant_multiple_p (subreg_memory_offset (trueop0),
+                                     GET_MODE_UNIT_BITSIZE (mode),
+                                     &subreg_offset))
+           {
+             poly_uint64 nunits
+               = GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
+             bool success = true;
+             for (int i = 0; i != l1; i++)
+               {
+                 rtx idx = XVECEXP (trueop1, 0, i);
+                 if (!CONST_INT_P (idx)
+                     || maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
+                   {
+                     success = false;
+                     break;
+                   }
+               }
+
+             if (success)
+               {
+                 rtx par = trueop1;
+                 if (subreg_offset)
+                   {
+                     rtvec vec = rtvec_alloc (l1);
+                     for (int i = 0; i < l1; i++)
+                       RTVEC_ELT (vec, i)
+                         = GEN_INT (INTVAL (XVECEXP (trueop1, 0, i))
+                                    + subreg_offset);
+                     par = gen_rtx_PARALLEL (VOIDmode, vec);
+                   }
+                 return gen_rtx_VEC_SELECT (mode, SUBREG_REG (trueop0), par);
+               }
+           }
        }
 
       if (XVECLEN (trueop1, 0) == 1
diff --git a/gcc/testsuite/gcc.target/i386/pr97249-1.c b/gcc/testsuite/gcc.target/i386/pr97249-1.c
new file mode 100644 (file)
index 0000000..4478a34
--- /dev/null
@@ -0,0 +1,30 @@
+/* PR target/97249  */
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3 -masm=att" } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxbw[ \t]+\(.*%xmm[0-9]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxwd[ \t]+\(.*%xmm[0-9]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vpmovzxdq[ \t]+\(.*%xmm[0-9]} 2 } } */
+
+void
+foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
+{
+    for (int i = 0 ; i != 8; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
+{
+    for (int i = 0 ; i != 4; i++)
+     p3[i] = p1[i] + p2[i];
+     return;
+}
+
+void
+foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
+{
+    for (int i = 0 ; i != 2; i++)
+      p3[i] = (long long)p1[i] + (long long)p2[i];
+     return;
+}