re PR tree-optimization/91201 (SIMD not generated for horizontal sum of bytes in...
authorJakub Jelinek <jakub@redhat.com>
Wed, 31 Jul 2019 09:22:48 +0000 (11:22 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Wed, 31 Jul 2019 09:22:48 +0000 (11:22 +0200)
PR tree-optimization/91201
* config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
(REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
TARGET_AVX512F.
(reduc_plus_scal_<mode>): Improve formatting by introducing
a temporary.

* gcc.target/i386/sse2-pr91201.c: New test.
* gcc.target/i386/avx2-pr91201.c: New test.
* gcc.target/i386/avx512bw-pr91201.c: New test.

From-SVN: r273927

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx2-pr91201.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse2-pr91201.c [new file with mode: 0644]

index 3e166c3f3afc919108095ebee23b253b436206a8..c0b86881f4afc49d6dab7837068687c3f7ab124c 100644 (file)
@@ -1,3 +1,12 @@
+2019-07-31  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/91201
+       * config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
+       (REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
+       TARGET_AVX512F.
+       (reduc_plus_scal_<mode>): Improve formatting by introducing
+       a temporary.
+
 2019-07-31  Sudakshina Das  <sudi.das@arm.com>
 
        * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Add
index fa8f13f5796a0c513269169f15286e528162adac..56a89154e54e3a0643b73a1fb8789e08e504e839 100644 (file)
   DONE;
 })
 
+(define_expand "reduc_plus_scal_v16qi"
+ [(plus:V16QI
+    (match_operand:QI 0 "register_operand")
+    (match_operand:V16QI 1 "register_operand"))]
+ "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (V1TImode);
+  emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
+                                GEN_INT (64)));
+  rtx tmp2 = gen_reg_rtx (V16QImode);
+  emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
+  rtx tmp3 = gen_reg_rtx (V16QImode);
+  emit_move_insn (tmp3, CONST0_RTX (V16QImode));
+  rtx tmp4 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
+  tmp4 = gen_lowpart (V16QImode, tmp4);
+  emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
+  DONE;
+})
+
 (define_mode_iterator REDUC_PLUS_MODE
  [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
-  (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
+  (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+  (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
 
 (define_expand "reduc_plus_scal_<mode>"
  [(plus:REDUC_PLUS_MODE
   rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
   emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
   rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
-  emit_insn (gen_add<ssehalfvecmodelower>3
-    (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
+  rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
+  emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
   emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
   DONE;
 })
index 632164ab3308107a741e2ab1e34c5492c60dc720..e09f3bf4e50093fbf0c85abc30ac9f56ad64aef5 100644 (file)
@@ -1,3 +1,10 @@
+2019-07-31  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/91201
+       * gcc.target/i386/sse2-pr91201.c: New test.
+       * gcc.target/i386/avx2-pr91201.c: New test.
+       * gcc.target/i386/avx512bw-pr91201.c: New test.
+
 2019-07-31  Sudakshina Das  <sudi.das@arm.com>
 
        * gcc.target/aarch64/acle/tme.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr91201.c b/gcc/testsuite/gcc.target/i386/avx2-pr91201.c
new file mode 100644 (file)
index 0000000..4cf0a3a
--- /dev/null
@@ -0,0 +1,6 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -mno-avx512f" } */
+/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
+
+#include "sse2-pr91201.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr91201.c
new file mode 100644 (file)
index 0000000..9829a5c
--- /dev/null
@@ -0,0 +1,6 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
+
+#include "sse2-pr91201.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201.c
new file mode 100644 (file)
index 0000000..016b187
--- /dev/null
@@ -0,0 +1,18 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2 -mno-sse3" } */
+/* { dg-final { scan-assembler "\tpsadbw\t" } } */
+
+unsigned char bytes[1024];
+
+unsigned char
+sum (void)
+{
+  unsigned char r = 0;
+  unsigned char *p = (unsigned char *) bytes;
+  int n;
+
+  for (n = 0; n < sizeof (bytes); ++n)
+    r += p[n];
+  return r;
+}