re PR tree-optimization/91201 (SIMD not generated for horizontal sum of bytes in...
authorJakub Jelinek <jakub@redhat.com>
Wed, 31 Jul 2019 13:49:26 +0000 (15:49 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Wed, 31 Jul 2019 13:49:26 +0000 (15:49 +0200)
PR tree-optimization/91201
* config/i386/mmx.md (reduc_plus_scal_v8qi): New expander.

* gcc.target/i386/sse2-pr91201-2.c: New test.

From-SVN: r273932

gcc/ChangeLog
gcc/config/i386/mmx.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/sse2-pr91201-2.c [new file with mode: 0644]

index 5bfe7bc09b19261e365d1ea2079356221758d269..a883b23cc21d2ab69c07af35feb0e6037952b4ed 100644 (file)
@@ -1,3 +1,8 @@
+2019-07-31  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/91201
+       * config/i386/mmx.md (reduc_plus_scal_v8qi): New expander.
+
 2019-07-31  Andrew Stubbs  <ams@codesourcery.com>
 
        * config/gcn/gcn-valu.md
index c78b33b510a6ce36600c375a2272a1cdec1031b5..b4738ae5ba3c786acbed2472dfed4f27ca633b7d 100644 (file)
    (set_attr "type" "mmxshft,sseiadd,sseiadd")
    (set_attr "mode" "DI,TI,TI")])
 
+(define_expand "reduc_plus_scal_v8qi"
+ [(plus:V8QI
+    (match_operand:QI 0 "register_operand")
+    (match_operand:V8QI 1 "register_operand"))]
+ "TARGET_MMX_WITH_SSE"
+{
+  rtx tmp = gen_reg_rtx (V8QImode);
+  emit_move_insn (tmp, CONST0_RTX (V8QImode));
+  rtx tmp2 = gen_reg_rtx (V1DImode);
+  emit_insn (gen_mmx_psadbw (tmp2, operands[1], tmp));
+  tmp2 = gen_lowpart (V8QImode, tmp2);
+  emit_insn (gen_vec_extractv8qiqi (operands[0], tmp2, const0_rtx));
+  DONE;
+})
+
 (define_insn_and_split "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
        (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
index 44166d1385eff280e33c47636818fbb57e548a6e..91378f2cff60a8ab0d0fb57ae9867159fb56b50b 100644 (file)
@@ -1,3 +1,8 @@
+2019-07-31  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/91201
+       * gcc.target/i386/sse2-pr91201-2.c: New test.
+
 2019-07-31  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/91178
diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201-2.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201-2.c
new file mode 100644 (file)
index 0000000..d711ee0
--- /dev/null
@@ -0,0 +1,21 @@
+/* PR tree-optimization/91201 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O3 -msse2 -mno-sse3" } */
+/* { dg-final { scan-assembler "\tpsadbw\t" } } */
+
+unsigned char bytes[1024];
+
+unsigned char
+sum (void)
+{
+  unsigned char r = 0;
+  unsigned char *p = (unsigned char *) bytes;
+  int n;
+
+  for (n = 8; n < sizeof (bytes); ++n)
+    {
+      p[n - 8] += p[n];
+      r += p[n];
+    }
+  return r;
+}