[16/n] PR85694: Add detection of averaging operations

author Richard Sandiford <richard.sandiford@arm.com>

Tue, 3 Jul 2018 10:03:44 +0000 (10:03 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Tue, 3 Jul 2018 10:03:44 +0000 (10:03 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Tue, 3 Jul 2018 10:03:44 +0000 (10:03 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Tue, 3 Jul 2018 10:03:44 +0000 (10:03 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index fd928b0a1fd06c0f83dd15cbcf5f812b3960383b..52b5e29bd2636580be7b0430db6980ca45183c2b 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2018-07-03  Richard Sandiford  <richard.sandiford@arm.com>
+
+       PR tree-optimization/85694
+       * doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil)
+       (uavgM3_ceil): Document new optabs.
+       * doc/sourcebuild.texi (vect_avg_qi): Document new target selector.
+       * internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal
+       functions.
+       * optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab)
+       (savg_ceil_optab): New optabs.
+       * tree-vect-patterns.c (vect_recog_average_pattern): New function.
+       (vect_vect_recog_func_ptrs): Add it.
+       * tree-vect-stmts.c (vectorizable_call): Get the type of the zero
+       constant directly from the associated lhs.
+
  2018-07-03  Richard Sandiford  <richard.sandiford@arm.com>
  
         * tree-vect-patterns.c (vect_split_statement): New function.
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 22919e4310c90763e5daa71778d68803c331f6dc..09d6e307c24249b04ec3d76e450837a70c263022 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5599,6 +5599,34 @@ Other shift and rotate instructions, analogous to the
  Vector shift and rotate instructions that take vectors as operand 2
  instead of a scalar type.
  
+@cindex @code{avg@var{m}3_floor} instruction pattern
+@cindex @code{uavg@var{m}3_floor} instruction pattern
+@item @samp{avg@var{m}3_floor}
+@itemx @samp{uavg@var{m}3_floor}
+Signed and unsigned average instructions.  These instructions add
+operands 1 and 2 without truncation, divide the result by 2,
+round towards -Inf, and store the result in operand 0.  This is
+equivalent to the C code:
+@smallexample
+narrow op0, op1, op2;
+@dots{}
+op0 = (narrow) (((wide) op1 + (wide) op2) >> 1);
+@end smallexample
+where the sign of @samp{narrow} determines whether this is a signed
+or unsigned operation.
+
+@cindex @code{avg@var{m}3_ceil} instruction pattern
+@cindex @code{uavg@var{m}3_ceil} instruction pattern
+@item @samp{avg@var{m}3_ceil}
+@itemx @samp{uavg@var{m}3_ceil}
+Like @samp{avg@var{m}3_floor} and @samp{uavg@var{m}3_floor}, but round
+towards +Inf.  This is equivalent to the C code:
+@smallexample
+narrow op0, op1, op2;
+@dots{}
+op0 = (narrow) (((wide) op1 + (wide) op2 + 1) >> 1);
+@end smallexample
+
  @cindex @code{bswap@var{m}2} instruction pattern
  @item @samp{bswap@var{m}2}
  Reverse the order of bytes of operand 1 and store the result in operand 0.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi

index d52183d9c60b5b07e20b56fc04508390ddb1f9df..89157079ffb55f3377260e9f8e74876f003fd841 100644 (file)
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1417,6 +1417,10 @@ Target supports Fortran @code{real} kinds larger than @code{real(8)}.
  The target's ABI allows stack variables to be aligned to the preferred
  vector alignment.
  
+@item vect_avg_qi
+Target supports both signed and unsigned averaging operations on vectors
+of bytes.
+
  @item vect_condition
  Target supports vector conditional operations.
  
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def

index 66336d8062b12c5c473c199bc206fba6d9e65203..6293ab36dc94a36fa855783e7bca729a4adda7d3 100644 (file)
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -143,6 +143,11 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
  
+DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
+                             savg_floor, uavg_floor, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
+                             savg_ceil, uavg_ceil, binary)
+
  DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary)
  DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary)
  DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary)
diff --git a/gcc/optabs.def b/gcc/optabs.def

index 11af7aaeb1570b6eb9a83adbd78f5e3c73852ef1..707d9696b4ca8f3c03c0a31ae35666e38f2fa352 100644 (file)
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -316,6 +316,10 @@ OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
  OPTAB_D (extract_last_optab, "extract_last_$a")
  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
  
+OPTAB_D (savg_floor_optab, "avg$a3_floor")
+OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
+OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
+OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil")
  OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
  OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
  OPTAB_D (udot_prod_optab, "udot_prod$I$a")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 90aa4d7e22a327ce59bbab3d1ccf748af8266841..0ed116fd209c5e7b1d0b205d9c81bdb4072cda17 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,23 @@
+2018-07-03  Richard Sandiford  <richard.sandiford@arm.com>
+
+       PR tree-optimization/85694
+       * lib/target-supports.exp (check_effective_target_vect_avg_qi): New
+       proc.
+       * gcc.dg/vect/vect-avg-1.c: New test.
+       * gcc.dg/vect/vect-avg-2.c: Likewise.
+       * gcc.dg/vect/vect-avg-3.c: Likewise.
+       * gcc.dg/vect/vect-avg-4.c: Likewise.
+       * gcc.dg/vect/vect-avg-5.c: Likewise.
+       * gcc.dg/vect/vect-avg-6.c: Likewise.
+       * gcc.dg/vect/vect-avg-7.c: Likewise.
+       * gcc.dg/vect/vect-avg-8.c: Likewise.
+       * gcc.dg/vect/vect-avg-9.c: Likewise.
+       * gcc.dg/vect/vect-avg-10.c: Likewise.
+       * gcc.dg/vect/vect-avg-11.c: Likewise.
+       * gcc.dg/vect/vect-avg-12.c: Likewise.
+       * gcc.dg/vect/vect-avg-13.c: Likewise.
+       * gcc.dg/vect/vect-avg-14.c: Likewise.
+
  2018-07-03  Richard Sandiford  <richard.sandiford@arm.com>
  
         * gcc.dg/vect/vect-over-widen-5.c: Test that the extensions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-1.c b/gcc/testsuite/gcc.dg/vect/vect-avg-1.c

new file mode 100644 (file)

index 0000000..a7bc7cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-1.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 50
+
+#ifndef SIGNEDNESS
+#define SIGNEDNESS unsigned
+#endif
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+void __attribute__ ((noipa))
+f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
+   SIGNEDNESS char *restrict c)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    a[i] = (b[i] + c[i] + BIAS) >> 1;
+}
+
+#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
+#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
+
+int
+main (void)
+{
+  check_vect ();
+
+  SIGNEDNESS char a[N], b[N], c[N];
+  for (int i = 0; i < N; ++i)
+    {
+      b[i] = BASE1 + i * 5;
+      c[i] = BASE2 + i * 4;
+      asm volatile ("" ::: "memory");
+    }
+  f (a, b, c);
+  for (int i = 0; i < N; ++i)
+    if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1))
+      __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-10.c b/gcc/testsuite/gcc.dg/vect/vect-avg-10.c

new file mode 100644 (file)

index 0000000..2630aea
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-10.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 2
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-11.c b/gcc/testsuite/gcc.dg/vect/vect-avg-11.c

new file mode 100644 (file)

index 0000000..85292f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-11.c
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 50
+
+#ifndef SIGNEDNESS
+#define SIGNEDNESS unsigned
+#endif
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+void __attribute__ ((noipa))
+f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
+   SIGNEDNESS char *restrict c)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int tmp = b[i];
+      tmp ^= 0x55;
+      tmp += BIAS;
+      tmp += c[i];
+      tmp >>= 1;
+      tmp |= 0x40;
+      a[i] = tmp;
+    }
+}
+
+#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
+#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
+
+int
+main (void)
+{
+  check_vect ();
+
+  SIGNEDNESS char a[N], b[N], c[N];
+  for (int i = 0; i < N; ++i)
+    {
+      b[i] = BASE1 + i * 5;
+      c[i] = BASE2 + i * 4;
+      asm volatile ("" ::: "memory");
+    }
+  f (a, b, c);
+  for (int i = 0; i < N; ++i)
+    if (a[i] != (((((BASE1 + i * 5) ^ 0x55)
+                  + (BASE2 + i * 4)
+                  + BIAS) >> 1) | 0x40))
+      __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-12.c b/gcc/testsuite/gcc.dg/vect/vect-avg-12.c

new file mode 100644 (file)

index 0000000..f40331e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-12.c
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+
+#include "vect-avg-11.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-13.c b/gcc/testsuite/gcc.dg/vect/vect-avg-13.c

new file mode 100644 (file)

index 0000000..7957c0e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-13.c
@@ -0,0 +1,11 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 1
+
+#include "vect-avg-11.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-14.c b/gcc/testsuite/gcc.dg/vect/vect-avg-14.c

new file mode 100644 (file)

index 0000000..8ab11f7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-14.c
@@ -0,0 +1,11 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 1
+
+#include "vect-avg-11.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-2.c b/gcc/testsuite/gcc.dg/vect/vect-avg-2.c

new file mode 100644 (file)

index 0000000..b5586b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-2.c
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+
+#include "vect-avg-1.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-3.c b/gcc/testsuite/gcc.dg/vect/vect-avg-3.c

new file mode 100644 (file)

index 0000000..104fe96
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-3.c
@@ -0,0 +1,11 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 1
+
+#include "vect-avg-1.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-4.c b/gcc/testsuite/gcc.dg/vect/vect-avg-4.c

new file mode 100644 (file)

index 0000000..92181d7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-4.c
@@ -0,0 +1,11 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 1
+
+#include "vect-avg-1.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-5.c b/gcc/testsuite/gcc.dg/vect/vect-avg-5.c

new file mode 100644 (file)

index 0000000..6c43575
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-5.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 50
+
+#ifndef SIGNEDNESS
+#define SIGNEDNESS unsigned
+#endif
+#ifndef BIAS
+#define BIAS 0
+#endif
+
+void __attribute__ ((noipa))
+f (SIGNEDNESS char *restrict a, SIGNEDNESS char *restrict b,
+   SIGNEDNESS char *restrict c)
+{
+  for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+    {
+      int tmp1 = b[i] + BIAS;
+      int tmp2 = tmp1 + c[i];
+      a[i] = tmp2 >> 1;
+    }
+}
+
+#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4)
+#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26)
+
+int
+main (void)
+{
+  check_vect ();
+
+  SIGNEDNESS char a[N], b[N], c[N];
+  for (int i = 0; i < N; ++i)
+    {
+      b[i] = BASE1 + i * 5;
+      c[i] = BASE2 + i * 4;
+      asm volatile ("" ::: "memory");
+    }
+  f (a, b, c);
+  for (int i = 0; i < N; ++i)
+    if (a[i] != ((BASE1 + BASE2 + i * 9 + BIAS) >> 1))
+      __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-6.c b/gcc/testsuite/gcc.dg/vect/vect-avg-6.c

new file mode 100644 (file)

index 0000000..efe97b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-6.c
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_FLOOR} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-7.c b/gcc/testsuite/gcc.dg/vect/vect-avg-7.c

new file mode 100644 (file)

index 0000000..62a8474
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-7.c
@@ -0,0 +1,11 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 1
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-8.c b/gcc/testsuite/gcc.dg/vect/vect-avg-8.c

new file mode 100644 (file)

index 0000000..cc7c4cd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-8.c
@@ -0,0 +1,11 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS signed
+#define BIAS 1
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump "vect_recog_average_pattern: detected" "vect" } } */
+/* { dg-final { scan-tree-dump {\.AVG_CEIL} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-not {vector\([^\n]*short} "vect" { target vect_avg_qi } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_avg_qi } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-avg-9.c b/gcc/testsuite/gcc.dg/vect/vect-avg-9.c

new file mode 100644 (file)

index 0000000..80865b6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-avg-9.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target vect_int } */
+
+#define SIGNEDNESS unsigned
+#define BIAS 2
+
+#include "vect-avg-5.c"
+
+/* { dg-final { scan-tree-dump-not "vect_recog_average_pattern: detected" "vect" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index ffbc882b07dafbdb66a7149c190c953229beac24..fc189f31b71db56062ac70c200c4cd45b4aa47a7 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6313,6 +6313,13 @@ proc check_effective_target_vect_usad_char { } {
      return $et_vect_usad_char_saved($et_index)
  }
  
+# Return 1 if the target plus current options supports both signed
+# and unsigned average operations on vectors of bytes.
+
+proc check_effective_target_vect_avg_qi {} {
+    return 0
+}
+
  # Return 1 if the target plus current options supports a vector
  # demotion (packing) of shorts (to chars) and ints (to shorts) 
  # using modulo arithmetic, 0 otherwise.
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c

index a1649d8b0fec5b5486522de83dfd546b2e6ce7c6..51defa08627b2fa8a475e0266124335a6f2bc950 100644 (file)
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1721,6 +1721,153 @@ vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
    return pattern_stmt;
  }
  
+/* Recognize the patterns:
+
+           ATYPE a;  // narrower than TYPE
+           BTYPE b;  // narrower than TYPE
+       (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
+     or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
+
+   where only the bottom half of avg is used.  Try to transform them into:
+
+       (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
+     or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
+
+  followed by:
+
+           TYPE avg = (TYPE) avg';
+
+  where NTYPE is no wider than half of TYPE.  Since only the bottom half
+  of avg is used, all or part of the cast of avg' should become redundant.  */
+
+static gimple *
+vect_recog_average_pattern (vec<gimple *> *stmts, tree *type_out)
+{
+  /* Check for a shift right by one bit.  */
+  gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
+  if (!last_stmt
+      || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
+      || !integer_onep (gimple_assign_rhs2 (last_stmt)))
+    return NULL;
+
+  stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
+  vec_info *vinfo = last_stmt_info->vinfo;
+
+  /* Check that the shift result is wider than the users of the
+     result need (i.e. that narrowing would be a natural choice).  */
+  tree lhs = gimple_assign_lhs (last_stmt);
+  tree type = TREE_TYPE (lhs);
+  unsigned int target_precision
+    = vect_element_precision (last_stmt_info->min_output_precision);
+  if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
+    return NULL;
+
+  /* Get the definition of the shift input.  */
+  tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
+  stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
+  if (!plus_stmt_info)
+    return NULL;
+
+  /* Check whether the shift input can be seen as a tree of additions on
+     2 or 3 widened inputs.
+
+     Note that the pattern should be a win even if the result of one or
+     more additions is reused elsewhere: if the pattern matches, we'd be
+     replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
+  internal_fn ifn = IFN_AVG_FLOOR;
+  vect_unpromoted_value unprom[3];
+  tree new_type;
+  unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
+                                           PLUS_EXPR, false, 3,
+                                           unprom, &new_type);
+  if (nops == 0)
+    return NULL;
+  if (nops == 3)
+    {
+      /* Check that one operand is 1.  */
+      unsigned int i;
+      for (i = 0; i < 3; ++i)
+       if (integer_onep (unprom[i].op))
+         break;
+      if (i == 3)
+       return NULL;
+      /* Throw away the 1 operand and keep the other two.  */
+      if (i < 2)
+       unprom[i] = unprom[2];
+      ifn = IFN_AVG_CEIL;
+    }
+
+  vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
+
+  /* We know that:
+
+     (a) the operation can be viewed as:
+
+          TYPE widened0 = (TYPE) UNPROM[0];
+          TYPE widened1 = (TYPE) UNPROM[1];
+          TYPE tmp1 = widened0 + widened1 {+ 1};
+          TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
+
+     (b) the first two statements are equivalent to:
+
+          TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
+          TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
+
+     (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
+        where sensible;
+
+     (d) all the operations can be performed correctly at twice the width of
+        NEW_TYPE, due to the nature of the average operation; and
+
+     (e) users of the result of the right shift need only TARGET_PRECISION
+        bits, where TARGET_PRECISION is no more than half of TYPE's
+        precision.
+
+     Under these circumstances, the only situation in which NEW_TYPE
+     could be narrower than TARGET_PRECISION is if widened0, widened1
+     and an addition result are all used more than once.  Thus we can
+     treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
+     as "free", whereas widening the result of the average instruction
+     from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
+     therefore better not to go narrower than TARGET_PRECISION.  */
+  if (TYPE_PRECISION (new_type) < target_precision)
+    new_type = build_nonstandard_integer_type (target_precision,
+                                              TYPE_UNSIGNED (new_type));
+
+  /* Check for target support.  */
+  tree new_vectype = get_vectype_for_scalar_type (new_type);
+  if (!new_vectype
+      || !direct_internal_fn_supported_p (ifn, new_vectype,
+                                         OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  /* The IR requires a valid vector type for the cast result, even though
+     it's likely to be discarded.  */
+  *type_out = get_vectype_for_scalar_type (type);
+  if (!*type_out)
+    return NULL;
+
+  /* Generate the IFN_AVG* call.  */
+  tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
+  tree new_ops[2];
+  vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
+                      unprom, new_vectype);
+  gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
+                                                   new_ops[1]);
+  gimple_call_set_lhs (average_stmt, new_var);
+  gimple_set_location (average_stmt, gimple_location (last_stmt));
+
+  if (dump_enabled_p ())
+    {
+      dump_printf_loc (MSG_NOTE, vect_location,
+                      "created pattern stmt: ");
+      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
+    }
+
+  stmts->safe_push (last_stmt);
+  return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
+}
+
  /* Recognize cases in which the input to a cast is wider than its
     output, and the input is fed by a widening operation.  Fold this
     by removing the unnecessary intermediate widening.  E.g.:
@@ -4670,6 +4817,9 @@ struct vect_recog_func
     less comples onex (widen_sum only after dot_prod or sad for example).  */
  static vect_recog_func vect_vect_recog_func_ptrs[] = {
    { vect_recog_over_widening_pattern, "over_widening" },
+  /* Must come after over_widening, which narrows the shift as much as
+     possible beforehand.  */
+  { vect_recog_average_pattern, "average" },
    { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
    { vect_recog_widen_mult_pattern, "widen_mult" },
    { vect_recog_dot_prod_pattern, "dot_prod" },
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c

index ae62fc36401ac7a6c822326693f114ba3458bb22..ea303bd7023512e09eff323c4389fbade03a5e8a 100644 (file)
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3116,7 +3116,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
    gcall *stmt;
    tree vec_dest;
    tree scalar_dest;
-  tree op, type;
+  tree op;
    tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
    stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
    tree vectype_out, vectype_in;
@@ -3592,12 +3592,11 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
    if (slp_node)
      return true;
  
-  type = TREE_TYPE (scalar_dest);
    if (is_pattern_stmt_p (stmt_info))
      stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
    lhs = gimple_get_lhs (stmt_info->stmt);
  
-  new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
+  new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
    set_vinfo_for_stmt (new_stmt, stmt_info);
    set_vinfo_for_stmt (stmt_info->stmt, NULL);
    STMT_VINFO_STMT (stmt_info) = new_stmt;
author	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 3 Jul 2018 10:03:44 +0000 (10:03 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Tue, 3 Jul 2018 10:03:44 +0000 (10:03 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/doc/md.texi		patch \| blob \| history
gcc/doc/sourcebuild.texi		patch \| blob \| history
gcc/internal-fn.def		patch \| blob \| history
gcc/optabs.def		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/vect-avg-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-10.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-11.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-12.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-13.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-14.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-8.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-avg-9.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| history
gcc/tree-vect-patterns.c		patch \| blob \| history
gcc/tree-vect-stmts.c		patch \| blob \| history