re PR tree-optimization/89007 ([SVE] Implement generic vector average expansion)
authorPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
Mon, 9 Dec 2019 09:59:42 +0000 (09:59 +0000)
committerPrathamesh Kulkarni <prathamesh3492@gcc.gnu.org>
Mon, 9 Dec 2019 09:59:42 +0000 (09:59 +0000)
2019-12-09  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

PR tree-optimization/89007
* tree-vect-patterns.c (vect_recog_average_pattern): If there is no
target support available, generate code to distribute rshift over plus
and add a carry.

testsuite/
* gcc.target/aarch64/sve/pr89007-1.c: New test.
* gcc.target/aarch64/sve/pr89007-2.c: Likewise.

From-SVN: r279112

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c [new file with mode: 0644]
gcc/tree-vect-patterns.c

index 0d421ca00b3a2c0e77936236dc1c1370bb728c0c..cee65513d00c75cb2276bb2e1a3030754ea3ee9b 100644 (file)
@@ -1,3 +1,10 @@
+2019-12-09  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+       PR tree-optimization/89007
+       * tree-vect-patterns.c (vect_recog_average_pattern): If there is no
+       target support available, generate code to distribute rshift over plus
+       and add a carry.
+
 2019-12-09  Martin Liska  <mliska@suse.cz>
 
        PR ipa/92737
index c6094cc3913f084c80082e14c116450316d39ed8..0706b05b417e901007cb75a1fea3b8f8d25de202 100644 (file)
@@ -1,3 +1,9 @@
+2019-12-09  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+       PR tree-optimization/89007
+       * gcc.target/aarch64/sve/pr89007-1.c: New test.
+       * gcc.target/aarch64/sve/pr89007-2.c: Likewise.
+
 2019-12-09  Hongtao Liu  <hongtao@intel.com>
 
        * gcc.target/i386/pr92686.inc: New file.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c
new file mode 100644 (file)
index 0000000..af4aff4
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1024
+unsigned char dst[N];
+unsigned char in1[N];
+unsigned char in2[N];
+
+/*
+**  foo: 
+**     ...
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     add     (z[0-9]+\.b), (\1, \2|\2, \1)
+**     orr     (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
+**     and     (z[0-9]+\.b), \5\.b, #0x1
+**     add     z0\.b, (\3, \6|\6, \3)
+**     ...
+*/
+void
+foo ()
+{
+  for( int x = 0; x < N; x++ )
+    dst[x] = (in1[x] + in2[x] + 1) >> 1;
+}
+
+/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
+/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c
new file mode 100644 (file)
index 0000000..2ccdd0d
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define N 1024
+unsigned char dst[N];
+unsigned char in1[N];
+unsigned char in2[N];
+
+/*
+**  foo: 
+**     ...
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     lsr     (z[0-9]+\.b), z[0-9]+\.b, #1
+**     add     (z[0-9]+\.b), (\1, \2|\2, \1)
+**     and     (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
+**     and     (z[0-9]+\.b), \5\.b, #0x1
+**     add     z0\.b, (\3, \6|\6, \3)
+**     ...
+*/
+void
+foo ()
+{
+  for( int x = 0; x < N; x++ )
+    dst[x] = (in1[x] + in2[x]) >> 1;
+}
+
+/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
+/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
index 2e021287b4efc120aa933e5404e07a22962a3e61..c9ad9e0eb9455901120c82917f08843eb21d9e73 100644 (file)
@@ -1928,7 +1928,10 @@ vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
            TYPE avg = (TYPE) avg';
 
   where NTYPE is no wider than half of TYPE.  Since only the bottom half
-  of avg is used, all or part of the cast of avg' should become redundant.  */
+  of avg is used, all or part of the cast of avg' should become redundant.
+
+  If there is no target support available, generate code to distribute rshift
+  over plus and add a carry.  */
 
 static gimple *
 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
@@ -2032,9 +2035,20 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
 
   /* Check for target support.  */
   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
-  if (!new_vectype
-      || !direct_internal_fn_supported_p (ifn, new_vectype,
-                                         OPTIMIZE_FOR_SPEED))
+  if (!new_vectype)
+    return NULL;
+
+  bool fallback_p = false;
+
+  if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
+    ;
+  else if (TYPE_UNSIGNED (new_type)
+          && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
+          && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
+          && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
+          && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
+    fallback_p = true;
+  else
     return NULL;
 
   /* The IR requires a valid vector type for the cast result, even though
@@ -2043,11 +2057,53 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
   if (!*type_out)
     return NULL;
 
-  /* Generate the IFN_AVG* call.  */
   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
   tree new_ops[2];
   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
                       unprom, new_vectype);
+
+  if (fallback_p)
+    {
+      /* As a fallback, generate code for following sequence:
+
+        shifted_op0 = new_ops[0] >> 1;
+        shifted_op1 = new_ops[1] >> 1;
+        sum_of_shifted = shifted_op0 + shifted_op1;
+        unmasked_carry = new_ops[0] and/or new_ops[1];
+        carry = unmasked_carry & 1;
+        new_var = sum_of_shifted + carry;
+      */        
+
+      tree one_cst = build_one_cst (new_type);
+      gassign *g;
+
+      tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
+      g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
+      append_pattern_def_seq (last_stmt_info, g, new_vectype);
+
+      tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
+      g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
+      append_pattern_def_seq (last_stmt_info, g, new_vectype);
+
+      tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
+      g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
+                              shifted_op0, shifted_op1);
+      append_pattern_def_seq (last_stmt_info, g, new_vectype);
+      
+      tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
+      tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
+      g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
+      append_pattern_def_seq (last_stmt_info, g, new_vectype);
+      tree carry = vect_recog_temp_ssa_var (new_type, NULL);
+      g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
+      append_pattern_def_seq (last_stmt_info, g, new_vectype);
+
+      g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
+      return vect_convert_output (last_stmt_info, type, g, new_vectype);
+    }
+
+  /* Generate the IFN_AVG* call.  */
   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
                                                    new_ops[1]);
   gimple_call_set_lhs (average_stmt, new_var);