From c0cbe5260fab673f7cd755df2226422b88b28837 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 12 Sep 2018 20:28:20 +0200 Subject: [PATCH] re PR middle-end/82853 (Optimize x % 3 == 0 without modulo) PR middle-end/82853 * expr.h (maybe_optimize_mod_cmp): Declare. * expr.c (mod_inv): New function. (maybe_optimize_mod_cmp): New function. (do_store_flag): Use it. * cfgexpand.c (expand_gimple_cond): Likewise. * gcc.target/i386/pr82853-1.c: New test. * gcc.target/i386/pr82853-2.c: New test. From-SVN: r264248 --- gcc/ChangeLog | 9 + gcc/cfgexpand.c | 7 + gcc/expr.c | 257 +++++++++++++++++++++- gcc/expr.h | 2 + gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.target/i386/pr82853-1.c | 15 ++ gcc/testsuite/gcc.target/i386/pr82853-2.c | 7 + 7 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr82853-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr82853-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 06911461a12..6314c60aa24 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2018-09-12 Jakub Jelinek + + PR middle-end/82853 + * expr.h (maybe_optimize_mod_cmp): Declare. + * expr.c (mod_inv): New function. + (maybe_optimize_mod_cmp): New function. + (do_store_flag): Use it. + * cfgexpand.c (expand_gimple_cond): Likewise. + 2018-09-09 Cesar Philippidis Julian Brown diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 2d3111da25d..697b238669f 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -2477,6 +2477,13 @@ expand_gimple_cond (basic_block bb, gcond *stmt) } } + /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial + into (x - C2) * C3 < C4. */ + if ((code == EQ_EXPR || code == NE_EXPR) + && TREE_CODE (op0) == SSA_NAME + && TREE_CODE (op1) == INTEGER_CST) + code = maybe_optimize_mod_cmp (code, &op0, &op1); + last2 = last = get_last_insn (); extract_true_false_edges_from_block (bb, &true_edge, &false_edge); diff --git a/gcc/expr.c b/gcc/expr.c index cd5cf12fca6..3b59f96f539 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -11491,6 +11491,243 @@ string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree *nonstr) return init; } +/* Compute the modular multiplicative inverse of A modulo M + using extended Euclid's algorithm. Assumes A and M are coprime. */ +static wide_int +mod_inv (const wide_int &a, const wide_int &b) +{ + /* Verify the assumption. */ + gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1)); + + unsigned int p = a.get_precision () + 1; + gcc_checking_assert (b.get_precision () + 1 == p); + wide_int c = wide_int::from (a, p, UNSIGNED); + wide_int d = wide_int::from (b, p, UNSIGNED); + wide_int x0 = wide_int::from (0, p, UNSIGNED); + wide_int x1 = wide_int::from (1, p, UNSIGNED); + + if (wi::eq_p (b, 1)) + return wide_int::from (1, p, UNSIGNED); + + while (wi::gt_p (c, 1, UNSIGNED)) + { + wide_int t = d; + wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d); + c = t; + wide_int s = x0; + x0 = wi::sub (x1, wi::mul (q, x0)); + x1 = s; + } + if (wi::lt_p (x1, 0, SIGNED)) + x1 += d; + return x1; +} + +/* Attempt to optimize unsigned (X % C1) == C2 (or (X % C1) != C2). + If C1 is odd to: + (X - C2) * C3 <= C4 (or >), where + C3 is modular multiplicative inverse of C1 and 1< ((1<> S) <= C4, where C3 is modular multiplicative + inverse of C1>>S and 1<>S)) >> S. + + For signed (X % C1) == 0 if C1 is odd to (all operations in it + unsigned): + (X * C3) + C4 <= 2 * C4, where + C3 is modular multiplicative inverse of (unsigned) C1 and 1<> S <= (C4 >> (S - 1)) + where C3 is modular multiplicative inverse of (unsigned)(C1>>S) and 1<>S)) & (-1<= c should be always false. */ + || tree_int_cst_le (treeop1, *arg1)) + return code; + + tree type = TREE_TYPE (*arg0); + scalar_int_mode mode; + if (!is_a (TYPE_MODE (type), &mode)) + return code; + if (GET_MODE_BITSIZE (mode) != TYPE_PRECISION (type) + || TYPE_PRECISION (type) <= 1) + return code; + + signop sgn = UNSIGNED; + /* If both operands are known to have the sign bit clear, handle + even the signed modulo case as unsigned. treeop1 is always + positive >= 2, checked above. */ + if (!TYPE_UNSIGNED (type) && get_range_pos_neg (treeop0) != 1) + sgn = SIGNED; + + if (!TYPE_UNSIGNED (type)) + { + if (tree_int_cst_sgn (*arg1) == -1) + return code; + type = unsigned_type_for (type); + if (!type || TYPE_MODE (type) != TYPE_MODE (TREE_TYPE (*arg0))) + return code; + } + + int prec = TYPE_PRECISION (type); + wide_int w = wi::to_wide (treeop1); + int shift = wi::ctz (w); + /* Unsigned (X % C1) == C2 is equivalent to (X - C2) % C1 == 0 if + C2 <= -1U % C1, because for any Z >= 0U - C2 in that case (Z % C1) != 0. + If C1 is odd, we can handle all cases by subtracting + C4 below. We could handle even the even C1 and C2 > -1U % C1 cases + e.g. by testing for overflow on the subtraction, punt on that for now + though. */ + if ((sgn == SIGNED || shift) && !integer_zerop (*arg1)) + { + if (sgn == SIGNED) + return code; + wide_int x = wi::umod_trunc (wi::mask (prec, false, prec), w); + if (wi::gtu_p (wi::to_wide (*arg1), x)) + return code; + } + + imm_use_iterator imm_iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, treeop0) + { + gimple *use_stmt = USE_STMT (use_p); + /* Punt if treeop0 is used in the same bb in a division + or another modulo with the same divisor. We should expect + the division and modulo combined together. */ + if (use_stmt == stmt + || gimple_bb (use_stmt) != gimple_bb (stmt)) + continue; + if (!is_gimple_assign (use_stmt) + || (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR + && gimple_assign_rhs_code (use_stmt) != TRUNC_MOD_EXPR)) + continue; + if (gimple_assign_rhs1 (use_stmt) != treeop0 + || !operand_equal_p (gimple_assign_rhs2 (use_stmt), treeop1, 0)) + continue; + return code; + } + + w = wi::lrshift (w, shift); + wide_int a = wide_int::from (w, prec + 1, UNSIGNED); + wide_int b = wi::shifted_mask (prec, 1, false, prec + 1); + wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED); + tree c3 = wide_int_to_tree (type, m); + tree c5 = NULL_TREE; + wide_int d, e; + if (sgn == UNSIGNED) + { + d = wi::divmod_trunc (wi::mask (prec, false, prec), w, UNSIGNED, &e); + /* Use <= floor ((1<code == EQ_EXPR || ops->code == NE_EXPR) + && TREE_CODE (arg0) == SSA_NAME + && TREE_CODE (arg1) == INTEGER_CST) + { + enum tree_code code = maybe_optimize_mod_cmp (ops->code, &arg0, &arg1); + if (code != ops->code) + { + struct separate_ops nops = *ops; + nops.code = ops->code = code; + nops.op0 = arg0; + nops.op1 = arg1; + nops.type = TREE_TYPE (arg0); + return do_store_flag (&nops, target, mode); + } + } + /* Get the rtx comparison code to use. We know that EXP is a comparison operation of some type. Some comparisons against 1 and -1 can be converted to comparisons with zero. Do so here so that the tests diff --git a/gcc/expr.h b/gcc/expr.h index 4177de8060b..19028f0e6a4 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -290,6 +290,8 @@ expand_normal (tree exp) a string constant. */ extern tree string_constant (tree, tree *, tree *, tree *); +extern enum tree_code maybe_optimize_mod_cmp (enum tree_code, tree *, tree *); + /* Two different ways of generating switch statements. */ extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, profile_probability); extern int try_tablejump (tree, tree, tree, tree, rtx, rtx, profile_probability); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f1b9fe231db..76ede606984 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-09-12 Jakub Jelinek + + PR middle-end/82853 + * gcc.target/i386/pr82853-1.c: New test. + * gcc.target/i386/pr82853-2.c: New test. + 2018-09-12 Richard Biener PR tree-optimization/87280 diff --git a/gcc/testsuite/gcc.target/i386/pr82853-1.c b/gcc/testsuite/gcc.target/i386/pr82853-1.c new file mode 100644 index 00000000000..822fe77591c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82853-1.c @@ -0,0 +1,15 @@ +/* PR middle-end/82853 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-bmi2 -mtune=generic" } */ +/* { dg-final { scan-assembler-times "mul\[lq]\t" 7 } } */ +/* { dg-final { scan-assembler-not "div\[lq]\t" } } */ +/* { dg-final { scan-assembler-not "lea\[lq]\t\[^\n\r]*,\[^\n\r]*,2\\)" } } */ + +unsigned f1 (unsigned x) { return (x % 679U) == 0; } +unsigned f2 (unsigned x) { return (x % 1738U) == 0; } +void bar (void); +void f3 (unsigned x) { if (x % 3 == 0) bar (); } +void f4 (unsigned x) { if (x % 3 == 1) bar (); } +void f5 (unsigned x) { if (x % 3 == 2) bar (); } +int f6 (int x) { return x % 3 == 0; } +int f7 (int x) { return x % 6 == 0; } diff --git a/gcc/testsuite/gcc.target/i386/pr82853-2.c b/gcc/testsuite/gcc.target/i386/pr82853-2.c new file mode 100644 index 00000000000..0c19e9d272a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82853-2.c @@ -0,0 +1,7 @@ +/* PR middle-end/82853 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-bmi2 -mtune=generic" } */ +/* { dg-final { scan-assembler-times "mul\[lq]\t" 2 } } */ +/* { dg-final { scan-assembler-not "div\[lq]\t" } } */ + +unsigned f1 (unsigned x, unsigned *y) { *y = x / 679U; return (x % 679U) == 0; } -- 2.30.2