return init;
}
\f
+/* Compute the modular multiplicative inverse of A modulo M
+ using extended Euclid's algorithm. Assumes A and M are coprime. */
+static wide_int
+mod_inv (const wide_int &a, const wide_int &b)
+{
+ /* Verify the assumption. */
+ gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
+
+ unsigned int p = a.get_precision () + 1;
+ gcc_checking_assert (b.get_precision () + 1 == p);
+ wide_int c = wide_int::from (a, p, UNSIGNED);
+ wide_int d = wide_int::from (b, p, UNSIGNED);
+ wide_int x0 = wide_int::from (0, p, UNSIGNED);
+ wide_int x1 = wide_int::from (1, p, UNSIGNED);
+
+ if (wi::eq_p (b, 1))
+ return wide_int::from (1, p, UNSIGNED);
+
+ while (wi::gt_p (c, 1, UNSIGNED))
+ {
+ wide_int t = d;
+ wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
+ c = t;
+ wide_int s = x0;
+ x0 = wi::sub (x1, wi::mul (q, x0));
+ x1 = s;
+ }
+ if (wi::lt_p (x1, 0, SIGNED))
+ x1 += d;
+ return x1;
+}
+
+/* Attempt to optimize unsigned (X % C1) == C2 (or (X % C1) != C2).
+ If C1 is odd to:
+ (X - C2) * C3 <= C4 (or >), where
+ C3 is modular multiplicative inverse of C1 and 1<<prec and
+ C4 is ((1<<prec) - 1) / C1 or ((1<<prec) - 1) / C1 - 1 (the latter
+ if C2 > ((1<<prec) - 1) % C1).
+ If C1 is even, S = ctz (C1) and C2 is 0, use
+ ((X * C3) r>> S) <= C4, where C3 is modular multiplicative
+ inverse of C1>>S and 1<<prec and C4 is (((1<<prec) - 1) / (C1>>S)) >> S.
+
+ For signed (X % C1) == 0 if C1 is odd to (all operations in it
+ unsigned):
+ (X * C3) + C4 <= 2 * C4, where
+ C3 is modular multiplicative inverse of (unsigned) C1 and 1<<prec and
+ C4 is ((1<<(prec - 1) - 1) / C1).
+ If C1 is even, S = ctz(C1), use
+ ((X * C3) + C4) r>> S <= (C4 >> (S - 1))
+ where C3 is modular multiplicative inverse of (unsigned)(C1>>S) and 1<<prec
+ and C4 is ((1<<(prec - 1) - 1) / (C1>>S)) & (-1<<S).
+
+ See the Hacker's Delight book, section 10-17. */
+enum tree_code
+maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
+{
+ gcc_checking_assert (code == EQ_EXPR || code == NE_EXPR);
+ gcc_checking_assert (TREE_CODE (*arg1) == INTEGER_CST);
+
+ if (optimize < 2)
+ return code;
+
+ gimple *stmt = get_def_for_expr (*arg0, TRUNC_MOD_EXPR);
+ if (stmt == NULL)
+ return code;
+
+ tree treeop0 = gimple_assign_rhs1 (stmt);
+ tree treeop1 = gimple_assign_rhs2 (stmt);
+ if (TREE_CODE (treeop0) != SSA_NAME
+ || TREE_CODE (treeop1) != INTEGER_CST
+ /* x % pow2 is handled right already. */
+ || integer_pow2p (treeop1)
+ /* Don't optimize the undefined behavior case x % 0;
+ x % 1 should have been optimized into zero, punt if
+ it makes it here for whatever reason;
+ x % -c should have been optimized into x % c. */
+ || compare_tree_int (treeop1, 2) <= 0
+ /* Likewise x % c == d where d >= c should be always false. */
+ || tree_int_cst_le (treeop1, *arg1))
+ return code;
+
+ tree type = TREE_TYPE (*arg0);
+ scalar_int_mode mode;
+ if (!is_a <scalar_int_mode> (TYPE_MODE (type), &mode))
+ return code;
+ if (GET_MODE_BITSIZE (mode) != TYPE_PRECISION (type)
+ || TYPE_PRECISION (type) <= 1)
+ return code;
+
+ signop sgn = UNSIGNED;
+ /* If both operands are known to have the sign bit clear, handle
+ even the signed modulo case as unsigned. treeop1 is always
+ positive >= 2, checked above. */
+ if (!TYPE_UNSIGNED (type) && get_range_pos_neg (treeop0) != 1)
+ sgn = SIGNED;
+
+ if (!TYPE_UNSIGNED (type))
+ {
+ if (tree_int_cst_sgn (*arg1) == -1)
+ return code;
+ type = unsigned_type_for (type);
+ if (!type || TYPE_MODE (type) != TYPE_MODE (TREE_TYPE (*arg0)))
+ return code;
+ }
+
+ int prec = TYPE_PRECISION (type);
+ wide_int w = wi::to_wide (treeop1);
+ int shift = wi::ctz (w);
+ /* Unsigned (X % C1) == C2 is equivalent to (X - C2) % C1 == 0 if
+ C2 <= -1U % C1, because for any Z >= 0U - C2 in that case (Z % C1) != 0.
+ If C1 is odd, we can handle all cases by subtracting
+ C4 below. We could handle even the even C1 and C2 > -1U % C1 cases
+ e.g. by testing for overflow on the subtraction, punt on that for now
+ though. */
+ if ((sgn == SIGNED || shift) && !integer_zerop (*arg1))
+ {
+ if (sgn == SIGNED)
+ return code;
+ wide_int x = wi::umod_trunc (wi::mask (prec, false, prec), w);
+ if (wi::gtu_p (wi::to_wide (*arg1), x))
+ return code;
+ }
+
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, treeop0)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ /* Punt if treeop0 is used in the same bb in a division
+ or another modulo with the same divisor. We should expect
+ the division and modulo combined together. */
+ if (use_stmt == stmt
+ || gimple_bb (use_stmt) != gimple_bb (stmt))
+ continue;
+ if (!is_gimple_assign (use_stmt)
+ || (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR
+ && gimple_assign_rhs_code (use_stmt) != TRUNC_MOD_EXPR))
+ continue;
+ if (gimple_assign_rhs1 (use_stmt) != treeop0
+ || !operand_equal_p (gimple_assign_rhs2 (use_stmt), treeop1, 0))
+ continue;
+ return code;
+ }
+
+ w = wi::lrshift (w, shift);
+ wide_int a = wide_int::from (w, prec + 1, UNSIGNED);
+ wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
+ wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED);
+ tree c3 = wide_int_to_tree (type, m);
+ tree c5 = NULL_TREE;
+ wide_int d, e;
+ if (sgn == UNSIGNED)
+ {
+ d = wi::divmod_trunc (wi::mask (prec, false, prec), w, UNSIGNED, &e);
+ /* Use <= floor ((1<<prec) - 1) / C1 only if C2 <= ((1<<prec) - 1) % C1,
+ otherwise use < or subtract one from C4. E.g. for
+ x % 3U == 0 we transform this into x * 0xaaaaaaab <= 0x55555555, but
+ x % 3U == 1 already needs to be
+ (x - 1) * 0xaaaaaaabU <= 0x55555554. */
+ if (!shift && wi::gtu_p (wi::to_wide (*arg1), e))
+ d -= 1;
+ if (shift)
+ d = wi::lrshift (d, shift);
+ }
+ else
+ {
+ e = wi::udiv_trunc (wi::mask (prec - 1, false, prec), w);
+ if (!shift)
+ d = wi::lshift (e, 1);
+ else
+ {
+ e = wi::bit_and (e, wi::mask (shift, true, prec));
+ d = wi::lrshift (e, shift - 1);
+ }
+ c5 = wide_int_to_tree (type, e);
+ }
+ tree c4 = wide_int_to_tree (type, d);
+
+ rtx op0 = expand_normal (treeop0);
+ treeop0 = make_tree (TREE_TYPE (treeop0), op0);
+
+ bool speed_p = optimize_insn_for_speed_p ();
+
+ do_pending_stack_adjust ();
+
+ location_t loc = gimple_location (stmt);
+ struct separate_ops ops;
+ ops.code = TRUNC_MOD_EXPR;
+ ops.location = loc;
+ ops.type = TREE_TYPE (treeop0);
+ ops.op0 = treeop0;
+ ops.op1 = treeop1;
+ ops.op2 = NULL_TREE;
+ start_sequence ();
+ rtx mor = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
+ EXPAND_NORMAL);
+ rtx_insn *moinsns = get_insns ();
+ end_sequence ();
+
+ unsigned mocost = seq_cost (moinsns, speed_p);
+ mocost += rtx_cost (mor, mode, EQ, 0, speed_p);
+ mocost += rtx_cost (expand_normal (*arg1), mode, EQ, 1, speed_p);
+
+ tree t = fold_convert_loc (loc, type, treeop0);
+ if (!integer_zerop (*arg1))
+ t = fold_build2_loc (loc, MINUS_EXPR, type, t, fold_convert (type, *arg1));
+ t = fold_build2_loc (loc, MULT_EXPR, type, t, c3);
+ if (sgn == SIGNED)
+ t = fold_build2_loc (loc, PLUS_EXPR, type, t, c5);
+ if (shift)
+ {
+ tree s = build_int_cst (NULL_TREE, shift);
+ t = fold_build2_loc (loc, RROTATE_EXPR, type, t, s);
+ }
+
+ start_sequence ();
+ rtx mur = expand_normal (t);
+ rtx_insn *muinsns = get_insns ();
+ end_sequence ();
+
+ unsigned mucost = seq_cost (muinsns, speed_p);
+ mucost += rtx_cost (mur, mode, LE, 0, speed_p);
+ mucost += rtx_cost (expand_normal (c4), mode, LE, 1, speed_p);
+
+ if (mocost <= mucost)
+ {
+ emit_insn (moinsns);
+ *arg0 = make_tree (TREE_TYPE (*arg0), mor);
+ return code;
+ }
+
+ emit_insn (muinsns);
+ *arg0 = make_tree (type, mur);
+ *arg1 = c4;
+ return code == EQ_EXPR ? LE_EXPR : GT_EXPR;
+}
+\f
/* Generate code to calculate OPS, and exploded expression
using a store-flag instruction and return an rtx for the result.
OPS reflects a comparison.
STRIP_NOPS (arg0);
STRIP_NOPS (arg1);
-
+
/* For vector typed comparisons emit code to generate the desired
all-ones or all-zeros mask. Conveniently use the VEC_COND_EXPR
expander for this. */
}
}
+ /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
+ into (x - C2) * C3 < C4. */
+ if ((ops->code == EQ_EXPR || ops->code == NE_EXPR)
+ && TREE_CODE (arg0) == SSA_NAME
+ && TREE_CODE (arg1) == INTEGER_CST)
+ {
+ enum tree_code code = maybe_optimize_mod_cmp (ops->code, &arg0, &arg1);
+ if (code != ops->code)
+ {
+ struct separate_ops nops = *ops;
+ nops.code = ops->code = code;
+ nops.op0 = arg0;
+ nops.op1 = arg1;
+ nops.type = TREE_TYPE (arg0);
+ return do_store_flag (&nops, target, mode);
+ }
+ }
+
/* Get the rtx comparison code to use. We know that EXP is a comparison
operation of some type. Some comparisons against 1 and -1 can be
converted to comparisons with zero. Do so here so that the tests