#include "expr.h"
#include "optabs-tree.h"
#include "libfuncs.h"
+#include "internal-fn.h"
+#include "langhooks.h"
static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
machine_mode *);
emit_move_insn (product_high, adjust);
return product;
}
+
+/* Subroutine of expand_binop. Optimize unsigned double-word OP0 % OP1 for
+ constant OP1. If for some bit in [BITS_PER_WORD / 2, BITS_PER_WORD] range
+ (prefer higher bits) ((1w << bit) % OP1) == 1, then the modulo can be
+ computed in word-mode as ((OP0 & (bit - 1)) + ((OP0 >> bit) & (bit - 1))
+ + (OP0 >> (2 * bit))) % OP1. Whether we need to sum 2, 3 or 4 values
+ depends on the bit value, if 2, then carry from the addition needs to be
+ added too, i.e. like:
+ sum += __builtin_add_overflow (low, high, &sum)
+
+ Optimize signed double-word OP0 % OP1 similarly, just apply some correction
+ factor to the sum before doing unsigned remainder, in the form of
+ sum += (((signed) OP0 >> (2 * BITS_PER_WORD - 1)) & const);
+ then perform unsigned
+ remainder = sum % OP1;
+ and finally
+ remainder += ((signed) OP0 >> (2 * BITS_PER_WORD - 1)) & (1 - OP1); */
+
+static rtx
+expand_doubleword_mod (machine_mode mode, rtx op0, rtx op1, bool unsignedp)
+{
+ if (INTVAL (op1) <= 1)
+ return NULL_RTX;
+
+ rtx_insn *last = get_last_insn ();
+ for (int bit = BITS_PER_WORD; bit >= BITS_PER_WORD / 2; bit--)
+ {
+ wide_int w = wi::shifted_mask (bit, 1, false, 2 * BITS_PER_WORD);
+ if (wi::ne_p (wi::umod_trunc (w, INTVAL (op1)), 1))
+ continue;
+ rtx sum = NULL_RTX, mask = NULL_RTX;
+ if (bit == BITS_PER_WORD)
+ {
+ /* For signed modulo we need to add correction to the sum
+ and that might again overflow. */
+ if (!unsignedp)
+ continue;
+ if (optab_handler (uaddv4_optab, word_mode) == CODE_FOR_nothing)
+ continue;
+ tree wtype = lang_hooks.types.type_for_mode (word_mode, 1);
+ if (wtype == NULL_TREE)
+ continue;
+ tree ctype = build_complex_type (wtype);
+ if (TYPE_MODE (ctype) != GET_MODE_COMPLEX_MODE (word_mode))
+ continue;
+ machine_mode cmode = TYPE_MODE (ctype);
+ rtx op00 = operand_subword_force (op0, 0, mode);
+ rtx op01 = operand_subword_force (op0, 1, mode);
+ rtx cres = gen_rtx_CONCAT (cmode, gen_reg_rtx (word_mode),
+ gen_reg_rtx (word_mode));
+ tree lhs = make_tree (ctype, cres);
+ tree arg0 = make_tree (wtype, op00);
+ tree arg1 = make_tree (wtype, op01);
+ expand_addsub_overflow (UNKNOWN_LOCATION, PLUS_EXPR, lhs, arg0,
+ arg1, true, true, true, false, NULL);
+ sum = expand_simple_binop (word_mode, PLUS, XEXP (cres, 0),
+ XEXP (cres, 1), NULL_RTX, 1,
+ OPTAB_DIRECT);
+ if (sum == NULL_RTX)
+ return NULL_RTX;
+ }
+ else
+ {
+ /* Code below uses GEN_INT, so we need the masks to be representable
+ in HOST_WIDE_INTs. */
+ if (bit >= HOST_BITS_PER_WIDE_INT)
+ continue;
+ /* If op0 is e.g. -1 or -2 unsigned, then the 2 additions might
+ overflow. Consider 64-bit -1ULL for word size 32, if we add
+ 0x7fffffffU + 0x7fffffffU + 3U, it wraps around to 1. */
+ if (bit == BITS_PER_WORD - 1)
+ continue;
+
+ int count = (2 * BITS_PER_WORD + bit - 1) / bit;
+ rtx sum_corr = NULL_RTX;
+
+ if (!unsignedp)
+ {
+ /* For signed modulo, compute it as unsigned modulo of
+ sum with a correction added to it if OP0 is negative,
+ such that the result can be computed as unsigned
+ remainder + ((OP1 >> (2 * BITS_PER_WORD - 1)) & (1 - OP1). */
+ w = wi::min_value (2 * BITS_PER_WORD, SIGNED);
+ wide_int wmod1 = wi::umod_trunc (w, INTVAL (op1));
+ wide_int wmod2 = wi::smod_trunc (w, INTVAL (op1));
+ /* wmod2 == -wmod1. */
+ wmod2 = wmod2 + (INTVAL (op1) - 1);
+ if (wi::ne_p (wmod1, wmod2))
+ {
+ wide_int wcorr = wmod2 - wmod1;
+ if (wi::neg_p (w))
+ wcorr = wcorr + INTVAL (op1);
+ /* Now verify if the count sums can't overflow, and punt
+ if they could. */
+ w = wi::mask (bit, false, 2 * BITS_PER_WORD);
+ w = w * (count - 1);
+ w = w + wi::mask (2 * BITS_PER_WORD - (count - 1) * bit,
+ false, 2 * BITS_PER_WORD);
+ w = w + wcorr;
+ w = wi::lrshift (w, BITS_PER_WORD);
+ if (wi::ne_p (w, 0))
+ continue;
+
+ mask = operand_subword_force (op0, WORDS_BIG_ENDIAN ? 0 : 1,
+ mode);
+ mask = expand_simple_binop (word_mode, ASHIFTRT, mask,
+ GEN_INT (BITS_PER_WORD - 1),
+ NULL_RTX, 0, OPTAB_DIRECT);
+ if (mask == NULL_RTX)
+ return NULL_RTX;
+ sum_corr = immed_wide_int_const (wcorr, word_mode);
+ sum_corr = expand_simple_binop (word_mode, AND, mask,
+ sum_corr, NULL_RTX, 1,
+ OPTAB_DIRECT);
+ if (sum_corr == NULL_RTX)
+ return NULL_RTX;
+ }
+ }
+
+ for (int i = 0; i < count; i++)
+ {
+ rtx v = op0;
+ if (i)
+ v = expand_simple_binop (mode, LSHIFTRT, v, GEN_INT (i * bit),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ if (v == NULL_RTX)
+ return NULL_RTX;
+ v = lowpart_subreg (word_mode, v, mode);
+ if (v == NULL_RTX)
+ return NULL_RTX;
+ if (i != count - 1)
+ v = expand_simple_binop (word_mode, AND, v,
+ GEN_INT ((HOST_WIDE_INT_1U << bit)
+ - 1), NULL_RTX, 1,
+ OPTAB_DIRECT);
+ if (v == NULL_RTX)
+ return NULL_RTX;
+ if (sum == NULL_RTX)
+ sum = v;
+ else
+ sum = expand_simple_binop (word_mode, PLUS, sum, v, NULL_RTX,
+ 1, OPTAB_DIRECT);
+ if (sum == NULL_RTX)
+ return NULL_RTX;
+ }
+ if (sum_corr)
+ {
+ sum = expand_simple_binop (word_mode, PLUS, sum, sum_corr,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ if (sum == NULL_RTX)
+ return NULL_RTX;
+ }
+ }
+ rtx remainder = expand_divmod (1, TRUNC_MOD_EXPR, word_mode, sum, op1,
+ NULL_RTX, 1);
+ if (remainder == NULL_RTX)
+ return NULL_RTX;
+
+ if (!unsignedp)
+ {
+ if (mask == NULL_RTX)
+ {
+ mask = operand_subword_force (op0, WORDS_BIG_ENDIAN ? 0 : 1,
+ mode);
+ mask = expand_simple_binop (word_mode, ASHIFTRT, mask,
+ GEN_INT (BITS_PER_WORD - 1),
+ NULL_RTX, 0, OPTAB_DIRECT);
+ if (mask == NULL_RTX)
+ return NULL_RTX;
+ }
+ mask = expand_simple_binop (word_mode, AND, mask,
+ GEN_INT (1 - INTVAL (op1)),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ if (mask == NULL_RTX)
+ return NULL_RTX;
+ remainder = expand_simple_binop (word_mode, PLUS, remainder,
+ mask, NULL_RTX, 1, OPTAB_DIRECT);
+ if (remainder == NULL_RTX)
+ return NULL_RTX;
+ }
+
+ remainder = convert_modes (mode, word_mode, remainder, unsignedp);
+ /* Punt if we need any library calls. */
+ for (; last; last = NEXT_INSN (last))
+ if (CALL_P (last))
+ return NULL_RTX;
+ return remainder;
+ }
+ return NULL_RTX;
+}
\f
/* Wrapper around expand_binop which takes an rtx code to specify
the operation to perform, not an optab pointer. All other
}
}
+ /* Attempt to synthetize double word modulo by constant divisor. */
+ if ((binoptab == umod_optab || binoptab == smod_optab)
+ && optimize
+ && CONST_INT_P (op1)
+ && is_int_mode (mode, &int_mode)
+ && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
+ && optab_handler (lshr_optab, int_mode) != CODE_FOR_nothing
+ && optab_handler (and_optab, word_mode) != CODE_FOR_nothing
+ && optab_handler (add_optab, word_mode) != CODE_FOR_nothing
+ && optimize_insn_for_speed_p ())
+ {
+ rtx remainder = expand_doubleword_mod (int_mode, op0, op1,
+ binoptab == umod_optab);
+ if (remainder != NULL_RTX)
+ {
+ if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
+ {
+ rtx_insn *move = emit_move_insn (target ? target : remainder,
+ remainder);
+ set_dst_reg_note (move,
+ REG_EQUAL,
+ gen_rtx_fmt_ee (UMOD, int_mode,
+ copy_rtx (op0), op1),
+ target ? target : remainder);
+ }
+ return remainder;
+ }
+ else
+ delete_insns_since (last);
+ }
+
/* It can't be open-coded in this mode.
Use a library call if one is available and caller says that's ok. */