From f48b51496d08de2aabc5ed1deb45e5517333662c Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 17 Dec 2020 00:15:05 +0000 Subject: [PATCH] simplify-rtx: Put simplify routines into a class MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit One of the recurring warts of RTL is that multiplication by a power of 2 is represented as a MULT inside a MEM but as an ASHIFT outside a MEM. It would obviously be better if we didn't have this kind of context sensitivity, but it would be difficult to remove. Currently the simplify-rtx.c routines are hard-coded for the ASHIFT form. This means that some callers have to convert the ASHIFTs “back” into MULTs after calling the simplify-rtx.c routines; see fwprop.c:canonicalize_address for an example. I think we can relieve some of the pain by wrapping the simplify-rtx.c routines in a simple class that tracks whether the expression occurs in a MEM or not, so that no post-processing is needed. An obvious concern is whether passing the “this” pointer around will slow things down or bloat the code. I can't measure any increase in compile time after applying the patch. Sizewise, simplify-rtx.o text increases by 2.3% in default-checking builds and 4.1% in release-checking builds. I realise the MULT/ASHIFT thing isn't the most palatable reason for doing this, but I think it might be useful for other things in future, such as using local nonzero_bits hooks/virtual functions instead of the global hooks. The obvious alternative would be to add a static variable and hope that it is always updated correctly. Later patches make use of this. gcc/ * rtl.h (simplify_context): New class. (simplify_unary_operation, simplify_binary_operation): Use it. (simplify_ternary_operation, simplify_relational_operation): Likewise. (simplify_subreg, simplify_gen_unary, simplify_gen_binary): Likewise. (simplify_gen_ternary, simplify_gen_relational): Likewise. (simplify_gen_subreg, lowpart_subreg): Likewise. * simplify-rtx.c (simplify_gen_binary): Turn into a member function of simplify_context. (simplify_gen_unary, simplify_gen_ternary, simplify_gen_relational) (simplify_truncation, simplify_unary_operation): Likewise. (simplify_unary_operation_1, simplify_byte_swapping_operation) (simplify_associative_operation, simplify_logical_relational_operation) (simplify_binary_operation, simplify_binary_operation_series) (simplify_distributive_operation, simplify_plus_minus): Likewise. (simplify_relational_operation, simplify_relational_operation_1) (simplify_cond_clz_ctz, simplify_merge_mask): Likewise. (simplify_ternary_operation, simplify_subreg, simplify_gen_subreg) (lowpart_subreg): Likewise. (simplify_binary_operation_1): Likewise. Test mem_depth when deciding whether the ASHIFT or MULT form is canonical. (simplify_merge_mask): Use simplify_context. --- gcc/rtl.h | 149 ++++++++++++++++++++++++++++++++++++++------ gcc/simplify-rtx.c | 152 ++++++++++++++++++++++++++------------------- 2 files changed, 220 insertions(+), 81 deletions(-) diff --git a/gcc/rtl.h b/gcc/rtl.h index 5a1670f295c..e9df95b02c4 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -3375,30 +3375,143 @@ extern rtx_insn *try_split (rtx, rtx_insn *, int); extern rtx_insn *split_insns (rtx, rtx_insn *); /* In simplify-rtx.c */ + +/* A class that records the context in which a simplification + is being mode. */ +class simplify_context +{ +public: + rtx simplify_unary_operation (rtx_code, machine_mode, rtx, machine_mode); + rtx simplify_binary_operation (rtx_code, machine_mode, rtx, rtx); + rtx simplify_ternary_operation (rtx_code, machine_mode, machine_mode, + rtx, rtx, rtx); + rtx simplify_relational_operation (rtx_code, machine_mode, machine_mode, + rtx, rtx); + rtx simplify_subreg (machine_mode, rtx, machine_mode, poly_uint64); + + rtx lowpart_subreg (machine_mode, rtx, machine_mode); + + rtx simplify_merge_mask (rtx, rtx, int); + + rtx simplify_gen_unary (rtx_code, machine_mode, rtx, machine_mode); + rtx simplify_gen_binary (rtx_code, machine_mode, rtx, rtx); + rtx simplify_gen_ternary (rtx_code, machine_mode, machine_mode, + rtx, rtx, rtx); + rtx simplify_gen_relational (rtx_code, machine_mode, machine_mode, rtx, rtx); + rtx simplify_gen_subreg (machine_mode, rtx, machine_mode, poly_uint64); + + /* Tracks the level of MEM nesting for the value being simplified: + 0 means the value is not in a MEM, >0 means it is. This is needed + because the canonical representation of multiplication is different + inside a MEM than outside. */ + unsigned int mem_depth = 0; + +private: + rtx simplify_truncation (machine_mode, rtx, machine_mode); + rtx simplify_byte_swapping_operation (rtx_code, machine_mode, rtx, rtx); + rtx simplify_associative_operation (rtx_code, machine_mode, rtx, rtx); + rtx simplify_distributive_operation (rtx_code, machine_mode, rtx, rtx); + rtx simplify_logical_relational_operation (rtx_code, machine_mode, rtx, rtx); + rtx simplify_binary_operation_series (rtx_code, machine_mode, rtx, rtx); + rtx simplify_distribute_over_subregs (rtx_code, machine_mode, rtx, rtx); + rtx simplify_shift_const_int (rtx_code, machine_mode, rtx, unsigned int); + rtx simplify_plus_minus (rtx_code, machine_mode, rtx, rtx); + rtx simplify_cond_clz_ctz (rtx, rtx_code, rtx, rtx); + + rtx simplify_unary_operation_1 (rtx_code, machine_mode, rtx); + rtx simplify_binary_operation_1 (rtx_code, machine_mode, rtx, rtx, rtx, rtx); + rtx simplify_ternary_operation_1 (rtx_code, machine_mode, machine_mode, + rtx, rtx, rtx); + rtx simplify_relational_operation_1 (rtx_code, machine_mode, machine_mode, + rtx, rtx); +}; + +inline rtx +simplify_unary_operation (rtx_code code, machine_mode mode, rtx op, + machine_mode op_mode) +{ + return simplify_context ().simplify_unary_operation (code, mode, op, + op_mode); +} + +inline rtx +simplify_binary_operation (rtx_code code, machine_mode mode, rtx op0, rtx op1) +{ + return simplify_context ().simplify_binary_operation (code, mode, op0, op1); +} + +inline rtx +simplify_ternary_operation (rtx_code code, machine_mode mode, + machine_mode op0_mode, rtx op0, rtx op1, rtx op2) +{ + return simplify_context ().simplify_ternary_operation (code, mode, op0_mode, + op0, op1, op2); +} + +inline rtx +simplify_relational_operation (rtx_code code, machine_mode mode, + machine_mode op_mode, rtx op0, rtx op1) +{ + return simplify_context ().simplify_relational_operation (code, mode, + op_mode, op0, op1); +} + +inline rtx +simplify_subreg (machine_mode outermode, rtx op, machine_mode innermode, + poly_uint64 byte) +{ + return simplify_context ().simplify_subreg (outermode, op, innermode, byte); +} + +inline rtx +simplify_gen_unary (rtx_code code, machine_mode mode, rtx op, + machine_mode op_mode) +{ + return simplify_context ().simplify_gen_unary (code, mode, op, op_mode); +} + +inline rtx +simplify_gen_binary (rtx_code code, machine_mode mode, rtx op0, rtx op1) +{ + return simplify_context ().simplify_gen_binary (code, mode, op0, op1); +} + +inline rtx +simplify_gen_ternary (rtx_code code, machine_mode mode, machine_mode op0_mode, + rtx op0, rtx op1, rtx op2) +{ + return simplify_context ().simplify_gen_ternary (code, mode, op0_mode, + op0, op1, op2); +} + +inline rtx +simplify_gen_relational (rtx_code code, machine_mode mode, + machine_mode op_mode, rtx op0, rtx op1) +{ + return simplify_context ().simplify_gen_relational (code, mode, op_mode, + op0, op1); +} + +inline rtx +simplify_gen_subreg (machine_mode outermode, rtx op, machine_mode innermode, + poly_uint64 byte) +{ + return simplify_context ().simplify_gen_subreg (outermode, op, + innermode, byte); +} + +inline rtx +lowpart_subreg (machine_mode outermode, rtx op, machine_mode innermode) +{ + return simplify_context ().lowpart_subreg (outermode, op, innermode); +} + extern rtx simplify_const_unary_operation (enum rtx_code, machine_mode, rtx, machine_mode); -extern rtx simplify_unary_operation (enum rtx_code, machine_mode, rtx, - machine_mode); extern rtx simplify_const_binary_operation (enum rtx_code, machine_mode, rtx, rtx); -extern rtx simplify_binary_operation (enum rtx_code, machine_mode, rtx, - rtx); -extern rtx simplify_ternary_operation (enum rtx_code, machine_mode, - machine_mode, rtx, rtx, rtx); extern rtx simplify_const_relational_operation (enum rtx_code, machine_mode, rtx, rtx); -extern rtx simplify_relational_operation (enum rtx_code, machine_mode, - machine_mode, rtx, rtx); -extern rtx simplify_gen_binary (enum rtx_code, machine_mode, rtx, rtx); -extern rtx simplify_gen_unary (enum rtx_code, machine_mode, rtx, - machine_mode); -extern rtx simplify_gen_ternary (enum rtx_code, machine_mode, - machine_mode, rtx, rtx, rtx); -extern rtx simplify_gen_relational (enum rtx_code, machine_mode, - machine_mode, rtx, rtx); -extern rtx simplify_subreg (machine_mode, rtx, machine_mode, poly_uint64); -extern rtx simplify_gen_subreg (machine_mode, rtx, machine_mode, poly_uint64); -extern rtx lowpart_subreg (machine_mode, rtx, machine_mode); extern rtx simplify_replace_fn_rtx (rtx, const_rtx, rtx (*fn) (rtx, const_rtx, void *), void *); extern rtx simplify_replace_rtx (rtx, const_rtx, rtx); diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 47e7aebda8a..ff331e1a17f 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -47,14 +47,6 @@ along with GCC; see the file COPYING3. If not see ((((HOST_WIDE_INT) low) < 0) ? HOST_WIDE_INT_M1 : HOST_WIDE_INT_0) static bool plus_minus_operand_p (const_rtx); -static rtx simplify_plus_minus (enum rtx_code, machine_mode, rtx, rtx); -static rtx simplify_associative_operation (enum rtx_code, machine_mode, - rtx, rtx); -static rtx simplify_relational_operation_1 (enum rtx_code, machine_mode, - machine_mode, rtx, rtx); -static rtx simplify_unary_operation_1 (enum rtx_code, machine_mode, rtx); -static rtx simplify_binary_operation_1 (enum rtx_code, machine_mode, - rtx, rtx, rtx, rtx); /* Negate I, which satisfies poly_int_rtx_p. MODE is the mode of I. */ @@ -180,8 +172,8 @@ val_signbit_known_clear_p (machine_mode mode, unsigned HOST_WIDE_INT val) seeing if the expression folds. */ rtx -simplify_gen_binary (enum rtx_code code, machine_mode mode, rtx op0, - rtx op1) +simplify_context::simplify_gen_binary (rtx_code code, machine_mode mode, + rtx op0, rtx op1) { rtx tem; @@ -358,8 +350,8 @@ delegitimize_mem_from_attrs (rtx x) the specified operation. */ rtx -simplify_gen_unary (enum rtx_code code, machine_mode mode, rtx op, - machine_mode op_mode) +simplify_context::simplify_gen_unary (rtx_code code, machine_mode mode, rtx op, + machine_mode op_mode) { rtx tem; @@ -373,8 +365,9 @@ simplify_gen_unary (enum rtx_code code, machine_mode mode, rtx op, /* Likewise for ternary operations. */ rtx -simplify_gen_ternary (enum rtx_code code, machine_mode mode, - machine_mode op0_mode, rtx op0, rtx op1, rtx op2) +simplify_context::simplify_gen_ternary (rtx_code code, machine_mode mode, + machine_mode op0_mode, + rtx op0, rtx op1, rtx op2) { rtx tem; @@ -390,8 +383,9 @@ simplify_gen_ternary (enum rtx_code code, machine_mode mode, CMP_MODE specifies mode comparison is done in. */ rtx -simplify_gen_relational (enum rtx_code code, machine_mode mode, - machine_mode cmp_mode, rtx op0, rtx op1) +simplify_context::simplify_gen_relational (rtx_code code, machine_mode mode, + machine_mode cmp_mode, + rtx op0, rtx op1) { rtx tem; @@ -614,9 +608,9 @@ simplify_replace_rtx (rtx x, const_rtx old_rtx, rtx new_rtx) However, X is still an arbitrary 64-bit number and so we cannot assume that truncating it too is a no-op. */ -static rtx -simplify_truncation (machine_mode mode, rtx op, - machine_mode op_mode) +rtx +simplify_context::simplify_truncation (machine_mode mode, rtx op, + machine_mode op_mode) { unsigned int precision = GET_MODE_UNIT_PRECISION (mode); unsigned int op_precision = GET_MODE_UNIT_PRECISION (op_mode); @@ -858,8 +852,8 @@ simplify_truncation (machine_mode mode, rtx op, MODE with input operand OP whose mode was originally OP_MODE. Return zero if no simplification can be made. */ rtx -simplify_unary_operation (enum rtx_code code, machine_mode mode, - rtx op, machine_mode op_mode) +simplify_context::simplify_unary_operation (rtx_code code, machine_mode mode, + rtx op, machine_mode op_mode) { rtx trueop, tem; @@ -900,8 +894,9 @@ exact_int_to_float_conversion_p (const_rtx op) /* Perform some simplifications we can do even if the operands aren't constant. */ -static rtx -simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) +rtx +simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode, + rtx op) { enum rtx_code reversed; rtx temp, elt, base, step; @@ -2091,9 +2086,10 @@ simplify_const_unary_operation (enum rtx_code code, machine_mode mode, operating on OP0 and OP1. CODE is currently one of AND, IOR or XOR. Return zero if no simplification or canonicalization is possible. */ -static rtx -simplify_byte_swapping_operation (enum rtx_code code, machine_mode mode, - rtx op0, rtx op1) +rtx +simplify_context::simplify_byte_swapping_operation (rtx_code code, + machine_mode mode, + rtx op0, rtx op1) { rtx tem; @@ -2121,9 +2117,10 @@ simplify_byte_swapping_operation (enum rtx_code code, machine_mode mode, SMIN, SMAX, UMIN or UMAX. Return zero if no simplification or canonicalization is possible. */ -static rtx -simplify_associative_operation (enum rtx_code code, machine_mode mode, - rtx op0, rtx op1) +rtx +simplify_context::simplify_associative_operation (rtx_code code, + machine_mode mode, + rtx op0, rtx op1) { rtx tem; @@ -2302,8 +2299,9 @@ comparison_code_valid_for_mode (enum rtx_code code, enum machine_mode mode) and OP1, which should be both relational operations. Return 0 if no such simplification is possible. */ rtx -simplify_logical_relational_operation (enum rtx_code code, machine_mode mode, - rtx op0, rtx op1) +simplify_context::simplify_logical_relational_operation (rtx_code code, + machine_mode mode, + rtx op0, rtx op1) { /* We only handle IOR of two relational operations. */ if (code != IOR) @@ -2351,8 +2349,8 @@ simplify_logical_relational_operation (enum rtx_code code, machine_mode mode, Don't use this for relational operations such as EQ or LT. Use simplify_relational_operation instead. */ rtx -simplify_binary_operation (enum rtx_code code, machine_mode mode, - rtx op0, rtx op1) +simplify_context::simplify_binary_operation (rtx_code code, machine_mode mode, + rtx op0, rtx op1) { rtx trueop0, trueop1; rtx tem; @@ -2397,9 +2395,10 @@ simplify_binary_operation (enum rtx_code code, machine_mode mode, MODE is the mode of the operation and is known to be a vector integer mode. */ -static rtx -simplify_binary_operation_series (rtx_code code, machine_mode mode, - rtx op0, rtx op1) +rtx +simplify_context::simplify_binary_operation_series (rtx_code code, + machine_mode mode, + rtx op0, rtx op1) { rtx base0, step0; if (vec_duplicate_p (op0, &base0)) @@ -2433,9 +2432,10 @@ simplify_binary_operation_series (rtx_code code, machine_mode mode, e.g. simplify (xor (and A C) (and (B C)) to (and (xor (A B) C). Returns NULL_RTX if no simplification is possible. */ -static rtx -simplify_distributive_operation (enum rtx_code code, machine_mode mode, - rtx op0, rtx op1) +rtx +simplify_context::simplify_distributive_operation (rtx_code code, + machine_mode mode, + rtx op0, rtx op1) { enum rtx_code op = GET_CODE (op0); gcc_assert (GET_CODE (op1) == op); @@ -2481,9 +2481,11 @@ simplify_distributive_operation (enum rtx_code code, machine_mode mode, OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the actual constants. */ -static rtx -simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, - rtx op0, rtx op1, rtx trueop0, rtx trueop1) +rtx +simplify_context::simplify_binary_operation_1 (rtx_code code, + machine_mode mode, + rtx op0, rtx op1, + rtx trueop0, rtx trueop1) { rtx tem, reversed, opleft, opright, elt0, elt1; HOST_WIDE_INT val; @@ -2948,7 +2950,7 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, return op0; /* Convert multiply by constant power of two into shift. */ - if (CONST_SCALAR_INT_P (trueop1)) + if (mem_depth == 0 && CONST_SCALAR_INT_P (trueop1)) { val = wi::exact_log2 (rtx_mode_t (trueop1, mode)); if (val >= 0) @@ -3863,6 +3865,18 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, return op0; if (trueop0 == CONST0_RTX (mode) && ! side_effects_p (op1)) return op0; + if (mem_depth + && code == ASHIFT + && CONST_INT_P (trueop1) + && is_a (mode, &int_mode) + && IN_RANGE (UINTVAL (trueop1), + 1, GET_MODE_PRECISION (int_mode) - 1)) + { + auto c = (wi::one (GET_MODE_PRECISION (int_mode)) + << UINTVAL (trueop1)); + rtx new_op1 = immed_wide_int_const (c, int_mode); + return simplify_gen_binary (MULT, int_mode, op0, new_op1); + } goto canonicalize_shift; case LSHIFTRT: @@ -4887,9 +4901,9 @@ simplify_plus_minus_op_data_cmp (rtx x, rtx y) May return NULL_RTX when no changes were made. */ -static rtx -simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0, - rtx op1) +rtx +simplify_context::simplify_plus_minus (rtx_code code, machine_mode mode, + rtx op0, rtx op1) { struct simplify_plus_minus_op_data { @@ -5234,8 +5248,10 @@ plus_minus_operand_p (const_rtx x) the operands or, if both are VOIDmode, the operands are compared in "infinite precision". */ rtx -simplify_relational_operation (enum rtx_code code, machine_mode mode, - machine_mode cmp_mode, rtx op0, rtx op1) +simplify_context::simplify_relational_operation (rtx_code code, + machine_mode mode, + machine_mode cmp_mode, + rtx op0, rtx op1) { rtx tem, trueop0, trueop1; @@ -5318,9 +5334,11 @@ simplify_relational_operation (enum rtx_code code, machine_mode mode, MODE is the mode of the result, while CMP_MODE specifies in which mode the comparison is done in, so it is the mode of the operands. */ -static rtx -simplify_relational_operation_1 (enum rtx_code code, machine_mode mode, - machine_mode cmp_mode, rtx op0, rtx op1) +rtx +simplify_context::simplify_relational_operation_1 (rtx_code code, + machine_mode mode, + machine_mode cmp_mode, + rtx op0, rtx op1) { enum rtx_code op0code = GET_CODE (op0); @@ -5988,8 +6006,9 @@ simplify_const_relational_operation (enum rtx_code code, Assume X is compared against zero with CMP_CODE and the true arm is TRUE_VAL and the false arm is FALSE_VAL. */ -static rtx -simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) +rtx +simplify_context::simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, + rtx true_val, rtx false_val) { if (cmp_code != EQ && cmp_code != NE) return NULL_RTX; @@ -6032,7 +6051,7 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) Return the simplified X on success, otherwise return NULL_RTX. */ rtx -simplify_merge_mask (rtx x, rtx mask, int op) +simplify_context::simplify_merge_mask (rtx x, rtx mask, int op) { gcc_assert (VECTOR_MODE_P (GET_MODE (x))); poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); @@ -6102,9 +6121,9 @@ simplify_merge_mask (rtx x, rtx mask, int op) a constant. Return 0 if no simplifications is possible. */ rtx -simplify_ternary_operation (enum rtx_code code, machine_mode mode, - machine_mode op0_mode, rtx op0, rtx op1, - rtx op2) +simplify_context::simplify_ternary_operation (rtx_code code, machine_mode mode, + machine_mode op0_mode, + rtx op0, rtx op1, rtx op2) { bool any_change = false; rtx tem, trueop2; @@ -6971,8 +6990,8 @@ simplify_immed_subreg (fixed_size_mode outermode, rtx x, /* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE) Return 0 if no simplifications are possible. */ rtx -simplify_subreg (machine_mode outermode, rtx op, - machine_mode innermode, poly_uint64 byte) +simplify_context::simplify_subreg (machine_mode outermode, rtx op, + machine_mode innermode, poly_uint64 byte) { /* Little bit of sanity checking. */ gcc_assert (innermode != VOIDmode); @@ -7246,8 +7265,9 @@ simplify_subreg (machine_mode outermode, rtx op, /* Make a SUBREG operation or equivalent if it folds. */ rtx -simplify_gen_subreg (machine_mode outermode, rtx op, - machine_mode innermode, poly_uint64 byte) +simplify_context::simplify_gen_subreg (machine_mode outermode, rtx op, + machine_mode innermode, + poly_uint64 byte) { rtx newx; @@ -7270,8 +7290,8 @@ simplify_gen_subreg (machine_mode outermode, rtx op, INNER_MODE) to OUTER_MODE. */ rtx -lowpart_subreg (machine_mode outer_mode, rtx expr, - machine_mode inner_mode) +simplify_context::lowpart_subreg (machine_mode outer_mode, rtx expr, + machine_mode inner_mode) { return simplify_gen_subreg (outer_mode, expr, inner_mode, subreg_lowpart_offset (outer_mode, inner_mode)); @@ -7685,6 +7705,12 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) series_0_1)); } +static rtx +simplify_merge_mask (rtx x, rtx mask, int op) +{ + return simplify_context ().simplify_merge_mask (x, mask, op); +} + /* Verify simplify_merge_mask works correctly. */ static void -- 2.30.2