From 8b287aea3561278b4002864c0d1c29b769493448 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Fri, 11 Nov 2016 22:38:33 +0000 Subject: [PATCH] re PR rtl-optimization/59461 (missed zero-extension elimination in the combiner) PR rtl-optimization/59461 * doc/rtl.texi (paradoxical subregs): Add missing word. * combine.c (reg_nonzero_bits_for_combine): Do not discard results in modes with precision larger than that of last_set_mode. * rtlanal.c (nonzero_bits1) : If WORD_REGISTER_OPERATIONS is set and LOAD_EXTEND_OP is appropriate, propagate results from inner REGs to paradoxical SUBREGs. (num_sign_bit_copies1) : Likewise. Check that the mode is not larger than a word before invoking LOAD_EXTEND_OP on it. From-SVN: r242326 --- gcc/ChangeLog | 12 ++++++ gcc/combine.c | 18 ++++----- gcc/doc/rtl.texi | 2 +- gcc/rtlanal.c | 43 ++++++++++----------- gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.target/sparc/20161111-1.c | 17 ++++++++ 6 files changed, 64 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/gcc.target/sparc/20161111-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fc568ee6db8..de764a3e912 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2016-11-11 Eric Botcazou + + PR rtl-optimization/59461 + * doc/rtl.texi (paradoxical subregs): Add missing word. + * combine.c (reg_nonzero_bits_for_combine): Do not discard results + in modes with precision larger than that of last_set_mode. + * rtlanal.c (nonzero_bits1) : If WORD_REGISTER_OPERATIONS is + set and LOAD_EXTEND_OP is appropriate, propagate results from inner + REGs to paradoxical SUBREGs. + (num_sign_bit_copies1) : Likewise. Check that the mode is not + larger than a word before invoking LOAD_EXTEND_OP on it. + 2016-11-11 Michael Meissner PR target/78243 diff --git a/gcc/combine.c b/gcc/combine.c index 6ffa387a95c..6b7bdd0f687 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -9895,18 +9895,17 @@ reg_nonzero_bits_for_combine (const_rtx x, machine_mode mode, (DF_LR_IN (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb), REGNO (x))))) { - unsigned HOST_WIDE_INT mask = rsp->last_set_nonzero_bits; - - if (GET_MODE_PRECISION (rsp->last_set_mode) < GET_MODE_PRECISION (mode)) - /* We don't know anything about the upper bits. */ - mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (rsp->last_set_mode); - - *nonzero &= mask; + /* Note that, even if the precision of last_set_mode is lower than that + of mode, record_value_for_reg invoked nonzero_bits on the register + with nonzero_bits_mode (because last_set_mode is necessarily integral + and HWI_COMPUTABLE_MODE_P in this case) so bits in nonzero_bits_mode + are all valid, hence in mode too since nonzero_bits_mode is defined + to the largest HWI_COMPUTABLE_MODE_P mode. */ + *nonzero &= rsp->last_set_nonzero_bits; return NULL; } tem = get_last_value (x); - if (tem) { if (SHORT_IMMEDIATES_SIGN_EXTEND) @@ -9915,7 +9914,8 @@ reg_nonzero_bits_for_combine (const_rtx x, machine_mode mode, return tem; } - else if (nonzero_sign_valid && rsp->nonzero_bits) + + if (nonzero_sign_valid && rsp->nonzero_bits) { unsigned HOST_WIDE_INT mask = rsp->nonzero_bits; diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi index 692d9b51b63..175f60dcf08 100644 --- a/gcc/doc/rtl.texi +++ b/gcc/doc/rtl.texi @@ -1882,7 +1882,7 @@ When used as an rvalue, the low-order bits of the @code{subreg} are taken from @var{reg} while the high-order bits may or may not be defined. -The high-order bits of rvalues are in the following circumstances: +The high-order bits of rvalues are defined in the following circumstances: @itemize @item @code{subreg}s of @code{mem} diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 4d7aad0dff0..4617e8ee449 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -4256,7 +4256,7 @@ cached_nonzero_bits (const_rtx x, machine_mode mode, const_rtx known_x, /* Given an expression, X, compute which bits in X can be nonzero. We don't care about bits outside of those defined in MODE. - For most X this is simply GET_MODE_MASK (GET_MODE (MODE)), but if X is + For most X this is simply GET_MODE_MASK (GET_MODE (X)), but if X is an arithmetic operation, we can do better. */ static unsigned HOST_WIDE_INT @@ -4563,18 +4563,17 @@ nonzero_bits1 (const_rtx x, machine_mode mode, const_rtx known_x, /* If this is a SUBREG formed for a promoted variable that has been zero-extended, we know that at least the high-order bits are zero, though others might be too. */ - if (SUBREG_PROMOTED_VAR_P (x) && SUBREG_PROMOTED_UNSIGNED_P (x)) nonzero = GET_MODE_MASK (GET_MODE (x)) & cached_nonzero_bits (SUBREG_REG (x), GET_MODE (x), known_x, known_mode, known_ret); - inner_mode = GET_MODE (SUBREG_REG (x)); /* If the inner mode is a single word for both the host and target machines, we can compute this from which bits of the inner object might be nonzero. */ + inner_mode = GET_MODE (SUBREG_REG (x)); if (GET_MODE_PRECISION (inner_mode) <= BITS_PER_WORD - && (GET_MODE_PRECISION (inner_mode) <= HOST_BITS_PER_WIDE_INT)) + && GET_MODE_PRECISION (inner_mode) <= HOST_BITS_PER_WIDE_INT) { nonzero &= cached_nonzero_bits (SUBREG_REG (x), mode, known_x, known_mode, known_ret); @@ -4582,19 +4581,17 @@ nonzero_bits1 (const_rtx x, machine_mode mode, const_rtx known_x, /* On many CISC machines, accessing an object in a wider mode causes the high-order bits to become undefined. So they are not known to be zero. */ - if (!WORD_REGISTER_OPERATIONS - /* If this is a typical RISC machine, we only have to worry - about the way loads are extended. */ - || ((LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND - ? val_signbit_known_set_p (inner_mode, nonzero) - : LOAD_EXTEND_OP (inner_mode) != ZERO_EXTEND) - || !MEM_P (SUBREG_REG (x)))) - { - if (GET_MODE_PRECISION (GET_MODE (x)) + if ((!WORD_REGISTER_OPERATIONS + /* If this is a typical RISC machine, we only have to worry + about the way loads are extended. */ + || (LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND + ? val_signbit_known_set_p (inner_mode, nonzero) + : LOAD_EXTEND_OP (inner_mode) != ZERO_EXTEND) + || (!MEM_P (SUBREG_REG (x)) && !REG_P (SUBREG_REG (x)))) + && GET_MODE_PRECISION (GET_MODE (x)) > GET_MODE_PRECISION (inner_mode)) - nonzero |= (GET_MODE_MASK (GET_MODE (x)) - & ~GET_MODE_MASK (inner_mode)); - } + nonzero + |= (GET_MODE_MASK (GET_MODE (x)) & ~GET_MODE_MASK (inner_mode)); } break; @@ -4799,6 +4796,7 @@ num_sign_bit_copies1 (const_rtx x, machine_mode mode, const_rtx known_x, { enum rtx_code code = GET_CODE (x); unsigned int bitwidth = GET_MODE_PRECISION (mode); + machine_mode inner_mode; int num0, num1, result; unsigned HOST_WIDE_INT nonzero; @@ -4906,13 +4904,13 @@ num_sign_bit_copies1 (const_rtx x, machine_mode mode, const_rtx known_x, } /* For a smaller object, just ignore the high bits. */ - if (bitwidth <= GET_MODE_PRECISION (GET_MODE (SUBREG_REG (x)))) + inner_mode = GET_MODE (SUBREG_REG (x)); + if (bitwidth <= GET_MODE_PRECISION (inner_mode)) { num0 = cached_num_sign_bit_copies (SUBREG_REG (x), VOIDmode, known_x, known_mode, known_ret); - return MAX (1, (num0 - - (int) (GET_MODE_PRECISION (GET_MODE (SUBREG_REG (x))) - - bitwidth))); + return + MAX (1, num0 - (int) (GET_MODE_PRECISION (inner_mode) - bitwidth)); } /* For paradoxical SUBREGs on machines where all register operations @@ -4926,9 +4924,10 @@ num_sign_bit_copies1 (const_rtx x, machine_mode mode, const_rtx known_x, to the stack. */ if (WORD_REGISTER_OPERATIONS + && GET_MODE_PRECISION (inner_mode) <= BITS_PER_WORD + && LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND && paradoxical_subreg_p (x) - && LOAD_EXTEND_OP (GET_MODE (SUBREG_REG (x))) == SIGN_EXTEND - && MEM_P (SUBREG_REG (x))) + && (MEM_P (SUBREG_REG (x)) || REG_P (SUBREG_REG (x)))) return cached_num_sign_bit_copies (SUBREG_REG (x), mode, known_x, known_mode, known_ret); break; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 16a2597a5a0..b8ee57d5fe3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2016-11-11 Eric Botcazou + + * gcc.target/sparc/20161111-1.c: New test. + 2016-11-11 Uros Bizjak PR target/78310 diff --git a/gcc/testsuite/gcc.target/sparc/20161111-1.c b/gcc/testsuite/gcc.target/sparc/20161111-1.c new file mode 100644 index 00000000000..eda8b0a9f12 --- /dev/null +++ b/gcc/testsuite/gcc.target/sparc/20161111-1.c @@ -0,0 +1,17 @@ +/* PR rtl-optimization/59461 */ + +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern char zeb_test_array[10]; + +unsigned char ee_isdigit2(unsigned int i) +{ + unsigned char c = zeb_test_array[i]; + unsigned char retval; + + retval = ((c>='0') & (c<='9')) ? 1 : 0; + return retval; +} + +/* { dg-final { scan-assembler-not "and\t%" } } */ -- 2.30.2