From 8aba425f4ebc5e2c054776d3cdddf13f7c1918f8 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 13 Feb 2020 10:04:11 +0100 Subject: [PATCH] sccvn: Handle bitfields in vn_reference_lookup_3 [PR93582] The following patch is first step towards fixing PR93582. vn_reference_lookup_3 right now punts on anything that isn't byte aligned, so to be able to lookup a constant bitfield store, one needs to use the exact same COMPONENT_REF, otherwise it isn't found. This patch lifts up that that restriction if the bits to be loaded are covered by a single store of a constant (keeps the restriction so far for the multiple store case, can tweak that incrementally, but I think for bisection etc. it is worth to do it one step at a time). 2020-02-13 Jakub Jelinek PR tree-optimization/93582 * fold-const.h (shift_bytes_in_array_left, shift_bytes_in_array_right): Declare. * fold-const.c (shift_bytes_in_array_left, shift_bytes_in_array_right): New function, moved from gimple-ssa-store-merging.c, no longer static. * gimple-ssa-store-merging.c (shift_bytes_in_array): Move to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left. (shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c. (encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of shift_bytes_in_array. (verify_shift_bytes_in_array): Rename to ... (verify_shift_bytes_in_array_left): ... this. Use shift_bytes_in_array_left instead of shift_bytes_in_array. (store_merging_c_tests): Call verify_shift_bytes_in_array_left instead of verify_shift_bytes_in_array. * tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr / native_interpret_expr where the store covers all needed bits, punt on PDP-endian, otherwise allow all involved offsets and sizes not to be byte-aligned. * gcc.dg/tree-ssa/pr93582-1.c: New test. * gcc.dg/tree-ssa/pr93582-2.c: New test. * gcc.dg/tree-ssa/pr93582-3.c: New test. --- gcc/ChangeLog | 21 ++++++ gcc/fold-const.c | 64 ++++++++++++++++ gcc/fold-const.h | 4 + gcc/gimple-ssa-store-merging.c | 72 ++---------------- gcc/testsuite/ChangeLog | 7 ++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c | 17 +++++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c | 17 +++++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c | 18 +++++ gcc/tree-ssa-sccvn.c | 90 +++++++++++++++++------ 9 files changed, 222 insertions(+), 88 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 252e820bce4..36a8956d8da 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,26 @@ 2020-02-13 Jakub Jelinek + PR tree-optimization/93582 + * fold-const.h (shift_bytes_in_array_left, + shift_bytes_in_array_right): Declare. + * fold-const.c (shift_bytes_in_array_left, + shift_bytes_in_array_right): New function, moved from + gimple-ssa-store-merging.c, no longer static. + * gimple-ssa-store-merging.c (shift_bytes_in_array): Move + to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left. + (shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c. + (encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of + shift_bytes_in_array. + (verify_shift_bytes_in_array): Rename to ... + (verify_shift_bytes_in_array_left): ... this. Use + shift_bytes_in_array_left instead of shift_bytes_in_array. + (store_merging_c_tests): Call verify_shift_bytes_in_array_left + instead of verify_shift_bytes_in_array. + * tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr + / native_interpret_expr where the store covers all needed bits, + punt on PDP-endian, otherwise allow all involved offsets and sizes + not to be byte-aligned. + PR target/93673 * config/i386/sse.md (k): Drop mode from last operand and use const_0_to_255_operand predicate instead of immediate_operand. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index aefa91666e2..71a1d3eb735 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -8354,6 +8354,70 @@ can_native_interpret_type_p (tree type) } } +/* Routines for manipulation of native_encode_expr encoded data if the encoded + or extracted constant positions and/or sizes aren't byte aligned. */ + +/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the + bits between adjacent elements. AMNT should be within + [0, BITS_PER_UNIT). + Example, AMNT = 2: + 00011111|11100000 << 2 = 01111111|10000000 + PTR[1] | PTR[0] PTR[1] | PTR[0]. */ + +void +shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz, + unsigned int amnt) +{ + if (amnt == 0) + return; + + unsigned char carry_over = 0U; + unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt); + unsigned char clear_mask = (~0U) << amnt; + + for (unsigned int i = 0; i < sz; i++) + { + unsigned prev_carry_over = carry_over; + carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); + + ptr[i] <<= amnt; + if (i != 0) + { + ptr[i] &= clear_mask; + ptr[i] |= prev_carry_over; + } + } +} + +/* Like shift_bytes_in_array_left but for big-endian. + Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the + bits between adjacent elements. AMNT should be within + [0, BITS_PER_UNIT). + Example, AMNT = 2: + 00011111|11100000 >> 2 = 00000111|11111000 + PTR[0] | PTR[1] PTR[0] | PTR[1]. */ + +void +shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz, + unsigned int amnt) +{ + if (amnt == 0) + return; + + unsigned char carry_over = 0U; + unsigned char carry_mask = ~(~0U << amnt); + + for (unsigned int i = 0; i < sz; i++) + { + unsigned prev_carry_over = carry_over; + carry_over = ptr[i] & carry_mask; + + carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; + ptr[i] >>= amnt; + ptr[i] |= prev_carry_over; + } +} + /* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating directly on the VECTOR_CST encoding, in a way that works for variable- length vectors. Return the resulting VECTOR_CST on success or null diff --git a/gcc/fold-const.h b/gcc/fold-const.h index 7ac792f16a8..0f788a458f2 100644 --- a/gcc/fold-const.h +++ b/gcc/fold-const.h @@ -30,6 +30,10 @@ extern int native_encode_initializer (tree, unsigned char *, int, int off = -1); extern tree native_interpret_expr (tree, const unsigned char *, int); extern bool can_native_interpret_type_p (tree); +extern void shift_bytes_in_array_left (unsigned char *, unsigned int, + unsigned int); +extern void shift_bytes_in_array_right (unsigned char *, unsigned int, + unsigned int); /* Fold constants as much as possible in an expression. Returns the simplified expression. diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index 8371323ef4a..4bcafef4878 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -1475,66 +1475,6 @@ dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len) fprintf (fd, "\n"); } -/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the - bits between adjacent elements. AMNT should be within - [0, BITS_PER_UNIT). - Example, AMNT = 2: - 00011111|11100000 << 2 = 01111111|10000000 - PTR[1] | PTR[0] PTR[1] | PTR[0]. */ - -static void -shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt) -{ - if (amnt == 0) - return; - - unsigned char carry_over = 0U; - unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt); - unsigned char clear_mask = (~0U) << amnt; - - for (unsigned int i = 0; i < sz; i++) - { - unsigned prev_carry_over = carry_over; - carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); - - ptr[i] <<= amnt; - if (i != 0) - { - ptr[i] &= clear_mask; - ptr[i] |= prev_carry_over; - } - } -} - -/* Like shift_bytes_in_array but for big-endian. - Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the - bits between adjacent elements. AMNT should be within - [0, BITS_PER_UNIT). - Example, AMNT = 2: - 00011111|11100000 >> 2 = 00000111|11111000 - PTR[0] | PTR[1] PTR[0] | PTR[1]. */ - -static void -shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz, - unsigned int amnt) -{ - if (amnt == 0) - return; - - unsigned char carry_over = 0U; - unsigned char carry_mask = ~(~0U << amnt); - - for (unsigned int i = 0; i < sz; i++) - { - unsigned prev_carry_over = carry_over; - carry_over = ptr[i] & carry_mask; - - carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; - ptr[i] >>= amnt; - ptr[i] |= prev_carry_over; - } -} - /* Clear out LEN bits starting from bit START in the byte array PTR. This clears the bits to the *right* from START. START must be within [0, BITS_PER_UNIT) and counts starting from @@ -1793,7 +1733,7 @@ encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos, /* Create the shifted version of EXPR. */ if (!BYTES_BIG_ENDIAN) { - shift_bytes_in_array (tmpbuf, byte_size, shift_amnt); + shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt); if (shift_amnt == 0) byte_size--; } @@ -5092,11 +5032,11 @@ verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n) } } -/* Test shift_bytes_in_array and that it carries bits across between +/* Test shift_bytes_in_array_left and that it carries bits across between bytes correctly. */ static void -verify_shift_bytes_in_array (void) +verify_shift_bytes_in_array_left (void) { /* byte 1 | byte 0 00011111 | 11100000. */ @@ -5105,13 +5045,13 @@ verify_shift_bytes_in_array (void) memcpy (in, orig, sizeof orig); unsigned char expected[2] = { 0x80, 0x7f }; - shift_bytes_in_array (in, sizeof (in), 2); + shift_bytes_in_array_left (in, sizeof (in), 2); verify_array_eq (in, expected, sizeof (in)); memcpy (in, orig, sizeof orig); memcpy (expected, orig, sizeof orig); /* Check that shifting by zero doesn't change anything. */ - shift_bytes_in_array (in, sizeof (in), 0); + shift_bytes_in_array_left (in, sizeof (in), 0); verify_array_eq (in, expected, sizeof (in)); } @@ -5196,7 +5136,7 @@ verify_clear_bit_region_be (void) void store_merging_c_tests (void) { - verify_shift_bytes_in_array (); + verify_shift_bytes_in_array_left (); verify_shift_bytes_in_array_right (); verify_clear_bit_region (); verify_clear_bit_region_be (); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 63d3ff56996..25413537503 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2020-02-13 Jakub Jelinek + + PR tree-optimization/93582 + * gcc.dg/tree-ssa/pr93582-1.c: New test. + * gcc.dg/tree-ssa/pr93582-2.c: New test. + * gcc.dg/tree-ssa/pr93582-3.c: New test. + 2020-02-13 Richard Biener PR testsuite/93717 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c new file mode 100644 index 00000000000..a9f0e34ab07 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c @@ -0,0 +1,17 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */ + +union U { + struct S { int a : 1, b : 4, c : 27; } s; + struct T { int d : 2; int e : 2; int f : 28; } t; +}; + +int +foo (void) +{ + union U u; + u.s.b = 10; + return u.t.e; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c new file mode 100644 index 00000000000..76d03ece43c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c @@ -0,0 +1,17 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */ + +union U { + struct S { int a : 1, b : 14, c : 17; } s; + struct T { int d : 2; int e : 12; int f : 18; } t; +}; + +int +foo (void) +{ + union U u; + u.s.b = -7005; + return u.t.e; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c new file mode 100644 index 00000000000..84f54fc3142 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c @@ -0,0 +1,18 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */ +/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */ + +union U { + struct S { int a : 1, b : 14, c : 17; } s; + struct T { int d : 10; int e : 4; int f : 18; } t; +}; + +int +foo (void) +{ + union U u; + u.s.b = -7005; + return u.t.e; +} diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 33cd12b202f..30518abfddd 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -2586,13 +2586,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) - && CHAR_BIT == 8 && BITS_PER_UNIT == 8 + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN /* native_encode and native_decode operate on arrays of bytes and so fundamentally need a compile-time size and offset. */ && maxsize.is_constant (&maxsizei) - && maxsizei % BITS_PER_UNIT == 0 && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0 && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)) || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt)))))) @@ -2617,8 +2617,6 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && !reverse && !storage_order_barrier_p (lhs) && known_eq (maxsize2, size2) - && multiple_p (size2, BITS_PER_UNIT) - && multiple_p (offset2, BITS_PER_UNIT) && adjust_offsets_for_equal_base_address (base, &offset, base2, &offset2) && offset.is_constant (&offseti) @@ -2629,37 +2627,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, && known_subrange_p (offseti, maxsizei, offset2, size2)) { /* We support up to 512-bit values (for V8DFmode). */ - unsigned char buffer[64]; + unsigned char buffer[65]; int len; tree rhs = gimple_assign_rhs1 (def_stmt); if (TREE_CODE (rhs) == SSA_NAME) rhs = SSA_VAL (rhs); - unsigned pad = 0; - if (BYTES_BIG_ENDIAN - && is_a (TYPE_MODE (TREE_TYPE (rhs)))) - { - /* On big-endian the padding is at the 'front' so - just skip the initial bytes. */ - fixed_size_mode mode - = as_a (TYPE_MODE (TREE_TYPE (rhs))); - pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT; - } len = native_encode_expr (rhs, - buffer, sizeof (buffer), - ((offseti - offset2i) / BITS_PER_UNIT - + pad)); + buffer, sizeof (buffer) - 1, + (offseti - offset2i) / BITS_PER_UNIT); if (len > 0 && len * BITS_PER_UNIT >= maxsizei) { tree type = vr->type; + unsigned char *buf = buffer; + unsigned int amnt = 0; /* Make sure to interpret in a type that has a range covering the whole access size. */ if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type)) type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type)); - tree val = native_interpret_expr (type, buffer, - maxsizei / BITS_PER_UNIT); + if (BYTES_BIG_ENDIAN) + { + /* For big-endian native_encode_expr stored the rhs + such that the LSB of it is the LSB of buffer[len - 1]. + That bit is stored into memory at position + offset2 + size2 - 1, i.e. in byte + base + (offset2 + size2 - 1) / BITS_PER_UNIT. + E.g. for offset2 1 and size2 14, rhs -1 and memory + previously cleared that is: + 0 1 + 01111111|11111110 + Now, if we want to extract offset 2 and size 12 from + it using native_interpret_expr (which actually works + for integral bitfield types in terms of byte size of + the mode), the native_encode_expr stored the value + into buffer as + XX111111|11111111 + and returned len 2 (the X bits are outside of + precision). + Let sz be maxsize / BITS_PER_UNIT if not extracting + a bitfield, and GET_MODE_SIZE otherwise. + We need to align the LSB of the value we want to + extract as the LSB of buf[sz - 1]. + The LSB from memory we need to read is at position + offset + maxsize - 1. */ + HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT; + if (INTEGRAL_TYPE_P (type)) + sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type)); + amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i + - offseti - maxsizei) % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_right (buffer, len, amnt); + amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i + - offseti - maxsizei - amnt) / BITS_PER_UNIT; + if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len) + len = 0; + else + { + buf = buffer + len - sz - amnt; + len -= (buf - buffer); + } + } + else + { + amnt = ((unsigned HOST_WIDE_INT) offset2i + - offseti) % BITS_PER_UNIT; + if (amnt) + { + buffer[len] = 0; + shift_bytes_in_array_left (buffer, len + 1, amnt); + buf = buffer + 1; + } + } + tree val = native_interpret_expr (type, buf, len); /* If we chop off bits because the types precision doesn't match the memory access size this is ok when optimizing reads but not when called from the DSE code during @@ -2677,7 +2718,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, return data->finish (get_alias_set (lhs), val); } } - else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i)) + else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, + size2i) + && maxsizei % BITS_PER_UNIT == 0 + && offseti % BITS_PER_UNIT == 0 + && size2i % BITS_PER_UNIT == 0 + && offset2i % BITS_PER_UNIT == 0) { pd_data pd; tree rhs = gimple_assign_rhs1 (def_stmt); -- 2.30.2