}
}
bool terminate_and_process_chain ();
- bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
+ bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
+ unsigned int);
bool coalesce_immediate_stores ();
bool output_merged_store (merged_store_group *);
bool output_merged_stores ();
into the group. That way it will be its own store group and will
not be touched. If ALL_INTEGER_CST_P and there are overlapping
INTEGER_CST stores, those are mergeable using merge_overlapping,
- so don't return false for those. */
+ so don't return false for those.
+
+ Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
+ (exclusive), whether they don't overlap the bitrange START to END
+ and have order in between FIRST_ORDER and LAST_ORDER. This is to
+ prevent merging in cases like:
+ MEM <char[12]> [&b + 8B] = {};
+ MEM[(short *) &b] = 5;
+ _5 = *x_4(D);
+ MEM <long long unsigned int> [&b + 2B] = _5;
+ MEM[(char *)&b + 16B] = 88;
+ MEM[(int *)&b + 20B] = 1;
+ The = {} store comes in sort_by_bitpos before the = 88 store, and can't
+ be merged with it, because the = _5 store overlaps these and is in between
+ them in sort_by_order ordering. If it was merged, the merged store would
+ go after the = _5 store and thus change behavior. */
static bool
check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
- bool all_integer_cst_p, unsigned int last_order,
- unsigned HOST_WIDE_INT end)
+ bool all_integer_cst_p, unsigned int first_order,
+ unsigned int last_order, unsigned HOST_WIDE_INT start,
+ unsigned HOST_WIDE_INT end, unsigned int first_earlier,
+ unsigned end_earlier)
{
unsigned int len = m_store_info.length ();
+ for (unsigned int j = first_earlier; j < end_earlier; j++)
+ {
+ store_immediate_info *info = m_store_info[j];
+ if (info->order > first_order
+ && info->order < last_order
+ && info->bitpos + info->bitsize > start)
+ return false;
+ }
for (++i; i < len; ++i)
{
store_immediate_info *info = m_store_info[i];
bool
imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
unsigned int first,
- unsigned int try_size)
+ unsigned int try_size,
+ unsigned int first_earlier)
{
unsigned int len = m_store_info.length (), last = first;
unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
return false;
- if (!check_no_overlap (m_store_info, last, false, last_order, end))
+ if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
+ merged_store->start, end, first_earlier, first))
return false;
/* Don't handle memory copy this way if normal non-bswap processing
store_immediate_info *info;
unsigned int i, ignore = 0;
+ unsigned int first_earlier = 0;
+ unsigned int end_earlier = 0;
/* Order the stores by the bitposition they write to. */
m_store_info.qsort (sort_by_bitpos);
if (i <= ignore)
goto done;
+ while (first_earlier < end_earlier
+ && (m_store_info[first_earlier]->bitpos
+ + m_store_info[first_earlier]->bitsize
+ <= merged_store->start))
+ first_earlier++;
+
/* First try to handle group of stores like:
p[0] = data >> 24;
p[1] = data >> 16;
{
unsigned int try_size;
for (try_size = 64; try_size >= 16; try_size >>= 1)
- if (try_coalesce_bswap (merged_store, i - 1, try_size))
+ if (try_coalesce_bswap (merged_store, i - 1, try_size,
+ first_earlier))
break;
if (try_size >= 16)
ignore = i + merged_store->stores.length () - 1;
m_merged_store_groups.safe_push (merged_store);
if (ignore < m_store_info.length ())
- merged_store = new merged_store_group (m_store_info[ignore]);
+ {
+ merged_store = new merged_store_group (m_store_info[ignore]);
+ end_earlier = ignore;
+ }
else
merged_store = NULL;
goto done;
&& merged_store->only_constants
&& info->lp_nr == merged_store->lp_nr)
{
+ unsigned int first_order
+ = MIN (merged_store->first_order, info->order);
unsigned int last_order
= MAX (merged_store->last_order, info->order);
unsigned HOST_WIDE_INT end
= MAX (merged_store->start + merged_store->width,
info->bitpos + info->bitsize);
- if (check_no_overlap (m_store_info, i, true, last_order, end))
+ if (check_no_overlap (m_store_info, i, true, first_order,
+ last_order, merged_store->start, end,
+ first_earlier, end_earlier))
{
/* check_no_overlap call above made sure there are no
overlapping stores with non-INTEGER_CST rhs_code
do
{
unsigned int max_order = 0;
+ unsigned int min_order = first_order;
unsigned first_nonmergeable_int_order = ~0U;
unsigned HOST_WIDE_INT this_end = end;
k = i;
break;
}
k = j;
+ min_order = MIN (min_order, info2->order);
this_end = MAX (this_end,
info2->bitpos + info2->bitsize);
}
first_nonmergeable_order
= MIN (first_nonmergeable_order, info2->order);
}
+ if (k > i
+ && !check_no_overlap (m_store_info, len - 1, true,
+ min_order, try_order,
+ merged_store->start, this_end,
+ first_earlier, end_earlier))
+ k = 0;
if (k == 0)
{
if (last_order == try_order)
info->ops_swapped_p = true;
}
if (check_no_overlap (m_store_info, i, false,
+ MIN (merged_store->first_order, info->order),
MAX (merged_store->last_order, info->order),
+ merged_store->start,
MAX (merged_store->start + merged_store->width,
- info->bitpos + info->bitsize)))
+ info->bitpos + info->bitsize),
+ first_earlier, end_earlier))
{
/* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
delete merged_store;
merged_store = new merged_store_group (info);
+ end_earlier = i;
if (dump_file && (dump_flags & TDF_DETAILS))
fputs ("New store group\n", dump_file);
--- /dev/null
+/* PR tree-optimization/97053 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-tree-dse" } */
+
+struct __attribute__((packed, may_alias)) S { long long s; };
+struct __attribute__((packed, may_alias)) T { short t; };
+
+__attribute__((noipa)) void
+test (char *p, char *q, int s)
+{
+ if ((s & 1) == 0)
+ {
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
+ || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
+ != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
+ __builtin_abort ();
+ }
+ else
+ {
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
+ || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
+ != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
+ || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
+ __builtin_abort ();
+ }
+}
+
+__attribute__((noipa)) void
+foo (long long *p, char *q, char *r, char *s)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
+ test (a, q, 0);
+}
+
+__attribute__((noipa)) void
+bar (long long *p, char *q, char *r, char *s, char *t)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
+ a[3 * sizeof (short) - 2] = 3;
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
+ t[3 * sizeof (short) - 2] = 3;
+ test (a, q, 1);
+}
+
+__attribute__((noipa)) void
+baz (long long *p, char *q, char *r, char *s)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
+ test (a, q, 2);
+}
+
+__attribute__((noipa)) void
+qux (long long *p, char *q, char *r, char *s, char *t)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ a[3 * sizeof (short) - 2] = 3;
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ s[3 * sizeof (short) - 2] = 3;
+ ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
+ test (a, q, 3);
+}
+
+__attribute__((noipa)) void
+corge (long long *p, char *q, char *r, char *s, short u[3])
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
+ test (a, q, 4);
+}
+
+__attribute__((noipa)) void
+garply (long long *p, char *q, char *r, char *s, short u[3])
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
+ test (a, q, 6);
+}
+
+int
+main ()
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ long long p = -1LL;
+ short u[] = { 1, 2, 3 };
+ foo (&p, &a[0], &a[0], &a[0]);
+ bar (&p, &a[0], &a[0], &a[0], &a[0]);
+ baz (&p, &a[0], &a[0], &a[0]);
+ qux (&p, &a[0], &a[0], &a[0], &a[0]);
+ corge (&p, &a[0], &a[0], &a[0], u);
+ garply (&p, &a[0], &a[0], &a[0], u);
+ return 0;
+}