/* GIMPLE store merging and byte swapping passes.
- Copyright (C) 2009-2018 Free Software Foundation, Inc.
+ Copyright (C) 2009-2020 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
record the surrounding bit region, i.e. bits that could be stored in
a read-modify-write operation when storing the bit-field. Record store
chains to different bases in a hash_map (m_stores) and make sure to
- terminate such chains when appropriate (for example when when the stored
+ terminate such chains when appropriate (for example when the stored
values get used subsequently).
These stores can be a result of structure element initializers, array stores
etc. A store_immediate_info object is recorded for every such store.
#include "gimple-pretty-print.h"
#include "alias.h"
#include "fold-const.h"
-#include "params.h"
#include "print-tree.h"
#include "tree-hash-traits.h"
#include "gimple-iterator.h"
#include "gimple-fold.h"
#include "stor-layout.h"
#include "timevar.h"
+#include "cfganal.h"
+#include "cfgcleanup.h"
#include "tree-cfg.h"
+#include "except.h"
#include "tree-eh.h"
#include "target.h"
#include "gimplify-me.h"
#include "rtl.h"
#include "expr.h" /* For get_bit_range. */
#include "optabs-tree.h"
+#include "dbgcnt.h"
#include "selftest.h"
/* The maximum size (in bits) of the stores this pass should generate. */
lhs_type = gimple_expr_type (stmt);
- if (TREE_CODE (lhs_type) != INTEGER_TYPE)
+ if (TREE_CODE (lhs_type) != INTEGER_TYPE
+ && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
return false;
if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
if (TREE_CODE (rhs1) == BIT_FIELD_REF
&& TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
{
+ if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
+ || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
+ return NULL;
+
unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
if (bitpos % BITS_PER_UNIT == 0
{
/* The last parameter determines the depth search limit. It usually
correlates directly to the number n of bytes to be touched. We
- increase that number by log2(n) + 1 here in order to also
+ increase that number by 2 * (log2(n) + 1) here in order to also
cover signed -> unsigned conversions of the src operand as can be seen
in libgcc, and for initial shift/and operation of the src operand. */
int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
- limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
+ limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
if (!ins_stmt)
and the other fields also reflect the memory load, or an SSA name, then
VAL represents the SSA name and all the other fields are zero, */
-struct store_operand_info
+class store_operand_info
{
+public:
tree val;
tree base_addr;
poly_uint64 bitsize;
to memory. These are created in the first phase and coalesced into
merged_store_group objects in the second phase. */
-struct store_immediate_info
+class store_immediate_info
{
+public:
unsigned HOST_WIDE_INT bitsize;
unsigned HOST_WIDE_INT bitpos;
unsigned HOST_WIDE_INT bitregion_start;
unsigned HOST_WIDE_INT bitregion_end;
gimple *stmt;
unsigned int order;
- /* INTEGER_CST for constant stores, MEM_REF for memory copy,
- BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
- for bit insertion.
+ /* INTEGER_CST for constant store, STRING_CST for string store,
+ MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
+ BIT_INSERT_EXPR for bit insertion.
LROTATE_EXPR if it can be only bswap optimized and
ops are not really meaningful.
NOP_EXPR if bswap optimization detected identity, ops
/* True if ops have been swapped and thus ops[1] represents
rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
bool ops_swapped_p;
+ /* The index number of the landing pad, or 0 if there is none. */
+ int lp_nr;
/* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
just the first one. */
store_operand_info ops[2];
store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
gimple *, unsigned int, enum tree_code,
- struct symbolic_number &, gimple *, bool,
+ struct symbolic_number &, gimple *, bool, int,
const store_operand_info &,
const store_operand_info &);
};
struct symbolic_number &nr,
gimple *ins_stmtp,
bool bitnotp,
+ int nr2,
const store_operand_info &op0r,
const store_operand_info &op1r)
: bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
stmt (st), order (ord), rhs_code (rhscode), n (nr),
- ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false)
+ ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
+ lp_nr (nr2)
#if __cplusplus >= 201103L
, ops { op0r, op1r }
{
These are produced by the second phase (coalescing) and consumed in the
third phase that outputs the widened stores. */
-struct merged_store_group
+class merged_store_group
{
+public:
unsigned HOST_WIDE_INT start;
unsigned HOST_WIDE_INT width;
unsigned HOST_WIDE_INT bitregion_start;
unsigned int first_order;
unsigned int last_order;
bool bit_insertion;
+ bool string_concatenation;
bool only_constants;
unsigned int first_nonmergeable_order;
+ int lp_nr;
auto_vec<store_immediate_info *> stores;
/* We record the first and last original statements in the sequence because
fprintf (fd, "\n");
}
-/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
- bits between adjacent elements. AMNT should be within
- [0, BITS_PER_UNIT).
- Example, AMNT = 2:
- 00011111|11100000 << 2 = 01111111|10000000
- PTR[1] | PTR[0] PTR[1] | PTR[0]. */
-
-static void
-shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
-{
- if (amnt == 0)
- return;
-
- unsigned char carry_over = 0U;
- unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
- unsigned char clear_mask = (~0U) << amnt;
-
- for (unsigned int i = 0; i < sz; i++)
- {
- unsigned prev_carry_over = carry_over;
- carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
-
- ptr[i] <<= amnt;
- if (i != 0)
- {
- ptr[i] &= clear_mask;
- ptr[i] |= prev_carry_over;
- }
- }
-}
-
-/* Like shift_bytes_in_array but for big-endian.
- Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
- bits between adjacent elements. AMNT should be within
- [0, BITS_PER_UNIT).
- Example, AMNT = 2:
- 00011111|11100000 >> 2 = 00000111|11111000
- PTR[0] | PTR[1] PTR[0] | PTR[1]. */
-
-static void
-shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
- unsigned int amnt)
-{
- if (amnt == 0)
- return;
-
- unsigned char carry_over = 0U;
- unsigned char carry_mask = ~(~0U << amnt);
-
- for (unsigned int i = 0; i < sz; i++)
- {
- unsigned prev_carry_over = carry_over;
- carry_over = ptr[i] & carry_mask;
-
- carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
- ptr[i] >>= amnt;
- ptr[i] |= prev_carry_over;
- }
-}
-
/* Clear out LEN bits starting from bit START in the byte array
PTR. This clears the bits to the *right* from START.
START must be within [0, BITS_PER_UNIT) and counts starting from
unsigned int total_bytes)
{
unsigned int first_byte = bitpos / BITS_PER_UNIT;
- tree tmp_int = expr;
bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
|| (bitpos % BITS_PER_UNIT)
|| !int_mode_for_size (bitlen, 0).exists ());
+ bool empty_ctor_p
+ = (TREE_CODE (expr) == CONSTRUCTOR
+ && CONSTRUCTOR_NELTS (expr) == 0
+ && TYPE_SIZE_UNIT (TREE_TYPE (expr))
+ && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
if (!sub_byte_op_p)
- return native_encode_expr (tmp_int, ptr + first_byte, total_bytes) != 0;
+ {
+ if (first_byte >= total_bytes)
+ return false;
+ total_bytes -= first_byte;
+ if (empty_ctor_p)
+ {
+ unsigned HOST_WIDE_INT rhs_bytes
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
+ if (rhs_bytes > total_bytes)
+ return false;
+ memset (ptr + first_byte, '\0', rhs_bytes);
+ return true;
+ }
+ return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
+ }
/* LITTLE-ENDIAN
We are writing a non byte-sized quantity or at a position that is not
/* We must be dealing with fixed-size data at this point, since the
total size is also fixed. */
- fixed_size_mode mode = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
+ unsigned int byte_size;
+ if (empty_ctor_p)
+ {
+ unsigned HOST_WIDE_INT rhs_bytes
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
+ if (rhs_bytes > total_bytes)
+ return false;
+ byte_size = rhs_bytes;
+ }
+ else
+ {
+ fixed_size_mode mode
+ = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
+ byte_size
+ = mode == BLKmode
+ ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
+ : GET_MODE_SIZE (mode);
+ }
/* Allocate an extra byte so that we have space to shift into. */
- unsigned int byte_size = GET_MODE_SIZE (mode) + 1;
+ byte_size++;
unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
memset (tmpbuf, '\0', byte_size);
/* The store detection code should only have allowed constants that are
- accepted by native_encode_expr. */
- if (native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
+ accepted by native_encode_expr or empty ctors. */
+ if (!empty_ctor_p
+ && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
gcc_unreachable ();
/* The native_encode_expr machinery uses TYPE_MODE to determine how many
/* Create the shifted version of EXPR. */
if (!BYTES_BIG_ENDIAN)
{
- shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+ shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
if (shift_amnt == 0)
byte_size--;
}
width has been finalized. */
val = NULL;
mask = NULL;
- bit_insertion = false;
+ bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
+ string_concatenation = info->rhs_code == STRING_CST;
only_constants = info->rhs_code == INTEGER_CST;
first_nonmergeable_order = ~0U;
+ lp_nr = info->lp_nr;
unsigned HOST_WIDE_INT align_bitpos = 0;
get_object_alignment_1 (gimple_assign_lhs (info->stmt),
&align, &align_bitpos);
if (info->rhs_code == LROTATE_EXPR)
return false;
+ if (info->lp_nr != lp_nr)
+ return false;
+
/* The canonical case. */
if (info->rhs_code == stores[0]->rhs_code)
return true;
- /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
+ /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
- return true;
+ return !string_concatenation;
if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
- return true;
+ return !string_concatenation;
- /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
+ /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
+ only for small regions since this can generate a lot of instructions. */
if (info->rhs_code == MEM_REF
&& (stores[0]->rhs_code == INTEGER_CST
|| stores[0]->rhs_code == BIT_INSERT_EXPR)
&& info->bitregion_start == stores[0]->bitregion_start
- && info->bitregion_end == stores[0]->bitregion_end)
- return true;
+ && info->bitregion_end == stores[0]->bitregion_end
+ && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
+ return !string_concatenation;
if (stores[0]->rhs_code == MEM_REF
&& (info->rhs_code == INTEGER_CST
|| info->rhs_code == BIT_INSERT_EXPR)
&& info->bitregion_start == stores[0]->bitregion_start
- && info->bitregion_end == stores[0]->bitregion_end)
- return true;
+ && info->bitregion_end == stores[0]->bitregion_end
+ && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
+ return !string_concatenation;
+
+ /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
+ if (info->rhs_code == STRING_CST
+ && stores[0]->rhs_code == INTEGER_CST
+ && stores[0]->bitsize == CHAR_BIT)
+ return !bit_insertion;
+
+ if (stores[0]->rhs_code == STRING_CST
+ && info->rhs_code == INTEGER_CST
+ && info->bitsize == CHAR_BIT)
+ return !bit_insertion;
return false;
}
first_order = info->order;
first_stmt = stmt;
}
+
+ /* We need to use extraction if there is any bit-field. */
+ if (info->rhs_code == BIT_INSERT_EXPR)
+ {
+ bit_insertion = true;
+ gcc_assert (!string_concatenation);
+ }
+
+ /* We need to use concatenation if there is any string. */
+ if (info->rhs_code == STRING_CST)
+ {
+ string_concatenation = true;
+ gcc_assert (!bit_insertion);
+ }
+
if (info->rhs_code != INTEGER_CST)
only_constants = false;
}
bool
merged_store_group::apply_stores ()
{
+ store_immediate_info *info;
+ unsigned int i;
+
/* Make sure we have more than one store in the group, otherwise we cannot
merge anything. */
if (bitregion_start % BITS_PER_UNIT != 0
|| stores.length () == 1)
return false;
- stores.qsort (sort_by_order);
- store_immediate_info *info;
- unsigned int i;
+ buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
+
+ /* Really do string concatenation for large strings only. */
+ if (buf_size <= MOVE_MAX)
+ string_concatenation = false;
+
/* Create a power-of-2-sized buffer for native_encode_expr. */
- buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
+ if (!string_concatenation)
+ buf_size = 1 << ceil_log2 (buf_size);
+
val = XNEWVEC (unsigned char, 2 * buf_size);
mask = val + buf_size;
memset (val, 0, buf_size);
memset (mask, ~0U, buf_size);
+ stores.qsort (sort_by_order);
+
FOR_EACH_VEC_ELT (stores, i, info)
{
unsigned int pos_in_buffer = info->bitpos - bitregion_start;
else
cst = NULL_TREE;
bool ret = true;
- if (cst)
- {
- if (info->rhs_code == BIT_INSERT_EXPR)
- bit_insertion = true;
- else
- ret = encode_tree_to_bitpos (cst, val, info->bitsize,
- pos_in_buffer, buf_size);
- }
+ if (cst && info->rhs_code != BIT_INSERT_EXPR)
+ ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
+ buf_size);
unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
if (BYTES_BIG_ENDIAN)
clear_bit_region_be (m, (BITS_PER_UNIT - 1
dump_char_array (dump_file, mask, buf_size);
if (bit_insertion)
fputs (" bit insertion is required\n", dump_file);
+ if (string_concatenation)
+ fputs (" string concatenation is required\n", dump_file);
}
else
fprintf (dump_file, "Failed to merge stores\n");
/* Structure describing the store chain. */
-struct imm_store_chain_info
+class imm_store_chain_info
{
+public:
/* Doubly-linked list that imposes an order on chain processing.
PNXP (prev's next pointer) points to the head of a list, or to
the next field in the previous chain in the list.
virtual unsigned int execute (function *);
private:
- hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
+ hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
/* Form a doubly-linked stack of the elements of m_stores, so that
we can iterate over them in a predictable way. Using this order
decisions when going out of SSA). */
imm_store_chain_info *m_stores_head;
- void process_store (gimple *);
- bool terminate_and_process_all_chains ();
+ bool process_store (gimple *);
+ bool terminate_and_process_chain (imm_store_chain_info *);
bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
- bool terminate_and_release_chain (imm_store_chain_info *);
+ bool terminate_and_process_all_chains ();
}; // class pass_store_merging
/* Terminate and process all recorded chains. Return true if any changes
{
bool ret = false;
while (m_stores_head)
- ret |= terminate_and_release_chain (m_stores_head);
- gcc_assert (m_stores.elements () == 0);
- gcc_assert (m_stores_head == NULL);
-
+ ret |= terminate_and_process_chain (m_stores_head);
+ gcc_assert (m_stores.is_empty ());
return ret;
}
/* Terminate all chains that are affected by the statement STMT.
CHAIN_INFO is the chain we should ignore from the checks if
- non-NULL. */
+ non-NULL. Return true if any changes were made. */
bool
pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
return false;
tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
+ ao_ref store_lhs_ref;
+ ao_ref_init (&store_lhs_ref, store_lhs);
for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
{
next = cur->next;
FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
{
tree lhs = gimple_assign_lhs (info->stmt);
- if (ref_maybe_used_by_stmt_p (stmt, lhs)
- || stmt_may_clobber_ref_p (stmt, lhs)
- || (store_lhs && refs_output_dependent_p (store_lhs, lhs)))
+ ao_ref lhs_ref;
+ ao_ref_init (&lhs_ref, lhs);
+ if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
+ || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
+ || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
+ &lhs_ref, false)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "stmt causes chain termination:\n");
print_gimple_stmt (dump_file, stmt, 0);
}
- terminate_and_release_chain (cur);
- ret = true;
+ ret |= terminate_and_process_chain (cur);
break;
}
}
entry is removed after the processing in any case. */
bool
-pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
+pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
{
bool ret = chain_info->terminate_and_process_chain ();
m_stores.remove (chain_info->base_addr);
}
/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
- may clobber REF. FIRST and LAST must be in the same basic block and
- have non-NULL vdef. We want to be able to sink load of REF across
- stores between FIRST and LAST, up to right before LAST. */
+ may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
+ be able to sink load of REF across stores between FIRST and LAST, up
+ to right before LAST. */
bool
stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
tree vop = gimple_vdef (last);
gimple *stmt;
- gcc_checking_assert (gimple_bb (first) == gimple_bb (last));
+ /* Return true conservatively if the basic blocks are different. */
+ if (gimple_bb (first) != gimple_bb (last))
+ return true;
+
do
{
stmt = SSA_NAME_DEF_STMT (vop);
vop = gimple_vuse (stmt);
}
while (stmt != first);
+
return false;
}
/* Check if there are any stores in M_STORE_INFO after index I
(where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
a potential group ending with END that have their order
- smaller than LAST_ORDER. RHS_CODE is the kind of store in the
- group. Return true if there are no such stores.
+ smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
+ all the stores already merged and the one under consideration
+ have rhs_code of INTEGER_CST. Return true if there are no such stores.
Consider:
MEM[(long long int *)p_28] = 0;
MEM[(long long int *)p_28 + 8B] = 0;
the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
into the group. That way it will be its own store group and will
- not be touched. If RHS_CODE is INTEGER_CST and there are overlapping
+ not be touched. If ALL_INTEGER_CST_P and there are overlapping
INTEGER_CST stores, those are mergeable using merge_overlapping,
so don't return false for those. */
static bool
check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
- enum tree_code rhs_code, unsigned int last_order,
+ bool all_integer_cst_p, unsigned int last_order,
unsigned HOST_WIDE_INT end)
{
unsigned int len = m_store_info.length ();
if (info->bitpos >= end)
break;
if (info->order < last_order
- && (rhs_code != INTEGER_CST || info->rhs_code != INTEGER_CST))
+ && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
return false;
}
return true;
for (unsigned int i = first + 1; i < len; ++i)
{
if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
+ || m_store_info[i]->lp_nr != merged_store->lp_nr
|| m_store_info[i]->ins_stmt == NULL)
return false;
width += m_store_info[i]->bitsize;
return false;
bool allow_unaligned
- = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
+ = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
/* Punt if the combined store would not be aligned and we need alignment. */
if (!allow_unaligned)
{
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
return false;
- if (!check_no_overlap (m_store_info, last, LROTATE_EXPR, last_order, end))
+ if (!check_no_overlap (m_store_info, last, false, last_order, end))
return false;
/* Don't handle memory copy this way if normal non-bswap processing
FOR_EACH_VEC_ELT (m_store_info, i, info)
{
+ unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
+
if (i <= ignore)
goto done;
if (info->bitpos == merged_store->start + merged_store->width
&& merged_store->stores.length () == 1
&& merged_store->stores[0]->ins_stmt != NULL
+ && info->lp_nr == merged_store->lp_nr
&& info->ins_stmt != NULL)
{
unsigned int try_size;
}
}
- if (info->order >= merged_store->first_nonmergeable_order)
+ new_bitregion_start
+ = MIN (merged_store->bitregion_start, info->bitregion_start);
+ new_bitregion_end
+ = MAX (merged_store->bitregion_end, info->bitregion_end);
+
+ if (info->order >= merged_store->first_nonmergeable_order
+ || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
+ > (unsigned) param_store_merging_max_size))
;
/* |---store 1---|
|---store 2---|
Overlapping stores. */
else if (IN_RANGE (info->bitpos, merged_store->start,
- merged_store->start + merged_store->width - 1))
+ merged_store->start + merged_store->width - 1)
+ /* |---store 1---||---store 2---|
+ Handle also the consecutive INTEGER_CST stores case here,
+ as we have here the code to deal with overlaps. */
+ || (info->bitregion_start <= merged_store->bitregion_end
+ && info->rhs_code == INTEGER_CST
+ && merged_store->only_constants
+ && merged_store->can_be_merged_into (info)))
{
/* Only allow overlapping stores of constants. */
- if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
+ if (info->rhs_code == INTEGER_CST
+ && merged_store->only_constants
+ && info->lp_nr == merged_store->lp_nr)
{
unsigned int last_order
= MAX (merged_store->last_order, info->order);
unsigned HOST_WIDE_INT end
= MAX (merged_store->start + merged_store->width,
info->bitpos + info->bitsize);
- if (check_no_overlap (m_store_info, i, INTEGER_CST,
- last_order, end))
+ if (check_no_overlap (m_store_info, i, true, last_order, end))
{
/* check_no_overlap call above made sure there are no
overlapping stores with non-INTEGER_CST rhs_code
break;
if (info2->order < try_order)
{
- if (info2->rhs_code != INTEGER_CST)
+ if (info2->rhs_code != INTEGER_CST
+ || info2->lp_nr != merged_store->lp_nr)
{
/* Normally check_no_overlap makes sure this
doesn't happen, but if end grows below,
info2->bitpos + info2->bitsize);
}
else if (info2->rhs_code == INTEGER_CST
+ && info2->lp_nr == merged_store->lp_nr
&& !last_iter)
{
max_order = MAX (max_order, info2->order + 1);
std::swap (info->ops[0], info->ops[1]);
info->ops_swapped_p = true;
}
- if (check_no_overlap (m_store_info, i, info->rhs_code,
+ if (check_no_overlap (m_store_info, i, false,
MAX (merged_store->last_order, info->order),
MAX (merged_store->start + merged_store->width,
info->bitpos + info->bitsize)))
infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
infoj->ops[0].base_addr = NULL_TREE;
}
+ merged_store->bit_insertion = true;
}
if ((infof->ops[0].base_addr
? compatible_load_p (merged_store, info, base_addr, 0)
/* Used to decribe a store resulting from splitting a wide store in smaller
regularly-sized stores in split_group. */
-struct split_store
+class split_store
{
+public:
unsigned HOST_WIDE_INT bytepos;
unsigned HOST_WIDE_INT size;
unsigned HOST_WIDE_INT align;
/* Record all stores in GROUP that write to the region starting at BITPOS and
is of size BITSIZE. Record infos for such statements in STORES if
non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
- if there is exactly one original store in the range. */
+ if there is exactly one original store in the range (in that case ignore
+ clobber stmts, unless there are only clobber stmts). */
static store_immediate_info *
-find_constituent_stores (struct merged_store_group *group,
+find_constituent_stores (class merged_store_group *group,
vec<store_immediate_info *> *stores,
unsigned int *first,
unsigned HOST_WIDE_INT bitpos,
if (stmt_start >= end)
return ret;
+ if (gimple_clobber_p (info->stmt))
+ {
+ if (stores)
+ stores->safe_push (info);
+ if (ret == NULL)
+ ret = info;
+ continue;
+ }
if (stores)
{
stores->safe_push (info);
- if (ret)
+ if (ret && !gimple_clobber_p (ret->stmt))
{
ret = NULL;
second = true;
}
}
- else if (ret)
+ else if (ret && !gimple_clobber_p (ret->stmt))
return NULL;
if (!second)
ret = info;
switch (info->rhs_code)
{
case INTEGER_CST:
+ case STRING_CST:
return 0;
case BIT_AND_EXPR:
case BIT_IOR_EXPR:
Return number of new stores.
If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
+ BZERO_FIRST may be true only when the first store covers the whole group
+ and clears it; if BZERO_FIRST is true, keep that first store in the set
+ unmodified and emit further stores for the overrides only.
If SPLIT_STORES is NULL, it is just a dry run to count number of
new stores. */
static unsigned int
split_group (merged_store_group *group, bool allow_unaligned_store,
- bool allow_unaligned_load,
- vec<struct split_store *> *split_stores,
+ bool allow_unaligned_load, bool bzero_first,
+ vec<split_store *> *split_stores,
unsigned *total_orig,
unsigned *total_new)
{
gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
+ /* For bswap framework using sets of stores, all the checking has been done
+ earlier in try_coalesce_bswap and the result always needs to be emitted
+ as a single store. Likewise for string concatenation, */
if (group->stores[0]->rhs_code == LROTATE_EXPR
- || group->stores[0]->rhs_code == NOP_EXPR)
+ || group->stores[0]->rhs_code == NOP_EXPR
+ || group->string_concatenation)
{
- /* For bswap framework using sets of stores, all the checking
- has been done earlier in try_coalesce_bswap and needs to be
- emitted as a single store. */
+ gcc_assert (!bzero_first);
if (total_orig)
{
/* Avoid the old/new stmt count heuristics. It should be
if (align_bitpos)
align = least_bit_hwi (align_bitpos);
bytepos = group->start / BITS_PER_UNIT;
- struct split_store *store
+ split_store *store
= new split_store (bytepos, group->width, align);
unsigned int first = 0;
find_constituent_stores (group, &store->orig_stores,
if (group->load_align[i])
group_load_align = MIN (group_load_align, group->load_align[i]);
+ if (bzero_first)
+ {
+ store_immediate_info *gstore;
+ FOR_EACH_VEC_ELT (group->stores, first, gstore)
+ if (!gimple_clobber_p (gstore->stmt))
+ break;
+ ++first;
+ ret = 1;
+ if (split_stores)
+ {
+ split_store *store
+ = new split_store (bytepos, gstore->bitsize, align_base);
+ store->orig_stores.safe_push (gstore);
+ store->orig = true;
+ any_orig = true;
+ split_stores->safe_push (store);
+ }
+ }
+
while (size > 0)
{
if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
- && group->mask[try_pos - bytepos] == (unsigned char) ~0U)
+ && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
+ || (bzero_first && group->val[try_pos - bytepos] == 0)))
{
/* Skip padding bytes. */
++try_pos;
unsigned HOST_WIDE_INT align_bitpos
= (try_bitpos - align_base) & (group_align - 1);
unsigned HOST_WIDE_INT align = group_align;
+ bool found_orig = false;
if (align_bitpos)
align = least_bit_hwi (align_bitpos);
if (!allow_unaligned_store)
}
store_immediate_info *info
= find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
- if (info)
+ if (info && !gimple_clobber_p (info->stmt))
{
/* If there is just one original statement for the range, see if
we can just reuse the original store which could be even larger
stmt_end - try_bitpos);
if (info && info->bitpos >= try_bitpos)
{
- try_size = stmt_end - try_bitpos;
- goto found;
+ store_immediate_info *info2 = NULL;
+ unsigned int first_copy = first;
+ if (info->bitpos > try_bitpos
+ && stmt_end - try_bitpos <= try_size)
+ {
+ info2 = find_constituent_stores (group, NULL, &first_copy,
+ try_bitpos,
+ info->bitpos - try_bitpos);
+ gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
+ }
+ if (info2 == NULL && stmt_end - try_bitpos < try_size)
+ {
+ info2 = find_constituent_stores (group, NULL, &first_copy,
+ stmt_end,
+ (try_bitpos + try_size)
+ - stmt_end);
+ gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
+ }
+ if (info2 == NULL)
+ {
+ try_size = stmt_end - try_bitpos;
+ found_orig = true;
+ goto found;
+ }
}
}
/* Now look for whole padding bytes at the end of that bitsize. */
for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
if (group->mask[try_pos - bytepos + nonmasked - 1]
- != (unsigned char) ~0U)
+ != (unsigned char) ~0U
+ && (!bzero_first
+ || group->val[try_pos - bytepos + nonmasked - 1] != 0))
break;
- if (nonmasked == 0)
+ if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
{
/* If entire try_size range is padding, skip it. */
try_pos += try_size / BITS_PER_UNIT;
/* Now look for whole padding bytes at the start of that bitsize. */
unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
for (masked = 0; masked < try_bytesize; ++masked)
- if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U)
+ if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
+ && (!bzero_first
+ || group->val[try_pos - bytepos + masked] != 0))
break;
masked *= BITS_PER_UNIT;
gcc_assert (masked < try_size);
if (split_stores)
{
- struct split_store *store
+ split_store *store
= new split_store (try_pos, try_size, align);
info = find_constituent_stores (group, &store->orig_stores,
&first, try_bitpos, try_size);
if (info
+ && !gimple_clobber_p (info->stmt)
&& info->bitpos >= try_bitpos
- && info->bitpos + info->bitsize <= try_bitpos + try_size)
+ && info->bitpos + info->bitsize <= try_bitpos + try_size
+ && (store->orig_stores.length () == 1
+ || found_orig
+ || (info->bitpos == try_bitpos
+ && (info->bitpos + info->bitsize
+ == try_bitpos + try_size))))
{
store->orig = true;
any_orig = true;
if (total_orig)
{
unsigned int i;
- struct split_store *store;
+ split_store *store;
/* If we are reusing some original stores and any of the
original SSA_NAMEs had multiple uses, we need to subtract
those now before we add the new ones. */
bool
imm_store_chain_info::output_merged_store (merged_store_group *group)
{
- split_store *split_store;
- unsigned int i;
- unsigned HOST_WIDE_INT start_byte_pos
+ const unsigned HOST_WIDE_INT start_byte_pos
= group->bitregion_start / BITS_PER_UNIT;
-
unsigned int orig_num_stmts = group->stores.length ();
if (orig_num_stmts < 2)
return false;
- auto_vec<struct split_store *, 32> split_stores;
bool allow_unaligned_store
- = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
+ = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
bool allow_unaligned_load = allow_unaligned_store;
- if (allow_unaligned_store)
+ bool bzero_first = false;
+ store_immediate_info *store;
+ unsigned int num_clobber_stmts = 0;
+ if (group->stores[0]->rhs_code == INTEGER_CST)
+ {
+ unsigned int i;
+ FOR_EACH_VEC_ELT (group->stores, i, store)
+ if (gimple_clobber_p (store->stmt))
+ num_clobber_stmts++;
+ else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
+ && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
+ && group->start == store->bitpos
+ && group->width == store->bitsize
+ && (group->start % BITS_PER_UNIT) == 0
+ && (group->width % BITS_PER_UNIT) == 0)
+ {
+ bzero_first = true;
+ break;
+ }
+ else
+ break;
+ FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
+ if (gimple_clobber_p (store->stmt))
+ num_clobber_stmts++;
+ if (num_clobber_stmts == orig_num_stmts)
+ return false;
+ orig_num_stmts -= num_clobber_stmts;
+ }
+ if (allow_unaligned_store || bzero_first)
{
/* If unaligned stores are allowed, see how many stores we'd emit
for unaligned and how many stores we'd emit for aligned stores.
- Only use unaligned stores if it allows fewer stores than aligned. */
- unsigned aligned_cnt
- = split_group (group, false, allow_unaligned_load, NULL, NULL, NULL);
- unsigned unaligned_cnt
- = split_group (group, true, allow_unaligned_load, NULL, NULL, NULL);
- if (aligned_cnt <= unaligned_cnt)
+ Only use unaligned stores if it allows fewer stores than aligned.
+ Similarly, if there is a whole region clear first, prefer expanding
+ it together compared to expanding clear first followed by merged
+ further stores. */
+ unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
+ int pass_min = 0;
+ for (int pass = 0; pass < 4; ++pass)
+ {
+ if (!allow_unaligned_store && (pass & 1) != 0)
+ continue;
+ if (!bzero_first && (pass & 2) != 0)
+ continue;
+ cnt[pass] = split_group (group, (pass & 1) != 0,
+ allow_unaligned_load, (pass & 2) != 0,
+ NULL, NULL, NULL);
+ if (cnt[pass] < cnt[pass_min])
+ pass_min = pass;
+ }
+ if ((pass_min & 1) == 0)
allow_unaligned_store = false;
+ if ((pass_min & 2) == 0)
+ bzero_first = false;
}
- unsigned total_orig, total_new;
- split_group (group, allow_unaligned_store, allow_unaligned_load,
+
+ auto_vec<class split_store *, 32> split_stores;
+ split_store *split_store;
+ unsigned total_orig, total_new, i;
+ split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
&split_stores, &total_orig, &total_new);
- if (split_stores.length () >= orig_num_stmts)
+ /* Determine if there is a clobber covering the whole group at the start,
+ followed by proposed split stores that cover the whole group. In that
+ case, prefer the transformation even if
+ split_stores.length () == orig_num_stmts. */
+ bool clobber_first = false;
+ if (num_clobber_stmts
+ && gimple_clobber_p (group->stores[0]->stmt)
+ && group->start == group->stores[0]->bitpos
+ && group->width == group->stores[0]->bitsize
+ && (group->start % BITS_PER_UNIT) == 0
+ && (group->width % BITS_PER_UNIT) == 0)
+ {
+ clobber_first = true;
+ unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ if (split_store->bytepos != pos)
+ {
+ clobber_first = false;
+ break;
+ }
+ else
+ pos += split_store->size / BITS_PER_UNIT;
+ if (pos != (group->start + group->width) / BITS_PER_UNIT)
+ clobber_first = false;
+ }
+
+ if (split_stores.length () >= orig_num_stmts + clobber_first)
{
+
/* We didn't manage to reduce the number of statements. Bail out. */
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Exceeded original number of stmts (%u)."
delete split_store;
return false;
}
+ if (group->stores[0]->rhs_code == INTEGER_CST)
+ {
+ bool all_orig = true;
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ if (!split_store->orig)
+ {
+ all_orig = false;
+ break;
+ }
+ if (all_orig)
+ {
+ unsigned int cnt = split_stores.length ();
+ store_immediate_info *store;
+ FOR_EACH_VEC_ELT (group->stores, i, store)
+ if (gimple_clobber_p (store->stmt))
+ ++cnt;
+ /* Punt if we wouldn't make any real changes, i.e. keep all
+ orig stmts + all clobbers. */
+ if (cnt == group->stores.length ())
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Exceeded original number of stmts (%u)."
+ " Not profitable to emit new sequence.\n",
+ orig_num_stmts);
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ delete split_store;
+ return false;
+ }
+ }
+ }
gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
gimple_seq seq = NULL;
new_vuse = gimple_vuse (group->last_stmt);
tree bswap_res = NULL_TREE;
+ /* Clobbers are not removed. */
+ if (gimple_clobber_p (group->last_stmt))
+ {
+ new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
+ gimple_set_vdef (group->last_stmt, new_vuse);
+ }
+
if (group->stores[0]->rhs_code == LROTATE_EXPR
|| group->stores[0]->rhs_code == NOP_EXPR)
{
FOR_EACH_VEC_ELT (split_stores, i, split_store)
{
- unsigned HOST_WIDE_INT try_size = split_store->size;
- unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
- unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
- unsigned HOST_WIDE_INT align = split_store->align;
+ const unsigned HOST_WIDE_INT try_size = split_store->size;
+ const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
+ const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
+ const unsigned HOST_WIDE_INT try_align = split_store->align;
+ const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
tree dest, src;
location_t loc;
+
if (split_store->orig)
{
- /* If there is just a single constituent store which covers
- the whole area, just reuse the lhs and rhs. */
- gimple *orig_stmt = split_store->orig_stores[0]->stmt;
+ /* If there is just a single non-clobber constituent store
+ which covers the whole area, just reuse the lhs and rhs. */
+ gimple *orig_stmt = NULL;
+ store_immediate_info *store;
+ unsigned int j;
+ FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
+ if (!gimple_clobber_p (store->stmt))
+ {
+ orig_stmt = store->stmt;
+ break;
+ }
dest = gimple_assign_lhs (orig_stmt);
src = gimple_assign_rhs1 (orig_stmt);
loc = gimple_location (orig_stmt);
orig_stmts.safe_push (info->stmt);
tree offset_type
= get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
+ tree dest_type;
loc = get_location_for_stmts (orig_stmts);
orig_stmts.truncate (0);
- tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
- int_type = build_aligned_type (int_type, align);
- dest = fold_build2 (MEM_REF, int_type, addr,
+ if (group->string_concatenation)
+ dest_type
+ = build_array_type_nelts (char_type_node,
+ try_size / BITS_PER_UNIT);
+ else
+ {
+ dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
+ dest_type = build_aligned_type (dest_type, try_align);
+ }
+ dest = fold_build2 (MEM_REF, dest_type, addr,
build_int_cst (offset_type, try_pos));
if (TREE_CODE (dest) == MEM_REF)
{
}
tree mask;
- if (bswap_res)
+ if (bswap_res || group->string_concatenation)
mask = integer_zero_node;
else
- mask = native_interpret_expr (int_type,
- group->mask + try_pos
- - start_byte_pos,
+ mask = native_interpret_expr (dest_type,
+ group->mask + try_offset,
group->buf_size);
tree ops[2];
store_operand_info &op = split_store->orig_stores[0]->ops[j];
if (bswap_res)
ops[j] = bswap_res;
+ else if (group->string_concatenation)
+ {
+ ops[j] = build_string (try_size / BITS_PER_UNIT,
+ (const char *) group->val + try_offset);
+ TREE_TYPE (ops[j]) = dest_type;
+ }
else if (op.base_addr)
{
FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
warnings in that case. */
TREE_NO_WARNING (ops[j]) = 1;
- stmt = gimple_build_assign (make_ssa_name (int_type),
- ops[j]);
+ stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
gimple_set_location (stmt, load_loc);
if (gsi_bb (load_gsi[j]))
{
ops[j] = gimple_assign_lhs (stmt);
tree xor_mask;
enum tree_code inv_op
- = invert_op (split_store, j, int_type, xor_mask);
+ = invert_op (split_store, j, dest_type, xor_mask);
if (inv_op != NOP_EXPR)
{
- stmt = gimple_build_assign (make_ssa_name (int_type),
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
inv_op, ops[j], xor_mask);
gimple_set_location (stmt, load_loc);
ops[j] = gimple_assign_lhs (stmt);
}
}
else
- ops[j] = native_interpret_expr (int_type,
- group->val + try_pos
- - start_byte_pos,
+ ops[j] = native_interpret_expr (dest_type,
+ group->val + try_offset,
group->buf_size);
}
orig_stmts.truncate (0);
stmt
- = gimple_build_assign (make_ssa_name (int_type),
+ = gimple_build_assign (make_ssa_name (dest_type),
split_store->orig_stores[0]->rhs_code,
ops[0], ops[1]);
gimple_set_location (stmt, bit_loc);
src = gimple_assign_lhs (stmt);
tree xor_mask;
enum tree_code inv_op;
- inv_op = invert_op (split_store, 2, int_type, xor_mask);
+ inv_op = invert_op (split_store, 2, dest_type, xor_mask);
if (inv_op != NOP_EXPR)
{
- stmt = gimple_build_assign (make_ssa_name (int_type),
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
inv_op, src, xor_mask);
gimple_set_location (stmt, bit_loc);
if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
gimple_seq_add_stmt_without_update (&seq, stmt);
src = gimple_assign_lhs (stmt);
}
- if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
+ if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
{
- stmt = gimple_build_assign (make_ssa_name (int_type),
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
NOP_EXPR, src);
gimple_seq_add_stmt_without_update (&seq, stmt);
src = gimple_assign_lhs (stmt);
}
- inv_op = invert_op (split_store, 2, int_type, xor_mask);
+ inv_op = invert_op (split_store, 2, dest_type, xor_mask);
if (inv_op != NOP_EXPR)
{
- stmt = gimple_build_assign (make_ssa_name (int_type),
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
inv_op, src, xor_mask);
gimple_set_location (stmt, loc);
gimple_seq_add_stmt_without_update (&seq, stmt);
const HOST_WIDE_INT end_gap
= (try_bitpos + try_size) - (info->bitpos + info->bitsize);
tree tem = info->ops[0].val;
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
+ {
+ const unsigned HOST_WIDE_INT size
+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
+ tree integer_type
+ = build_nonstandard_integer_type (size, UNSIGNED);
+ tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
+ integer_type, tem);
+ }
if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
{
tree bitfield_type
tem = gimple_build (&seq, loc,
RSHIFT_EXPR, TREE_TYPE (tem), tem,
build_int_cst (NULL_TREE, -shift));
- tem = gimple_convert (&seq, loc, int_type, tem);
+ tem = gimple_convert (&seq, loc, dest_type, tem);
if (shift > 0)
tem = gimple_build (&seq, loc,
- LSHIFT_EXPR, int_type, tem,
+ LSHIFT_EXPR, dest_type, tem,
build_int_cst (NULL_TREE, shift));
src = gimple_build (&seq, loc,
- BIT_IOR_EXPR, int_type, tem, src);
+ BIT_IOR_EXPR, dest_type, tem, src);
}
if (!integer_zerop (mask))
{
- tree tem = make_ssa_name (int_type);
+ tree tem = make_ssa_name (dest_type);
tree load_src = unshare_expr (dest);
/* The load might load some or all bits uninitialized,
avoid -W*uninitialized warnings in that case.
/* FIXME: If there is a single chunk of zero bits in mask,
perhaps use BIT_INSERT_EXPR instead? */
- stmt = gimple_build_assign (make_ssa_name (int_type),
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
BIT_AND_EXPR, tem, mask);
gimple_set_location (stmt, loc);
gimple_seq_add_stmt_without_update (&seq, stmt);
tem = gimple_assign_lhs (stmt);
if (TREE_CODE (src) == INTEGER_CST)
- src = wide_int_to_tree (int_type,
+ src = wide_int_to_tree (dest_type,
wi::bit_and_not (wi::to_wide (src),
wi::to_wide (mask)));
else
{
tree nmask
- = wide_int_to_tree (int_type,
+ = wide_int_to_tree (dest_type,
wi::bit_not (wi::to_wide (mask)));
- stmt = gimple_build_assign (make_ssa_name (int_type),
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
BIT_AND_EXPR, src, nmask);
gimple_set_location (stmt, loc);
gimple_seq_add_stmt_without_update (&seq, stmt);
src = gimple_assign_lhs (stmt);
}
- stmt = gimple_build_assign (make_ssa_name (int_type),
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
BIT_IOR_EXPR, tem, src);
gimple_set_location (stmt, loc);
gimple_seq_add_stmt_without_update (&seq, stmt);
gimple_set_vuse (stmt, new_vuse);
gimple_seq_add_stmt_without_update (&seq, stmt);
+ if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
+ add_stmt_to_eh_lp (stmt, group->lp_nr);
+
tree new_vdef;
if (i < split_stores.length () - 1)
new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
if (dump_flags & TDF_DETAILS)
print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
}
- gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
+
+ if (gimple_clobber_p (group->last_stmt))
+ update_stmt (group->last_stmt);
+
+ if (group->lp_nr > 0)
+ {
+ /* We're going to insert a sequence of (potentially) throwing stores
+ into an active EH region. This means that we're going to create
+ new basic blocks with EH edges pointing to the post landing pad
+ and, therefore, to have to update its PHI nodes, if any. For the
+ virtual PHI node, we're going to use the VDEFs created above, but
+ for the other nodes, we need to record the original reaching defs. */
+ eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
+ basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
+ basic_block last_bb = gimple_bb (group->last_stmt);
+ edge last_edge = find_edge (last_bb, lp_bb);
+ auto_vec<tree, 16> last_defs;
+ gphi_iterator gpi;
+ for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
+ {
+ gphi *phi = gpi.phi ();
+ tree last_def;
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ last_def = NULL_TREE;
+ else
+ last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
+ last_defs.safe_push (last_def);
+ }
+
+ /* Do the insertion. Then, if new basic blocks have been created in the
+ process, rewind the chain of VDEFs create above to walk the new basic
+ blocks and update the corresponding arguments of the PHI nodes. */
+ update_modified_stmts (seq);
+ if (gimple_find_sub_bbs (seq, &last_gsi))
+ while (last_vdef != gimple_vuse (group->last_stmt))
+ {
+ gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
+ if (stmt_could_throw_p (cfun, stmt))
+ {
+ edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
+ unsigned int i;
+ for (gpi = gsi_start_phis (lp_bb), i = 0;
+ !gsi_end_p (gpi);
+ gsi_next (&gpi), i++)
+ {
+ gphi *phi = gpi.phi ();
+ tree new_def;
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ new_def = last_vdef;
+ else
+ new_def = last_defs[i];
+ add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
+ }
+ }
+ last_vdef = gimple_vuse (stmt);
+ }
+ }
+ else
+ gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
+
for (int j = 0; j < 2; ++j)
if (load_seq[j])
gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
bool ret = false;
FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
{
- if (output_merged_store (merged_store))
+ if (dbg_cnt (store_merging)
+ && output_merged_store (merged_store))
{
unsigned int j;
store_immediate_info *store;
{
gimple *stmt = store->stmt;
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ /* Don't remove clobbers, they are still useful even if
+ everything is overwritten afterwards. */
+ if (gimple_clobber_p (stmt))
+ continue;
gsi_remove (&gsi, true);
+ if (store->lp_nr)
+ remove_stmt_from_eh_lp (stmt);
if (stmt != merged_store->last_stmt)
{
unlink_stmt_vdef (stmt);
static bool
lhs_valid_for_store_merging_p (tree lhs)
{
- tree_code code = TREE_CODE (lhs);
-
- if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
- || code == COMPONENT_REF || code == BIT_FIELD_REF)
+ if (DECL_P (lhs))
return true;
- return false;
+ switch (TREE_CODE (lhs))
+ {
+ case ARRAY_REF:
+ case ARRAY_RANGE_REF:
+ case BIT_FIELD_REF:
+ case COMPONENT_REF:
+ case MEM_REF:
+ case VIEW_CONVERT_EXPR:
+ return true;
+ default:
+ return false;
+ }
+
+ gcc_unreachable ();
}
/* Return true if the tree RHS is a constant we want to consider
rhs_valid_for_store_merging_p (tree rhs)
{
unsigned HOST_WIDE_INT size;
+ if (TREE_CODE (rhs) == CONSTRUCTOR
+ && CONSTRUCTOR_NELTS (rhs) == 0
+ && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
+ && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
+ return true;
return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
&& native_encode_expr (rhs, NULL, size) != 0);
}
+/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
+ and return true on success or false on failure. */
+
+static bool
+adjust_bit_pos (poly_offset_int byte_off,
+ poly_int64 *pbitpos,
+ poly_uint64 *pbitregion_start,
+ poly_uint64 *pbitregion_end)
+{
+ poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
+ bit_off += *pbitpos;
+
+ if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
+ {
+ if (maybe_ne (*pbitregion_end, 0U))
+ {
+ bit_off = byte_off << LOG2_BITS_PER_UNIT;
+ bit_off += *pbitregion_start;
+ if (bit_off.to_uhwi (pbitregion_start))
+ {
+ bit_off = byte_off << LOG2_BITS_PER_UNIT;
+ bit_off += *pbitregion_end;
+ if (!bit_off.to_uhwi (pbitregion_end))
+ *pbitregion_end = 0;
+ }
+ else
+ *pbitregion_end = 0;
+ }
+ return true;
+ }
+ else
+ return false;
+}
+
/* If MEM is a memory reference usable for store merging (either as
store destination or for loads), return the non-NULL base_addr
and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
PR 23684 and this way we can catch more chains. */
else if (TREE_CODE (base_addr) == MEM_REF)
{
- poly_offset_int byte_off = mem_ref_offset (base_addr);
- poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
- bit_off += bitpos;
- if (known_ge (bit_off, 0) && bit_off.to_shwi (&bitpos))
- {
- if (maybe_ne (bitregion_end, 0U))
- {
- bit_off = byte_off << LOG2_BITS_PER_UNIT;
- bit_off += bitregion_start;
- if (bit_off.to_uhwi (&bitregion_start))
- {
- bit_off = byte_off << LOG2_BITS_PER_UNIT;
- bit_off += bitregion_end;
- if (!bit_off.to_uhwi (&bitregion_end))
- bitregion_end = 0;
- }
- else
- bitregion_end = 0;
- }
- }
- else
+ if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
+ &bitregion_start, &bitregion_end))
return NULL_TREE;
base_addr = TREE_OPERAND (base_addr, 0);
}
base_addr = build_fold_addr_expr (base_addr);
}
- if (known_eq (bitregion_end, 0U))
- {
- bitregion_start = round_down_to_byte_boundary (bitpos);
- bitregion_end = bitpos;
- bitregion_end = round_up_to_byte_boundary (bitregion_end + bitsize);
- }
-
- if (offset != NULL_TREE)
+ if (offset)
{
/* If the access is variable offset then a base decl has to be
address-taken to be able to emit pointer-based stores to it.
base up to the first variable part and then wrapping that inside
a BIT_FIELD_REF. */
tree base = get_base_address (base_addr);
- if (! base
- || (DECL_P (base) && ! TREE_ADDRESSABLE (base)))
+ if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
return NULL_TREE;
+ /* Similarly to above for the base, remove constant from the offset. */
+ if (TREE_CODE (offset) == PLUS_EXPR
+ && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
+ && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
+ &bitpos, &bitregion_start, &bitregion_end))
+ offset = TREE_OPERAND (offset, 0);
+
base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
base_addr, offset);
}
+ if (known_eq (bitregion_end, 0U))
+ {
+ bitregion_start = round_down_to_byte_boundary (bitpos);
+ bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
+ }
+
*pbitsize = bitsize;
*pbitpos = bitpos;
*pbitregion_start = bitregion_start;
return false;
}
+/* Return the index number of the landing pad for STMT, if any. */
+
+static int
+lp_nr_for_store (gimple *stmt)
+{
+ if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
+ return 0;
+
+ if (!stmt_could_throw_p (cfun, stmt))
+ return 0;
+
+ return lookup_stmt_eh_lp (stmt);
+}
+
/* Record the store STMT for store merging optimization if it can be
- optimized. */
+ optimized. Return true if any changes were made. */
-void
+bool
pass_store_merging::process_store (gimple *stmt)
{
tree lhs = gimple_assign_lhs (stmt);
tree rhs = gimple_assign_rhs1 (stmt);
- poly_uint64 bitsize, bitpos;
- poly_uint64 bitregion_start, bitregion_end;
+ poly_uint64 bitsize, bitpos = 0;
+ poly_uint64 bitregion_start = 0, bitregion_end = 0;
tree base_addr
= mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
&bitregion_start, &bitregion_end);
if (known_eq (bitsize, 0U))
- return;
+ return false;
bool invalid = (base_addr == NULL_TREE
|| (maybe_gt (bitsize,
(unsigned int) MAX_BITSIZE_MODE_ANY_INT)
- && (TREE_CODE (rhs) != INTEGER_CST)));
+ && TREE_CODE (rhs) != INTEGER_CST
+ && (TREE_CODE (rhs) != CONSTRUCTOR
+ || CONSTRUCTOR_NELTS (rhs) != 0)));
enum tree_code rhs_code = ERROR_MARK;
bool bit_not_p = false;
struct symbolic_number n;
store_operand_info ops[2];
if (invalid)
;
+ else if (TREE_CODE (rhs) == STRING_CST)
+ {
+ rhs_code = STRING_CST;
+ ops[0].val = rhs;
+ }
else if (rhs_valid_for_store_merging_p (rhs))
{
rhs_code = INTEGER_CST;
ops[0].val = rhs;
}
- else if (TREE_CODE (rhs) != SSA_NAME)
- invalid = true;
- else
+ else if (TREE_CODE (rhs) == SSA_NAME)
{
gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
if (!is_gimple_assign (def_stmt))
&& bitsize.is_constant (&const_bitsize)
&& ((const_bitsize % BITS_PER_UNIT) != 0
|| !multiple_p (bitpos, BITS_PER_UNIT))
- && const_bitsize <= 64)
+ && const_bitsize <= MAX_FIXED_MODE_SIZE)
{
/* Bypass a conversion to the bit-field type. */
if (!bit_not_p
invalid = false;
}
}
+ else
+ invalid = true;
unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
|| !bitpos.is_constant (&const_bitpos)
|| !bitregion_start.is_constant (&const_bitregion_start)
|| !bitregion_end.is_constant (&const_bitregion_end))
- {
- terminate_all_aliasing_chains (NULL, stmt);
- return;
- }
+ return terminate_all_aliasing_chains (NULL, stmt);
if (!ins_stmt)
memset (&n, 0, sizeof (n));
- struct imm_store_chain_info **chain_info = NULL;
+ class imm_store_chain_info **chain_info = NULL;
+ bool ret = false;
if (base_addr)
chain_info = m_stores.get (base_addr);
const_bitregion_start,
const_bitregion_end,
stmt, ord, rhs_code, n, ins_stmt,
- bit_not_p, ops[0], ops[1]);
+ bit_not_p, lp_nr_for_store (stmt),
+ ops[0], ops[1]);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Recording immediate store from stmt:\n");
print_gimple_stmt (dump_file, stmt, 0);
}
(*chain_info)->m_store_info.safe_push (info);
- terminate_all_aliasing_chains (chain_info, stmt);
+ ret |= terminate_all_aliasing_chains (chain_info, stmt);
/* If we reach the limit of stores to merge in a chain terminate and
process the chain now. */
if ((*chain_info)->m_store_info.length ()
- == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
+ == (unsigned int) param_max_stores_to_merge)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"Reached maximum number of statements to merge:\n");
- terminate_and_release_chain (*chain_info);
+ ret |= terminate_and_process_chain (*chain_info);
}
- return;
+ return ret;
}
/* Store aliases any existing chain? */
- terminate_all_aliasing_chains (NULL, stmt);
+ ret |= terminate_all_aliasing_chains (NULL, stmt);
/* Start a new chain. */
- struct imm_store_chain_info *new_chain
+ class imm_store_chain_info *new_chain
= new imm_store_chain_info (m_stores_head, base_addr);
info = new store_immediate_info (const_bitsize, const_bitpos,
const_bitregion_start,
const_bitregion_end,
stmt, 0, rhs_code, n, ins_stmt,
- bit_not_p, ops[0], ops[1]);
+ bit_not_p, lp_nr_for_store (stmt),
+ ops[0], ops[1]);
new_chain->m_store_info.safe_push (info);
m_stores.put (base_addr, new_chain);
if (dump_file && (dump_flags & TDF_DETAILS))
print_generic_expr (dump_file, base_addr);
fprintf (dump_file, "\n");
}
+ return ret;
+}
+
+/* Return true if STMT is a store valid for store merging. */
+
+static bool
+store_valid_for_store_merging_p (gimple *stmt)
+{
+ return gimple_assign_single_p (stmt)
+ && gimple_vdef (stmt)
+ && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
+ && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
+}
+
+enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
+
+/* Return the status of basic block BB wrt store merging. */
+
+static enum basic_block_status
+get_status_for_store_merging (basic_block bb)
+{
+ unsigned int num_statements = 0;
+ gimple_stmt_iterator gsi;
+ edge e;
+
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (is_gimple_debug (stmt))
+ continue;
+
+ if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
+ break;
+ }
+
+ if (num_statements == 0)
+ return BB_INVALID;
+
+ if (cfun->can_throw_non_call_exceptions && cfun->eh
+ && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
+ && (e = find_fallthru_edge (bb->succs))
+ && e->dest == bb->next_bb)
+ return BB_EXTENDED_VALID;
+
+ return num_statements >= 2 ? BB_VALID : BB_INVALID;
}
/* Entry point for the pass. Go over each basic block recording chains of
{
basic_block bb;
hash_set<gimple *> orig_stmts;
+ bool changed = false, open_chains = false;
+
+ /* If the function can throw and catch non-call exceptions, we'll be trying
+ to merge stores across different basic blocks so we need to first unsplit
+ the EH edges in order to streamline the CFG of the function. */
+ if (cfun->can_throw_non_call_exceptions && cfun->eh)
+ unsplit_eh_edges ();
calculate_dominance_info (CDI_DOMINATORS);
FOR_EACH_BB_FN (bb, fun)
{
+ const basic_block_status bb_status = get_status_for_store_merging (bb);
gimple_stmt_iterator gsi;
- unsigned HOST_WIDE_INT num_statements = 0;
- /* Record the original statements so that we can keep track of
- statements emitted in this pass and not re-process new
- statements. */
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- if (is_gimple_debug (gsi_stmt (gsi)))
- continue;
- if (++num_statements >= 2)
- break;
+ if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
+ {
+ changed |= terminate_and_process_all_chains ();
+ open_chains = false;
}
- if (num_statements < 2)
+ if (bb_status == BB_INVALID)
continue;
if (dump_file && (dump_flags & TDF_DETAILS))
if (is_gimple_debug (stmt))
continue;
- if (gimple_has_volatile_ops (stmt))
+ if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
{
/* Terminate all chains. */
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Volatile access terminates "
"all chains\n");
- terminate_and_process_all_chains ();
+ changed |= terminate_and_process_all_chains ();
+ open_chains = false;
continue;
}
- if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
- && !stmt_can_throw_internal (cfun, stmt)
- && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
- process_store (stmt);
+ if (store_valid_for_store_merging_p (stmt))
+ changed |= process_store (stmt);
else
- terminate_all_aliasing_chains (NULL, stmt);
+ changed |= terminate_all_aliasing_chains (NULL, stmt);
}
- terminate_and_process_all_chains ();
+
+ if (bb_status == BB_EXTENDED_VALID)
+ open_chains = true;
+ else
+ {
+ changed |= terminate_and_process_all_chains ();
+ open_chains = false;
+ }
+ }
+
+ if (open_chains)
+ changed |= terminate_and_process_all_chains ();
+
+ /* If the function can throw and catch non-call exceptions and something
+ changed during the pass, then the CFG has (very likely) changed too. */
+ if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
+ {
+ free_dominance_info (CDI_DOMINATORS);
+ return TODO_cleanup_cfg;
}
+
return 0;
}
}
}
-/* Test shift_bytes_in_array and that it carries bits across between
+/* Test shift_bytes_in_array_left and that it carries bits across between
bytes correctly. */
static void
-verify_shift_bytes_in_array (void)
+verify_shift_bytes_in_array_left (void)
{
/* byte 1 | byte 0
00011111 | 11100000. */
memcpy (in, orig, sizeof orig);
unsigned char expected[2] = { 0x80, 0x7f };
- shift_bytes_in_array (in, sizeof (in), 2);
+ shift_bytes_in_array_left (in, sizeof (in), 2);
verify_array_eq (in, expected, sizeof (in));
memcpy (in, orig, sizeof orig);
memcpy (expected, orig, sizeof orig);
/* Check that shifting by zero doesn't change anything. */
- shift_bytes_in_array (in, sizeof (in), 0);
+ shift_bytes_in_array_left (in, sizeof (in), 0);
verify_array_eq (in, expected, sizeof (in));
}
verify_array_eq (in, expected, sizeof in);
}
-/* Test verify_clear_bit_region_be that it clears exactly the bits asked and
+/* Test clear_bit_region_be that it clears exactly the bits asked and
nothing more. */
static void
void
store_merging_c_tests (void)
{
- verify_shift_bytes_in_array ();
+ verify_shift_bytes_in_array_left ();
verify_shift_bytes_in_array_right ();
verify_clear_bit_region ();
verify_clear_bit_region_be ();