poly_int: store merging
authorRichard Sandiford <richard.sandiford@linaro.org>
Wed, 20 Dec 2017 12:56:50 +0000 (12:56 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Wed, 20 Dec 2017 12:56:50 +0000 (12:56 +0000)
This patch makes pass_store_merging track polynomial sizes
and offsets.  store_immediate_info remains restricted to stores
with a constant offset and size.

2017-12-20  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/
* poly-int-types.h (round_down_to_byte_boundary): New macro.
(round_up_to_byte_boundary): Likewise.
* expr.h (get_bit_range): Add temporary shim.
* gimple-ssa-store-merging.c (store_operand_info): Change the
bitsize, bitpos, bitregion_start and bitregion_end fields from
unsigned HOST_WIDE_INT to poly_uint64.
(merged_store_group): Likewise load_align_base.
(compatible_load_p, compatible_load_p): Update accordingly.
(imm_store_chain_info::coalesce_immediate_stores): Likewise.
(split_group, imm_store_chain_info::output_merged_store): Likewise.
(mem_valid_for_store_merging): Return the bitsize, bitpos,
bitregion_start and bitregion_end as poly_uint64s rather than
unsigned HOST_WIDE_INTs.  Track polynomial offsets internally.
(handled_load): Take the bitsize, bitpos,
bitregion_start and bitregion_end as poly_uint64s rather than
unsigned HOST_WIDE_INTs.
(pass_store_merging::process_store): Update call to
mem_valid_for_store_merging.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r255894

gcc/ChangeLog
gcc/expr.h
gcc/gimple-ssa-store-merging.c
gcc/poly-int-types.h

index c0c6c81b572da1a464e55ad17c52ef47f42b2d27..5ccbe5d3b026d31d526d2619a6334bb316b53898 100644 (file)
@@ -1,3 +1,26 @@
+2017-12-20  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * poly-int-types.h (round_down_to_byte_boundary): New macro.
+       (round_up_to_byte_boundary): Likewise.
+       * expr.h (get_bit_range): Add temporary shim.
+       * gimple-ssa-store-merging.c (store_operand_info): Change the
+       bitsize, bitpos, bitregion_start and bitregion_end fields from
+       unsigned HOST_WIDE_INT to poly_uint64.
+       (merged_store_group): Likewise load_align_base.
+       (compatible_load_p, compatible_load_p): Update accordingly.
+       (imm_store_chain_info::coalesce_immediate_stores): Likewise.
+       (split_group, imm_store_chain_info::output_merged_store): Likewise.
+       (mem_valid_for_store_merging): Return the bitsize, bitpos,
+       bitregion_start and bitregion_end as poly_uint64s rather than
+       unsigned HOST_WIDE_INTs.  Track polynomial offsets internally.
+       (handled_load): Take the bitsize, bitpos,
+       bitregion_start and bitregion_end as poly_uint64s rather than
+       unsigned HOST_WIDE_INTs.
+       (pass_store_merging::process_store): Update call to
+       mem_valid_for_store_merging.
+
 2017-12-20  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index 0603797d8670b311620973f5b6f260a8a4568871..9b0927197d57e71465fcaf7dfdb060874f82c26d 100644 (file)
@@ -243,6 +243,15 @@ extern bool emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
 extern void get_bit_range (unsigned HOST_WIDE_INT *, unsigned HOST_WIDE_INT *,
                           tree, HOST_WIDE_INT *, tree *);
 
+/* Temporary.  */
+inline void
+get_bit_range (poly_uint64_pod *bitstart, poly_uint64_pod *bitend, tree exp,
+              poly_int64_pod *bitpos, tree *offset)
+{
+  get_bit_range (&bitstart->coeffs[0], &bitend->coeffs[0], exp,
+                &bitpos->coeffs[0], offset);
+}
+
 /* Expand an assignment that stores the value of FROM into TO.  */
 extern void expand_assignment (tree, tree, bool);
 
index 9c1d97ac657a3af305995ac0ca01fc3423245584..078acca82122df084932c7744b0c5b87a079184f 100644 (file)
@@ -1321,10 +1321,10 @@ struct store_operand_info
 {
   tree val;
   tree base_addr;
-  unsigned HOST_WIDE_INT bitsize;
-  unsigned HOST_WIDE_INT bitpos;
-  unsigned HOST_WIDE_INT bitregion_start;
-  unsigned HOST_WIDE_INT bitregion_end;
+  poly_uint64 bitsize;
+  poly_uint64 bitpos;
+  poly_uint64 bitregion_start;
+  poly_uint64 bitregion_end;
   gimple *stmt;
   bool bit_not_p;
   store_operand_info ();
@@ -1414,7 +1414,7 @@ struct merged_store_group
   /* The size of the allocated memory for val and mask.  */
   unsigned HOST_WIDE_INT buf_size;
   unsigned HOST_WIDE_INT align_base;
-  unsigned HOST_WIDE_INT load_align_base[2];
+  poly_uint64 load_align_base[2];
 
   unsigned int align;
   unsigned int load_align[2];
@@ -2198,8 +2198,8 @@ compatible_load_p (merged_store_group *merged_store,
 {
   store_immediate_info *infof = merged_store->stores[0];
   if (!info->ops[idx].base_addr
-      || (info->ops[idx].bitpos - infof->ops[idx].bitpos
-         != info->bitpos - infof->bitpos)
+      || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
+                  info->bitpos - infof->bitpos)
       || !operand_equal_p (info->ops[idx].base_addr,
                           infof->ops[idx].base_addr, 0))
     return false;
@@ -2229,7 +2229,7 @@ compatible_load_p (merged_store_group *merged_store,
      the construction of the immediate chain info guarantees no intervening
      stores, so no further checks are needed.  Example:
      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
-  if (info->ops[idx].bitpos == info->bitpos
+  if (known_eq (info->ops[idx].bitpos, info->bitpos)
       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
     return true;
 
@@ -2624,8 +2624,8 @@ imm_store_chain_info::coalesce_immediate_stores ()
              && infof->ops[1].base_addr
              && info->ops[0].base_addr
              && info->ops[1].base_addr
-             && (info->ops[1].bitpos - infof->ops[0].bitpos
-                 == info->bitpos - infof->bitpos)
+             && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
+                          info->bitpos - infof->bitpos)
              && operand_equal_p (info->ops[1].base_addr,
                                  infof->ops[0].base_addr, 0))
            {
@@ -3031,11 +3031,12 @@ split_group (merged_store_group *group, bool allow_unaligned_store,
          for (int i = 0; i < 2; ++i)
            if (group->load_align[i])
              {
-               align_bitpos = try_bitpos - group->stores[0]->bitpos;
-               align_bitpos += group->stores[0]->ops[i].bitpos;
-               align_bitpos -= group->load_align_base[i];
-               align_bitpos &= (group_load_align - 1);
-               if (align_bitpos)
+               align_bitpos
+                 = known_alignment (try_bitpos
+                                    - group->stores[0]->bitpos
+                                    + group->stores[0]->ops[i].bitpos
+                                    - group->load_align_base[i]);
+               if (align_bitpos & (group_load_align - 1))
                  {
                    unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
                    load_align = MIN (load_align, a);
@@ -3491,10 +3492,10 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
 
                  unsigned HOST_WIDE_INT load_align = group->load_align[j];
                  unsigned HOST_WIDE_INT align_bitpos
-                   = (try_pos * BITS_PER_UNIT
-                      - split_store->orig_stores[0]->bitpos
-                      + op.bitpos) & (load_align - 1);
-                 if (align_bitpos)
+                   = known_alignment (try_pos * BITS_PER_UNIT
+                                      - split_store->orig_stores[0]->bitpos
+                                      + op.bitpos);
+                 if (align_bitpos & (load_align - 1))
                    load_align = least_bit_hwi (align_bitpos);
 
                  tree load_int_type
@@ -3502,10 +3503,11 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
                  load_int_type
                    = build_aligned_type (load_int_type, load_align);
 
-                 unsigned HOST_WIDE_INT load_pos
-                   = (try_pos * BITS_PER_UNIT
-                      - split_store->orig_stores[0]->bitpos
-                      + op.bitpos) / BITS_PER_UNIT;
+                 poly_uint64 load_pos
+                   = exact_div (try_pos * BITS_PER_UNIT
+                                - split_store->orig_stores[0]->bitpos
+                                + op.bitpos,
+                                BITS_PER_UNIT);
                  ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
                                        build_int_cst (offset_type, load_pos));
                  if (TREE_CODE (ops[j]) == MEM_REF)
@@ -3811,30 +3813,28 @@ rhs_valid_for_store_merging_p (tree rhs)
    case.  */
 
 static tree
-mem_valid_for_store_merging (tree mem, unsigned HOST_WIDE_INT *pbitsize,
-                            unsigned HOST_WIDE_INT *pbitpos,
-                            unsigned HOST_WIDE_INT *pbitregion_start,
-                            unsigned HOST_WIDE_INT *pbitregion_end)
+mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
+                            poly_uint64 *pbitpos,
+                            poly_uint64 *pbitregion_start,
+                            poly_uint64 *pbitregion_end)
 {
-  HOST_WIDE_INT bitsize;
-  HOST_WIDE_INT bitpos;
-  unsigned HOST_WIDE_INT bitregion_start = 0;
-  unsigned HOST_WIDE_INT bitregion_end = 0;
+  poly_int64 bitsize, bitpos;
+  poly_uint64 bitregion_start = 0, bitregion_end = 0;
   machine_mode mode;
   int unsignedp = 0, reversep = 0, volatilep = 0;
   tree offset;
   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
                                        &unsignedp, &reversep, &volatilep);
   *pbitsize = bitsize;
-  if (bitsize == 0)
+  if (known_eq (bitsize, 0))
     return NULL_TREE;
 
   if (TREE_CODE (mem) == COMPONENT_REF
       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
     {
       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
-      if (bitregion_end)
-       ++bitregion_end;
+      if (maybe_ne (bitregion_end, 0U))
+       bitregion_end += 1;
     }
 
   if (reversep)
@@ -3850,24 +3850,20 @@ mem_valid_for_store_merging (tree mem, unsigned HOST_WIDE_INT *pbitsize,
      PR 23684 and this way we can catch more chains.  */
   else if (TREE_CODE (base_addr) == MEM_REF)
     {
-      offset_int bit_off, byte_off = mem_ref_offset (base_addr);
-      bit_off = byte_off << LOG2_BITS_PER_UNIT;
+      poly_offset_int byte_off = mem_ref_offset (base_addr);
+      poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
       bit_off += bitpos;
-      if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off))
+      if (known_ge (bit_off, 0) && bit_off.to_shwi (&bitpos))
        {
-         bitpos = bit_off.to_shwi ();
-         if (bitregion_end)
+         if (maybe_ne (bitregion_end, 0U))
            {
              bit_off = byte_off << LOG2_BITS_PER_UNIT;
              bit_off += bitregion_start;
-             if (wi::fits_uhwi_p (bit_off))
+             if (bit_off.to_uhwi (&bitregion_start))
                {
-                 bitregion_start = bit_off.to_uhwi ();
                  bit_off = byte_off << LOG2_BITS_PER_UNIT;
                  bit_off += bitregion_end;
-                 if (wi::fits_uhwi_p (bit_off))
-                   bitregion_end = bit_off.to_uhwi ();
-                 else
+                 if (!bit_off.to_uhwi (&bitregion_end))
                    bitregion_end = 0;
                }
              else
@@ -3882,15 +3878,15 @@ mem_valid_for_store_merging (tree mem, unsigned HOST_WIDE_INT *pbitsize,
      address now.  */
   else
     {
-      if (bitpos < 0)
+      if (maybe_lt (bitpos, 0))
        return NULL_TREE;
       base_addr = build_fold_addr_expr (base_addr);
     }
 
-  if (!bitregion_end)
+  if (known_eq (bitregion_end, 0U))
     {
-      bitregion_start = ROUND_DOWN (bitpos, BITS_PER_UNIT);
-      bitregion_end = ROUND_UP (bitpos + bitsize, BITS_PER_UNIT);
+      bitregion_start = round_down_to_byte_boundary (bitpos);
+      bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
     }
 
   if (offset != NULL_TREE)
@@ -3922,9 +3918,8 @@ mem_valid_for_store_merging (tree mem, unsigned HOST_WIDE_INT *pbitsize,
 
 static bool
 handled_load (gimple *stmt, store_operand_info *op,
-             unsigned HOST_WIDE_INT bitsize, unsigned HOST_WIDE_INT bitpos,
-             unsigned HOST_WIDE_INT bitregion_start,
-             unsigned HOST_WIDE_INT bitregion_end)
+             poly_uint64 bitsize, poly_uint64 bitpos,
+             poly_uint64 bitregion_start, poly_uint64 bitregion_end)
 {
   if (!is_gimple_assign (stmt))
     return false;
@@ -3956,10 +3951,12 @@ handled_load (gimple *stmt, store_operand_info *op,
                                       &op->bitregion_start,
                                       &op->bitregion_end);
       if (op->base_addr != NULL_TREE
-         && op->bitsize == bitsize
-         && ((op->bitpos - bitpos) % BITS_PER_UNIT) == 0
-         && op->bitpos - op->bitregion_start >= bitpos - bitregion_start
-         && op->bitregion_end - op->bitpos >= bitregion_end - bitpos)
+         && known_eq (op->bitsize, bitsize)
+         && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
+         && known_ge (op->bitpos - op->bitregion_start,
+                      bitpos - bitregion_start)
+         && known_ge (op->bitregion_end - op->bitpos,
+                      bitregion_end - bitpos))
        {
          op->stmt = stmt;
          op->val = mem;
@@ -3978,18 +3975,18 @@ pass_store_merging::process_store (gimple *stmt)
 {
   tree lhs = gimple_assign_lhs (stmt);
   tree rhs = gimple_assign_rhs1 (stmt);
-  unsigned HOST_WIDE_INT bitsize, bitpos;
-  unsigned HOST_WIDE_INT bitregion_start;
-  unsigned HOST_WIDE_INT bitregion_end;
+  poly_uint64 bitsize, bitpos;
+  poly_uint64 bitregion_start, bitregion_end;
   tree base_addr
     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
                                   &bitregion_start, &bitregion_end);
-  if (bitsize == 0)
+  if (known_eq (bitsize, 0U))
     return;
 
   bool invalid = (base_addr == NULL_TREE
-                 || ((bitsize > MAX_BITSIZE_MODE_ANY_INT)
-                      && (TREE_CODE (rhs) != INTEGER_CST)));
+                 || (maybe_gt (bitsize,
+                               (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
+                     && (TREE_CODE (rhs) != INTEGER_CST)));
   enum tree_code rhs_code = ERROR_MARK;
   bool bit_not_p = false;
   struct symbolic_number n;
@@ -4058,9 +4055,11 @@ pass_store_merging::process_store (gimple *stmt)
            invalid = true;
            break;
          }
-      if ((bitsize % BITS_PER_UNIT) == 0
-         && (bitpos % BITS_PER_UNIT) == 0
-         && bitsize <= 64
+      unsigned HOST_WIDE_INT const_bitsize;
+      if (bitsize.is_constant (&const_bitsize)
+         && multiple_p (const_bitsize, BITS_PER_UNIT)
+         && multiple_p (bitpos, BITS_PER_UNIT)
+         && const_bitsize <= 64
          && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
        {
          ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
@@ -4068,7 +4067,8 @@ pass_store_merging::process_store (gimple *stmt)
            {
              uint64_t nn = n.n;
              for (unsigned HOST_WIDE_INT i = 0;
-                  i < bitsize; i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
+                  i < const_bitsize;
+                  i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
                if ((nn & MARKER_MASK) == 0
                    || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
                  {
@@ -4089,7 +4089,13 @@ pass_store_merging::process_store (gimple *stmt)
        }
     }
 
-  if (invalid)
+  unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
+  unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
+  if (invalid
+      || !bitsize.is_constant (&const_bitsize)
+      || !bitpos.is_constant (&const_bitpos)
+      || !bitregion_start.is_constant (&const_bitregion_start)
+      || !bitregion_end.is_constant (&const_bitregion_end))
     {
       terminate_all_aliasing_chains (NULL, stmt);
       return;
@@ -4106,9 +4112,10 @@ pass_store_merging::process_store (gimple *stmt)
   if (chain_info)
     {
       unsigned int ord = (*chain_info)->m_store_info.length ();
-      info = new store_immediate_info (bitsize, bitpos, bitregion_start,
-                                      bitregion_end, stmt, ord, rhs_code,
-                                      n, ins_stmt,
+      info = new store_immediate_info (const_bitsize, const_bitpos,
+                                      const_bitregion_start,
+                                      const_bitregion_end,
+                                      stmt, ord, rhs_code, n, ins_stmt,
                                       bit_not_p, ops[0], ops[1]);
       if (dump_file && (dump_flags & TDF_DETAILS))
        {
@@ -4135,9 +4142,10 @@ pass_store_merging::process_store (gimple *stmt)
   /* Start a new chain.  */
   struct imm_store_chain_info *new_chain
     = new imm_store_chain_info (m_stores_head, base_addr);
-  info = new store_immediate_info (bitsize, bitpos, bitregion_start,
-                                  bitregion_end, stmt, 0, rhs_code,
-                                  n, ins_stmt,
+  info = new store_immediate_info (const_bitsize, const_bitpos,
+                                  const_bitregion_start,
+                                  const_bitregion_end,
+                                  stmt, 0, rhs_code, n, ins_stmt,
                                   bit_not_p, ops[0], ops[1]);
   new_chain->m_store_info.safe_push (info);
   m_stores.put (base_addr, new_chain);
index 1c62ff88ff7bcc8e2f43923900ae817b388511ae..d681c374fc1be945e34fcf435bdcf02eb5c68440 100644 (file)
@@ -60,6 +60,18 @@ typedef poly_int<NUM_POLY_INT_COEFFS, widest_int> poly_widest_int;
    of bytes in size.  */
 #define num_trailing_bits(X) force_get_misalignment (X, BITS_PER_UNIT)
 
+/* Round bit quantity X down to the nearest byte boundary.
+
+   This is safe because non-constant mode sizes must be a whole number
+   of bytes in size.  */
+#define round_down_to_byte_boundary(X) force_align_down (X, BITS_PER_UNIT)
+
+/* Round bit quantity X up the nearest byte boundary.
+
+   This is safe because non-constant mode sizes must be a whole number
+   of bytes in size.  */
+#define round_up_to_byte_boundary(X) force_align_up (X, BITS_PER_UNIT)
+
 /* Return the size of an element in a vector of size SIZE, given that
    the vector has NELTS elements.  The return value is in the same units
    as SIZE (either bits or bytes).