/* Loop distribution.
- Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
- Free Software Foundation, Inc.
+ Copyright (C) 2006-2014 Free Software Foundation, Inc.
Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
and Sebastian Pop <sebastian.pop@amd.com>.
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "tree-flow.h"
+#include "tree.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "stor-layout.h"
+#include "gimple-ssa.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop.h"
+#include "tree-into-ssa.h"
+#include "tree-ssa.h"
#include "cfgloop.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
+#include "gimple-pretty-print.h"
+#include "tree-vectorizer.h"
+
+
+/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
+typedef struct rdg_vertex
+{
+ /* The statement represented by this vertex. */
+ gimple stmt;
+
+ /* Vector of data-references in this statement. */
+ vec<data_reference_p> datarefs;
+
+ /* True when the statement contains a write to memory. */
+ bool has_mem_write;
+
+ /* True when the statement contains a read from memory. */
+ bool has_mem_reads;
+} *rdg_vertex_p;
+
+#define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
+#define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
+#define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
+#define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
+#define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
+#define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
+#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
+#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
+
+/* Data dependence type. */
+
+enum rdg_dep_type
+{
+ /* Read After Write (RAW). */
+ flow_dd = 'f',
+
+ /* Control dependence (execute conditional on). */
+ control_dd = 'c'
+};
+
+/* Dependence information attached to an edge of the RDG. */
+
+typedef struct rdg_edge
+{
+ /* Type of the dependence. */
+ enum rdg_dep_type type;
+} *rdg_edge_p;
+
+#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
+
+/* Dump vertex I in RDG to FILE. */
+
+static void
+dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
+{
+ struct vertex *v = &(rdg->vertices[i]);
+ struct graph_edge *e;
+
+ fprintf (file, "(vertex %d: (%s%s) (in:", i,
+ RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
+ RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
+
+ if (v->pred)
+ for (e = v->pred; e; e = e->pred_next)
+ fprintf (file, " %d", e->src);
+
+ fprintf (file, ") (out:");
+
+ if (v->succ)
+ for (e = v->succ; e; e = e->succ_next)
+ fprintf (file, " %d", e->dest);
+
+ fprintf (file, ")\n");
+ print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (file, ")\n");
+}
+
+/* Call dump_rdg_vertex on stderr. */
+
+DEBUG_FUNCTION void
+debug_rdg_vertex (struct graph *rdg, int i)
+{
+ dump_rdg_vertex (stderr, rdg, i);
+}
+
+/* Dump the reduced dependence graph RDG to FILE. */
+
+static void
+dump_rdg (FILE *file, struct graph *rdg)
+{
+ fprintf (file, "(rdg\n");
+ for (int i = 0; i < rdg->n_vertices; i++)
+ dump_rdg_vertex (file, rdg, i);
+ fprintf (file, ")\n");
+}
+
+/* Call dump_rdg on stderr. */
+
+DEBUG_FUNCTION void
+debug_rdg (struct graph *rdg)
+{
+ dump_rdg (stderr, rdg);
+}
+
+static void
+dot_rdg_1 (FILE *file, struct graph *rdg)
+{
+ int i;
+ pretty_printer buffer;
+ pp_needs_newline (&buffer) = false;
+ buffer.buffer->stream = file;
+
+ fprintf (file, "digraph RDG {\n");
+
+ for (i = 0; i < rdg->n_vertices; i++)
+ {
+ struct vertex *v = &(rdg->vertices[i]);
+ struct graph_edge *e;
+
+ fprintf (file, "%d [label=\"[%d] ", i, i);
+ pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
+ pp_flush (&buffer);
+ fprintf (file, "\"]\n");
+
+ /* Highlight reads from memory. */
+ if (RDG_MEM_READS_STMT (rdg, i))
+ fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
+
+ /* Highlight stores to memory. */
+ if (RDG_MEM_WRITE_STMT (rdg, i))
+ fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
+
+ if (v->succ)
+ for (e = v->succ; e; e = e->succ_next)
+ switch (RDGE_TYPE (e))
+ {
+ case flow_dd:
+ /* These are the most common dependences: don't print these. */
+ fprintf (file, "%d -> %d \n", i, e->dest);
+ break;
+
+ case control_dd:
+ fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ fprintf (file, "}\n\n");
+}
+
+/* Display the Reduced Dependence Graph using dotty. */
+
+DEBUG_FUNCTION void
+dot_rdg (struct graph *rdg)
+{
+ /* When debugging, you may want to enable the following code. */
+#if 1
+ FILE *file = popen ("dot -Tx11", "w");
+ if (!file)
+ return;
+ dot_rdg_1 (file, rdg);
+ fflush (file);
+ close (fileno (file));
+ pclose (file);
+#else
+ dot_rdg_1 (stderr, rdg);
+#endif
+}
+
+/* Returns the index of STMT in RDG. */
+
+static int
+rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
+{
+ int index = gimple_uid (stmt);
+ gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
+ return index;
+}
+
+/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
+ the index of DEF in RDG. */
+
+static void
+create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
+{
+ use_operand_p imm_use_p;
+ imm_use_iterator iterator;
+
+ FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
+ {
+ struct graph_edge *e;
+ int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
+
+ if (use < 0)
+ continue;
+
+ e = add_edge (rdg, idef, use);
+ e->data = XNEW (struct rdg_edge);
+ RDGE_TYPE (e) = flow_dd;
+ }
+}
+
+/* Creates an edge for the control dependences of BB to the vertex V. */
+
+static void
+create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
+ int v, control_dependences *cd)
+{
+ bitmap_iterator bi;
+ unsigned edge_n;
+ EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
+ 0, edge_n, bi)
+ {
+ basic_block cond_bb = cd->get_edge (edge_n)->src;
+ gimple stmt = last_stmt (cond_bb);
+ if (stmt && is_ctrl_stmt (stmt))
+ {
+ struct graph_edge *e;
+ int c = rdg_vertex_for_stmt (rdg, stmt);
+ if (c < 0)
+ continue;
+
+ e = add_edge (rdg, c, v);
+ e->data = XNEW (struct rdg_edge);
+ RDGE_TYPE (e) = control_dd;
+ }
+ }
+}
+
+/* Creates the edges of the reduced dependence graph RDG. */
+
+static void
+create_rdg_flow_edges (struct graph *rdg)
+{
+ int i;
+ def_operand_p def_p;
+ ssa_op_iter iter;
+
+ for (i = 0; i < rdg->n_vertices; i++)
+ FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
+ iter, SSA_OP_DEF)
+ create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
+}
+
+/* Creates the edges of the reduced dependence graph RDG. */
+
+static void
+create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
+{
+ int i;
+
+ for (i = 0; i < rdg->n_vertices; i++)
+ {
+ gimple stmt = RDG_STMT (rdg, i);
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ {
+ edge_iterator ei;
+ edge e;
+ FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
+ create_edge_for_control_dependence (rdg, e->src, i, cd);
+ }
+ else
+ create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
+ }
+}
+
+/* Build the vertices of the reduced dependence graph RDG. Return false
+ if that failed. */
+
+static bool
+create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
+ vec<data_reference_p> *datarefs)
+{
+ int i;
+ gimple stmt;
+
+ FOR_EACH_VEC_ELT (stmts, i, stmt)
+ {
+ struct vertex *v = &(rdg->vertices[i]);
+
+ /* Record statement to vertex mapping. */
+ gimple_set_uid (stmt, i);
+
+ v->data = XNEW (struct rdg_vertex);
+ RDGV_STMT (v) = stmt;
+ RDGV_DATAREFS (v).create (0);
+ RDGV_HAS_MEM_WRITE (v) = false;
+ RDGV_HAS_MEM_READS (v) = false;
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ continue;
+
+ unsigned drp = datarefs->length ();
+ if (!find_data_references_in_stmt (loop, stmt, datarefs))
+ return false;
+ for (unsigned j = drp; j < datarefs->length (); ++j)
+ {
+ data_reference_p dr = (*datarefs)[j];
+ if (DR_IS_READ (dr))
+ RDGV_HAS_MEM_READS (v) = true;
+ else
+ RDGV_HAS_MEM_WRITE (v) = true;
+ RDGV_DATAREFS (v).safe_push (dr);
+ }
+ }
+ return true;
+}
+
+/* Initialize STMTS with all the statements of LOOP. The order in
+ which we discover statements is important as
+ generate_loops_for_partition is using the same traversal for
+ identifying statements in loop copies. */
+
+static void
+stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
+{
+ unsigned int i;
+ basic_block *bbs = get_loop_body_in_dom_order (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+ gimple_stmt_iterator bsi;
+ gimple stmt;
+
+ for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
+ stmts->safe_push (gsi_stmt (bsi));
+
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ stmt = gsi_stmt (bsi);
+ if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
+ stmts->safe_push (stmt);
+ }
+ }
+
+ free (bbs);
+}
+
+/* Free the reduced dependence graph RDG. */
+
+static void
+free_rdg (struct graph *rdg)
+{
+ int i;
+
+ for (i = 0; i < rdg->n_vertices; i++)
+ {
+ struct vertex *v = &(rdg->vertices[i]);
+ struct graph_edge *e;
+
+ for (e = v->succ; e; e = e->succ_next)
+ free (e->data);
+
+ if (v->data)
+ {
+ gimple_set_uid (RDGV_STMT (v), -1);
+ free_data_refs (RDGV_DATAREFS (v));
+ free (v->data);
+ }
+ }
+
+ free_graph (rdg);
+}
+
+/* Build the Reduced Dependence Graph (RDG) with one vertex per
+ statement of the loop nest LOOP_NEST, and one edge per data dependence or
+ scalar dependence. */
+
+static struct graph *
+build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
+{
+ struct graph *rdg;
+ vec<data_reference_p> datarefs;
+
+ /* Create the RDG vertices from the stmts of the loop nest. */
+ auto_vec<gimple, 10> stmts;
+ stmts_from_loop (loop_nest[0], &stmts);
+ rdg = new_graph (stmts.length ());
+ datarefs.create (10);
+ if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
+ {
+ datarefs.release ();
+ free_rdg (rdg);
+ return NULL;
+ }
+ stmts.release ();
+
+ create_rdg_flow_edges (rdg);
+ if (cd)
+ create_rdg_cd_edges (rdg, cd);
+
+ datarefs.release ();
+
+ return rdg;
+}
+
+
+
+enum partition_kind {
+ PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
+};
typedef struct partition_s
{
bitmap stmts;
+ bitmap loops;
+ bool reduction_p;
+ enum partition_kind kind;
+ /* data-references a kind != PKIND_NORMAL partition is about. */
+ data_reference_p main_dr;
+ data_reference_p secondary_dr;
+ tree niter;
+ bool plus_one;
} *partition_t;
-DEF_VEC_P (partition_t);
-DEF_VEC_ALLOC_P (partition_t, heap);
/* Allocate and initialize a partition from BITMAP. */
static partition_t
-partition_alloc (bitmap stmts)
+partition_alloc (bitmap stmts, bitmap loops)
{
partition_t partition = XCNEW (struct partition_s);
partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
+ partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
+ partition->reduction_p = false;
+ partition->kind = PKIND_NORMAL;
return partition;
}
partition_free (partition_t partition)
{
BITMAP_FREE (partition->stmts);
+ BITMAP_FREE (partition->loops);
free (partition);
}
+/* Returns true if the partition can be generated as a builtin. */
+
+static bool
+partition_builtin_p (partition_t partition)
+{
+ return partition->kind != PKIND_NORMAL;
+}
+
+/* Returns true if the partition contains a reduction. */
-/* If bit I is not set, it means that this node represents an
- operation that has already been performed, and that should not be
- performed again. This is the subgraph of remaining important
- computations that is passed to the DFS algorithm for avoiding to
- include several times the same stores in different loops. */
-static bitmap remaining_stmts;
+static bool
+partition_reduction_p (partition_t partition)
+{
+ return partition->reduction_p;
+}
+
+/* Merge PARTITION into the partition DEST. */
+
+static void
+partition_merge_into (partition_t dest, partition_t partition)
+{
+ dest->kind = PKIND_NORMAL;
+ bitmap_ior_into (dest->stmts, partition->stmts);
+ if (partition_reduction_p (partition))
+ dest->reduction_p = true;
+}
-/* A node of the RDG is marked in this bitmap when it has as a
- predecessor a node that writes to memory. */
-static bitmap upstream_mem_writes;
/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
the LOOP. */
use_operand_p use_p;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
- if (loop != loop_containing_stmt (USE_STMT (use_p)))
- return true;
+ {
+ gimple use_stmt = USE_STMT (use_p);
+ if (!is_gimple_debug (use_stmt)
+ && loop != loop_containing_stmt (use_stmt))
+ return true;
+ }
return false;
}
def_operand_p def_p;
ssa_op_iter op_iter;
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
+
FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
return true;
return false;
}
-/* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
- ORIG_LOOP. */
-
-static void
-update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
-{
- tree new_ssa_name;
- gimple_stmt_iterator si_new, si_orig;
- edge orig_loop_latch = loop_latch_edge (orig_loop);
- edge orig_entry_e = loop_preheader_edge (orig_loop);
- edge new_loop_entry_e = loop_preheader_edge (new_loop);
-
- /* Scan the phis in the headers of the old and new loops
- (they are organized in exactly the same order). */
- for (si_new = gsi_start_phis (new_loop->header),
- si_orig = gsi_start_phis (orig_loop->header);
- !gsi_end_p (si_new) && !gsi_end_p (si_orig);
- gsi_next (&si_new), gsi_next (&si_orig))
- {
- tree def;
- source_location locus;
- gimple phi_new = gsi_stmt (si_new);
- gimple phi_orig = gsi_stmt (si_orig);
-
- /* Add the first phi argument for the phi in NEW_LOOP (the one
- associated with the entry of NEW_LOOP) */
- def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
- locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
- add_phi_arg (phi_new, def, new_loop_entry_e, locus);
-
- /* Add the second phi argument for the phi in NEW_LOOP (the one
- associated with the latch of NEW_LOOP) */
- def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
- locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
-
- if (TREE_CODE (def) == SSA_NAME)
- {
- new_ssa_name = get_current_def (def);
-
- if (!new_ssa_name)
- /* This only happens if there are no definitions inside the
- loop. Use the the invariant in the new loop as is. */
- new_ssa_name = def;
- }
- else
- /* Could be an integer. */
- new_ssa_name = def;
-
- add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
- }
-}
-
/* Return a copy of LOOP placed before LOOP. */
static struct loop *
struct loop *res;
edge preheader = loop_preheader_edge (loop);
- if (!single_exit (loop))
- return NULL;
-
initialize_original_copy_tables ();
- res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
+ res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
+ gcc_assert (res != NULL);
free_original_copy_tables ();
-
- if (!res)
- return NULL;
-
- update_phis_for_loop_copy (loop, res);
- rename_variables_in_loop (res);
+ delete_update_ssa ();
return res;
}
copied when COPY_P is true. All the statements not flagged in the
PARTITION bitmap are removed from the loop or from its copy. The
statements are indexed in sequence inside a basic block, and the
- basic blocks of a loop are taken in dom order. Returns true when
- the code gen succeeded. */
+ basic blocks of a loop are taken in dom order. */
-static bool
+static void
generate_loops_for_partition (struct loop *loop, partition_t partition,
bool copy_p)
{
- unsigned i, x;
+ unsigned i;
gimple_stmt_iterator bsi;
basic_block *bbs;
if (copy_p)
{
loop = copy_loop_before (loop);
+ gcc_assert (loop != NULL);
create_preheader (loop, CP_SIMPLE_PREHEADERS);
create_bb_after_loop (loop);
}
- if (loop == NULL)
- return false;
-
- /* Remove stmts not in the PARTITION bitmap. The order in which we
- visit the phi nodes and the statements is exactly as in
- stmts_from_loop. */
+ /* Remove stmts not in the PARTITION bitmap. */
bbs = get_loop_body_in_dom_order (loop);
if (MAY_HAVE_DEBUG_STMTS)
- for (x = 0, i = 0; i < loop->num_nodes; i++)
+ for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = bbs[i];
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
- if (!bitmap_bit_p (partition->stmts, x++))
- reset_debug_uses (gsi_stmt (bsi));
+ {
+ gimple phi = gsi_stmt (bsi);
+ if (!virtual_operand_p (gimple_phi_result (phi))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
+ reset_debug_uses (phi);
+ }
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
gimple stmt = gsi_stmt (bsi);
if (gimple_code (stmt) != GIMPLE_LABEL
&& !is_gimple_debug (stmt)
- && !bitmap_bit_p (partition->stmts, x++))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
reset_debug_uses (stmt);
}
}
- for (x = 0, i = 0; i < loop->num_nodes; i++)
+ for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = bbs[i];
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
- if (!bitmap_bit_p (partition->stmts, x++))
- {
- gimple phi = gsi_stmt (bsi);
- if (!is_gimple_reg (gimple_phi_result (phi)))
- mark_virtual_phi_result_for_renaming (phi);
+ {
+ gimple phi = gsi_stmt (bsi);
+ if (!virtual_operand_p (gimple_phi_result (phi))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
remove_phi_node (&bsi, true);
- }
- else
- gsi_next (&bsi);
+ else
+ gsi_next (&bsi);
+ }
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
{
gimple stmt = gsi_stmt (bsi);
if (gimple_code (stmt) != GIMPLE_LABEL
&& !is_gimple_debug (stmt)
- && !bitmap_bit_p (partition->stmts, x++))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
{
- unlink_stmt_vdef (stmt);
- gsi_remove (&bsi, true);
- release_defs (stmt);
+ /* Choose an arbitrary path through the empty CFG part
+ that this unnecessary control stmt controls. */
+ if (gimple_code (stmt) == GIMPLE_COND)
+ {
+ gimple_cond_make_false (stmt);
+ update_stmt (stmt);
+ }
+ else if (gimple_code (stmt) == GIMPLE_SWITCH)
+ {
+ gimple_switch_set_index
+ (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
+ update_stmt (stmt);
+ }
+ else
+ {
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&bsi, true);
+ release_defs (stmt);
+ continue;
+ }
}
- else
- gsi_next (&bsi);
+ gsi_next (&bsi);
}
}
free (bbs);
- return true;
}
-/* Build the size argument for a memset call. */
+/* Build the size argument for a memory operation call. */
-static inline tree
-build_size_arg_loc (location_t loc, tree nb_iter, tree op,
- gimple_seq *stmt_list)
+static tree
+build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
+ bool plus_one)
{
- gimple_seq stmts;
- tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
- fold_convert_loc (loc, size_type_node, nb_iter),
- fold_convert_loc (loc, size_type_node,
- TYPE_SIZE_UNIT (TREE_TYPE (op))));
- x = force_gimple_operand (x, &stmts, true, NULL);
- gimple_seq_add_seq (stmt_list, stmts);
-
- return x;
+ tree size = fold_convert_loc (loc, sizetype, nb_iter);
+ if (plus_one)
+ size = size_binop (PLUS_EXPR, size, size_one_node);
+ size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
+ TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
+ size = fold_convert_loc (loc, size_type_node, size);
+ return size;
}
-/* Generate a call to memset. Return true when the operation succeeded. */
+/* Build an address argument for a memory operation call. */
-static void
-generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
- gimple_stmt_iterator bsi)
+static tree
+build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
{
- tree addr_base, nb_bytes;
- bool res = false;
- gimple_seq stmt_list = NULL, stmts;
- gimple fn_call;
- tree mem, fn;
- struct data_reference *dr = XCNEW (struct data_reference);
- location_t loc = gimple_location (stmt);
-
- DR_STMT (dr) = stmt;
- DR_REF (dr) = op0;
- res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
- gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
-
- nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
+ tree addr_base;
+
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
addr_base = fold_convert_loc (loc, sizetype, addr_base);
addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
fold_convert_loc (loc, sizetype, nb_bytes));
addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
- TYPE_SIZE_UNIT (TREE_TYPE (op0)));
- }
-
- addr_base = fold_build_pointer_plus_loc (loc,
- DR_BASE_ADDRESS (dr), addr_base);
- mem = force_gimple_operand (addr_base, &stmts, true, NULL);
- gimple_seq_add_seq (&stmt_list, stmts);
-
- fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
- fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
- gimple_seq_add_stmt (&stmt_list, fn_call);
- gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "generated memset zero\n");
-
- free_data_ref (dr);
-}
-
-/* Tries to generate a builtin function for the instructions of LOOP
- pointed to by the bits set in PARTITION. Returns true when the
- operation succeeded. */
-
-static bool
-generate_builtin (struct loop *loop, partition_t partition, bool copy_p)
-{
- bool res = false;
- unsigned i, x = 0;
- basic_block *bbs;
- gimple write = NULL;
- gimple_stmt_iterator bsi;
- tree nb_iter = number_of_exit_cond_executions (loop);
-
- if (!nb_iter || nb_iter == chrec_dont_know)
- return false;
-
- bbs = get_loop_body_in_dom_order (loop);
-
- for (i = 0; i < loop->num_nodes; i++)
- {
- basic_block bb = bbs[i];
-
- for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
- x++;
-
- for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
- {
- gimple stmt = gsi_stmt (bsi);
-
- if (gimple_code (stmt) == GIMPLE_LABEL
- || is_gimple_debug (stmt))
- continue;
-
- if (!bitmap_bit_p (partition->stmts, x++))
- continue;
-
- /* If the stmt has uses outside of the loop fail.
- ??? If the stmt is generated in another partition that
- is not created as builtin we can ignore this. */
- if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "not generating builtin, partition has "
- "scalar uses outside of the loop\n");
- goto end;
- }
-
- if (is_gimple_assign (stmt)
- && !is_gimple_reg (gimple_assign_lhs (stmt)))
- {
- /* Don't generate the builtins when there are more than
- one memory write. */
- if (write != NULL)
- goto end;
-
- write = stmt;
- if (bb == loop->latch)
- nb_iter = number_of_latch_executions (loop);
- }
- }
+ TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
}
- if (!stmt_with_adjacent_zero_store_dr_p (write))
- goto end;
-
- /* The new statements will be placed before LOOP. */
- bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
- generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
- res = true;
-
- /* If this is the last partition for which we generate code, we have
- to destroy the loop. */
- if (!copy_p)
- {
- unsigned nbbs = loop->num_nodes;
- edge exit = single_exit (loop);
- basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
- redirect_edge_pred (exit, src);
- exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
- exit->flags |= EDGE_FALLTHRU;
- cancel_loop_tree (loop);
- rescan_loop_exit (exit, false, true);
-
- for (i = 0; i < nbbs; i++)
- delete_basic_block (bbs[i]);
-
- set_immediate_dominator (CDI_DOMINATORS, dest,
- recompute_dominator (CDI_DOMINATORS, dest));
- }
-
- end:
- free (bbs);
- return res;
-}
-
-/* Generates code for PARTITION. For simple loops, this function can
- generate a built-in. */
-
-static bool
-generate_code_for_partition (struct loop *loop, partition_t partition,
- bool copy_p)
-{
- if (generate_builtin (loop, partition, copy_p))
- return true;
-
- return generate_loops_for_partition (loop, partition, copy_p);
-}
-
-
-/* Returns true if the node V of RDG cannot be recomputed. */
-
-static bool
-rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
-{
- if (RDG_MEM_WRITE_STMT (rdg, v))
- return true;
-
- return false;
+ return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
}
-/* Returns true when the vertex V has already been generated in the
- current partition (V is in PROCESSED), or when V belongs to another
- partition and cannot be recomputed (V is not in REMAINING_STMTS). */
-
-static inline bool
-already_processed_vertex_p (bitmap processed, int v)
-{
- return (bitmap_bit_p (processed, v)
- || !bitmap_bit_p (remaining_stmts, v));
-}
-
-/* Returns NULL when there is no anti-dependence among the successors
- of vertex V, otherwise returns the edge with the anti-dep. */
-
-static struct graph_edge *
-has_anti_dependence (struct vertex *v)
-{
- struct graph_edge *e;
-
- if (v->succ)
- for (e = v->succ; e; e = e->succ_next)
- if (RDGE_TYPE (e) == anti_dd)
- return e;
-
- return NULL;
-}
-
-/* Returns true when V has an anti-dependence edge among its successors. */
-
-static bool
-predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
-{
- struct graph_edge *e;
+/* If VAL memory representation contains the same value in all bytes,
+ return that value, otherwise return -1.
+ E.g. for 0x24242424 return 0x24, for IEEE double
+ 747708026454360457216.0 return 0x44, etc. */
- if (v->pred)
- for (e = v->pred; e; e = e->pred_next)
- if (bitmap_bit_p (upstream_mem_writes, e->src)
- /* Don't consider flow channels: a write to memory followed
- by a read from memory. These channels allow the split of
- the RDG in different partitions. */
- && !RDG_MEM_WRITE_STMT (rdg, e->src))
- return true;
-
- return false;
-}
-
-/* Initializes the upstream_mem_writes bitmap following the
- information from RDG. */
-
-static void
-mark_nodes_having_upstream_mem_writes (struct graph *rdg)
-{
- int v, x;
- bitmap seen = BITMAP_ALLOC (NULL);
-
- for (v = rdg->n_vertices - 1; v >= 0; v--)
- if (!bitmap_bit_p (seen, v))
- {
- unsigned i;
- VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
-
- graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
-
- FOR_EACH_VEC_ELT (int, nodes, i, x)
- {
- if (!bitmap_set_bit (seen, x))
- continue;
-
- if (RDG_MEM_WRITE_STMT (rdg, x)
- || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
- /* In anti dependences the read should occur before
- the write, this is why both the read and the write
- should be placed in the same partition. */
- || has_anti_dependence (&(rdg->vertices[x])))
- {
- bitmap_set_bit (upstream_mem_writes, x);
- }
- }
-
- VEC_free (int, heap, nodes);
- }
-}
-
-/* Returns true when vertex u has a memory write node as a predecessor
- in RDG. */
-
-static bool
-has_upstream_mem_writes (int u)
+static int
+const_with_all_bytes_same (tree val)
{
- return bitmap_bit_p (upstream_mem_writes, u);
+ unsigned char buf[64];
+ int i, len;
+
+ if (integer_zerop (val)
+ || real_zerop (val)
+ || (TREE_CODE (val) == CONSTRUCTOR
+ && !TREE_CLOBBER_P (val)
+ && CONSTRUCTOR_NELTS (val) == 0))
+ return 0;
+
+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+ return -1;
+
+ len = native_encode_expr (val, buf, sizeof (buf));
+ if (len == 0)
+ return -1;
+ for (i = 1; i < len; i++)
+ if (buf[i] != buf[0])
+ return -1;
+ return buf[0];
}
-static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
- bitmap, bitmap, bool *);
-
-/* Flag the uses of U stopping following the information from
- upstream_mem_writes. */
+/* Generate a call to memset for PARTITION in LOOP. */
static void
-rdg_flag_uses (struct graph *rdg, int u, partition_t partition, bitmap loops,
- bitmap processed, bool *part_has_writes)
+generate_memset_builtin (struct loop *loop, partition_t partition)
{
- use_operand_p use_p;
- struct vertex *x = &(rdg->vertices[u]);
- gimple stmt = RDGV_STMT (x);
- struct graph_edge *anti_dep = has_anti_dependence (x);
-
- /* Keep in the same partition the destination of an antidependence,
- because this is a store to the exact same location. Putting this
- in another partition is bad for cache locality. */
- if (anti_dep)
- {
- int v = anti_dep->dest;
+ gimple_stmt_iterator gsi;
+ gimple stmt, fn_call;
+ tree mem, fn, nb_bytes;
+ location_t loc;
+ tree val;
- if (!already_processed_vertex_p (processed, v))
- rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
- processed, part_has_writes);
- }
+ stmt = DR_STMT (partition->main_dr);
+ loc = gimple_location (stmt);
- if (gimple_code (stmt) != GIMPLE_PHI)
+ /* The new statements will be placed before LOOP. */
+ gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
+
+ nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
+ partition->plus_one);
+ nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
+ mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+
+ /* This exactly matches the pattern recognition in classify_partition. */
+ val = gimple_assign_rhs1 (stmt);
+ /* Handle constants like 0x15151515 and similarly
+ floating point constants etc. where all bytes are the same. */
+ int bytev = const_with_all_bytes_same (val);
+ if (bytev != -1)
+ val = build_int_cst (integer_type_node, bytev);
+ else if (TREE_CODE (val) == INTEGER_CST)
+ val = fold_convert (integer_type_node, val);
+ else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
{
- if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
- {
- tree use = USE_FROM_PTR (use_p);
-
- if (TREE_CODE (use) == SSA_NAME)
- {
- gimple def_stmt = SSA_NAME_DEF_STMT (use);
- int v = rdg_vertex_for_stmt (rdg, def_stmt);
-
- if (v >= 0
- && !already_processed_vertex_p (processed, v))
- rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
- processed, part_has_writes);
- }
- }
+ gimple cstmt;
+ tree tem = make_ssa_name (integer_type_node, NULL);
+ cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
+ gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
+ val = tem;
}
- if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
- {
- tree op0 = gimple_assign_lhs (stmt);
-
- /* Scalar channels don't have enough space for transmitting data
- between tasks, unless we add more storage by privatizing. */
- if (is_gimple_reg (op0))
- {
- use_operand_p use_p;
- imm_use_iterator iter;
-
- FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
- {
- int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
+ fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
+ fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
+ gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
- if (!already_processed_vertex_p (processed, v))
- rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
- processed, part_has_writes);
- }
- }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "generated memset");
+ if (bytev == 0)
+ fprintf (dump_file, " zero\n");
+ else
+ fprintf (dump_file, "\n");
}
}
-/* Flag V from RDG as part of PARTITION, and also flag its loop number
- in LOOPS. */
+/* Generate a call to memcpy for PARTITION in LOOP. */
static void
-rdg_flag_vertex (struct graph *rdg, int v, partition_t partition, bitmap loops,
- bool *part_has_writes)
+generate_memcpy_builtin (struct loop *loop, partition_t partition)
{
- struct loop *loop;
+ gimple_stmt_iterator gsi;
+ gimple stmt, fn_call;
+ tree dest, src, fn, nb_bytes;
+ location_t loc;
+ enum built_in_function kind;
- if (!bitmap_set_bit (partition->stmts, v))
- return;
+ stmt = DR_STMT (partition->main_dr);
+ loc = gimple_location (stmt);
- loop = loop_containing_stmt (RDG_STMT (rdg, v));
- bitmap_set_bit (loops, loop->num);
+ /* The new statements will be placed before LOOP. */
+ gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
+
+ nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
+ partition->plus_one);
+ nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
+ src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
+ if (ptr_derefs_may_alias_p (dest, src))
+ kind = BUILT_IN_MEMMOVE;
+ else
+ kind = BUILT_IN_MEMCPY;
- if (rdg_cannot_recompute_vertex_p (rdg, v))
+ dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ fn = build_fold_addr_expr (builtin_decl_implicit (kind));
+ fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
+ gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- *part_has_writes = true;
- bitmap_clear_bit (remaining_stmts, v);
+ if (kind == BUILT_IN_MEMCPY)
+ fprintf (dump_file, "generated memcpy\n");
+ else
+ fprintf (dump_file, "generated memmove\n");
}
}
-/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
- Also flag their loop number in LOOPS. */
+/* Remove and destroy the loop LOOP. */
static void
-rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
- bitmap loops, bitmap processed,
- bool *part_has_writes)
+destroy_loop (struct loop *loop)
{
+ unsigned nbbs = loop->num_nodes;
+ edge exit = single_exit (loop);
+ basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
+ basic_block *bbs;
unsigned i;
- VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
- int x;
-
- bitmap_set_bit (processed, v);
- rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
- graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
- rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
-
- FOR_EACH_VEC_ELT (int, nodes, i, x)
- if (!already_processed_vertex_p (processed, x))
- rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
- part_has_writes);
-
- VEC_free (int, heap, nodes);
-}
-/* Initialize CONDS with all the condition statements from the basic
- blocks of LOOP. */
+ bbs = get_loop_body_in_dom_order (loop);
-static void
-collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
-{
- unsigned i;
- edge e;
- VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+ redirect_edge_pred (exit, src);
+ exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
+ exit->flags |= EDGE_FALLTHRU;
+ cancel_loop_tree (loop);
+ rescan_loop_exit (exit, false, true);
- FOR_EACH_VEC_ELT (edge, exits, i, e)
+ for (i = 0; i < nbbs; i++)
{
- gimple cond = last_stmt (e->src);
-
- if (cond)
- VEC_safe_push (gimple, heap, *conds, cond);
+ /* We have made sure to not leave any dangling uses of SSA
+ names defined in the loop. With the exception of virtuals.
+ Make sure we replace all uses of virtual defs that will remain
+ outside of the loop with the bare symbol as delete_basic_block
+ will release them. */
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple phi = gsi_stmt (gsi);
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ mark_virtual_phi_result_for_renaming (phi);
+ }
+ for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ tree vdef = gimple_vdef (stmt);
+ if (vdef && TREE_CODE (vdef) == SSA_NAME)
+ mark_virtual_operand_for_renaming (vdef);
+ }
+ delete_basic_block (bbs[i]);
}
+ free (bbs);
- VEC_free (edge, heap, exits);
+ set_immediate_dominator (CDI_DOMINATORS, dest,
+ recompute_dominator (CDI_DOMINATORS, dest));
}
-/* Add to PARTITION all the exit condition statements for LOOPS
- together with all their dependent statements determined from
- RDG. */
+/* Generates code for PARTITION. */
static void
-rdg_flag_loop_exits (struct graph *rdg, bitmap loops, partition_t partition,
- bitmap processed, bool *part_has_writes)
+generate_code_for_partition (struct loop *loop,
+ partition_t partition, bool copy_p)
{
- unsigned i;
- bitmap_iterator bi;
- VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
-
- EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
- collect_condition_stmts (get_loop (i), &conds);
-
- while (!VEC_empty (gimple, conds))
+ switch (partition->kind)
{
- gimple cond = VEC_pop (gimple, conds);
- int v = rdg_vertex_for_stmt (rdg, cond);
- bitmap new_loops = BITMAP_ALLOC (NULL);
-
- if (!already_processed_vertex_p (processed, v))
- rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
- part_has_writes);
-
- EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
- if (bitmap_set_bit (loops, i))
- collect_condition_stmts (get_loop (i), &conds);
-
- BITMAP_FREE (new_loops);
+ case PKIND_NORMAL:
+ /* Reductions all have to be in the last partition. */
+ gcc_assert (!partition_reduction_p (partition)
+ || !copy_p);
+ generate_loops_for_partition (loop, partition, copy_p);
+ return;
+
+ case PKIND_MEMSET:
+ generate_memset_builtin (loop, partition);
+ break;
+
+ case PKIND_MEMCPY:
+ generate_memcpy_builtin (loop, partition);
+ break;
+
+ default:
+ gcc_unreachable ();
}
- VEC_free (gimple, heap, conds);
+ /* Common tail for partitions we turn into a call. If this was the last
+ partition for which we generate code, we have to destroy the loop. */
+ if (!copy_p)
+ destroy_loop (loop);
}
-/* Returns a bitmap in which all the statements needed for computing
- the strongly connected component C of the RDG are flagged, also
- including the loop exit conditions. */
+
+/* Returns a partition with all the statements needed for computing
+ the vertex V of the RDG, also including the loop exit conditions. */
static partition_t
-build_rdg_partition_for_component (struct graph *rdg, rdgc c,
- bool *part_has_writes)
+build_rdg_partition_for_vertex (struct graph *rdg, int v)
{
- int i, v;
- partition_t partition = partition_alloc (NULL);
- bitmap loops = BITMAP_ALLOC (NULL);
- bitmap processed = BITMAP_ALLOC (NULL);
+ partition_t partition = partition_alloc (NULL, NULL);
+ auto_vec<int, 3> nodes;
+ unsigned i;
+ int x;
- FOR_EACH_VEC_ELT (int, c->vertices, i, v)
- if (!already_processed_vertex_p (processed, v))
- rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
- part_has_writes);
+ graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
- rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
+ FOR_EACH_VEC_ELT (nodes, i, x)
+ {
+ bitmap_set_bit (partition->stmts, x);
+ bitmap_set_bit (partition->loops,
+ loop_containing_stmt (RDG_STMT (rdg, x))->num);
+ }
- BITMAP_FREE (processed);
- BITMAP_FREE (loops);
return partition;
}
-/* Free memory for COMPONENTS. */
+/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
+ For the moment we detect only the memset zero pattern. */
static void
-free_rdg_components (VEC (rdgc, heap) *components)
+classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
{
- int i;
- rdgc x;
+ bitmap_iterator bi;
+ unsigned i;
+ tree nb_iter;
+ data_reference_p single_load, single_store;
+ bool volatiles_p = false;
+ bool plus_one = false;
- FOR_EACH_VEC_ELT (rdgc, components, i, x)
+ partition->kind = PKIND_NORMAL;
+ partition->main_dr = NULL;
+ partition->secondary_dr = NULL;
+ partition->niter = NULL_TREE;
+ partition->plus_one = false;
+
+ EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
{
- VEC_free (int, heap, x->vertices);
- free (x);
- }
+ gimple stmt = RDG_STMT (rdg, i);
- VEC_free (rdgc, heap, components);
-}
+ if (gimple_has_volatile_ops (stmt))
+ volatiles_p = true;
-/* Build the COMPONENTS vector with the strongly connected components
- of RDG in which the STARTING_VERTICES occur. */
+ /* If the stmt has uses outside of the loop mark it as reduction. */
+ if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
+ {
+ partition->reduction_p = true;
+ return;
+ }
+ }
-static void
-rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
- VEC (rdgc, heap) **components)
-{
- int i, v;
- bitmap saved_components = BITMAP_ALLOC (NULL);
- int n_components = graphds_scc (rdg, NULL);
- VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
+ /* Perform general partition disqualification for builtins. */
+ if (volatiles_p
+ || !flag_tree_loop_distribute_patterns)
+ return;
+
+ /* Detect memset and memcpy. */
+ single_load = NULL;
+ single_store = NULL;
+ EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
+ {
+ gimple stmt = RDG_STMT (rdg, i);
+ data_reference_p dr;
+ unsigned j;
- for (i = 0; i < n_components; i++)
- all_components[i] = VEC_alloc (int, heap, 3);
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ continue;
- for (i = 0; i < rdg->n_vertices; i++)
- VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
+ /* Any scalar stmts are ok. */
+ if (!gimple_vuse (stmt))
+ continue;
- FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
- {
- int c = rdg->vertices[v].component;
+ /* Otherwise just regular loads/stores. */
+ if (!gimple_assign_single_p (stmt))
+ return;
- if (bitmap_set_bit (saved_components, c))
+ /* But exactly one store and/or load. */
+ for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
{
- rdgc x = XCNEW (struct rdg_component);
- x->num = c;
- x->vertices = all_components[c];
-
- VEC_safe_push (rdgc, heap, *components, x);
+ if (DR_IS_READ (dr))
+ {
+ if (single_load != NULL)
+ return;
+ single_load = dr;
+ }
+ else
+ {
+ if (single_store != NULL)
+ return;
+ single_store = dr;
+ }
}
}
- for (i = 0; i < n_components; i++)
- if (!bitmap_bit_p (saved_components, i))
- VEC_free (int, heap, all_components[i]);
+ if (!single_store)
+ return;
+
+ nb_iter = number_of_latch_executions (loop);
+ if (!nb_iter || nb_iter == chrec_dont_know)
+ return;
+ if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
+ gimple_bb (DR_STMT (single_store))))
+ plus_one = true;
- free (all_components);
- BITMAP_FREE (saved_components);
+ if (single_store && !single_load)
+ {
+ gimple stmt = DR_STMT (single_store);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ if (const_with_all_bytes_same (rhs) == -1
+ && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
+ || (TYPE_MODE (TREE_TYPE (rhs))
+ != TYPE_MODE (unsigned_char_type_node))))
+ return;
+ if (TREE_CODE (rhs) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (rhs)
+ && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
+ return;
+ if (!adjacent_dr_p (single_store)
+ || !dominated_by_p (CDI_DOMINATORS,
+ loop->latch, gimple_bb (stmt)))
+ return;
+ partition->kind = PKIND_MEMSET;
+ partition->main_dr = single_store;
+ partition->niter = nb_iter;
+ partition->plus_one = plus_one;
+ }
+ else if (single_store && single_load)
+ {
+ gimple store = DR_STMT (single_store);
+ gimple load = DR_STMT (single_load);
+ /* Direct aggregate copy or via an SSA name temporary. */
+ if (load != store
+ && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
+ return;
+ if (!adjacent_dr_p (single_store)
+ || !adjacent_dr_p (single_load)
+ || !operand_equal_p (DR_STEP (single_store),
+ DR_STEP (single_load), 0)
+ || !dominated_by_p (CDI_DOMINATORS,
+ loop->latch, gimple_bb (store)))
+ return;
+ /* Now check that if there is a dependence this dependence is
+ of a suitable form for memmove. */
+ vec<loop_p> loops = vNULL;
+ ddr_p ddr;
+ loops.safe_push (loop);
+ ddr = initialize_data_dependence_relation (single_load, single_store,
+ loops);
+ compute_affine_dependence (ddr, loop);
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ {
+ free_dependence_relation (ddr);
+ loops.release ();
+ return;
+ }
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
+ {
+ if (DDR_NUM_DIST_VECTS (ddr) == 0)
+ {
+ free_dependence_relation (ddr);
+ loops.release ();
+ return;
+ }
+ lambda_vector dist_v;
+ FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
+ {
+ int dist = dist_v[index_in_loop_nest (loop->num,
+ DDR_LOOP_NEST (ddr))];
+ if (dist > 0 && !DDR_REVERSED_P (ddr))
+ {
+ free_dependence_relation (ddr);
+ loops.release ();
+ return;
+ }
+ }
+ }
+ free_dependence_relation (ddr);
+ loops.release ();
+ partition->kind = PKIND_MEMCPY;
+ partition->main_dr = single_store;
+ partition->secondary_dr = single_load;
+ partition->niter = nb_iter;
+ partition->plus_one = plus_one;
+ }
}
-/* Returns true when it is possible to generate a builtin pattern for
- the PARTITION of RDG. For the moment we detect only the memset
- zero pattern. */
+/* For a data reference REF, return the declaration of its base
+ address or NULL_TREE if the base is not determined. */
-static bool
-can_generate_builtin (struct graph *rdg, partition_t partition)
+static tree
+ref_base_address (data_reference_p dr)
{
- unsigned i;
- bitmap_iterator bi;
- int nb_reads = 0;
- int nb_writes = 0;
- int stores_zero = 0;
-
- EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
- if (RDG_MEM_READS_STMT (rdg, i))
- nb_reads++;
- else if (RDG_MEM_WRITE_STMT (rdg, i))
- {
- gimple stmt = RDG_STMT (rdg, i);
- nb_writes++;
- if (!gimple_has_volatile_ops (stmt)
- && stmt_with_adjacent_zero_store_dr_p (stmt))
- stores_zero++;
- }
+ tree base_address = DR_BASE_ADDRESS (dr);
+ if (base_address
+ && TREE_CODE (base_address) == ADDR_EXPR)
+ return TREE_OPERAND (base_address, 0);
- return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
+ return base_address;
}
/* Returns true when PARTITION1 and PARTITION2 have similar memory
similar_memory_accesses (struct graph *rdg, partition_t partition1,
partition_t partition2)
{
- unsigned i, j;
+ unsigned i, j, k, l;
bitmap_iterator bi, bj;
+ data_reference_p ref1, ref2;
+
+ /* First check whether in the intersection of the two partitions are
+ any loads or stores. Common loads are the situation that happens
+ most often. */
+ EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
+ if (RDG_MEM_WRITE_STMT (rdg, i)
+ || RDG_MEM_READS_STMT (rdg, i))
+ return true;
+ /* Then check all data-references against each other. */
EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
if (RDG_MEM_WRITE_STMT (rdg, i)
|| RDG_MEM_READS_STMT (rdg, i))
EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
if (RDG_MEM_WRITE_STMT (rdg, j)
|| RDG_MEM_READS_STMT (rdg, j))
- if (rdg_has_similar_memory_accesses (rdg, i, j))
- return true;
-
- return false;
-}
-
-/* Fuse all the partitions from PARTITIONS that contain similar memory
- references, i.e., we're taking care of cache locality. This
- function does not fuse those partitions that contain patterns that
- can be code generated with builtins. */
-
-static void
-fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
- VEC (partition_t, heap) **partitions)
-{
- int p1, p2;
- partition_t partition1, partition2;
-
- FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
- if (!can_generate_builtin (rdg, partition1))
- FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
- if (p1 != p2
- && !can_generate_builtin (rdg, partition2)
- && similar_memory_accesses (rdg, partition1, partition2))
{
- bitmap_ior_into (partition1->stmts, partition2->stmts);
- VEC_ordered_remove (partition_t, *partitions, p2);
- p2--;
+ FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
+ {
+ tree base1 = ref_base_address (ref1);
+ if (base1)
+ FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
+ if (base1 == ref_base_address (ref2))
+ return true;
+ }
}
+
+ return false;
}
/* Aggregate several components into a useful partition that is
distributed in different loops. */
static void
-rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
- VEC (int, heap) **other_stores,
- VEC (partition_t, heap) **partitions, bitmap processed)
+rdg_build_partitions (struct graph *rdg,
+ vec<gimple> starting_stmts,
+ vec<partition_t> *partitions)
{
+ bitmap processed = BITMAP_ALLOC (NULL);
int i;
- rdgc x;
- partition_t partition = partition_alloc (NULL);
+ gimple stmt;
- FOR_EACH_VEC_ELT (rdgc, components, i, x)
+ FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
{
- partition_t np;
- bool part_has_writes = false;
- int v = VEC_index (int, x->vertices, 0);
+ int v = rdg_vertex_for_stmt (rdg, stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "ldist asked to generate code for vertex %d\n", v);
+ /* If the vertex is already contained in another partition so
+ is the partition rooted at it. */
if (bitmap_bit_p (processed, v))
continue;
- np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
- bitmap_ior_into (partition->stmts, np->stmts);
- bitmap_ior_into (processed, np->stmts);
- partition_free (np);
+ partition_t partition = build_rdg_partition_for_vertex (rdg, v);
+ bitmap_ior_into (processed, partition->stmts);
- if (part_has_writes)
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "ldist useful partition:\n");
- dump_bitmap (dump_file, partition->stmts);
- }
-
- VEC_safe_push (partition_t, heap, *partitions, partition);
- partition = partition_alloc (NULL);
+ fprintf (dump_file, "ldist useful partition:\n");
+ dump_bitmap (dump_file, partition->stmts);
}
- }
-
- /* Add the nodes from the RDG that were not marked as processed, and
- that are used outside the current loop. These are scalar
- computations that are not yet part of previous partitions. */
- for (i = 0; i < rdg->n_vertices; i++)
- if (!bitmap_bit_p (processed, i)
- && rdg_defs_used_in_other_loops_p (rdg, i))
- VEC_safe_push (int, heap, *other_stores, i);
-
- /* If there are still statements left in the OTHER_STORES array,
- create other components and partitions with these stores and
- their dependences. */
- if (VEC_length (int, *other_stores) > 0)
- {
- VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
- VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
-
- rdg_build_components (rdg, *other_stores, &comps);
- rdg_build_partitions (rdg, comps, &foo, partitions, processed);
- VEC_free (int, heap, foo);
- free_rdg_components (comps);
+ partitions->safe_push (partition);
}
- /* If there is something left in the last partition, save it. */
- if (bitmap_count_bits (partition->stmts) > 0)
- VEC_safe_push (partition_t, heap, *partitions, partition);
- else
- partition_free (partition);
+ /* All vertices should have been assigned to at least one partition now,
+ other than vertices belonging to dead code. */
- fuse_partitions_with_similar_memory_accesses (rdg, partitions);
+ BITMAP_FREE (processed);
}
/* Dump to FILE the PARTITIONS. */
static void
-dump_rdg_partitions (FILE *file, VEC (partition_t, heap) *partitions)
+dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
{
int i;
partition_t partition;
- FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
+ FOR_EACH_VEC_ELT (partitions, i, partition)
debug_bitmap_file (file, partition->stmts);
}
/* Debug PARTITIONS. */
-extern void debug_rdg_partitions (VEC (partition_t, heap) *);
+extern void debug_rdg_partitions (vec<partition_t> );
DEBUG_FUNCTION void
-debug_rdg_partitions (VEC (partition_t, heap) *partitions)
+debug_rdg_partitions (vec<partition_t> partitions)
{
dump_rdg_partitions (stderr, partitions);
}
write operations of RDG. */
static bool
-partition_contains_all_rw (struct graph *rdg, VEC (partition_t, heap) *partitions)
+partition_contains_all_rw (struct graph *rdg,
+ vec<partition_t> partitions)
{
int i;
partition_t partition;
int nrw = number_of_rw_in_rdg (rdg);
- FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
+ FOR_EACH_VEC_ELT (partitions, i, partition)
if (nrw == number_of_rw_in_partition (rdg, partition))
return true;
return false;
}
-/* Generate code from STARTING_VERTICES in RDG. Returns the number of
- distributed loops. */
+/* Compute partition dependence created by the data references in DRS1
+ and DRS2 and modify and return DIR according to that. */
static int
-ldist_gen (struct loop *loop, struct graph *rdg,
- VEC (int, heap) *starting_vertices)
+pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
+ vec<data_reference_p> drs1,
+ vec<data_reference_p> drs2)
{
- int i, nbp;
- VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
- VEC (partition_t, heap) *partitions = VEC_alloc (partition_t, heap, 3);
- VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
- partition_t partition;
- bitmap processed = BITMAP_ALLOC (NULL);
-
- remaining_stmts = BITMAP_ALLOC (NULL);
- upstream_mem_writes = BITMAP_ALLOC (NULL);
-
- for (i = 0; i < rdg->n_vertices; i++)
- {
- bitmap_set_bit (remaining_stmts, i);
+ data_reference_p dr1, dr2;
- /* Save in OTHER_STORES all the memory writes that are not in
- STARTING_VERTICES. */
- if (RDG_MEM_WRITE_STMT (rdg, i))
- {
- int v;
- unsigned j;
- bool found = false;
-
- FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
- if (i == v)
+ /* dependence direction - 0 is no dependence, -1 is back,
+ 1 is forth, 2 is both (we can stop then, merging will occur). */
+ for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
+ for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
+ {
+ int this_dir = 1;
+ ddr_p ddr;
+ /* Re-shuffle data-refs to be in dominator order. */
+ if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
+ > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
+ {
+ data_reference_p tem = dr1;
+ dr1 = dr2;
+ dr2 = tem;
+ this_dir = -this_dir;
+ }
+ ddr = initialize_data_dependence_relation (dr1, dr2, loops);
+ compute_affine_dependence (ddr, loops[0]);
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ this_dir = 2;
+ else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
+ {
+ if (DDR_REVERSED_P (ddr))
{
- found = true;
- break;
+ data_reference_p tem = dr1;
+ dr1 = dr2;
+ dr2 = tem;
+ this_dir = -this_dir;
}
+ /* Known dependences can still be unordered througout the
+ iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
+ if (DDR_NUM_DIST_VECTS (ddr) != 1)
+ this_dir = 2;
+ /* If the overlap is exact preserve stmt order. */
+ else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
+ ;
+ else
+ {
+ /* Else as the distance vector is lexicographic positive
+ swap the dependence direction. */
+ this_dir = -this_dir;
+ }
+ }
+ else
+ this_dir = 0;
+ free_dependence_relation (ddr);
+ if (dir == 0)
+ dir = this_dir;
+ else if (dir != this_dir)
+ return 2;
+ }
+ return dir;
+}
- if (!found)
- VEC_safe_push (int, heap, other_stores, i);
- }
- }
-
- mark_nodes_having_upstream_mem_writes (rdg);
- rdg_build_components (rdg, starting_vertices, &components);
- rdg_build_partitions (rdg, components, &other_stores, &partitions,
- processed);
- BITMAP_FREE (processed);
-
- nbp = VEC_length (partition_t, partitions);
- if (nbp == 0
- || (nbp == 1
- && !can_generate_builtin (rdg,
- VEC_index (partition_t, partitions, 0)))
- || (nbp > 1
- && partition_contains_all_rw (rdg, partitions)))
- goto ldist_done;
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- dump_rdg_partitions (dump_file, partitions);
-
- FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
- if (!generate_code_for_partition (loop, partition, i < nbp - 1))
- goto ldist_done;
-
- rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
- mark_sym_for_renaming (gimple_vop (cfun));
- update_ssa (TODO_update_ssa_only_virtuals);
-
- ldist_done:
-
- BITMAP_FREE (remaining_stmts);
- BITMAP_FREE (upstream_mem_writes);
-
- FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
- partition_free (partition);
+/* Compare postorder number of the partition graph vertices V1 and V2. */
- VEC_free (int, heap, other_stores);
- VEC_free (partition_t, heap, partitions);
- free_rdg_components (components);
- return nbp;
+static int
+pgcmp (const void *v1_, const void *v2_)
+{
+ const vertex *v1 = (const vertex *)v1_;
+ const vertex *v2 = (const vertex *)v2_;
+ return v2->post - v1->post;
}
/* Distributes the code from LOOP in such a way that producer
- statements are placed before consumer statements. When STMTS is
- NULL, performs the maximal distribution, if STMTS is not NULL,
- tries to separate only these statements from the LOOP's body.
+ statements are placed before consumer statements. Tries to separate
+ only the statements from STMTS into separate loops.
Returns the number of distributed loops. */
static int
-distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
+distribute_loop (struct loop *loop, vec<gimple> stmts,
+ control_dependences *cd, int *nb_calls)
{
- int res = 0;
struct graph *rdg;
- gimple s;
- unsigned i;
- VEC (int, heap) *vertices;
- VEC (ddr_p, heap) *dependence_relations;
- VEC (data_reference_p, heap) *datarefs;
- VEC (loop_p, heap) *loop_nest;
+ partition_t partition;
+ bool any_builtin;
+ int i, nbp;
+ graph *pg = NULL;
+ int num_sccs = 1;
+
+ *nb_calls = 0;
+ auto_vec<loop_p, 3> loop_nest;
+ if (!find_loop_nest (loop, &loop_nest))
+ return 0;
- if (loop->num_nodes > 2)
+ rdg = build_rdg (loop_nest, cd);
+ if (!rdg)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
- "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
+ "Loop %d not distributed: failed to build the RDG.\n",
loop->num);
- return res;
+ return 0;
}
- datarefs = VEC_alloc (data_reference_p, heap, 10);
- dependence_relations = VEC_alloc (ddr_p, heap, 100);
- loop_nest = VEC_alloc (loop_p, heap, 3);
- rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ dump_rdg (dump_file, rdg);
+
+ auto_vec<partition_t, 3> partitions;
+ rdg_build_partitions (rdg, stmts, &partitions);
- if (!rdg)
+ any_builtin = false;
+ FOR_EACH_VEC_ELT (partitions, i, partition)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "FIXME: Loop %d not distributed: failed to build the RDG.\n",
- loop->num);
+ classify_partition (loop, rdg, partition);
+ any_builtin |= partition_builtin_p (partition);
+ }
+
+ /* If we are only distributing patterns but did not detect any,
+ simply bail out. */
+ if (!flag_tree_loop_distribution
+ && !any_builtin)
+ {
+ nbp = 0;
+ goto ldist_done;
+ }
- free_dependence_relations (dependence_relations);
- free_data_refs (datarefs);
- VEC_free (loop_p, heap, loop_nest);
- return res;
+ /* If we are only distributing patterns fuse all partitions that
+ were not classified as builtins. This also avoids chopping
+ a loop into pieces, separated by builtin calls. That is, we
+ only want no or a single loop body remaining. */
+ partition_t into;
+ if (!flag_tree_loop_distribution)
+ {
+ for (i = 0; partitions.iterate (i, &into); ++i)
+ if (!partition_builtin_p (into))
+ break;
+ for (++i; partitions.iterate (i, &partition); ++i)
+ if (!partition_builtin_p (partition))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "fusing non-builtin partitions\n");
+ dump_bitmap (dump_file, into->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ }
+ partition_merge_into (into, partition);
+ partitions.unordered_remove (i);
+ partition_free (partition);
+ i--;
+ }
}
- vertices = VEC_alloc (int, heap, 3);
+ /* Due to limitations in the transform phase we have to fuse all
+ reduction partitions into the last partition so the existing
+ loop will contain all loop-closed PHI nodes. */
+ for (i = 0; partitions.iterate (i, &into); ++i)
+ if (partition_reduction_p (into))
+ break;
+ for (i = i + 1; partitions.iterate (i, &partition); ++i)
+ if (partition_reduction_p (partition))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "fusing partitions\n");
+ dump_bitmap (dump_file, into->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "because they have reductions\n");
+ }
+ partition_merge_into (into, partition);
+ partitions.unordered_remove (i);
+ partition_free (partition);
+ i--;
+ }
- if (dump_file && (dump_flags & TDF_DETAILS))
- dump_rdg (dump_file, rdg);
+ /* Apply our simple cost model - fuse partitions with similar
+ memory accesses. */
+ for (i = 0; partitions.iterate (i, &into); ++i)
+ {
+ if (partition_builtin_p (into))
+ continue;
+ for (int j = i + 1;
+ partitions.iterate (j, &partition); ++j)
+ {
+ if (!partition_builtin_p (partition)
+ && similar_memory_accesses (rdg, into, partition))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "fusing partitions\n");
+ dump_bitmap (dump_file, into->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "because they have similar "
+ "memory accesses\n");
+ }
+ partition_merge_into (into, partition);
+ partitions.unordered_remove (j);
+ partition_free (partition);
+ j--;
+ }
+ }
+ }
- FOR_EACH_VEC_ELT (gimple, stmts, i, s)
+ /* Build the partition dependency graph. */
+ if (partitions.length () > 1)
{
- int v = rdg_vertex_for_stmt (rdg, s);
+ pg = new_graph (partitions.length ());
+ struct pgdata {
+ partition_t partition;
+ vec<data_reference_p> writes;
+ vec<data_reference_p> reads;
+ };
+#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
+ for (i = 0; partitions.iterate (i, &partition); ++i)
+ {
+ vertex *v = &pg->vertices[i];
+ pgdata *data = new pgdata;
+ data_reference_p dr;
+ /* FIXME - leaks. */
+ v->data = data;
+ bitmap_iterator bi;
+ unsigned j;
+ data->partition = partition;
+ data->reads = vNULL;
+ data->writes = vNULL;
+ EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
+ for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
+ if (DR_IS_READ (dr))
+ data->reads.safe_push (dr);
+ else
+ data->writes.safe_push (dr);
+ }
+ partition_t partition1, partition2;
+ for (i = 0; partitions.iterate (i, &partition1); ++i)
+ for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
+ {
+ /* dependence direction - 0 is no dependence, -1 is back,
+ 1 is forth, 2 is both (we can stop then, merging will occur). */
+ int dir = 0;
+ dir = pg_add_dependence_edges (rdg, loop_nest, dir,
+ PGDATA(i)->writes,
+ PGDATA(j)->reads);
+ if (dir != 2)
+ dir = pg_add_dependence_edges (rdg, loop_nest, dir,
+ PGDATA(i)->reads,
+ PGDATA(j)->writes);
+ if (dir != 2)
+ dir = pg_add_dependence_edges (rdg, loop_nest, dir,
+ PGDATA(i)->writes,
+ PGDATA(j)->writes);
+ if (dir == 1 || dir == 2)
+ add_edge (pg, i, j);
+ if (dir == -1 || dir == 2)
+ add_edge (pg, j, i);
+ }
+
+ /* Add edges to the reduction partition (if any) to force it last. */
+ unsigned j;
+ for (j = 0; partitions.iterate (j, &partition); ++j)
+ if (partition_reduction_p (partition))
+ break;
+ if (j < partitions.length ())
+ {
+ for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
+ if (i != j)
+ add_edge (pg, i, j);
+ }
- if (v >= 0)
+ /* Compute partitions we cannot separate and fuse them. */
+ num_sccs = graphds_scc (pg, NULL);
+ for (i = 0; i < num_sccs; ++i)
{
- VEC_safe_push (int, heap, vertices, v);
+ partition_t first;
+ int j;
+ for (j = 0; partitions.iterate (j, &first); ++j)
+ if (pg->vertices[j].component == i)
+ break;
+ for (j = j + 1; partitions.iterate (j, &partition); ++j)
+ if (pg->vertices[j].component == i)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "fusing partitions\n");
+ dump_bitmap (dump_file, first->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "because they are in the same "
+ "dependence SCC\n");
+ }
+ partition_merge_into (first, partition);
+ partitions[j] = NULL;
+ partition_free (partition);
+ PGDATA (j)->partition = NULL;
+ }
+ }
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "ldist asked to generate code for vertex %d\n", v);
+ /* Now order the remaining nodes in postorder. */
+ qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
+ partitions.truncate (0);
+ for (i = 0; i < pg->n_vertices; ++i)
+ {
+ pgdata *data = PGDATA (i);
+ if (data->partition)
+ partitions.safe_push (data->partition);
+ data->reads.release ();
+ data->writes.release ();
+ delete data;
}
+ gcc_assert (partitions.length () == (unsigned)num_sccs);
+ free_graph (pg);
+ }
+
+ nbp = partitions.length ();
+ if (nbp == 0
+ || (nbp == 1 && !partition_builtin_p (partitions[0]))
+ || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
+ {
+ nbp = 0;
+ goto ldist_done;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ dump_rdg_partitions (dump_file, partitions);
+
+ FOR_EACH_VEC_ELT (partitions, i, partition)
+ {
+ if (partition_builtin_p (partition))
+ (*nb_calls)++;
+ generate_code_for_partition (loop, partition, i < nbp - 1);
}
- res = ldist_gen (loop, rdg, vertices);
- VEC_free (int, heap, vertices);
+ ldist_done:
+
+ FOR_EACH_VEC_ELT (partitions, i, partition)
+ partition_free (partition);
+
free_rdg (rdg);
- free_dependence_relations (dependence_relations);
- free_data_refs (datarefs);
- VEC_free (loop_p, heap, loop_nest);
- return res;
+ return nbp - *nb_calls;
}
/* Distribute all loops in the current function. */
tree_loop_distribution (void)
{
struct loop *loop;
- loop_iterator li;
- int nb_generated_loops = 0;
+ bool changed = false;
+ basic_block bb;
+ control_dependences *cd = NULL;
+
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ gimple_set_uid (gsi_stmt (gsi), -1);
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ gimple_set_uid (gsi_stmt (gsi), -1);
+ }
- FOR_EACH_LOOP (li, loop, 0)
+ /* We can at the moment only distribute non-nested loops, thus restrict
+ walking to innermost loops. */
+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
{
- VEC (gimple, heap) *work_list = NULL;
+ auto_vec<gimple> work_list;
+ basic_block *bbs;
int num = loop->num;
+ unsigned int i;
/* If the loop doesn't have a single exit we will fail anyway,
so do that early. */
if (!single_exit (loop))
continue;
- /* If both flag_tree_loop_distribute_patterns and
- flag_tree_loop_distribution are set, then only
- distribute_patterns is executed. */
- if (flag_tree_loop_distribute_patterns)
+ /* Only optimize hot loops. */
+ if (!optimize_loop_for_speed_p (loop))
+ continue;
+
+ /* Initialize the worklist with stmts we seed the partitions with. */
+ bbs = get_loop_body_in_dom_order (loop);
+ for (i = 0; i < loop->num_nodes; ++i)
{
- /* With the following working list, we're asking
- distribute_loop to separate from the rest of the loop the
- stores of the form "A[i] = 0". */
- stores_zero_from_loop (loop, &work_list);
-
- /* Do nothing if there are no patterns to be distributed. */
- if (VEC_length (gimple, work_list) > 0)
- nb_generated_loops = distribute_loop (loop, work_list);
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple phi = gsi_stmt (gsi);
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ continue;
+ /* Distribute stmts which have defs that are used outside of
+ the loop. */
+ if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
+ continue;
+ work_list.safe_push (phi);
+ }
+ for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+
+ /* If there is a stmt with side-effects bail out - we
+ cannot and should not distribute this loop. */
+ if (gimple_has_side_effects (stmt))
+ {
+ work_list.truncate (0);
+ goto out;
+ }
+
+ /* Distribute stmts which have defs that are used outside of
+ the loop. */
+ if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
+ ;
+ /* Otherwise only distribute stores for now. */
+ else if (!gimple_vdef (stmt))
+ continue;
+
+ work_list.safe_push (stmt);
+ }
}
- else if (flag_tree_loop_distribution)
+out:
+ free (bbs);
+
+ int nb_generated_loops = 0;
+ int nb_generated_calls = 0;
+ location_t loc = find_loop_location (loop);
+ if (work_list.length () > 0)
{
- /* With the following working list, we're asking
- distribute_loop to separate the stores of the loop: when
- dependences allow, it will end on having one store per
- loop. */
- stores_from_loop (loop, &work_list);
-
- /* A simple heuristic for cache locality is to not split
- stores to the same array. Without this call, an unrolled
- loop would be split into as many loops as unroll factor,
- each loop storing in the same array. */
- remove_similar_memory_refs (&work_list);
-
- nb_generated_loops = distribute_loop (loop, work_list);
+ if (!cd)
+ {
+ calculate_dominance_info (CDI_DOMINATORS);
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ cd = new control_dependences (create_edge_list ());
+ free_dominance_info (CDI_POST_DOMINATORS);
+ }
+ nb_generated_loops = distribute_loop (loop, work_list, cd,
+ &nb_generated_calls);
}
- if (dump_file && (dump_flags & TDF_DETAILS))
+ if (nb_generated_loops + nb_generated_calls > 0)
{
- if (nb_generated_loops > 1)
- fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
- num, nb_generated_loops);
- else
- fprintf (dump_file, "Loop %d is the same.\n", num);
+ changed = true;
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
+ loc, "Loop %d distributed: split to %d loops "
+ "and %d library calls.\n",
+ num, nb_generated_loops, nb_generated_calls);
}
+ else if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Loop %d is the same.\n", num);
+ }
-#ifdef ENABLE_CHECKING
- verify_loop_structure ();
-#endif
+ if (cd)
+ delete cd;
- VEC_free (gimple, heap, work_list);
+ if (changed)
+ {
+ mark_virtual_operands_for_renaming (cfun);
+ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
}
+#ifdef ENABLE_CHECKING
+ verify_loop_structure ();
+#endif
+
return 0;
}
|| flag_tree_loop_distribute_patterns;
}
-struct gimple_opt_pass pass_loop_distribution =
+namespace {
+
+const pass_data pass_data_loop_distribution =
{
- {
- GIMPLE_PASS,
- "ldist", /* name */
- gate_tree_loop_distribution, /* gate */
- tree_loop_distribution, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
- PROP_cfg | PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_ggc_collect
- | TODO_verify_ssa /* todo_flags_finish */
- }
+ GIMPLE_PASS, /* type */
+ "ldist", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ true, /* has_gate */
+ true, /* has_execute */
+ TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_verify_ssa, /* todo_flags_finish */
};
+
+class pass_loop_distribution : public gimple_opt_pass
+{
+public:
+ pass_loop_distribution (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_loop_distribution, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate () { return gate_tree_loop_distribution (); }
+ unsigned int execute () { return tree_loop_distribution (); }
+
+}; // class pass_loop_distribution
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_loop_distribution (gcc::context *ctxt)
+{
+ return new pass_loop_distribution (ctxt);
+}