/* Loop distribution.
- Copyright (C) 2006-2013 Free Software Foundation, Inc.
+ Copyright (C) 2006-2014 Free Software Foundation, Inc.
Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
and Sebastian Pop <sebastian.pop@amd.com>.
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "tree-flow.h"
+#include "tree.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "stor-layout.h"
+#include "gimple-ssa.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop.h"
+#include "tree-into-ssa.h"
+#include "tree-ssa.h"
#include "cfgloop.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "gimple-pretty-print.h"
+#include "tree-vectorizer.h"
/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
/* Read After Write (RAW). */
flow_dd = 'f',
- /* Write After Read (WAR). */
- anti_dd = 'a',
-
- /* Write After Write (WAW). */
- output_dd = 'o',
-
- /* Read After Read (RAR). */
- input_dd = 'i'
+ /* Control dependence (execute conditional on). */
+ control_dd = 'c'
};
/* Dependence information attached to an edge of the RDG. */
{
/* Type of the dependence. */
enum rdg_dep_type type;
-
- /* Levels of the dependence: the depth of the loops that carry the
- dependence. */
- unsigned level;
-
- /* Dependence relation between data dependences, NULL when one of
- the vertices is a scalar. */
- ddr_p relation;
} *rdg_edge_p;
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
-#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
-#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
-
-/* Strongly connected components of the reduced data dependence graph. */
-
-typedef struct rdg_component
-{
- int num;
- vec<int> vertices;
-} *rdgc;
/* Dump vertex I in RDG to FILE. */
dump_rdg_vertex (stderr, rdg, i);
}
-/* Dump component C of RDG to FILE. If DUMPED is non-null, set the
- dumped vertices to that bitmap. */
-
-static void
-dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
-{
- int i;
-
- fprintf (file, "(%d\n", c);
-
- for (i = 0; i < rdg->n_vertices; i++)
- if (rdg->vertices[i].component == c)
- {
- if (dumped)
- bitmap_set_bit (dumped, i);
-
- dump_rdg_vertex (file, rdg, i);
- }
-
- fprintf (file, ")\n");
-}
-
-/* Call dump_rdg_vertex on stderr. */
-
-DEBUG_FUNCTION void
-debug_rdg_component (struct graph *rdg, int c)
-{
- dump_rdg_component (stderr, rdg, c, NULL);
-}
-
/* Dump the reduced dependence graph RDG to FILE. */
static void
dump_rdg (FILE *file, struct graph *rdg)
{
- int i;
- bitmap dumped = BITMAP_ALLOC (NULL);
-
fprintf (file, "(rdg\n");
-
- for (i = 0; i < rdg->n_vertices; i++)
- if (!bitmap_bit_p (dumped, i))
- dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
-
+ for (int i = 0; i < rdg->n_vertices; i++)
+ dump_rdg_vertex (file, rdg, i);
fprintf (file, ")\n");
- BITMAP_FREE (dumped);
}
/* Call dump_rdg on stderr. */
for (e = v->succ; e; e = e->succ_next)
switch (RDGE_TYPE (e))
{
- case input_dd:
- fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
- break;
-
- case output_dd:
- fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
- break;
-
case flow_dd:
/* These are the most common dependences: don't print these. */
fprintf (file, "%d -> %d \n", i, e->dest);
break;
- case anti_dd:
- fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
+ case control_dd:
+ fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
break;
default:
{
/* When debugging, you may want to enable the following code. */
#if 1
- FILE *file = popen("dot -Tx11", "w");
+ FILE *file = popen ("dot -Tx11", "w");
if (!file)
return;
dot_rdg_1 (file, rdg);
return index;
}
-/* Creates an edge in RDG for each distance vector from DDR. The
- order that we keep track of in the RDG is the order in which
- statements have to be executed. */
-
-static void
-create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
-{
- struct graph_edge *e;
- int va, vb;
- data_reference_p dra = DDR_A (ddr);
- data_reference_p drb = DDR_B (ddr);
- unsigned level = ddr_dependence_level (ddr);
-
- /* For non scalar dependences, when the dependence is REVERSED,
- statement B has to be executed before statement A. */
- if (level > 0
- && !DDR_REVERSED_P (ddr))
- {
- data_reference_p tmp = dra;
- dra = drb;
- drb = tmp;
- }
-
- va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
- vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
-
- if (va < 0 || vb < 0)
- return;
-
- e = add_edge (rdg, va, vb);
- e->data = XNEW (struct rdg_edge);
-
- RDGE_LEVEL (e) = level;
- RDGE_RELATION (e) = ddr;
-
- /* Determines the type of the data dependence. */
- if (DR_IS_READ (dra) && DR_IS_READ (drb))
- RDGE_TYPE (e) = input_dd;
- else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
- RDGE_TYPE (e) = output_dd;
- else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
- RDGE_TYPE (e) = flow_dd;
- else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
- RDGE_TYPE (e) = anti_dd;
-}
-
/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
the index of DEF in RDG. */
e = add_edge (rdg, idef, use);
e->data = XNEW (struct rdg_edge);
RDGE_TYPE (e) = flow_dd;
- RDGE_RELATION (e) = NULL;
+ }
+}
+
+/* Creates an edge for the control dependences of BB to the vertex V. */
+
+static void
+create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
+ int v, control_dependences *cd)
+{
+ bitmap_iterator bi;
+ unsigned edge_n;
+ EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
+ 0, edge_n, bi)
+ {
+ basic_block cond_bb = cd->get_edge (edge_n)->src;
+ gimple stmt = last_stmt (cond_bb);
+ if (stmt && is_ctrl_stmt (stmt))
+ {
+ struct graph_edge *e;
+ int c = rdg_vertex_for_stmt (rdg, stmt);
+ if (c < 0)
+ continue;
+
+ e = add_edge (rdg, c, v);
+ e->data = XNEW (struct rdg_edge);
+ RDGE_TYPE (e) = control_dd;
+ }
}
}
/* Creates the edges of the reduced dependence graph RDG. */
static void
-create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
+create_rdg_flow_edges (struct graph *rdg)
{
int i;
- struct data_dependence_relation *ddr;
def_operand_p def_p;
ssa_op_iter iter;
- FOR_EACH_VEC_ELT (ddrs, i, ddr)
- if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
- create_rdg_edge_for_ddr (rdg, ddr);
- else
- free_dependence_relation (ddr);
-
for (i = 0; i < rdg->n_vertices; i++)
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
iter, SSA_OP_DEF)
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
}
+/* Creates the edges of the reduced dependence graph RDG. */
+
+static void
+create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
+{
+ int i;
+
+ for (i = 0; i < rdg->n_vertices; i++)
+ {
+ gimple stmt = RDG_STMT (rdg, i);
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ {
+ edge_iterator ei;
+ edge e;
+ FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
+ create_edge_for_control_dependence (rdg, e->src, i, cd);
+ }
+ else
+ create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
+ }
+}
+
/* Build the vertices of the reduced dependence graph RDG. Return false
if that failed. */
return true;
}
-/* Initialize STMTS with all the statements of LOOP. When
- INCLUDE_PHIS is true, include also the PHI nodes. The order in
+/* Initialize STMTS with all the statements of LOOP. The order in
which we discover statements is important as
generate_loops_for_partition is using the same traversal for
- identifying statements. */
+ identifying statements in loop copies. */
static void
stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
gimple stmt;
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
- stmts->safe_push (gsi_stmt (bsi));
+ if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi))))
+ stmts->safe_push (gsi_stmt (bsi));
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
free (bbs);
}
-/* Returns true when all the dependences are computable. */
-
-static bool
-known_dependences_p (vec<ddr_p> dependence_relations)
-{
- ddr_p ddr;
- unsigned int i;
-
- FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
- if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
- return false;
-
- return true;
-}
-
-/* Build the Reduced Dependence Graph (RDG) with one vertex per
- statement of the loop nest, and one edge per data dependence or
- scalar dependence. */
-
-struct graph *
-build_empty_rdg (int n_stmts)
-{
- struct graph *rdg = new_graph (n_stmts);
- return rdg;
-}
-
/* Free the reduced dependence graph RDG. */
static void
struct graph_edge *e;
for (e = v->succ; e; e = e->succ_next)
- {
- free_dependence_relation (RDGE_RELATION (e));
- free (e->data);
- }
+ free (e->data);
if (v->data)
{
scalar dependence. */
static struct graph *
-build_rdg (vec<loop_p> loop_nest)
+build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
{
struct graph *rdg;
- vec<gimple> stmts;
vec<data_reference_p> datarefs;
- vec<ddr_p> dependence_relations;
/* Create the RDG vertices from the stmts of the loop nest. */
- stmts.create (10);
+ auto_vec<gimple, 10> stmts;
stmts_from_loop (loop_nest[0], &stmts);
- rdg = build_empty_rdg (stmts.length ());
+ rdg = new_graph (stmts.length ());
datarefs.create (10);
if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
{
- stmts.release ();
datarefs.release ();
free_rdg (rdg);
return NULL;
}
stmts.release ();
- /* Create the RDG edges from the data dependences in the loop nest. */
- dependence_relations.create (100);
- if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
- false)
- || !known_dependences_p (dependence_relations))
- {
- free_dependence_relations (dependence_relations);
- datarefs.release ();
- free_rdg (rdg);
- return NULL;
- }
- create_rdg_edges (rdg, dependence_relations);
- dependence_relations.release ();
+ create_rdg_flow_edges (rdg);
+ if (cd)
+ create_rdg_cd_edges (rdg, cd);
+
datarefs.release ();
return rdg;
}
-/* Determines whether the statement from vertex V of the RDG has a
- definition used outside the loop that contains this statement. */
-
-static bool
-rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
-{
- gimple stmt = RDG_STMT (rdg, v);
- struct loop *loop = loop_containing_stmt (stmt);
- use_operand_p imm_use_p;
- imm_use_iterator iterator;
- ssa_op_iter it;
- def_operand_p def_p;
-
- if (!loop)
- return true;
-
- FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
- {
- FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
- {
- if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
- return true;
- }
- }
-
- return false;
-}
-
enum partition_kind {
- PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY
+ PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
};
typedef struct partition_s
{
bitmap stmts;
bitmap loops;
- bool has_writes;
+ bool reduction_p;
enum partition_kind kind;
/* data-references a kind != PKIND_NORMAL partition is about. */
data_reference_p main_dr;
data_reference_p secondary_dr;
+ tree niter;
+ bool plus_one;
} *partition_t;
partition_t partition = XCNEW (struct partition_s);
partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
- partition->has_writes = false;
+ partition->reduction_p = false;
partition->kind = PKIND_NORMAL;
return partition;
}
static bool
partition_builtin_p (partition_t partition)
{
- return partition->kind > PKIND_REDUCTION;
+ return partition->kind != PKIND_NORMAL;
}
-/* Returns true if the partition has an writes. */
+/* Returns true if the partition contains a reduction. */
static bool
-partition_has_writes (partition_t partition)
+partition_reduction_p (partition_t partition)
+{
+ return partition->reduction_p;
+}
+
+/* Merge PARTITION into the partition DEST. */
+
+static void
+partition_merge_into (partition_t dest, partition_t partition)
{
- return partition->has_writes;
+ dest->kind = PKIND_NORMAL;
+ bitmap_ior_into (dest->stmts, partition->stmts);
+ if (partition_reduction_p (partition))
+ dest->reduction_p = true;
}
+
/* Returns true when DEF is an SSA_NAME defined in LOOP and used after
the LOOP. */
edge preheader = loop_preheader_edge (loop);
initialize_original_copy_tables ();
- res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
+ res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
gcc_assert (res != NULL);
free_original_copy_tables ();
delete_update_ssa ();
generate_loops_for_partition (struct loop *loop, partition_t partition,
bool copy_p)
{
- unsigned i, x;
+ unsigned i;
gimple_stmt_iterator bsi;
basic_block *bbs;
create_bb_after_loop (loop);
}
- /* Remove stmts not in the PARTITION bitmap. The order in which we
- visit the phi nodes and the statements is exactly as in
- stmts_from_loop. */
+ /* Remove stmts not in the PARTITION bitmap. */
bbs = get_loop_body_in_dom_order (loop);
if (MAY_HAVE_DEBUG_STMTS)
- for (x = 0, i = 0; i < loop->num_nodes; i++)
+ for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = bbs[i];
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
- if (!bitmap_bit_p (partition->stmts, x++))
- reset_debug_uses (gsi_stmt (bsi));
+ {
+ gimple phi = gsi_stmt (bsi);
+ if (!virtual_operand_p (gimple_phi_result (phi))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
+ reset_debug_uses (phi);
+ }
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
gimple stmt = gsi_stmt (bsi);
if (gimple_code (stmt) != GIMPLE_LABEL
&& !is_gimple_debug (stmt)
- && !bitmap_bit_p (partition->stmts, x++))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
reset_debug_uses (stmt);
}
}
- for (x = 0, i = 0; i < loop->num_nodes; i++)
+ for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = bbs[i];
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
- if (!bitmap_bit_p (partition->stmts, x++))
- {
- gimple phi = gsi_stmt (bsi);
- if (virtual_operand_p (gimple_phi_result (phi)))
- mark_virtual_phi_result_for_renaming (phi);
+ {
+ gimple phi = gsi_stmt (bsi);
+ if (!virtual_operand_p (gimple_phi_result (phi))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
remove_phi_node (&bsi, true);
- }
- else
- gsi_next (&bsi);
+ else
+ gsi_next (&bsi);
+ }
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
{
gimple stmt = gsi_stmt (bsi);
if (gimple_code (stmt) != GIMPLE_LABEL
&& !is_gimple_debug (stmt)
- && !bitmap_bit_p (partition->stmts, x++))
+ && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
{
- unlink_stmt_vdef (stmt);
- gsi_remove (&bsi, true);
- release_defs (stmt);
+ /* Choose an arbitrary path through the empty CFG part
+ that this unnecessary control stmt controls. */
+ if (gimple_code (stmt) == GIMPLE_COND)
+ {
+ gimple_cond_make_false (stmt);
+ update_stmt (stmt);
+ }
+ else if (gimple_code (stmt) == GIMPLE_SWITCH)
+ {
+ gimple_switch_set_index
+ (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
+ update_stmt (stmt);
+ }
+ else
+ {
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&bsi, true);
+ release_defs (stmt);
+ continue;
+ }
}
- else
- gsi_next (&bsi);
+ gsi_next (&bsi);
}
}
/* Build the size argument for a memory operation call. */
static tree
-build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
+build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
+ bool plus_one)
{
- tree size;
- size = fold_build2_loc (loc, MULT_EXPR, sizetype,
- fold_convert_loc (loc, sizetype, nb_iter),
+ tree size = fold_convert_loc (loc, sizetype, nb_iter);
+ if (plus_one)
+ size = size_binop (PLUS_EXPR, size, size_one_node);
+ size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
- return fold_convert_loc (loc, size_type_node, size);
+ size = fold_convert_loc (loc, size_type_node, size);
+ return size;
}
/* Build an address argument for a memory operation call. */
{
gimple_stmt_iterator gsi;
gimple stmt, fn_call;
- tree nb_iter, mem, fn, nb_bytes;
+ tree mem, fn, nb_bytes;
location_t loc;
tree val;
stmt = DR_STMT (partition->main_dr);
loc = gimple_location (stmt);
- if (gimple_bb (stmt) == loop->latch)
- nb_iter = number_of_latch_executions (loop);
- else
- nb_iter = number_of_exit_cond_executions (loop);
/* The new statements will be placed before LOOP. */
gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
- nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
+ nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
+ partition->plus_one);
nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
{
gimple_stmt_iterator gsi;
gimple stmt, fn_call;
- tree nb_iter, dest, src, fn, nb_bytes;
+ tree dest, src, fn, nb_bytes;
location_t loc;
enum built_in_function kind;
stmt = DR_STMT (partition->main_dr);
loc = gimple_location (stmt);
- if (gimple_bb (stmt) == loop->latch)
- nb_iter = number_of_latch_executions (loop);
- else
- nb_iter = number_of_exit_cond_executions (loop);
/* The new statements will be placed before LOOP. */
gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
- nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
+ nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
+ partition->plus_one);
nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
{
switch (partition->kind)
{
+ case PKIND_NORMAL:
+ /* Reductions all have to be in the last partition. */
+ gcc_assert (!partition_reduction_p (partition)
+ || !copy_p);
+ generate_loops_for_partition (loop, partition, copy_p);
+ return;
+
case PKIND_MEMSET:
generate_memset_builtin (loop, partition);
- /* If this is the last partition for which we generate code, we have
- to destroy the loop. */
- if (!copy_p)
- destroy_loop (loop);
break;
case PKIND_MEMCPY:
generate_memcpy_builtin (loop, partition);
- /* If this is the last partition for which we generate code, we have
- to destroy the loop. */
- if (!copy_p)
- destroy_loop (loop);
- break;
-
- case PKIND_REDUCTION:
- /* Reductions all have to be in the last partition. */
- gcc_assert (!copy_p);
- case PKIND_NORMAL:
- generate_loops_for_partition (loop, partition, copy_p);
break;
default:
gcc_unreachable ();
}
-}
-
-
-/* Returns true if the node V of RDG cannot be recomputed. */
-
-static bool
-rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
-{
- if (RDG_MEM_WRITE_STMT (rdg, v))
- return true;
- return false;
+ /* Common tail for partitions we turn into a call. If this was the last
+ partition for which we generate code, we have to destroy the loop. */
+ if (!copy_p)
+ destroy_loop (loop);
}
-/* Returns true when the vertex V has already been generated in the
- current partition (V is in PROCESSED), or when V belongs to another
- partition and cannot be recomputed (V is not in REMAINING_STMTS). */
-
-static inline bool
-already_processed_vertex_p (bitmap processed, int v)
-{
- return bitmap_bit_p (processed, v);
-}
-
-static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t,
- bitmap);
-
-/* Flag V from RDG as part of PARTITION, and also flag its loop number
- in LOOPS. */
-
-static void
-rdg_flag_vertex (struct graph *rdg, int v, partition_t partition)
-{
- struct loop *loop;
- if (!bitmap_set_bit (partition->stmts, v))
- return;
-
- loop = loop_containing_stmt (RDG_STMT (rdg, v));
- bitmap_set_bit (partition->loops, loop->num);
-
- if (rdg_cannot_recompute_vertex_p (rdg, v))
- partition->has_writes = true;
-}
+/* Returns a partition with all the statements needed for computing
+ the vertex V of the RDG, also including the loop exit conditions. */
-/* Flag in the bitmap PARTITION the vertex V and all its predecessors.
- Also flag their loop number in LOOPS. */
-
-static void
-rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition,
- bitmap processed)
+static partition_t
+build_rdg_partition_for_vertex (struct graph *rdg, int v)
{
+ partition_t partition = partition_alloc (NULL, NULL);
+ auto_vec<int, 3> nodes;
unsigned i;
- vec<int> nodes;
- nodes.create (3);
int x;
graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
FOR_EACH_VEC_ELT (nodes, i, x)
- if (bitmap_set_bit (processed, x))
- rdg_flag_vertex (rdg, x, partition);
-
- nodes.release ();
-}
-
-/* Initialize CONDS with all the condition statements from the basic
- blocks of LOOP. */
-
-static void
-collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
-{
- unsigned i;
- edge e;
- vec<edge> exits = get_loop_exit_edges (loop);
-
- FOR_EACH_VEC_ELT (exits, i, e)
- {
- gimple cond = last_stmt (e->src);
-
- if (cond)
- conds->safe_push (cond);
- }
-
- exits.release ();
-}
-
-/* Add to PARTITION all the exit condition statements for LOOPS
- together with all their dependent statements determined from
- RDG. */
-
-static void
-rdg_flag_loop_exits (struct graph *rdg, partition_t partition,
- bitmap processed)
-{
- unsigned i;
- bitmap_iterator bi;
- vec<gimple> conds;
- conds.create (3);
-
- EXECUTE_IF_SET_IN_BITMAP (partition->loops, 0, i, bi)
- collect_condition_stmts (get_loop (cfun, i), &conds);
-
- while (!conds.is_empty ())
{
- gimple cond = conds.pop ();
- int v = rdg_vertex_for_stmt (rdg, cond);
- if (!already_processed_vertex_p (processed, v))
- {
- bitmap saved_loops = BITMAP_ALLOC (NULL);
- bitmap_copy (saved_loops, partition->loops);
- rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
- EXECUTE_IF_AND_COMPL_IN_BITMAP (partition->loops, saved_loops,
- 0, i, bi)
- collect_condition_stmts (get_loop (cfun, i), &conds);
- BITMAP_FREE (saved_loops);
- }
+ bitmap_set_bit (partition->stmts, x);
+ bitmap_set_bit (partition->loops,
+ loop_containing_stmt (RDG_STMT (rdg, x))->num);
}
- conds.release ();
-}
-
-/* Returns a bitmap in which all the statements needed for computing
- the strongly connected component C of the RDG are flagged, also
- including the loop exit conditions. */
-
-static partition_t
-build_rdg_partition_for_component (struct graph *rdg, rdgc c)
-{
- partition_t partition = partition_alloc (NULL, NULL);
- bitmap processed = BITMAP_ALLOC (NULL);
-
- /* Flag the first vertex of the component and its dependent nodes.
- Other members of the component are included in its dependencies.
- ??? What do we need components for again? To early merge initial
- vertices that are in a SCC of the RDG? */
- rdg_flag_vertex_and_dependent (rdg, c->vertices[0], partition, processed);
-
- rdg_flag_loop_exits (rdg, partition, processed);
-
- BITMAP_FREE (processed);
return partition;
}
-/* Free memory for COMPONENTS. */
-
-static void
-free_rdg_components (vec<rdgc> components)
-{
- int i;
- rdgc x;
-
- FOR_EACH_VEC_ELT (components, i, x)
- {
- x->vertices.release ();
- free (x);
- }
-
- components.release ();
-}
-
-/* Build the COMPONENTS vector with the strongly connected components
- of RDG in which the STARTING_VERTICES occur. */
-
-static void
-rdg_build_components (struct graph *rdg, vec<int> starting_vertices,
- vec<rdgc> *components)
-{
- int i, v;
- bitmap saved_components = BITMAP_ALLOC (NULL);
- int n_components = graphds_scc (rdg, NULL);
- /* ??? Macros cannot process template types with more than one
- argument, so we need this typedef. */
- typedef vec<int> vec_int_heap;
- vec<int> *all_components = XNEWVEC (vec_int_heap, n_components);
-
- for (i = 0; i < n_components; i++)
- all_components[i].create (3);
-
- for (i = 0; i < rdg->n_vertices; i++)
- all_components[rdg->vertices[i].component].safe_push (i);
-
- FOR_EACH_VEC_ELT (starting_vertices, i, v)
- {
- int c = rdg->vertices[v].component;
-
- if (bitmap_set_bit (saved_components, c))
- {
- rdgc x = XCNEW (struct rdg_component);
- x->num = c;
- x->vertices = all_components[c];
-
- components->safe_push (x);
- }
- }
-
- for (i = 0; i < n_components; i++)
- if (!bitmap_bit_p (saved_components, i))
- all_components[i].release ();
-
- free (all_components);
- BITMAP_FREE (saved_components);
-}
-
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
For the moment we detect only the memset zero pattern. */
tree nb_iter;
data_reference_p single_load, single_store;
bool volatiles_p = false;
+ bool plus_one = false;
partition->kind = PKIND_NORMAL;
partition->main_dr = NULL;
partition->secondary_dr = NULL;
+ partition->niter = NULL_TREE;
+ partition->plus_one = false;
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
{
if (gimple_has_volatile_ops (stmt))
volatiles_p = true;
- /* If the stmt has uses outside of the loop fail.
- ??? If the stmt is generated in another partition that
- is not created as builtin we can ignore this. */
+ /* If the stmt has uses outside of the loop mark it as reduction. */
if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "not generating builtin, partition has "
- "scalar uses outside of the loop\n");
- partition->kind = PKIND_REDUCTION;
+ partition->reduction_p = true;
return;
}
}
|| !flag_tree_loop_distribute_patterns)
return;
- nb_iter = number_of_exit_cond_executions (loop);
- if (!nb_iter || nb_iter == chrec_dont_know)
- return;
-
/* Detect memset and memcpy. */
single_load = NULL;
single_store = NULL;
}
}
+ if (!single_store)
+ return;
+
+ nb_iter = number_of_latch_executions (loop);
+ if (!nb_iter || nb_iter == chrec_dont_know)
+ return;
+ if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
+ gimple_bb (DR_STMT (single_store))))
+ plus_one = true;
+
if (single_store && !single_load)
{
gimple stmt = DR_STMT (single_store);
&& !SSA_NAME_IS_DEFAULT_DEF (rhs)
&& flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
return;
- if (!adjacent_dr_p (single_store))
+ if (!adjacent_dr_p (single_store)
+ || !dominated_by_p (CDI_DOMINATORS,
+ loop->latch, gimple_bb (stmt)))
return;
partition->kind = PKIND_MEMSET;
partition->main_dr = single_store;
+ partition->niter = nb_iter;
+ partition->plus_one = plus_one;
}
else if (single_store && single_load)
{
if (!adjacent_dr_p (single_store)
|| !adjacent_dr_p (single_load)
|| !operand_equal_p (DR_STEP (single_store),
- DR_STEP (single_load), 0))
+ DR_STEP (single_load), 0)
+ || !dominated_by_p (CDI_DOMINATORS,
+ loop->latch, gimple_bb (store)))
return;
/* Now check that if there is a dependence this dependence is
of a suitable form for memmove. */
partition->kind = PKIND_MEMCPY;
partition->main_dr = single_store;
partition->secondary_dr = single_load;
+ partition->niter = nb_iter;
+ partition->plus_one = plus_one;
}
}
distributed in different loops. */
static void
-rdg_build_partitions (struct graph *rdg, vec<rdgc> components,
- vec<int> *other_stores,
- vec<partition_t> *partitions, bitmap processed)
+rdg_build_partitions (struct graph *rdg,
+ vec<gimple> starting_stmts,
+ vec<partition_t> *partitions)
{
+ bitmap processed = BITMAP_ALLOC (NULL);
int i;
- rdgc x;
- partition_t partition = partition_alloc (NULL, NULL);
+ gimple stmt;
- FOR_EACH_VEC_ELT (components, i, x)
+ FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
{
- partition_t np;
- int v = x->vertices[0];
+ int v = rdg_vertex_for_stmt (rdg, stmt);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "ldist asked to generate code for vertex %d\n", v);
+
+ /* If the vertex is already contained in another partition so
+ is the partition rooted at it. */
if (bitmap_bit_p (processed, v))
continue;
- np = build_rdg_partition_for_component (rdg, x);
- bitmap_ior_into (partition->stmts, np->stmts);
- partition->has_writes = partition_has_writes (np);
- bitmap_ior_into (processed, np->stmts);
- partition_free (np);
+ partition_t partition = build_rdg_partition_for_vertex (rdg, v);
+ bitmap_ior_into (processed, partition->stmts);
- if (partition_has_writes (partition))
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "ldist useful partition:\n");
- dump_bitmap (dump_file, partition->stmts);
- }
-
- partitions->safe_push (partition);
- partition = partition_alloc (NULL, NULL);
+ fprintf (dump_file, "ldist useful partition:\n");
+ dump_bitmap (dump_file, partition->stmts);
}
- }
-
- /* Add the nodes from the RDG that were not marked as processed, and
- that are used outside the current loop. These are scalar
- computations that are not yet part of previous partitions. */
- for (i = 0; i < rdg->n_vertices; i++)
- if (!bitmap_bit_p (processed, i)
- && rdg_defs_used_in_other_loops_p (rdg, i))
- other_stores->safe_push (i);
-
- /* If there are still statements left in the OTHER_STORES array,
- create other components and partitions with these stores and
- their dependences. */
- if (other_stores->length () > 0)
- {
- vec<rdgc> comps;
- comps.create (3);
- vec<int> foo;
- foo.create (3);
-
- rdg_build_components (rdg, *other_stores, &comps);
- rdg_build_partitions (rdg, comps, &foo, partitions, processed);
- foo.release ();
- free_rdg_components (comps);
+ partitions->safe_push (partition);
}
- /* If there is something left in the last partition, save it. */
- if (bitmap_count_bits (partition->stmts) > 0)
- partitions->safe_push (partition);
- else
- partition_free (partition);
+ /* All vertices should have been assigned to at least one partition now,
+ other than vertices belonging to dead code. */
+
+ BITMAP_FREE (processed);
}
/* Dump to FILE the PARTITIONS. */
return false;
}
-/* Generate code from STARTING_VERTICES in RDG. Returns the number of
- distributed loops. */
+/* Compute partition dependence created by the data references in DRS1
+ and DRS2 and modify and return DIR according to that. */
static int
-ldist_gen (struct loop *loop, struct graph *rdg,
- vec<int> starting_vertices)
+pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
+ vec<data_reference_p> drs1,
+ vec<data_reference_p> drs2)
{
- int i, nbp;
- vec<rdgc> components;
- components.create (3);
- vec<partition_t> partitions;
- partitions.create (3);
- vec<int> other_stores;
- other_stores.create (3);
+ data_reference_p dr1, dr2;
+
+ /* dependence direction - 0 is no dependence, -1 is back,
+ 1 is forth, 2 is both (we can stop then, merging will occur). */
+ for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
+ for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
+ {
+ int this_dir = 1;
+ ddr_p ddr;
+ /* Re-shuffle data-refs to be in dominator order. */
+ if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
+ > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
+ {
+ data_reference_p tem = dr1;
+ dr1 = dr2;
+ dr2 = tem;
+ this_dir = -this_dir;
+ }
+ ddr = initialize_data_dependence_relation (dr1, dr2, loops);
+ compute_affine_dependence (ddr, loops[0]);
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ this_dir = 2;
+ else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
+ {
+ if (DDR_REVERSED_P (ddr))
+ {
+ data_reference_p tem = dr1;
+ dr1 = dr2;
+ dr2 = tem;
+ this_dir = -this_dir;
+ }
+ /* Known dependences can still be unordered througout the
+ iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
+ if (DDR_NUM_DIST_VECTS (ddr) != 1)
+ this_dir = 2;
+ /* If the overlap is exact preserve stmt order. */
+ else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
+ ;
+ else
+ {
+ /* Else as the distance vector is lexicographic positive
+ swap the dependence direction. */
+ this_dir = -this_dir;
+ }
+ }
+ else
+ this_dir = 0;
+ free_dependence_relation (ddr);
+ if (dir == 0)
+ dir = this_dir;
+ else if (dir != this_dir)
+ return 2;
+ }
+ return dir;
+}
+
+/* Compare postorder number of the partition graph vertices V1 and V2. */
+
+static int
+pgcmp (const void *v1_, const void *v2_)
+{
+ const vertex *v1 = (const vertex *)v1_;
+ const vertex *v2 = (const vertex *)v2_;
+ return v2->post - v1->post;
+}
+
+/* Distributes the code from LOOP in such a way that producer
+ statements are placed before consumer statements. Tries to separate
+ only the statements from STMTS into separate loops.
+ Returns the number of distributed loops. */
+
+static int
+distribute_loop (struct loop *loop, vec<gimple> stmts,
+ control_dependences *cd, int *nb_calls)
+{
+ struct graph *rdg;
partition_t partition;
- bitmap processed = BITMAP_ALLOC (NULL);
bool any_builtin;
+ int i, nbp;
+ graph *pg = NULL;
+ int num_sccs = 1;
- for (i = 0; i < rdg->n_vertices; i++)
- {
- /* Save in OTHER_STORES all the memory writes that are not in
- STARTING_VERTICES. */
- if (RDG_MEM_WRITE_STMT (rdg, i))
- {
- int v;
- unsigned j;
- bool found = false;
+ *nb_calls = 0;
+ auto_vec<loop_p, 3> loop_nest;
+ if (!find_loop_nest (loop, &loop_nest))
+ return 0;
- FOR_EACH_VEC_ELT (starting_vertices, j, v)
- if (i == v)
- {
- found = true;
- break;
- }
+ rdg = build_rdg (loop_nest, cd);
+ if (!rdg)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Loop %d not distributed: failed to build the RDG.\n",
+ loop->num);
- if (!found)
- other_stores.safe_push (i);
- }
+ return 0;
}
- rdg_build_components (rdg, starting_vertices, &components);
- rdg_build_partitions (rdg, components, &other_stores, &partitions,
- processed);
- BITMAP_FREE (processed);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ dump_rdg (dump_file, rdg);
+
+ auto_vec<partition_t, 3> partitions;
+ rdg_build_partitions (rdg, stmts, &partitions);
any_builtin = false;
FOR_EACH_VEC_ELT (partitions, i, partition)
any_builtin |= partition_builtin_p (partition);
}
+ /* If we are only distributing patterns but did not detect any,
+ simply bail out. */
+ if (!flag_tree_loop_distribution
+ && !any_builtin)
+ {
+ nbp = 0;
+ goto ldist_done;
+ }
+
/* If we are only distributing patterns fuse all partitions that
- were not properly classified as builtins. Else fuse partitions
- with similar memory accesses. */
+ were not classified as builtins. This also avoids chopping
+ a loop into pieces, separated by builtin calls. That is, we
+ only want no or a single loop body remaining. */
+ partition_t into;
if (!flag_tree_loop_distribution)
{
- partition_t into;
- /* If we did not detect any builtin simply bail out. */
- if (!any_builtin)
- {
- nbp = 0;
- goto ldist_done;
- }
- /* Only fuse adjacent non-builtin partitions, see PR53616.
- ??? Use dependence information to improve partition ordering. */
- i = 0;
- do
- {
- for (; partitions.iterate (i, &into); ++i)
- if (!partition_builtin_p (into))
- break;
- for (++i; partitions.iterate (i, &partition); ++i)
- if (!partition_builtin_p (partition))
+ for (i = 0; partitions.iterate (i, &into); ++i)
+ if (!partition_builtin_p (into))
+ break;
+ for (++i; partitions.iterate (i, &partition); ++i)
+ if (!partition_builtin_p (partition))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- bitmap_ior_into (into->stmts, partition->stmts);
- if (partition->kind == PKIND_REDUCTION)
- into->kind = PKIND_REDUCTION;
- partitions.ordered_remove (i);
- partition_free (partition);
- i--;
+ fprintf (dump_file, "fusing non-builtin partitions\n");
+ dump_bitmap (dump_file, into->stmts);
+ dump_bitmap (dump_file, partition->stmts);
}
- else
- break;
- }
- while ((unsigned) i < partitions.length ());
+ partition_merge_into (into, partition);
+ partitions.unordered_remove (i);
+ partition_free (partition);
+ i--;
+ }
}
- else
+
+ /* Due to limitations in the transform phase we have to fuse all
+ reduction partitions into the last partition so the existing
+ loop will contain all loop-closed PHI nodes. */
+ for (i = 0; partitions.iterate (i, &into); ++i)
+ if (partition_reduction_p (into))
+ break;
+ for (i = i + 1; partitions.iterate (i, &partition); ++i)
+ if (partition_reduction_p (partition))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "fusing partitions\n");
+ dump_bitmap (dump_file, into->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "because they have reductions\n");
+ }
+ partition_merge_into (into, partition);
+ partitions.unordered_remove (i);
+ partition_free (partition);
+ i--;
+ }
+
+ /* Apply our simple cost model - fuse partitions with similar
+ memory accesses. */
+ for (i = 0; partitions.iterate (i, &into); ++i)
{
- partition_t into;
- int j;
- for (i = 0; partitions.iterate (i, &into); ++i)
+ if (partition_builtin_p (into))
+ continue;
+ for (int j = i + 1;
+ partitions.iterate (j, &partition); ++j)
{
- if (partition_builtin_p (into))
- continue;
- for (j = i + 1;
- partitions.iterate (j, &partition); ++j)
+ if (!partition_builtin_p (partition)
+ && similar_memory_accesses (rdg, into, partition))
{
- if (!partition_builtin_p (partition)
- /* ??? The following is horribly inefficient,
- we are re-computing and analyzing data-references
- of the stmts in the partitions all the time. */
- && similar_memory_accesses (rdg, into, partition))
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "fusing partitions\n");
- dump_bitmap (dump_file, into->stmts);
- dump_bitmap (dump_file, partition->stmts);
- fprintf (dump_file, "because they have similar "
- "memory accesses\n");
- }
- bitmap_ior_into (into->stmts, partition->stmts);
- if (partition->kind == PKIND_REDUCTION)
- into->kind = PKIND_REDUCTION;
- partitions.ordered_remove (j);
- partition_free (partition);
- j--;
+ fprintf (dump_file, "fusing partitions\n");
+ dump_bitmap (dump_file, into->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "because they have similar "
+ "memory accesses\n");
}
+ partition_merge_into (into, partition);
+ partitions.unordered_remove (j);
+ partition_free (partition);
+ j--;
}
}
}
- /* Fuse all reduction partitions into the last. */
+ /* Build the partition dependency graph. */
if (partitions.length () > 1)
{
- partition_t into = partitions.last ();
- for (i = partitions.length () - 2; i >= 0; --i)
+ pg = new_graph (partitions.length ());
+ struct pgdata {
+ partition_t partition;
+ vec<data_reference_p> writes;
+ vec<data_reference_p> reads;
+ };
+#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
+ for (i = 0; partitions.iterate (i, &partition); ++i)
{
- partition_t what = partitions[i];
- if (what->kind == PKIND_REDUCTION)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "fusing partitions\n");
- dump_bitmap (dump_file, into->stmts);
- dump_bitmap (dump_file, what->stmts);
- fprintf (dump_file, "because the latter has reductions\n");
- }
- bitmap_ior_into (into->stmts, what->stmts);
- into->kind = PKIND_REDUCTION;
- partitions.ordered_remove (i);
- partition_free (what);
- }
+ vertex *v = &pg->vertices[i];
+ pgdata *data = new pgdata;
+ data_reference_p dr;
+ /* FIXME - leaks. */
+ v->data = data;
+ bitmap_iterator bi;
+ unsigned j;
+ data->partition = partition;
+ data->reads = vNULL;
+ data->writes = vNULL;
+ EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
+ for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
+ if (DR_IS_READ (dr))
+ data->reads.safe_push (dr);
+ else
+ data->writes.safe_push (dr);
+ }
+ partition_t partition1, partition2;
+ for (i = 0; partitions.iterate (i, &partition1); ++i)
+ for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
+ {
+ /* dependence direction - 0 is no dependence, -1 is back,
+ 1 is forth, 2 is both (we can stop then, merging will occur). */
+ int dir = 0;
+ dir = pg_add_dependence_edges (rdg, loop_nest, dir,
+ PGDATA(i)->writes,
+ PGDATA(j)->reads);
+ if (dir != 2)
+ dir = pg_add_dependence_edges (rdg, loop_nest, dir,
+ PGDATA(i)->reads,
+ PGDATA(j)->writes);
+ if (dir != 2)
+ dir = pg_add_dependence_edges (rdg, loop_nest, dir,
+ PGDATA(i)->writes,
+ PGDATA(j)->writes);
+ if (dir == 1 || dir == 2)
+ add_edge (pg, i, j);
+ if (dir == -1 || dir == 2)
+ add_edge (pg, j, i);
+ }
+
+ /* Add edges to the reduction partition (if any) to force it last. */
+ unsigned j;
+ for (j = 0; partitions.iterate (j, &partition); ++j)
+ if (partition_reduction_p (partition))
+ break;
+ if (j < partitions.length ())
+ {
+ for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
+ if (i != j)
+ add_edge (pg, i, j);
}
+
+ /* Compute partitions we cannot separate and fuse them. */
+ num_sccs = graphds_scc (pg, NULL);
+ for (i = 0; i < num_sccs; ++i)
+ {
+ partition_t first;
+ int j;
+ for (j = 0; partitions.iterate (j, &first); ++j)
+ if (pg->vertices[j].component == i)
+ break;
+ for (j = j + 1; partitions.iterate (j, &partition); ++j)
+ if (pg->vertices[j].component == i)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "fusing partitions\n");
+ dump_bitmap (dump_file, first->stmts);
+ dump_bitmap (dump_file, partition->stmts);
+ fprintf (dump_file, "because they are in the same "
+ "dependence SCC\n");
+ }
+ partition_merge_into (first, partition);
+ partitions[j] = NULL;
+ partition_free (partition);
+ PGDATA (j)->partition = NULL;
+ }
+ }
+
+ /* Now order the remaining nodes in postorder. */
+ qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
+ partitions.truncate (0);
+ for (i = 0; i < pg->n_vertices; ++i)
+ {
+ pgdata *data = PGDATA (i);
+ if (data->partition)
+ partitions.safe_push (data->partition);
+ data->reads.release ();
+ data->writes.release ();
+ delete data;
+ }
+ gcc_assert (partitions.length () == (unsigned)num_sccs);
+ free_graph (pg);
}
nbp = partitions.length ();
dump_rdg_partitions (dump_file, partitions);
FOR_EACH_VEC_ELT (partitions, i, partition)
- generate_code_for_partition (loop, partition, i < nbp - 1);
+ {
+ if (partition_builtin_p (partition))
+ (*nb_calls)++;
+ generate_code_for_partition (loop, partition, i < nbp - 1);
+ }
ldist_done:
FOR_EACH_VEC_ELT (partitions, i, partition)
partition_free (partition);
- other_stores.release ();
- partitions.release ();
- free_rdg_components (components);
- return nbp;
-}
-
-/* Distributes the code from LOOP in such a way that producer
- statements are placed before consumer statements. When STMTS is
- NULL, performs the maximal distribution, if STMTS is not NULL,
- tries to separate only these statements from the LOOP's body.
- Returns the number of distributed loops. */
-
-static int
-distribute_loop (struct loop *loop, vec<gimple> stmts)
-{
- int res = 0;
- struct graph *rdg;
- gimple s;
- unsigned i;
- vec<int> vertices;
- vec<loop_p> loop_nest;
-
- loop_nest.create (3);
- if (!find_loop_nest (loop, &loop_nest))
- {
- loop_nest.release ();
- return 0;
- }
-
- rdg = build_rdg (loop_nest);
- if (!rdg)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "FIXME: Loop %d not distributed: failed to build the RDG.\n",
- loop->num);
-
- loop_nest.release ();
- return res;
- }
-
- vertices.create (3);
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- dump_rdg (dump_file, rdg);
-
- FOR_EACH_VEC_ELT (stmts, i, s)
- {
- int v = rdg_vertex_for_stmt (rdg, s);
-
- if (v >= 0)
- {
- vertices.safe_push (v);
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "ldist asked to generate code for vertex %d\n", v);
- }
- }
-
- res = ldist_gen (loop, rdg, vertices);
- vertices.release ();
free_rdg (rdg);
- loop_nest.release ();
- return res;
+ return nbp - *nb_calls;
}
/* Distribute all loops in the current function. */
tree_loop_distribution (void)
{
struct loop *loop;
- loop_iterator li;
bool changed = false;
basic_block bb;
+ control_dependences *cd = NULL;
- FOR_ALL_BB (bb)
+ FOR_ALL_BB_FN (bb, cfun)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
/* We can at the moment only distribute non-nested loops, thus restrict
walking to innermost loops. */
- FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
{
- vec<gimple> work_list = vNULL;
+ auto_vec<gimple> work_list;
basic_block *bbs;
int num = loop->num;
- int nb_generated_loops = 0;
unsigned int i;
/* If the loop doesn't have a single exit we will fail anyway,
if (!optimize_loop_for_speed_p (loop))
continue;
- /* Only distribute loops with a header and latch for now. */
- if (loop->num_nodes > 2)
- continue;
-
/* Initialize the worklist with stmts we seed the partitions with. */
bbs = get_loop_body_in_dom_order (loop);
for (i = 0; i < loop->num_nodes; ++i)
{
gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple phi = gsi_stmt (gsi);
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ continue;
+ /* Distribute stmts which have defs that are used outside of
+ the loop. */
+ if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
+ continue;
+ work_list.safe_push (phi);
+ }
for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
+
+ /* If there is a stmt with side-effects bail out - we
+ cannot and should not distribute this loop. */
+ if (gimple_has_side_effects (stmt))
+ {
+ work_list.truncate (0);
+ goto out;
+ }
+
/* Distribute stmts which have defs that are used outside of
the loop. */
if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
;
/* Otherwise only distribute stores for now. */
- else if (!gimple_assign_single_p (stmt)
- || is_gimple_reg (gimple_assign_lhs (stmt)))
+ else if (!gimple_vdef (stmt))
continue;
work_list.safe_push (stmt);
}
}
+out:
free (bbs);
+ int nb_generated_loops = 0;
+ int nb_generated_calls = 0;
+ location_t loc = find_loop_location (loop);
if (work_list.length () > 0)
- nb_generated_loops = distribute_loop (loop, work_list);
-
- if (nb_generated_loops > 0)
- changed = true;
-
- if (dump_file && (dump_flags & TDF_DETAILS))
{
- if (nb_generated_loops > 1)
- fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
- num, nb_generated_loops);
- else
- fprintf (dump_file, "Loop %d is the same.\n", num);
+ if (!cd)
+ {
+ calculate_dominance_info (CDI_DOMINATORS);
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ cd = new control_dependences (create_edge_list ());
+ free_dominance_info (CDI_POST_DOMINATORS);
+ }
+ nb_generated_loops = distribute_loop (loop, work_list, cd,
+ &nb_generated_calls);
}
- work_list.release ();
+ if (nb_generated_loops + nb_generated_calls > 0)
+ {
+ changed = true;
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
+ loc, "Loop %d distributed: split to %d loops "
+ "and %d library calls.\n",
+ num, nb_generated_loops, nb_generated_calls);
+ }
+ else if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Loop %d is the same.\n", num);
}
+ if (cd)
+ delete cd;
+
if (changed)
{
mark_virtual_operands_for_renaming (cfun);
class pass_loop_distribution : public gimple_opt_pass
{
public:
- pass_loop_distribution(gcc::context *ctxt)
- : gimple_opt_pass(pass_data_loop_distribution, ctxt)
+ pass_loop_distribution (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_loop_distribution, ctxt)
{}
/* opt_pass methods: */