tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code dealing with cost.
authorRichard Biener <rguenther@suse.de>
Tue, 9 Apr 2013 14:12:34 +0000 (14:12 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 9 Apr 2013 14:12:34 +0000 (14:12 +0000)
2013-04-09  Richard Biener  <rguenther@suse.de>

* tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code
dealing with cost.
(vect_build_slp_tree): Likewise.
(vect_analyze_slp_cost_1, vect_analyze_slp_cost): New functions
calculating the cost of a SLP instance.
(vect_analyze_slp_instance): Use it from here, after building
the SLP tree.

From-SVN: r197635

gcc/ChangeLog
gcc/tree-vect-slp.c

index 69cefc06f204ded1504e408adf4c1b6868dbce39..42823116e0468ff07ecfc6b9a2c2aed2b23eb7b9 100644 (file)
@@ -1,3 +1,13 @@
+2013-04-09  Richard Biener  <rguenther@suse.de>
+
+       * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code
+       dealing with cost.
+       (vect_build_slp_tree): Likewise.
+       (vect_analyze_slp_cost_1, vect_analyze_slp_cost): New functions
+       calculating the cost of a SLP instance.
+       (vect_analyze_slp_instance): Use it from here, after building
+       the SLP tree.
+
 2013-04-09  Jakub Jelinek  <jakub@redhat.com>
 
        PR middle-end/56883
index 34131060ee5e4fc4cc4f9253918ee5c43e0daf2d..28f86b3955010bc0f4ae0a3e9bdd3bdede4845a5 100644 (file)
@@ -199,11 +199,8 @@ vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
 
 static bool
 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-                             slp_tree slp_node, gimple stmt,
-                            int ncopies_for_cost, bool first,
-                             vec<slp_oprnd_info> *oprnds_info,
-                            stmt_vector_for_cost *prologue_cost_vec,
-                            stmt_vector_for_cost *body_cost_vec)
+                             gimple stmt, bool first,
+                             vec<slp_oprnd_info> *oprnds_info)
 {
   tree oprnd;
   unsigned int i, number_of_oprnds;
@@ -211,8 +208,6 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   gimple def_stmt;
   enum vect_def_type dt = vect_uninitialized_def;
   enum vect_def_type dt_op0 = vect_uninitialized_def;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  tree lhs = gimple_get_lhs (stmt);
   struct loop *loop = NULL;
   enum tree_code rhs_code;
   bool different_types = false;
@@ -344,22 +339,6 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
            {
              def_op0 = def;
              dt_op0 = dt;
-             /* Analyze costs (for the first stmt of the group only).  */
-             if (REFERENCE_CLASS_P (lhs))
-               /* Store.  */
-                vect_model_store_cost (stmt_info, ncopies_for_cost, false,
-                                      dt, slp_node, prologue_cost_vec,
-                                      body_cost_vec);
-             else
-               {
-                 enum vect_def_type dts[2];
-                 dts[0] = dt;
-                 dts[1] = vect_uninitialized_def;
-                 /* Not memory operation (we don't call this function for
-                    loads).  */
-                 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
-                                         prologue_cost_vec, body_cost_vec);
-               }
            }
        }
       else
@@ -479,13 +458,11 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
 static bool
 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-                     slp_tree *node, unsigned int group_size, int *outside_cost,
-                     int ncopies_for_cost, unsigned int *max_nunits,
+                     slp_tree *node, unsigned int group_size,
+                     unsigned int *max_nunits,
                      vec<int> *load_permutation,
                      vec<slp_tree> *loads,
-                     unsigned int vectorization_factor, bool *loads_permuted,
-                    stmt_vector_for_cost *prologue_cost_vec,
-                    stmt_vector_for_cost *body_cost_vec)
+                     unsigned int vectorization_factor, bool *loads_permuted)
 {
   unsigned int i;
   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
@@ -750,11 +727,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
          if (REFERENCE_CLASS_P (lhs))
            {
              /* Store.  */
-             if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
-                                               stmt, ncopies_for_cost,
-                                               (i == 0), &oprnds_info,
-                                               prologue_cost_vec,
-                                               body_cost_vec))
+             if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
+                                               stmt, (i == 0), &oprnds_info))
                {
                  vect_free_oprnd_info (oprnds_info);
                  return false;
@@ -863,11 +837,6 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                      vect_free_oprnd_info (oprnds_info);
                       return false;
                     }
-
-                  /* Analyze costs (for the first stmt in the group).  */
-                  vect_model_load_cost (vinfo_for_stmt (stmt),
-                                        ncopies_for_cost, false, *node,
-                                       prologue_cost_vec, body_cost_vec);
                 }
 
               /* Store the place of this load in the interleaving chain.  In
@@ -943,10 +912,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
             }
 
          /* Find the def-stmts.  */
-         if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
-                                           ncopies_for_cost, (i == 0),
-                                           &oprnds_info, prologue_cost_vec,
-                                           body_cost_vec))
+         if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, stmt,
+                                           (i == 0), &oprnds_info))
            {
              vect_free_oprnd_info (oprnds_info);
              return false;
@@ -959,12 +926,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
     {
       loads->safe_push (*node);
       if (permutation)
-        {
-         gimple first_stmt = stmts[0];
-          *loads_permuted = true;
-         (void) record_stmt_cost (body_cost_vec, group_size, vec_perm, 
-                                  vinfo_for_stmt (first_stmt), 0, vect_body);
-        }
+       *loads_permuted = true;
       else
         {
           /* We don't check here complex numbers chains, so we set
@@ -989,10 +951,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       child = vect_create_new_slp_node (oprnd_info->def_stmts);
       if (!child
           || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
-                                  outside_cost, ncopies_for_cost,
                                   max_nunits, load_permutation, loads,
-                                  vectorization_factor, loads_permuted,
-                                  prologue_cost_vec, body_cost_vec))
+                                  vectorization_factor, loads_permuted))
         {
          if (child)
            oprnd_info->def_stmts = vNULL;
@@ -1510,6 +1470,122 @@ vect_find_last_store_in_slp_instance (slp_instance instance)
   return last_store;
 }
 
+/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
+
+static void
+vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+                        slp_instance instance, slp_tree node,
+                        stmt_vector_for_cost *prologue_cost_vec,
+                        unsigned ncopies_for_cost)
+{
+  stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
+
+  unsigned i;
+  slp_tree child;
+  gimple stmt, s;
+  stmt_vec_info stmt_info;
+  tree lhs;
+  unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+
+  /* Recurse down the SLP tree.  */
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
+                            instance, child, prologue_cost_vec,
+                            ncopies_for_cost);
+
+  /* Look at the first scalar stmt to determine the cost.  */
+  stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+  stmt_info = vinfo_for_stmt (stmt);
+  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    {
+      if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+       vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+                              vect_uninitialized_def,
+                              node, prologue_cost_vec, body_cost_vec);
+      else
+       {
+         int i;
+         gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
+         vect_model_load_cost (stmt_info, ncopies_for_cost, false,
+                               node, prologue_cost_vec, body_cost_vec);
+         /* If the load is permuted record the cost for the permutation.
+            ???  Loads from multiple chains are let through here only
+            for a single special case involving complex numbers where
+            in the end no permutation is necessary.  */
+         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
+           if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
+                == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
+               && vect_get_place_in_interleaving_chain
+                    (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
+             {
+               record_stmt_cost (body_cost_vec, group_size, vec_perm,
+                                 stmt_info, 0, vect_body);
+               break;
+             }
+       }
+    }
+  else
+    record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
+                     stmt_info, 0, vect_body);
+
+  /* Scan operands and account for prologue cost of constants/externals.
+     ???  This over-estimates cost for multiple uses and should be
+     re-engineered.  */
+  lhs = gimple_get_lhs (stmt);
+  for (i = 0; i < gimple_num_ops (stmt); ++i)
+    {
+      tree def, op = gimple_op (stmt, i);
+      gimple def_stmt;
+      enum vect_def_type dt;
+      if (!op || op == lhs)
+       continue;
+      if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
+                             &def_stmt, &def, &dt)
+         && (dt == vect_constant_def || dt == vect_external_def))
+       record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
+                         stmt_info, 0, vect_prologue);
+    }
+}
+
+/* Compute the cost for the SLP instance INSTANCE.  */
+
+static void
+vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+                      slp_instance instance, unsigned nunits)
+{
+  stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
+  unsigned ncopies_for_cost;
+  stmt_info_for_cost *si;
+  unsigned i;
+
+  /* Calculate the number of vector stmts to create based on the unrolling
+     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
+     GROUP_SIZE / NUNITS otherwise.  */
+  unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+  ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
+
+  prologue_cost_vec.create (10);
+  body_cost_vec.create (10);
+  SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
+  vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
+                          instance, SLP_INSTANCE_TREE (instance),
+                          &prologue_cost_vec, ncopies_for_cost);
+
+  /* Record the prologue costs, which were delayed until we were
+     sure that SLP was successful.  Unlike the body costs, we know
+     the final values now regardless of the loop vectorization factor.  */
+  void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
+               : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
+  FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
+    {
+      struct _stmt_vec_info *stmt_info
+       = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
+      (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
+                           si->misalign, vect_prologue);
+    }
+
+  prologue_cost_vec.release ();
+}
 
 /* Analyze an SLP instance starting from a group of grouped stores.  Call
    vect_build_slp_tree to build a tree of packed stmts if possible.
@@ -1526,15 +1602,13 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   tree vectype, scalar_type = NULL_TREE;
   gimple next;
   unsigned int vectorization_factor = 0;
-  int outside_cost = 0, ncopies_for_cost, i;
+  int i;
   unsigned int max_nunits = 0;
   vec<int> load_permutation;
   vec<slp_tree> loads;
   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
   bool loads_permuted = false;
   vec<gimple> scalar_stmts;
-  stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
-  stmt_info_for_cost *si;
 
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
@@ -1615,26 +1689,14 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
   node = vect_create_new_slp_node (scalar_stmts);
 
-  /* Calculate the number of vector stmts to create based on the unrolling
-     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
-     GROUP_SIZE / NUNITS otherwise.  */
-  ncopies_for_cost = unrolling_factor * group_size / nunits;
-
   load_permutation.create (group_size * group_size);
   loads.create (group_size);
-  prologue_cost_vec.create (10);
-  body_cost_vec.create (10);
 
   /* Build the tree for the SLP instance.  */
   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
-                           &outside_cost, ncopies_for_cost,
                           &max_nunits, &load_permutation, &loads,
-                          vectorization_factor, &loads_permuted,
-                          &prologue_cost_vec, &body_cost_vec))
+                          vectorization_factor, &loads_permuted))
     {
-      void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
-                   : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
-
       /* Calculate the unrolling factor based on the smallest type.  */
       if (max_nunits > nunits)
         unrolling_factor = least_common_multiple (max_nunits, group_size)
@@ -1647,8 +1709,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                             "Build SLP failed: unrolling required in basic"
                             " block SLP");
          vect_free_slp_tree (node);
-         body_cost_vec.release ();
-         prologue_cost_vec.release ();
          load_permutation.release ();
          loads.release ();
           return false;
@@ -1659,7 +1719,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       SLP_INSTANCE_TREE (new_instance) = node;
       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
-      SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
+      SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
       SLP_INSTANCE_LOADS (new_instance) = loads;
       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
@@ -1678,7 +1738,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                 }
 
               vect_free_slp_instance (new_instance);
-             prologue_cost_vec.release ();
               return false;
             }
 
@@ -1688,18 +1747,9 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       else
         SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
 
-      /* Record the prologue costs, which were delayed until we were
-        sure that SLP was successful.  Unlike the body costs, we know
-        the final values now regardless of the loop vectorization factor.  */
-      FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
-       {
-         struct _stmt_vec_info *stmt_info
-           = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
-         (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
-                               si->misalign, vect_prologue);
-       }
-
-      prologue_cost_vec.release ();
+      /* Compute the costs of this SLP instance.  */
+      vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
+                            new_instance, TYPE_VECTOR_SUBPARTS (vectype));
 
       if (loop_vinfo)
         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
@@ -1711,11 +1761,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
       return true;
     }
-  else
-    {
-      body_cost_vec.release ();
-      prologue_cost_vec.release ();
-    }
 
   /* Failed to SLP.  */
   /* Free the allocated memory.  */