From 437f4a0024d9829a138ad1f5b0311cbf4289d5e3 Mon Sep 17 00:00:00 2001 From: Ira Rosen Date: Thu, 28 Apr 2011 19:50:28 +0000 Subject: [PATCH] re PR tree-optimization/48765 (ICE in vect_transform_stmt) PR tree-optimization/48765 * tree-vectorizer.h (vect_make_slp_decision): Return bool. * tree-vect-loop.c (vect_analyze_loop_operations): Add new argument to indicate if loop aware SLP is being used. Scan the statements and update the vectorization factor according to the type of vectorization before statement analysis. (vect_analyze_loop_2): Get a return value from vect_make_slp_decision, pass it to vect_analyze_loop_operations. (vectorizable_reduction): Set number of copies to 1 in case of pure SLP statement. * tree-vect-stmts.c (vectorizable_conversion, vectorizable_assignment, vectorizable_shift, vectorizable_operation, vectorizable_type_demotion, vectorizable_type_promotion, vectorizable_store, vectorizable_load): Likewise. (vectorizable_condition): Move the check that it is not SLP vectorization before the number of copies check. * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided to vectorize the loop using SLP. From-SVN: r173132 --- gcc/ChangeLog | 22 ++++++++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.dg/vect/pr48765.c | 82 +++++++++++++++++++++++++++++ gcc/tree-vect-loop.c | 66 +++++++++++++---------- gcc/tree-vect-slp.c | 7 ++- gcc/tree-vect-stmts.c | 24 ++++----- gcc/tree-vectorizer.h | 2 +- 7 files changed, 166 insertions(+), 42 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr48765.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 819d689a730..459923e4686 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2011-04-28 Ira Rosen + + PR tree-optimization/48765 + * tree-vectorizer.h (vect_make_slp_decision): Return bool. + * tree-vect-loop.c (vect_analyze_loop_operations): Add new argument + to indicate if loop aware SLP is being used. Scan the statements + and update the vectorization factor according to the type of + vectorization before statement analysis. + (vect_analyze_loop_2): Get a return value from vect_make_slp_decision, + pass it to vect_analyze_loop_operations. + (vectorizable_reduction): Set number of copies to 1 in case of pure + SLP statement. + * tree-vect-stmts.c (vectorizable_conversion, + vectorizable_assignment, vectorizable_shift, + vectorizable_operation, vectorizable_type_demotion, + vectorizable_type_promotion, vectorizable_store, vectorizable_load): + Likewise. + (vectorizable_condition): Move the check that it is not SLP + vectorization before the number of copies check. + * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided + to vectorize the loop using SLP. + 2011-04-28 Jakub Jelinek PR middle-end/48597 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ab8d621ae05..6e17672032d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2011-04-28 Ira Rosen + + PR tree-optimization/48765 + * gcc.dg/vect/pr48765.c: New. + 2011-04-28 Rainer Orth PR tree-optimization/48775 diff --git a/gcc/testsuite/gcc.dg/vect/pr48765.c b/gcc/testsuite/gcc.dg/vect/pr48765.c new file mode 100644 index 00000000000..469c4f423ce --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr48765.c @@ -0,0 +1,82 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-m64 -O3 -mcpu=power6" } */ + +enum reg_class +{ + NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS, + LIM_REG_CLASSES +}; +enum machine_mode +{ + VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode, OImode, + QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode, DCmode, + XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode, + BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE +}; +typedef struct rtx_def +{ + int mode:8; +} + *rtx; +extern rtx *regno_reg_rtx; +typedef unsigned int HARD_REG_ELT_TYPE; +typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)]; +extern int reg_alloc_order[64]; +extern int max_regno; +extern int *reg_n_calls_crossed; +extern short *reg_renumber; +static int *reg_where_dead; +static int *reg_where_born; +static int *reg_order; +static char *regs_change_size; +static HARD_REG_SET *after_insn_hard_regs; +static int stupid_find_reg (int, enum reg_class, enum machine_mode, int, int, + int); +void +stupid_life_analysis (f, nregs, file) + rtx f; +{ + register int i; + for (i = (((64)) + 3) + 1; i < max_regno; i++) + { + register int r = reg_order[i]; + if ((int) LIM_REG_CLASSES > 1) + reg_renumber[r] = + stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r), + ((regno_reg_rtx[r])->mode), reg_where_born[r], + reg_where_dead[r], regs_change_size[r]); + } +} + +static int +stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn, + changes_size) + int call_preserved; + enum reg_class class; + enum machine_mode mode; +{ + register int i, ins; + HARD_REG_SET used, this_reg; + for (ins = born_insn; ins < dead_insn; ins++) + do + { + register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ = + (after_insn_hard_regs[ins]); + for (i = 0; i < ((64 + 32 - 1) / 32); i++) + *scan_tp_++ |= *scan_fp_++; + } + while (0); + for (i = 0; i < 64; i++) + { + int regno = reg_alloc_order[i]; + if (((used)[(regno) / ((unsigned) 32)] & + (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32))))) + { + register int j; + if (j == regno) + return regno; + } + } +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 5fecf2a0524..e05c324d0b9 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1146,7 +1146,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cost) Scan the loop stmts and make sure they are all vectorizable. */ static bool -vect_analyze_loop_operations (loop_vec_info loop_vinfo) +vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); @@ -1167,6 +1167,40 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + if (slp) + { + /* If all the stmts in the loop can be SLPed, we perform only SLP, and + vectorization factor of the loop is the unrolling factor required by + the SLP instances. If that unrolling factor is 1, we say, that we + perform pure SLP on loop - cross iteration parallelism is not + exploited. */ + for (i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { + gimple stmt = gsi_stmt (si); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + gcc_assert (stmt_info); + if ((STMT_VINFO_RELEVANT_P (stmt_info) + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) + && !PURE_SLP_STMT (stmt_info)) + /* STMT needs both SLP and loop-based vectorization. */ + only_slp_in_loop = false; + } + } + + if (only_slp_in_loop) + vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); + else + vectorization_factor = least_common_multiple (vectorization_factor, + LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); + + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Updating vectorization factor to %d ", + vectorization_factor); + } for (i = 0; i < nbbs; i++) { @@ -1272,18 +1306,8 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { gimple stmt = gsi_stmt (si); - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - - gcc_assert (stmt_info); - if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL)) return false; - - if ((STMT_VINFO_RELEVANT_P (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - && !PURE_SLP_STMT (stmt_info)) - /* STMT needs both SLP and loop-based vectorization. */ - only_slp_in_loop = false; } } /* bbs */ @@ -1303,18 +1327,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) return false; } - /* If all the stmts in the loop can be SLPed, we perform only SLP, and - vectorization factor of the loop is the unrolling factor required by the - SLP instances. If that unrolling factor is 1, we say, that we perform - pure SLP on loop - cross iteration parallelism is not exploited. */ - if (only_slp_in_loop) - vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); - else - vectorization_factor = least_common_multiple (vectorization_factor, - LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); - - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, @@ -1410,7 +1422,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) static bool vect_analyze_loop_2 (loop_vec_info loop_vinfo) { - bool ok, dummy; + bool ok, dummy, slp = false; int max_vf = MAX_VECTORIZATION_FACTOR; int min_vf = 2; @@ -1524,7 +1536,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) if (ok) { /* Decide which possible SLP instances to SLP. */ - vect_make_slp_decision (loop_vinfo); + slp = vect_make_slp_decision (loop_vinfo); /* Find stmts that need to be both vectorized and SLPed. */ vect_detect_hybrid_slp (loop_vinfo); @@ -1533,7 +1545,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) /* Scan all the operations in the loop and make sure they are vectorizable. */ - ok = vect_analyze_loop_operations (loop_vinfo); + ok = vect_analyze_loop_operations (loop_vinfo, slp); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -4136,7 +4148,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt))) return false; - if (slp_node) + if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 6eb67ae5a75..c4464d52ee2 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1351,9 +1351,10 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) /* For each possible SLP instance decide whether to SLP it and calculate overall - unrolling factor needed to SLP the loop. */ + unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at + least one instance. */ -void +bool vect_make_slp_decision (loop_vec_info loop_vinfo) { unsigned int i, unrolling_factor = 1; @@ -1382,6 +1383,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo) if (decided_to_slp && vect_print_dump_info (REPORT_SLP)) fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d", decided_to_slp, unrolling_factor); + + return (decided_to_slp > 0); } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 879153d8031..bf6f36290fb 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp_node) + if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; /* Sanity check: make sure that at least one copy of the vectorized stmt @@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp_node) + if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; @@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp_node) + if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; @@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp_node) + if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; @@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp_node) + if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; @@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp_node) + if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; @@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp) + if (slp || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; @@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (slp) + if (slp || PURE_SLP_STMT (stmt_info)) ncopies = 1; else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; @@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, /* FORNOW: unsupported in basic block SLP. */ gcc_assert (loop_vinfo); + /* FORNOW: SLP not supported. */ + if (STMT_SLP_TYPE (stmt_info)) + return false; + gcc_assert (ncopies >= 1); if (reduc_index && ncopies > 1) return false; /* FORNOW */ @@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, && reduc_def)) return false; - /* FORNOW: SLP not supported. */ - if (STMT_SLP_TYPE (stmt_info)) - return false; - /* FORNOW: not yet supported. */ if (STMT_VINFO_LIVE_P (stmt_info)) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 6e63afcc716..9181f673830 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -870,7 +870,7 @@ extern bool vect_transform_slp_perm_load (gimple, VEC (tree, heap) *, extern bool vect_schedule_slp (loop_vec_info, bb_vec_info); extern void vect_update_slp_costs_according_to_vf (loop_vec_info); extern bool vect_analyze_slp (loop_vec_info, bb_vec_info); -extern void vect_make_slp_decision (loop_vec_info); +extern bool vect_make_slp_decision (loop_vec_info); extern void vect_detect_hybrid_slp (loop_vec_info); extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **, VEC (tree,heap) **, int); -- 2.30.2