From: Richard Biener Date: Fri, 6 Nov 2020 10:11:42 +0000 (+0100) Subject: tree-optimization/97706 - part one, refactor vect_determine_mask_precision X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7307d8e10727aea8069c0e47e64a7a9b8588a22e;p=gcc.git tree-optimization/97706 - part one, refactor vect_determine_mask_precision This computes vect_determine_mask_precision in a RPO forward walk rather than in a backward walk and using a worklist. It will make fixing PR97706 easier but for bisecting I wanted it to be separate. 2020-11-06 Richard Biener PR tree-optimization/97706 * tree-vect-patterns.c (vect_determine_mask_precision): Remove worklist operation. (vect_determine_stmt_precisions): Do not call vect_determine_mask_precision here. (vect_determine_precisions): Compute mask precision in a forward walk. --- diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index ac56acebe01..47d9fce594f 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -5017,104 +5017,88 @@ possible_vector_mask_operation_p (stmt_vec_info stmt_info) static void vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info) { - if (!possible_vector_mask_operation_p (stmt_info) - || stmt_info->mask_precision) + if (!possible_vector_mask_operation_p (stmt_info)) return; - auto_vec worklist; - worklist.quick_push (stmt_info); - while (!worklist.is_empty ()) - { - stmt_info = worklist.last (); - unsigned int orig_length = worklist.length (); - - /* If at least one boolean input uses a vector mask type, - pick the mask type with the narrowest elements. - - ??? This is the traditional behavior. It should always produce - the smallest number of operations, but isn't necessarily the - optimal choice. For example, if we have: + /* If at least one boolean input uses a vector mask type, + pick the mask type with the narrowest elements. - a = b & c + ??? This is the traditional behavior. It should always produce + the smallest number of operations, but isn't necessarily the + optimal choice. For example, if we have: - where: + a = b & c - - the user of a wants it to have a mask type for 16-bit elements (M16) - - b also uses M16 - - c uses a mask type for 8-bit elements (M8) + where: - then picking M8 gives: + - the user of a wants it to have a mask type for 16-bit elements (M16) + - b also uses M16 + - c uses a mask type for 8-bit elements (M8) - - 1 M16->M8 pack for b - - 1 M8 AND for a - - 2 M8->M16 unpacks for the user of a + then picking M8 gives: - whereas picking M16 would have given: + - 1 M16->M8 pack for b + - 1 M8 AND for a + - 2 M8->M16 unpacks for the user of a - - 2 M8->M16 unpacks for c - - 2 M16 ANDs for a - - The number of operations are equal, but M16 would have given - a shorter dependency chain and allowed more ILP. */ - unsigned int precision = ~0U; - gassign *assign = as_a (stmt_info->stmt); - unsigned int nops = gimple_num_ops (assign); - for (unsigned int i = 1; i < nops; ++i) - { - tree rhs = gimple_op (assign, i); - if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs))) - continue; + whereas picking M16 would have given: - stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); - if (!def_stmt_info) - /* Don't let external or constant operands influence the choice. - We can convert them to whichever vector type we pick. */ - continue; + - 2 M8->M16 unpacks for c + - 2 M16 ANDs for a - if (def_stmt_info->mask_precision) - { - if (precision > def_stmt_info->mask_precision) - precision = def_stmt_info->mask_precision; - } - else if (possible_vector_mask_operation_p (def_stmt_info)) - worklist.safe_push (def_stmt_info); - } + The number of operations are equal, but M16 would have given + a shorter dependency chain and allowed more ILP. */ + unsigned int precision = ~0U; + gassign *assign = as_a (stmt_info->stmt); + unsigned int nops = gimple_num_ops (assign); + for (unsigned int i = 1; i < nops; ++i) + { + tree rhs = gimple_op (assign, i); + if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs))) + continue; - /* Defer the choice if we need to visit operands first. */ - if (orig_length != worklist.length ()) + stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); + if (!def_stmt_info) + /* Don't let external or constant operands influence the choice. + We can convert them to whichever vector type we pick. */ continue; - /* If the statement compares two values that shouldn't use vector masks, - try comparing the values as normal scalars instead. */ - tree_code rhs_code = gimple_assign_rhs_code (assign); - if (precision == ~0U - && TREE_CODE_CLASS (rhs_code) == tcc_comparison) + if (def_stmt_info->mask_precision) { - tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign)); - scalar_mode mode; - tree vectype, mask_type; - if (is_a (TYPE_MODE (rhs1_type), &mode) - && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type)) - && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type)) - && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code)) - precision = GET_MODE_BITSIZE (mode); + if (precision > def_stmt_info->mask_precision) + precision = def_stmt_info->mask_precision; } + } - if (dump_enabled_p ()) - { - if (precision == ~0U) - dump_printf_loc (MSG_NOTE, vect_location, - "using normal nonmask vectors for %G", - stmt_info->stmt); - else - dump_printf_loc (MSG_NOTE, vect_location, - "using boolean precision %d for %G", - precision, stmt_info->stmt); - } + /* If the statement compares two values that shouldn't use vector masks, + try comparing the values as normal scalars instead. */ + tree_code rhs_code = gimple_assign_rhs_code (assign); + if (precision == ~0U + && TREE_CODE_CLASS (rhs_code) == tcc_comparison) + { + tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign)); + scalar_mode mode; + tree vectype, mask_type; + if (is_a (TYPE_MODE (rhs1_type), &mode) + && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type)) + && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type)) + && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code)) + precision = GET_MODE_BITSIZE (mode); + } - stmt_info->mask_precision = precision; - worklist.pop (); + if (dump_enabled_p ()) + { + if (precision == ~0U) + dump_printf_loc (MSG_NOTE, vect_location, + "using normal nonmask vectors for %G", + stmt_info->stmt); + else + dump_printf_loc (MSG_NOTE, vect_location, + "using boolean precision %d for %G", + precision, stmt_info->stmt); } + + stmt_info->mask_precision = precision; } /* Handle vect_determine_precisions for STMT_INFO, given that we @@ -5129,7 +5113,6 @@ vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info) vect_determine_precisions_from_range (stmt_info, stmt); vect_determine_precisions_from_users (stmt_info, stmt); } - vect_determine_mask_precision (vinfo, stmt_info); } /* Walk backwards through the vectorizable region to determine the @@ -5151,6 +5134,14 @@ vect_determine_precisions (vec_info *vinfo) basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); unsigned int nbbs = loop->num_nodes; + for (unsigned int i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + if (!is_gimple_debug (gsi_stmt (si))) + vect_determine_mask_precision + (vinfo, vinfo->lookup_stmt (gsi_stmt (si))); + } for (unsigned int i = 0; i < nbbs; i++) { basic_block bb = bbs[nbbs - i - 1]; @@ -5164,6 +5155,16 @@ vect_determine_precisions (vec_info *vinfo) else { bb_vec_info bb_vinfo = as_a (vinfo); + for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i) + { + basic_block bb = bb_vinfo->bbs[i]; + for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) + vect_determine_mask_precision (vinfo, stmt_info); + } + } for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i) for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]); !gsi_end_p (gsi); gsi_prev (&gsi))