From abe93733a265f8a8b56dbdd307380f8c83dd3ab5 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Tue, 3 Nov 2020 16:13:47 +0000 Subject: [PATCH] PR target/96342 Change field "simdlen" into poly_uint64 This is the first patch of PR96342. In order to add support for "omp declare simd", change the type of the field "simdlen" of struct cgraph_simd_clone from unsigned int to poly_uint64 and related adaptation. Since the length might be variable for the SVE cases. 2020-11-03 Yang Yang gcc/ChangeLog: * cgraph.h (struct cgraph_simd_clone): Change field "simdlen" of struct cgraph_simd_clone from unsigned int to poly_uint64. * config/aarch64/aarch64.c (aarch64_simd_clone_compute_vecsize_and_simdlen): adaptation of operations on "simdlen". * config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen): Printf formats update. * gengtype.c (main): Handle poly_uint64. * omp-simd-clone.c (simd_clone_mangle): Likewise.Re (simd_clone_adjust_return_type): Likewise. (create_tmp_simd_array): Likewise. (simd_clone_adjust_argument_types): Likewise. (simd_clone_init_simd_arrays): Likewise. (ipa_simd_modify_function_body): Likewise. (simd_clone_adjust): Likewise. (expand_simd_clones): Likewise. * poly-int-types.h (vector_unroll_factor): New macro. * poly-int.h (constant_multiple_p): Add two-argument versions. * tree-vect-stmts.c (vectorizable_simd_clone_call): Likewise. --- gcc/cgraph.h | 6 ++-- gcc/config/aarch64/aarch64.c | 30 ++++++++++------ gcc/config/i386/i386.c | 8 +++-- gcc/gengtype.c | 1 + gcc/omp-simd-clone.c | 70 +++++++++++++++++++++--------------- gcc/poly-int-types.h | 8 +++++ gcc/poly-int.h | 57 +++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 43 ++++++++++++---------- 8 files changed, 159 insertions(+), 64 deletions(-) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index cf543705e13..cd22676ff9e 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -759,17 +759,17 @@ struct GTY(()) cgraph_simd_clone_arg { struct GTY(()) cgraph_simd_clone { /* Number of words in the SIMD lane associated with this clone. */ - unsigned int simdlen; + poly_uint64 simdlen; /* Number of annotated function arguments in `args'. This is usually the number of named arguments in FNDECL. */ unsigned int nargs; /* Max hardware vector size in bits for integral vectors. */ - unsigned int vecsize_int; + poly_uint64 vecsize_int; /* Max hardware vector size in bits for floating point vectors. */ - unsigned int vecsize_float; + poly_uint64 vecsize_float; /* Machine mode of the mask argument(s), if they are to be passed as bitmasks in integer argument(s). VOIDmode if masks are passed diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index db991e59cbe..27f587be7e7 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -22960,18 +22960,23 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, tree base_type, int num) { tree t, ret_type, arg_type; - unsigned int elt_bits, vec_bits, count; + unsigned int elt_bits, count; + unsigned HOST_WIDE_INT const_simdlen; + poly_uint64 vec_bits; if (!TARGET_SIMD) return 0; - if (clonei->simdlen - && (clonei->simdlen < 2 - || clonei->simdlen > 1024 - || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) + /* For now, SVE simdclones won't produce illegal simdlen, So only check + const simdlens here. */ + if (maybe_ne (clonei->simdlen, 0U) + && clonei->simdlen.is_constant (&const_simdlen) + && (const_simdlen < 2 + || const_simdlen > 1024 + || (const_simdlen & (const_simdlen - 1)) != 0)) { warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %d", clonei->simdlen); + "unsupported simdlen %wd", const_simdlen); return 0; } @@ -23015,21 +23020,24 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, clonei->vecsize_mangle = 'n'; clonei->mask_mode = VOIDmode; elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); - if (clonei->simdlen == 0) + if (known_eq (clonei->simdlen, 0U)) { count = 2; vec_bits = (num == 0 ? 64 : 128); - clonei->simdlen = vec_bits / elt_bits; + clonei->simdlen = exact_div (vec_bits, elt_bits); } else { count = 1; vec_bits = clonei->simdlen * elt_bits; - if (vec_bits != 64 && vec_bits != 128) + /* For now, SVE simdclones won't produce illegal simdlen, So only check + const simdlens here. */ + if (clonei->simdlen.is_constant (&const_simdlen) + && maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U)) { warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "GCC does not currently support simdlen %d for type %qT", - clonei->simdlen, base_type); + "GCC does not currently support simdlen %wd for type %qT", + const_simdlen, base_type); return 0; } } diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index caa9b9d5ac1..df47a53d701 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22485,7 +22485,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) { warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %d", clonei->simdlen); + "unsupported simdlen %wd", clonei->simdlen.to_constant ()); return 0; } @@ -22590,7 +22590,8 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, clonei->simdlen = clonei->vecsize_int; else clonei->simdlen = clonei->vecsize_float; - clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type)); + clonei->simdlen = clonei->simdlen + / GET_MODE_BITSIZE (TYPE_MODE (base_type)); } else if (clonei->simdlen > 16) { @@ -22612,7 +22613,8 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, if (cnt > (TARGET_64BIT ? 16 : 8)) { warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %d", clonei->simdlen); + "unsupported simdlen %wd", + clonei->simdlen.to_constant ()); return 0; } } diff --git a/gcc/gengtype.c b/gcc/gengtype.c index a7cf5c25893..b21eeacdbe2 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -5200,6 +5200,7 @@ main (int argc, char **argv) POS_HERE (do_scalar_typedef ("widest_int", &pos)); POS_HERE (do_scalar_typedef ("int64_t", &pos)); POS_HERE (do_scalar_typedef ("poly_int64", &pos)); + POS_HERE (do_scalar_typedef ("poly_uint64", &pos)); POS_HERE (do_scalar_typedef ("uint64_t", &pos)); POS_HERE (do_scalar_typedef ("uint8", &pos)); POS_HERE (do_scalar_typedef ("uintptr_t", &pos)); diff --git a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c index 942fb971cb7..cbd58c8987b 100644 --- a/gcc/omp-simd-clone.c +++ b/gcc/omp-simd-clone.c @@ -338,16 +338,18 @@ simd_clone_mangle (struct cgraph_node *node, { char vecsize_mangle = clone_info->vecsize_mangle; char mask = clone_info->inbranch ? 'M' : 'N'; - unsigned int simdlen = clone_info->simdlen; + poly_uint64 simdlen = clone_info->simdlen; unsigned int n; pretty_printer pp; - gcc_assert (vecsize_mangle && simdlen); + gcc_assert (vecsize_mangle && maybe_ne (simdlen, 0U)); pp_string (&pp, "_ZGV"); pp_character (&pp, vecsize_mangle); pp_character (&pp, mask); - pp_decimal_int (&pp, simdlen); + /* For now, simdlen is always constant, while variable simdlen pp 'n'. */ + unsigned int len = simdlen.to_constant (); + pp_decimal_int (&pp, (len)); for (n = 0; n < clone_info->nargs; ++n) { @@ -491,7 +493,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node) { tree fndecl = node->decl; tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl)); - unsigned int veclen; + poly_uint64 veclen; tree t; /* Adjust the function return type. */ @@ -502,17 +504,18 @@ simd_clone_adjust_return_type (struct cgraph_node *node) veclen = node->simdclone->vecsize_int; else veclen = node->simdclone->vecsize_float; - veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t)); - if (veclen > node->simdclone->simdlen) + veclen = exact_div (veclen, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t))); + if (multiple_p (veclen, node->simdclone->simdlen)) veclen = node->simdclone->simdlen; if (POINTER_TYPE_P (t)) t = pointer_sized_int_node; - if (veclen == node->simdclone->simdlen) + if (known_eq (veclen, node->simdclone->simdlen)) t = build_vector_type (t, node->simdclone->simdlen); else { t = build_vector_type (t, veclen); - t = build_array_type_nelts (t, node->simdclone->simdlen / veclen); + t = build_array_type_nelts (t, exact_div (node->simdclone->simdlen, + veclen)); } TREE_TYPE (TREE_TYPE (fndecl)) = t; if (!node->definition) @@ -526,7 +529,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node) tree atype = build_array_type_nelts (orig_rettype, node->simdclone->simdlen); - if (veclen != node->simdclone->simdlen) + if (maybe_ne (veclen, node->simdclone->simdlen)) return build1 (VIEW_CONVERT_EXPR, atype, t); /* Set up a SIMD array to use as the return value. */ @@ -546,7 +549,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node) SIMDLEN is the number of elements. */ static tree -create_tmp_simd_array (const char *prefix, tree type, int simdlen) +create_tmp_simd_array (const char *prefix, tree type, poly_uint64 simdlen) { tree atype = build_array_type_nelts (type, simdlen); tree avar = create_tmp_var_raw (atype, prefix); @@ -578,7 +581,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) struct cgraph_simd_clone *sc = node->simdclone; vec *new_params = NULL; vec_safe_reserve (new_params, sc->nargs); - unsigned i, j, veclen; + unsigned i, j, k; + poly_uint64 veclen; for (i = 0; i < sc->nargs; ++i) { @@ -614,8 +618,9 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) veclen = sc->vecsize_int; else veclen = sc->vecsize_float; - veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type)); - if (veclen > sc->simdlen) + veclen = exact_div (veclen, + GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type))); + if (multiple_p (veclen, sc->simdlen)) veclen = sc->simdlen; adj.op = IPA_PARAM_OP_NEW; adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD; @@ -624,10 +629,11 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) else adj.type = build_vector_type (parm_type, veclen); sc->args[i].vector_type = adj.type; - for (j = veclen; j < sc->simdlen; j += veclen) + k = vector_unroll_factor (sc->simdlen, veclen); + for (j = 1; j < k; j++) { vec_safe_push (new_params, adj); - if (j == veclen) + if (j == 1) { memset (&adj, 0, sizeof (adj)); adj.op = IPA_PARAM_OP_NEW; @@ -663,8 +669,9 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) veclen = sc->vecsize_int; else veclen = sc->vecsize_float; - veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); - if (veclen > sc->simdlen) + veclen = exact_div (veclen, + GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type))); + if (multiple_p (veclen, sc->simdlen)) veclen = sc->simdlen; if (sc->mask_mode != VOIDmode) adj.type @@ -675,7 +682,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) adj.type = build_vector_type (base_type, veclen); vec_safe_push (new_params, adj); - for (j = veclen; j < sc->simdlen; j += veclen) + k = vector_unroll_factor (sc->simdlen, veclen); + for (j = 1; j < k; j++) vec_safe_push (new_params, adj); /* We have previously allocated one extra entry for the mask. Use @@ -690,9 +698,9 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) if (sc->mask_mode == VOIDmode) sc->args[i].simd_array = create_tmp_simd_array ("mask", base_type, sc->simdlen); - else if (veclen < sc->simdlen) + else if (k > 1) sc->args[i].simd_array - = create_tmp_simd_array ("mask", adj.type, sc->simdlen / veclen); + = create_tmp_simd_array ("mask", adj.type, k); else sc->args[i].simd_array = NULL_TREE; } @@ -783,7 +791,8 @@ simd_clone_init_simd_arrays (struct cgraph_node *node, } continue; } - if (simd_clone_subparts (TREE_TYPE (arg)) == node->simdclone->simdlen) + if (known_eq (simd_clone_subparts (TREE_TYPE (arg)), + node->simdclone->simdlen)) { tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); tree ptr = build_fold_addr_expr (array); @@ -795,8 +804,10 @@ simd_clone_init_simd_arrays (struct cgraph_node *node, else { unsigned int simdlen = simd_clone_subparts (TREE_TYPE (arg)); + unsigned int times = vector_unroll_factor (node->simdclone->simdlen, + simdlen); tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); - for (k = 0; k < node->simdclone->simdlen; k += simdlen) + for (k = 0; k < times; k++) { tree ptr = build_fold_addr_expr (array); int elemsize; @@ -808,7 +819,7 @@ simd_clone_init_simd_arrays (struct cgraph_node *node, tree elemtype = TREE_TYPE (TREE_TYPE (arg)); elemsize = GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype)); tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr, - build_int_cst (ptype, k * elemsize)); + build_int_cst (ptype, k * elemsize * simdlen)); t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); gimplify_and_add (t, &seq); } @@ -981,8 +992,9 @@ ipa_simd_modify_function_body (struct cgraph_node *node, iter, NULL_TREE, NULL_TREE); adjustments->register_replacement (&(*adjustments->m_adj_params)[j], r); - if (simd_clone_subparts (vectype) < node->simdclone->simdlen) - j += node->simdclone->simdlen / simd_clone_subparts (vectype) - 1; + if (multiple_p (node->simdclone->simdlen, simd_clone_subparts (vectype))) + j += vector_unroll_factor (node->simdclone->simdlen, + simd_clone_subparts (vectype)) - 1; } tree name; @@ -1249,7 +1261,8 @@ simd_clone_adjust (struct cgraph_node *node) below). */ loop = alloc_loop (); cfun->has_force_vectorize_loops = true; - loop->safelen = node->simdclone->simdlen; + /* For now, simlen is always constant. */ + loop->safelen = node->simdclone->simdlen.to_constant (); loop->force_vectorize = true; loop->header = body_bb; } @@ -1275,7 +1288,8 @@ simd_clone_adjust (struct cgraph_node *node) { tree maskt = TREE_TYPE (mask_array); int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt))); - c = node->simdclone->simdlen / (c + 1); + /* For now, c must be constant here. */ + c = exact_div (node->simdclone->simdlen, c + 1).to_constant (); int s = exact_log2 (c); gcc_assert (s > 0); c--; @@ -1683,7 +1697,7 @@ expand_simd_clones (struct cgraph_node *node) if (clone_info == NULL) continue; - int orig_simdlen = clone_info->simdlen; + poly_uint64 orig_simdlen = clone_info->simdlen; tree base_type = simd_clone_compute_base_data_type (node, clone_info); /* The target can return 0 (no simd clones should be created), 1 (just one ISA of simd clones should be created) or higher diff --git a/gcc/poly-int-types.h b/gcc/poly-int-types.h index 5e04e63ebf2..302e5bfd97d 100644 --- a/gcc/poly-int-types.h +++ b/gcc/poly-int-types.h @@ -81,6 +81,14 @@ typedef poly_int poly_widest_int; #define vector_element_size(SIZE, NELTS) \ (exact_div (SIZE, NELTS).to_constant ()) +/* Return the number of unroll times when a vector that has NELTS1 elements + is unrolled to vectors that have NELTS2 elements. + + to_constant () is safe in this situation because the multiples of the + NELTS of two vectors are always constant-size scalars. */ +#define vector_unroll_factor(NELTS1, NELTS2) \ + (exact_div (NELTS1, NELTS2).to_constant ()) + /* Wrapper for poly_int arguments to target macros, so that if a target doesn't need polynomial-sized modes, its header file can continue to treat the argument as a normal constant. This should go away once diff --git a/gcc/poly-int.h b/gcc/poly-int.h index b953ffacec4..96a763daedf 100644 --- a/gcc/poly-int.h +++ b/gcc/poly-int.h @@ -2044,6 +2044,63 @@ constant_multiple_p (const poly_int_pod &a, return true; } +/* Return true if A is a constant multiple of B. */ + +template +inline typename if_nonpoly::type +constant_multiple_p (const poly_int_pod &a, Cb b) +{ + typedef POLY_CAST (Ca, Cb) NCa; + typedef POLY_CAST (Cb, Ca) NCb; + + /* Do the modulus before the constant check, to catch divide by + zero errors. */ + if (NCa (a.coeffs[0]) % NCb (b) != 0 || !a.is_constant ()) + return false; + return true; +} + +template +inline typename if_nonpoly::type +constant_multiple_p (Ca a, const poly_int_pod &b) +{ + typedef POLY_CAST (Ca, Cb) NCa; + typedef POLY_CAST (Cb, Ca) NCb; + typedef POLY_INT_TYPE (Ca) int_type; + + /* Do the modulus before the constant check, to catch divide by + zero errors. */ + if (NCa (a) % NCb (b.coeffs[0]) != 0 + || (a != int_type (0) && !b.is_constant ())) + return false; + return true; +} + +template +inline bool +constant_multiple_p (const poly_int_pod &a, + const poly_int_pod &b) +{ + typedef POLY_CAST (Ca, Cb) NCa; + typedef POLY_CAST (Cb, Ca) NCb; + typedef POLY_INT_TYPE (Ca) ICa; + typedef POLY_INT_TYPE (Cb) ICb; + typedef POLY_BINARY_COEFF (Ca, Cb) C; + + if (NCa (a.coeffs[0]) % NCb (b.coeffs[0]) != 0) + return false; + + C r = NCa (a.coeffs[0]) / NCb (b.coeffs[0]); + for (unsigned int i = 1; i < N; ++i) + if (b.coeffs[i] == ICb (0) + ? a.coeffs[i] != ICa (0) + : (NCa (a.coeffs[i]) % NCb (b.coeffs[i]) != 0 + || NCa (a.coeffs[i]) / NCb (b.coeffs[i]) != r)) + return false; + return true; +} + + /* Return true if A is a multiple of B. */ template diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 1a0da0e84cc..9cf85a0cd51 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3707,7 +3707,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, tree op, type; tree vec_oprnd0 = NULL_TREE; tree vectype; - unsigned int nunits; + poly_uint64 nunits; loop_vec_info loop_vinfo = dyn_cast (vinfo); bb_vec_info bb_vinfo = dyn_cast (vinfo); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; @@ -3859,8 +3859,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, arginfo.quick_push (thisarginfo); } - unsigned HOST_WIDE_INT vf; - if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + if (!vf.is_constant ()) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3878,12 +3878,12 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, n = n->simdclone->next_clone) { unsigned int this_badness = 0; - if (n->simdclone->simdlen > vf + unsigned int num_calls; + if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls) || n->simdclone->nargs != nargs) continue; - if (n->simdclone->simdlen < vf) - this_badness += (exact_log2 (vf) - - exact_log2 (n->simdclone->simdlen)) * 1024; + if (num_calls != 1) + this_badness += exact_log2 (num_calls) * 1024; if (n->simdclone->inbranch) this_badness += 2048; int target_badness = targetm.simd_clone.usable (n); @@ -3964,19 +3964,19 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, slp_node); if (arginfo[i].vectype == NULL - || (simd_clone_subparts (arginfo[i].vectype) - > bestn->simdclone->simdlen)) + || !constant_multiple_p (bestn->simdclone->simdlen, + simd_clone_subparts (arginfo[i].vectype))) return false; } fndecl = bestn->decl; nunits = bestn->simdclone->simdlen; - ncopies = vf / nunits; + ncopies = vector_unroll_factor (vf, nunits); /* If the function isn't const, only allow it in simd loops where user has asserted that at least nunits consecutive iterations can be performed using SIMD instructions. */ - if ((loop == NULL || (unsigned) loop->safelen < nunits) + if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits)) && gimple_vuse (stmt)) return false; @@ -4054,7 +4054,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { case SIMD_CLONE_ARG_TYPE_VECTOR: atype = bestn->simdclone->args[i].vector_type; - o = nunits / simd_clone_subparts (atype); + o = vector_unroll_factor (nunits, + simd_clone_subparts (atype)); for (m = j * o; m < (j + 1) * o; m++) { if (simd_clone_subparts (atype) @@ -4179,7 +4180,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, ? POINTER_PLUS_EXPR : PLUS_EXPR; tree type = POINTER_TYPE_P (TREE_TYPE (op)) ? sizetype : TREE_TYPE (op); - widest_int cst + poly_widest_int cst = wi::mul (bestn->simdclone->args[i].linear_step, ncopies * nunits); tree tcst = wide_int_to_tree (type, cst); @@ -4200,7 +4201,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, ? POINTER_PLUS_EXPR : PLUS_EXPR; tree type = POINTER_TYPE_P (TREE_TYPE (op)) ? sizetype : TREE_TYPE (op); - widest_int cst + poly_widest_int cst = wi::mul (bestn->simdclone->args[i].linear_step, j * nunits); tree tcst = wide_int_to_tree (type, cst); @@ -4226,7 +4227,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, gcall *new_call = gimple_build_call_vec (fndecl, vargs); if (vec_dest) { - gcc_assert (ratype || simd_clone_subparts (rtype) == nunits); + gcc_assert (ratype + || known_eq (simd_clone_subparts (rtype), nunits)); if (ratype) new_temp = create_tmp_var (ratype); else if (useless_type_conversion_p (vectype, rtype)) @@ -4240,12 +4242,13 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, if (vec_dest) { - if (simd_clone_subparts (vectype) < nunits) + if (!multiple_p (simd_clone_subparts (vectype), nunits)) { unsigned int k, l; poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype)); poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype)); - k = nunits / simd_clone_subparts (vectype); + k = vector_unroll_factor (nunits, + simd_clone_subparts (vectype)); gcc_assert ((k & (k - 1)) == 0); for (l = 0; l < k; l++) { @@ -4271,7 +4274,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, vect_clobber_variable (vinfo, stmt_info, gsi, new_temp); continue; } - else if (simd_clone_subparts (vectype) > nunits) + else if (!multiple_p (nunits, simd_clone_subparts (vectype))) { unsigned int k = (simd_clone_subparts (vectype) / simd_clone_subparts (rtype)); @@ -4280,7 +4283,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, vec_alloc (ret_ctor_elts, k); if (ratype) { - unsigned int m, o = nunits / simd_clone_subparts (rtype); + unsigned int m, o; + o = vector_unroll_factor (nunits, + simd_clone_subparts (rtype)); for (m = 0; m < o; m++) { tree tem = build4 (ARRAY_REF, rtype, new_temp, -- 2.30.2