add vect_get_slp_vect_def
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
48
49
50 /* Initialize a SLP node. */
51
52 _slp_tree::_slp_tree ()
53 {
54 SLP_TREE_SCALAR_STMTS (this) = vNULL;
55 SLP_TREE_SCALAR_OPS (this) = vNULL;
56 SLP_TREE_VEC_STMTS (this) = vNULL;
57 SLP_TREE_VEC_DEFS (this) = vNULL;
58 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
59 SLP_TREE_CHILDREN (this) = vNULL;
60 SLP_TREE_LOAD_PERMUTATION (this) = vNULL;
61 SLP_TREE_TWO_OPERATORS (this) = false;
62 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def;
63 SLP_TREE_VECTYPE (this) = NULL_TREE;
64 SLP_TREE_REPRESENTATIVE (this) = NULL;
65 this->refcnt = 1;
66 this->max_nunits = 1;
67 this->lanes = 0;
68 }
69
70 /* Tear down a SLP node. */
71
72 _slp_tree::~_slp_tree ()
73 {
74 SLP_TREE_CHILDREN (this).release ();
75 SLP_TREE_SCALAR_STMTS (this).release ();
76 SLP_TREE_SCALAR_OPS (this).release ();
77 SLP_TREE_VEC_STMTS (this).release ();
78 SLP_TREE_VEC_DEFS (this).release ();
79 SLP_TREE_LOAD_PERMUTATION (this).release ();
80 }
81
82 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
83 FINAL_P is true if we have vectorized the instance or if we have
84 made a final decision not to vectorize the statements in any way. */
85
86 static void
87 vect_free_slp_tree (slp_tree node, bool final_p)
88 {
89 int i;
90 slp_tree child;
91
92 if (--node->refcnt != 0)
93 return;
94
95 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
96 vect_free_slp_tree (child, final_p);
97
98 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
99 Some statements might no longer exist, after having been
100 removed by vect_transform_stmt. Updating the remaining
101 statements would be redundant. */
102 if (!final_p)
103 {
104 stmt_vec_info stmt_info;
105 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
106 {
107 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
108 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
109 }
110 }
111
112 delete node;
113 }
114
115 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
116 have vectorized the instance or if we have made a final decision not
117 to vectorize the statements in any way. */
118
119 void
120 vect_free_slp_instance (slp_instance instance, bool final_p)
121 {
122 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
123 SLP_INSTANCE_LOADS (instance).release ();
124 free (instance);
125 }
126
127
128 /* Create an SLP node for SCALAR_STMTS. */
129
130 static slp_tree
131 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
132 {
133 slp_tree node = new _slp_tree;
134 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
135 SLP_TREE_CHILDREN (node).create (nops);
136 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
137 SLP_TREE_REPRESENTATIVE (node) = scalar_stmts[0];
138 SLP_TREE_LANES (node) = scalar_stmts.length ();
139
140 unsigned i;
141 stmt_vec_info stmt_info;
142 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
143 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
144
145 return node;
146 }
147
148 /* Create an SLP node for OPS. */
149
150 static slp_tree
151 vect_create_new_slp_node (vec<tree> ops)
152 {
153 slp_tree node = new _slp_tree;
154 SLP_TREE_SCALAR_OPS (node) = ops;
155 SLP_TREE_DEF_TYPE (node) = vect_external_def;
156 SLP_TREE_LANES (node) = ops.length ();
157 return node;
158 }
159
160
161 /* This structure is used in creation of an SLP tree. Each instance
162 corresponds to the same operand in a group of scalar stmts in an SLP
163 node. */
164 typedef struct _slp_oprnd_info
165 {
166 /* Def-stmts for the operands. */
167 vec<stmt_vec_info> def_stmts;
168 /* Operands. */
169 vec<tree> ops;
170 /* Information about the first statement, its vector def-type, type, the
171 operand itself in case it's constant, and an indication if it's a pattern
172 stmt. */
173 tree first_op_type;
174 enum vect_def_type first_dt;
175 bool any_pattern;
176 } *slp_oprnd_info;
177
178
179 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
180 operand. */
181 static vec<slp_oprnd_info>
182 vect_create_oprnd_info (int nops, int group_size)
183 {
184 int i;
185 slp_oprnd_info oprnd_info;
186 vec<slp_oprnd_info> oprnds_info;
187
188 oprnds_info.create (nops);
189 for (i = 0; i < nops; i++)
190 {
191 oprnd_info = XNEW (struct _slp_oprnd_info);
192 oprnd_info->def_stmts.create (group_size);
193 oprnd_info->ops.create (group_size);
194 oprnd_info->first_dt = vect_uninitialized_def;
195 oprnd_info->first_op_type = NULL_TREE;
196 oprnd_info->any_pattern = false;
197 oprnds_info.quick_push (oprnd_info);
198 }
199
200 return oprnds_info;
201 }
202
203
204 /* Free operands info. */
205
206 static void
207 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
208 {
209 int i;
210 slp_oprnd_info oprnd_info;
211
212 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
213 {
214 oprnd_info->def_stmts.release ();
215 oprnd_info->ops.release ();
216 XDELETE (oprnd_info);
217 }
218
219 oprnds_info.release ();
220 }
221
222
223 /* Return true if STMTS contains a pattern statement. */
224
225 static bool
226 vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
227 {
228 stmt_vec_info stmt_info;
229 unsigned int i;
230 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
231 if (is_pattern_stmt_p (stmt_info))
232 return true;
233 return false;
234 }
235
236 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
237 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
238 of the chain. */
239
240 int
241 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
242 stmt_vec_info first_stmt_info)
243 {
244 stmt_vec_info next_stmt_info = first_stmt_info;
245 int result = 0;
246
247 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
248 return -1;
249
250 do
251 {
252 if (next_stmt_info == stmt_info)
253 return result;
254 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
255 if (next_stmt_info)
256 result += DR_GROUP_GAP (next_stmt_info);
257 }
258 while (next_stmt_info);
259
260 return -1;
261 }
262
263 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
264 using the method implemented by duplicate_and_interleave. Return true
265 if so, returning the number of intermediate vectors in *NVECTORS_OUT
266 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
267 (if nonnull). */
268
269 bool
270 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
271 tree elt_type, unsigned int *nvectors_out,
272 tree *vector_type_out,
273 tree *permutes)
274 {
275 tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
276 if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
277 return false;
278
279 machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
280 poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
281 unsigned int nvectors = 1;
282 for (;;)
283 {
284 scalar_int_mode int_mode;
285 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
286 if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
287 {
288 /* Get the natural vector type for this SLP group size. */
289 tree int_type = build_nonstandard_integer_type
290 (GET_MODE_BITSIZE (int_mode), 1);
291 tree vector_type
292 = get_vectype_for_scalar_type (vinfo, int_type, count);
293 if (vector_type
294 && VECTOR_MODE_P (TYPE_MODE (vector_type))
295 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
296 GET_MODE_SIZE (base_vector_mode)))
297 {
298 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
299 together into elements of type INT_TYPE and using the result
300 to build NVECTORS vectors. */
301 poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
302 vec_perm_builder sel1 (nelts, 2, 3);
303 vec_perm_builder sel2 (nelts, 2, 3);
304 poly_int64 half_nelts = exact_div (nelts, 2);
305 for (unsigned int i = 0; i < 3; ++i)
306 {
307 sel1.quick_push (i);
308 sel1.quick_push (i + nelts);
309 sel2.quick_push (half_nelts + i);
310 sel2.quick_push (half_nelts + i + nelts);
311 }
312 vec_perm_indices indices1 (sel1, 2, nelts);
313 vec_perm_indices indices2 (sel2, 2, nelts);
314 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
315 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
316 {
317 if (nvectors_out)
318 *nvectors_out = nvectors;
319 if (vector_type_out)
320 *vector_type_out = vector_type;
321 if (permutes)
322 {
323 permutes[0] = vect_gen_perm_mask_checked (vector_type,
324 indices1);
325 permutes[1] = vect_gen_perm_mask_checked (vector_type,
326 indices2);
327 }
328 return true;
329 }
330 }
331 }
332 if (!multiple_p (elt_bytes, 2, &elt_bytes))
333 return false;
334 nvectors *= 2;
335 }
336 }
337
338 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
339 they are of a valid type and that they match the defs of the first stmt of
340 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
341 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
342 indicates swap is required for cond_expr stmts. Specifically, *SWAP
343 is 1 if STMT is cond and operands of comparison need to be swapped;
344 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
345 If there is any operand swap in this function, *SWAP is set to non-zero
346 value.
347 If there was a fatal error return -1; if the error could be corrected by
348 swapping operands of father node of this one, return 1; if everything is
349 ok return 0. */
350 static int
351 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
352 vec<stmt_vec_info> stmts, unsigned stmt_num,
353 vec<slp_oprnd_info> *oprnds_info)
354 {
355 stmt_vec_info stmt_info = stmts[stmt_num];
356 tree oprnd;
357 unsigned int i, number_of_oprnds;
358 enum vect_def_type dt = vect_uninitialized_def;
359 slp_oprnd_info oprnd_info;
360 int first_op_idx = 1;
361 unsigned int commutative_op = -1U;
362 bool first_op_cond = false;
363 bool first = stmt_num == 0;
364
365 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
366 {
367 number_of_oprnds = gimple_call_num_args (stmt);
368 first_op_idx = 3;
369 if (gimple_call_internal_p (stmt))
370 {
371 internal_fn ifn = gimple_call_internal_fn (stmt);
372 commutative_op = first_commutative_argument (ifn);
373
374 /* Masked load, only look at mask. */
375 if (ifn == IFN_MASK_LOAD)
376 {
377 number_of_oprnds = 1;
378 /* Mask operand index. */
379 first_op_idx = 5;
380 }
381 }
382 }
383 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
384 {
385 enum tree_code code = gimple_assign_rhs_code (stmt);
386 number_of_oprnds = gimple_num_ops (stmt) - 1;
387 /* Swap can only be done for cond_expr if asked to, otherwise we
388 could result in different comparison code to the first stmt. */
389 if (code == COND_EXPR
390 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
391 {
392 first_op_cond = true;
393 number_of_oprnds++;
394 }
395 else
396 commutative_op = commutative_tree_code (code) ? 0U : -1U;
397 }
398 else
399 return -1;
400
401 bool swapped = (*swap != 0);
402 gcc_assert (!swapped || first_op_cond);
403 for (i = 0; i < number_of_oprnds; i++)
404 {
405 again:
406 if (first_op_cond)
407 {
408 /* Map indicating how operands of cond_expr should be swapped. */
409 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
410 int *map = maps[*swap];
411
412 if (i < 2)
413 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
414 first_op_idx), map[i]);
415 else
416 oprnd = gimple_op (stmt_info->stmt, map[i]);
417 }
418 else
419 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
420 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
421 oprnd = TREE_OPERAND (oprnd, 0);
422
423 oprnd_info = (*oprnds_info)[i];
424
425 stmt_vec_info def_stmt_info;
426 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
427 {
428 if (dump_enabled_p ())
429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
430 "Build SLP failed: can't analyze def for %T\n",
431 oprnd);
432
433 return -1;
434 }
435
436 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
437 oprnd_info->any_pattern = true;
438
439 if (first)
440 {
441 /* For the swapping logic below force vect_reduction_def
442 for the reduction op in a SLP reduction group. */
443 if (!STMT_VINFO_DATA_REF (stmt_info)
444 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
445 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
446 && def_stmt_info)
447 dt = vect_reduction_def;
448 oprnd_info->first_dt = dt;
449 oprnd_info->first_op_type = TREE_TYPE (oprnd);
450 }
451 else
452 {
453 /* Not first stmt of the group, check that the def-stmt/s match
454 the def-stmt/s of the first stmt. Allow different definition
455 types for reduction chains: the first stmt must be a
456 vect_reduction_def (a phi node), and the rest
457 end in the reduction chain. */
458 tree type = TREE_TYPE (oprnd);
459 if ((oprnd_info->first_dt != dt
460 && !(oprnd_info->first_dt == vect_reduction_def
461 && !STMT_VINFO_DATA_REF (stmt_info)
462 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
463 && def_stmt_info
464 && !STMT_VINFO_DATA_REF (def_stmt_info)
465 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
466 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
467 && !((oprnd_info->first_dt == vect_external_def
468 || oprnd_info->first_dt == vect_constant_def)
469 && (dt == vect_external_def
470 || dt == vect_constant_def)))
471 || !types_compatible_p (oprnd_info->first_op_type, type)
472 || (!STMT_VINFO_DATA_REF (stmt_info)
473 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
474 && ((!def_stmt_info
475 || STMT_VINFO_DATA_REF (def_stmt_info)
476 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
477 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
478 != (oprnd_info->first_dt != vect_reduction_def))))
479 {
480 /* Try swapping operands if we got a mismatch. */
481 if (i == commutative_op && !swapped)
482 {
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_NOTE, vect_location,
485 "trying swapped operands\n");
486 swapped = true;
487 goto again;
488 }
489
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
492 "Build SLP failed: different types\n");
493
494 return 1;
495 }
496 if ((dt == vect_constant_def
497 || dt == vect_external_def)
498 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
499 && (TREE_CODE (type) == BOOLEAN_TYPE
500 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
501 type)))
502 {
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
505 "Build SLP failed: invalid type of def "
506 "for variable-length SLP %T\n", oprnd);
507 return -1;
508 }
509 }
510
511 /* Check the types of the definitions. */
512 switch (dt)
513 {
514 case vect_external_def:
515 /* Make sure to demote the overall operand to external. */
516 oprnd_info->first_dt = vect_external_def;
517 /* Fallthru. */
518 case vect_constant_def:
519 oprnd_info->def_stmts.quick_push (NULL);
520 oprnd_info->ops.quick_push (oprnd);
521 break;
522
523 case vect_internal_def:
524 case vect_reduction_def:
525 if (oprnd_info->first_dt == vect_reduction_def
526 && !STMT_VINFO_DATA_REF (stmt_info)
527 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
528 && !STMT_VINFO_DATA_REF (def_stmt_info)
529 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
530 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
531 {
532 /* For a SLP reduction chain we want to duplicate the
533 reduction to each of the chain members. That gets
534 us a sane SLP graph (still the stmts are not 100%
535 correct wrt the initial values). */
536 gcc_assert (!first);
537 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
538 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
539 break;
540 }
541 /* Fallthru. */
542 case vect_induction_def:
543 oprnd_info->def_stmts.quick_push (def_stmt_info);
544 oprnd_info->ops.quick_push (oprnd);
545 break;
546
547 default:
548 /* FORNOW: Not supported. */
549 if (dump_enabled_p ())
550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
551 "Build SLP failed: illegal type of def %T\n",
552 oprnd);
553
554 return -1;
555 }
556 }
557
558 /* Swap operands. */
559 if (swapped)
560 {
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE, vect_location,
563 "swapped operands to match def types in %G",
564 stmt_info->stmt);
565 }
566
567 *swap = swapped;
568 return 0;
569 }
570
571 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
572 Return true if we can, meaning that this choice doesn't conflict with
573 existing SLP nodes that use STMT_INFO. */
574
575 static bool
576 vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
577 {
578 tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
579 if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
580 return true;
581
582 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
583 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
584 {
585 /* We maintain the invariant that if any statement in the group is
586 used, all other members of the group have the same vector type. */
587 stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
588 stmt_vec_info member_info = first_info;
589 for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
590 if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
591 || is_pattern_stmt_p (member_info))
592 break;
593
594 if (!member_info)
595 {
596 for (member_info = first_info; member_info;
597 member_info = DR_GROUP_NEXT_ELEMENT (member_info))
598 STMT_VINFO_VECTYPE (member_info) = vectype;
599 return true;
600 }
601 }
602 else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
603 && !is_pattern_stmt_p (stmt_info))
604 {
605 STMT_VINFO_VECTYPE (stmt_info) = vectype;
606 return true;
607 }
608
609 if (dump_enabled_p ())
610 {
611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
612 "Build SLP failed: incompatible vector"
613 " types for: %G", stmt_info->stmt);
614 dump_printf_loc (MSG_NOTE, vect_location,
615 " old vector type: %T\n", old_vectype);
616 dump_printf_loc (MSG_NOTE, vect_location,
617 " new vector type: %T\n", vectype);
618 }
619 return false;
620 }
621
622 /* Return true if call statements CALL1 and CALL2 are similar enough
623 to be combined into the same SLP group. */
624
625 static bool
626 compatible_calls_p (gcall *call1, gcall *call2)
627 {
628 unsigned int nargs = gimple_call_num_args (call1);
629 if (nargs != gimple_call_num_args (call2))
630 return false;
631
632 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
633 return false;
634
635 if (gimple_call_internal_p (call1))
636 {
637 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
638 TREE_TYPE (gimple_call_lhs (call2))))
639 return false;
640 for (unsigned int i = 0; i < nargs; ++i)
641 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
642 TREE_TYPE (gimple_call_arg (call2, i))))
643 return false;
644 }
645 else
646 {
647 if (!operand_equal_p (gimple_call_fn (call1),
648 gimple_call_fn (call2), 0))
649 return false;
650
651 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
652 return false;
653 }
654 return true;
655 }
656
657 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
658 caller's attempt to find the vector type in STMT_INFO with the narrowest
659 element type. Return true if VECTYPE is nonnull and if it is valid
660 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
661 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
662 vect_build_slp_tree. */
663
664 static bool
665 vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
666 unsigned int group_size,
667 tree vectype, poly_uint64 *max_nunits)
668 {
669 if (!vectype)
670 {
671 if (dump_enabled_p ())
672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
673 "Build SLP failed: unsupported data-type in %G\n",
674 stmt_info->stmt);
675 /* Fatal mismatch. */
676 return false;
677 }
678
679 /* If populating the vector type requires unrolling then fail
680 before adjusting *max_nunits for basic-block vectorization. */
681 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
682 unsigned HOST_WIDE_INT const_nunits;
683 if (is_a <bb_vec_info> (vinfo)
684 && (!nunits.is_constant (&const_nunits)
685 || const_nunits > group_size))
686 {
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "Build SLP failed: unrolling required "
690 "in basic block SLP\n");
691 /* Fatal mismatch. */
692 return false;
693 }
694
695 /* In case of multiple types we need to detect the smallest type. */
696 vect_update_max_nunits (max_nunits, vectype);
697 return true;
698 }
699
700 /* STMTS is a group of GROUP_SIZE SLP statements in which some
701 statements do the same operation as the first statement and in which
702 the others do ALT_STMT_CODE. Return true if we can take one vector
703 of the first operation and one vector of the second and permute them
704 to get the required result. VECTYPE is the type of the vector that
705 would be permuted. */
706
707 static bool
708 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
709 unsigned int group_size, tree vectype,
710 tree_code alt_stmt_code)
711 {
712 unsigned HOST_WIDE_INT count;
713 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
714 return false;
715
716 vec_perm_builder sel (count, count, 1);
717 for (unsigned int i = 0; i < count; ++i)
718 {
719 unsigned int elt = i;
720 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
721 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
722 elt += count;
723 sel.quick_push (elt);
724 }
725 vec_perm_indices indices (sel, 2, count);
726 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
727 }
728
729 /* Verify if the scalar stmts STMTS are isomorphic, require data
730 permutation or are of unsupported types of operation. Return
731 true if they are, otherwise return false and indicate in *MATCHES
732 which stmts are not isomorphic to the first one. If MATCHES[0]
733 is false then this indicates the comparison could not be
734 carried out or the stmts will never be vectorized by SLP.
735
736 Note COND_EXPR is possibly isomorphic to another one after swapping its
737 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
738 the first stmt by swapping the two operands of comparison; set SWAP[i]
739 to 2 if stmt I is isormorphic to the first stmt by inverting the code
740 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
741 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
742
743 static bool
744 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
745 vec<stmt_vec_info> stmts, unsigned int group_size,
746 poly_uint64 *max_nunits, bool *matches,
747 bool *two_operators)
748 {
749 unsigned int i;
750 stmt_vec_info first_stmt_info = stmts[0];
751 enum tree_code first_stmt_code = ERROR_MARK;
752 enum tree_code alt_stmt_code = ERROR_MARK;
753 enum tree_code rhs_code = ERROR_MARK;
754 enum tree_code first_cond_code = ERROR_MARK;
755 tree lhs;
756 bool need_same_oprnds = false;
757 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
758 optab optab;
759 int icode;
760 machine_mode optab_op2_mode;
761 machine_mode vec_mode;
762 stmt_vec_info first_load = NULL, prev_first_load = NULL;
763 bool load_p = false;
764
765 /* For every stmt in NODE find its def stmt/s. */
766 stmt_vec_info stmt_info;
767 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
768 {
769 gimple *stmt = stmt_info->stmt;
770 swap[i] = 0;
771 matches[i] = false;
772
773 if (dump_enabled_p ())
774 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
775
776 /* Fail to vectorize statements marked as unvectorizable. */
777 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
778 {
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
781 "Build SLP failed: unvectorizable statement %G",
782 stmt);
783 /* Fatal mismatch. */
784 matches[0] = false;
785 return false;
786 }
787
788 lhs = gimple_get_lhs (stmt);
789 if (lhs == NULL_TREE)
790 {
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
793 "Build SLP failed: not GIMPLE_ASSIGN nor "
794 "GIMPLE_CALL %G", stmt);
795 /* Fatal mismatch. */
796 matches[0] = false;
797 return false;
798 }
799
800 tree nunits_vectype;
801 if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype,
802 &nunits_vectype, group_size)
803 || (nunits_vectype
804 && !vect_record_max_nunits (vinfo, stmt_info, group_size,
805 nunits_vectype, max_nunits)))
806 {
807 /* Fatal mismatch. */
808 matches[0] = false;
809 return false;
810 }
811
812 gcc_assert (vectype);
813
814 if (is_a <bb_vec_info> (vinfo)
815 && !vect_update_shared_vectype (stmt_info, vectype))
816 continue;
817
818 gcall *call_stmt = dyn_cast <gcall *> (stmt);
819 if (call_stmt)
820 {
821 rhs_code = CALL_EXPR;
822
823 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
824 load_p = true;
825 else if ((gimple_call_internal_p (call_stmt)
826 && (!vectorizable_internal_fn_p
827 (gimple_call_internal_fn (call_stmt))))
828 || gimple_call_tail_p (call_stmt)
829 || gimple_call_noreturn_p (call_stmt)
830 || !gimple_call_nothrow_p (call_stmt)
831 || gimple_call_chain (call_stmt))
832 {
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
835 "Build SLP failed: unsupported call type %G",
836 call_stmt);
837 /* Fatal mismatch. */
838 matches[0] = false;
839 return false;
840 }
841 }
842 else
843 {
844 rhs_code = gimple_assign_rhs_code (stmt);
845 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
846 }
847
848 /* Check the operation. */
849 if (i == 0)
850 {
851 first_stmt_code = rhs_code;
852
853 /* Shift arguments should be equal in all the packed stmts for a
854 vector shift with scalar shift operand. */
855 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
856 || rhs_code == LROTATE_EXPR
857 || rhs_code == RROTATE_EXPR)
858 {
859 vec_mode = TYPE_MODE (vectype);
860
861 /* First see if we have a vector/vector shift. */
862 optab = optab_for_tree_code (rhs_code, vectype,
863 optab_vector);
864
865 if (!optab
866 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
867 {
868 /* No vector/vector shift, try for a vector/scalar shift. */
869 optab = optab_for_tree_code (rhs_code, vectype,
870 optab_scalar);
871
872 if (!optab)
873 {
874 if (dump_enabled_p ())
875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
876 "Build SLP failed: no optab.\n");
877 /* Fatal mismatch. */
878 matches[0] = false;
879 return false;
880 }
881 icode = (int) optab_handler (optab, vec_mode);
882 if (icode == CODE_FOR_nothing)
883 {
884 if (dump_enabled_p ())
885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
886 "Build SLP failed: "
887 "op not supported by target.\n");
888 /* Fatal mismatch. */
889 matches[0] = false;
890 return false;
891 }
892 optab_op2_mode = insn_data[icode].operand[2].mode;
893 if (!VECTOR_MODE_P (optab_op2_mode))
894 {
895 need_same_oprnds = true;
896 first_op1 = gimple_assign_rhs2 (stmt);
897 }
898 }
899 }
900 else if (rhs_code == WIDEN_LSHIFT_EXPR)
901 {
902 need_same_oprnds = true;
903 first_op1 = gimple_assign_rhs2 (stmt);
904 }
905 else if (call_stmt
906 && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
907 {
908 need_same_oprnds = true;
909 first_op1 = gimple_call_arg (call_stmt, 1);
910 }
911 }
912 else
913 {
914 if (first_stmt_code != rhs_code
915 && alt_stmt_code == ERROR_MARK)
916 alt_stmt_code = rhs_code;
917 if (first_stmt_code != rhs_code
918 && (first_stmt_code != IMAGPART_EXPR
919 || rhs_code != REALPART_EXPR)
920 && (first_stmt_code != REALPART_EXPR
921 || rhs_code != IMAGPART_EXPR)
922 /* Handle mismatches in plus/minus by computing both
923 and merging the results. */
924 && !((first_stmt_code == PLUS_EXPR
925 || first_stmt_code == MINUS_EXPR)
926 && (alt_stmt_code == PLUS_EXPR
927 || alt_stmt_code == MINUS_EXPR)
928 && rhs_code == alt_stmt_code)
929 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
930 && (first_stmt_code == ARRAY_REF
931 || first_stmt_code == BIT_FIELD_REF
932 || first_stmt_code == INDIRECT_REF
933 || first_stmt_code == COMPONENT_REF
934 || first_stmt_code == MEM_REF)))
935 {
936 if (dump_enabled_p ())
937 {
938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
939 "Build SLP failed: different operation "
940 "in stmt %G", stmt);
941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
942 "original stmt %G", first_stmt_info->stmt);
943 }
944 /* Mismatch. */
945 continue;
946 }
947
948 if (need_same_oprnds)
949 {
950 tree other_op1 = (call_stmt
951 ? gimple_call_arg (call_stmt, 1)
952 : gimple_assign_rhs2 (stmt));
953 if (!operand_equal_p (first_op1, other_op1, 0))
954 {
955 if (dump_enabled_p ())
956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
957 "Build SLP failed: different shift "
958 "arguments in %G", stmt);
959 /* Mismatch. */
960 continue;
961 }
962 }
963
964 if (!load_p && rhs_code == CALL_EXPR)
965 {
966 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
967 as_a <gcall *> (stmt)))
968 {
969 if (dump_enabled_p ())
970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
971 "Build SLP failed: different calls in %G",
972 stmt);
973 /* Mismatch. */
974 continue;
975 }
976 }
977 }
978
979 /* Grouped store or load. */
980 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
981 {
982 if (REFERENCE_CLASS_P (lhs))
983 {
984 /* Store. */
985 ;
986 }
987 else
988 {
989 /* Load. */
990 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
991 if (prev_first_load)
992 {
993 /* Check that there are no loads from different interleaving
994 chains in the same node. */
995 if (prev_first_load != first_load)
996 {
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
999 vect_location,
1000 "Build SLP failed: different "
1001 "interleaving chains in one node %G",
1002 stmt);
1003 /* Mismatch. */
1004 continue;
1005 }
1006 }
1007 else
1008 prev_first_load = first_load;
1009 }
1010 } /* Grouped access. */
1011 else
1012 {
1013 if (load_p)
1014 {
1015 /* Not grouped load. */
1016 if (dump_enabled_p ())
1017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1018 "Build SLP failed: not grouped load %G", stmt);
1019
1020 /* FORNOW: Not grouped loads are not supported. */
1021 /* Fatal mismatch. */
1022 matches[0] = false;
1023 return false;
1024 }
1025
1026 /* Not memory operation. */
1027 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
1028 && TREE_CODE_CLASS (rhs_code) != tcc_unary
1029 && TREE_CODE_CLASS (rhs_code) != tcc_expression
1030 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1031 && rhs_code != VIEW_CONVERT_EXPR
1032 && rhs_code != CALL_EXPR)
1033 {
1034 if (dump_enabled_p ())
1035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1036 "Build SLP failed: operation unsupported %G",
1037 stmt);
1038 /* Fatal mismatch. */
1039 matches[0] = false;
1040 return false;
1041 }
1042
1043 if (rhs_code == COND_EXPR)
1044 {
1045 tree cond_expr = gimple_assign_rhs1 (stmt);
1046 enum tree_code cond_code = TREE_CODE (cond_expr);
1047 enum tree_code swap_code = ERROR_MARK;
1048 enum tree_code invert_code = ERROR_MARK;
1049
1050 if (i == 0)
1051 first_cond_code = TREE_CODE (cond_expr);
1052 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1053 {
1054 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1055 swap_code = swap_tree_comparison (cond_code);
1056 invert_code = invert_tree_comparison (cond_code, honor_nans);
1057 }
1058
1059 if (first_cond_code == cond_code)
1060 ;
1061 /* Isomorphic can be achieved by swapping. */
1062 else if (first_cond_code == swap_code)
1063 swap[i] = 1;
1064 /* Isomorphic can be achieved by inverting. */
1065 else if (first_cond_code == invert_code)
1066 swap[i] = 2;
1067 else
1068 {
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1071 "Build SLP failed: different"
1072 " operation %G", stmt);
1073 /* Mismatch. */
1074 continue;
1075 }
1076 }
1077 }
1078
1079 matches[i] = true;
1080 }
1081
1082 for (i = 0; i < group_size; ++i)
1083 if (!matches[i])
1084 return false;
1085
1086 /* If we allowed a two-operation SLP node verify the target can cope
1087 with the permute we are going to use. */
1088 if (alt_stmt_code != ERROR_MARK
1089 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1090 {
1091 if (!vect_two_operations_perm_ok_p (stmts, group_size,
1092 vectype, alt_stmt_code))
1093 {
1094 for (i = 0; i < group_size; ++i)
1095 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1096 {
1097 matches[i] = false;
1098 if (dump_enabled_p ())
1099 {
1100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1101 "Build SLP failed: different operation "
1102 "in stmt %G", stmts[i]->stmt);
1103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1104 "original stmt %G", first_stmt_info->stmt);
1105 }
1106 }
1107 return false;
1108 }
1109 *two_operators = true;
1110 }
1111
1112 return true;
1113 }
1114
1115 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1116 Note we never remove apart from at destruction time so we do not
1117 need a special value for deleted that differs from empty. */
1118 struct bst_traits
1119 {
1120 typedef vec <stmt_vec_info> value_type;
1121 typedef vec <stmt_vec_info> compare_type;
1122 static inline hashval_t hash (value_type);
1123 static inline bool equal (value_type existing, value_type candidate);
1124 static inline bool is_empty (value_type x) { return !x.exists (); }
1125 static inline bool is_deleted (value_type x) { return !x.exists (); }
1126 static const bool empty_zero_p = true;
1127 static inline void mark_empty (value_type &x) { x.release (); }
1128 static inline void mark_deleted (value_type &x) { x.release (); }
1129 static inline void remove (value_type &x) { x.release (); }
1130 };
1131 inline hashval_t
1132 bst_traits::hash (value_type x)
1133 {
1134 inchash::hash h;
1135 for (unsigned i = 0; i < x.length (); ++i)
1136 h.add_int (gimple_uid (x[i]->stmt));
1137 return h.end ();
1138 }
1139 inline bool
1140 bst_traits::equal (value_type existing, value_type candidate)
1141 {
1142 if (existing.length () != candidate.length ())
1143 return false;
1144 for (unsigned i = 0; i < existing.length (); ++i)
1145 if (existing[i] != candidate[i])
1146 return false;
1147 return true;
1148 }
1149
1150 typedef hash_map <vec <gimple *>, slp_tree,
1151 simple_hashmap_traits <bst_traits, slp_tree> >
1152 scalar_stmts_to_slp_tree_map_t;
1153
1154 static slp_tree
1155 vect_build_slp_tree_2 (vec_info *vinfo,
1156 vec<stmt_vec_info> stmts, unsigned int group_size,
1157 poly_uint64 *max_nunits,
1158 bool *matches, unsigned *npermutes, unsigned *tree_size,
1159 scalar_stmts_to_slp_tree_map_t *bst_map);
1160
1161 static slp_tree
1162 vect_build_slp_tree (vec_info *vinfo,
1163 vec<stmt_vec_info> stmts, unsigned int group_size,
1164 poly_uint64 *max_nunits,
1165 bool *matches, unsigned *npermutes, unsigned *tree_size,
1166 scalar_stmts_to_slp_tree_map_t *bst_map)
1167 {
1168 if (slp_tree *leader = bst_map->get (stmts))
1169 {
1170 if (dump_enabled_p ())
1171 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1172 *leader ? "" : "failed ", *leader);
1173 if (*leader)
1174 {
1175 (*leader)->refcnt++;
1176 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1177 }
1178 return *leader;
1179 }
1180 poly_uint64 this_max_nunits = 1;
1181 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1182 &this_max_nunits,
1183 matches, npermutes, tree_size, bst_map);
1184 if (res)
1185 {
1186 res->max_nunits = this_max_nunits;
1187 vect_update_max_nunits (max_nunits, this_max_nunits);
1188 /* Keep a reference for the bst_map use. */
1189 res->refcnt++;
1190 }
1191 bst_map->put (stmts.copy (), res);
1192 return res;
1193 }
1194
1195 /* Recursively build an SLP tree starting from NODE.
1196 Fail (and return a value not equal to zero) if def-stmts are not
1197 isomorphic, require data permutation or are of unsupported types of
1198 operation. Otherwise, return 0.
1199 The value returned is the depth in the SLP tree where a mismatch
1200 was found. */
1201
1202 static slp_tree
1203 vect_build_slp_tree_2 (vec_info *vinfo,
1204 vec<stmt_vec_info> stmts, unsigned int group_size,
1205 poly_uint64 *max_nunits,
1206 bool *matches, unsigned *npermutes, unsigned *tree_size,
1207 scalar_stmts_to_slp_tree_map_t *bst_map)
1208 {
1209 unsigned nops, i, this_tree_size = 0;
1210 poly_uint64 this_max_nunits = *max_nunits;
1211 slp_tree node;
1212
1213 matches[0] = false;
1214
1215 stmt_vec_info stmt_info = stmts[0];
1216 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1217 nops = gimple_call_num_args (stmt);
1218 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1219 {
1220 nops = gimple_num_ops (stmt) - 1;
1221 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1222 nops++;
1223 }
1224 else if (is_a <gphi *> (stmt_info->stmt))
1225 nops = 0;
1226 else
1227 return NULL;
1228
1229 /* If the SLP node is a PHI (induction or reduction), terminate
1230 the recursion. */
1231 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1232 {
1233 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1234 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1235 if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
1236 max_nunits))
1237 return NULL;
1238
1239 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1240 /* Induction from different IVs is not supported. */
1241 if (def_type == vect_induction_def)
1242 {
1243 stmt_vec_info other_info;
1244 FOR_EACH_VEC_ELT (stmts, i, other_info)
1245 if (stmt_info != other_info)
1246 return NULL;
1247 }
1248 else if (def_type == vect_reduction_def
1249 || def_type == vect_double_reduction_def
1250 || def_type == vect_nested_cycle)
1251 {
1252 /* Else def types have to match. */
1253 stmt_vec_info other_info;
1254 FOR_EACH_VEC_ELT (stmts, i, other_info)
1255 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1256 return NULL;
1257 }
1258 else
1259 return NULL;
1260 (*tree_size)++;
1261 node = vect_create_new_slp_node (stmts, 0);
1262 return node;
1263 }
1264
1265
1266 bool two_operators = false;
1267 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1268 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
1269 &this_max_nunits, matches, &two_operators))
1270 return NULL;
1271
1272 /* If the SLP node is a load, terminate the recursion unless masked. */
1273 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1274 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1275 {
1276 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1277 {
1278 /* Masked load. */
1279 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1280 nops = 1;
1281 }
1282 else
1283 {
1284 *max_nunits = this_max_nunits;
1285 (*tree_size)++;
1286 node = vect_create_new_slp_node (stmts, 0);
1287 /* And compute the load permutation. Whether it is actually
1288 a permutation depends on the unrolling factor which is
1289 decided later. */
1290 vec<unsigned> load_permutation;
1291 int j;
1292 stmt_vec_info load_info;
1293 load_permutation.create (group_size);
1294 stmt_vec_info first_stmt_info
1295 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
1296 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1297 {
1298 int load_place = vect_get_place_in_interleaving_chain
1299 (load_info, first_stmt_info);
1300 gcc_assert (load_place != -1);
1301 load_permutation.safe_push (load_place);
1302 }
1303 SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
1304 return node;
1305 }
1306 }
1307
1308 /* Get at the operands, verifying they are compatible. */
1309 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1310 slp_oprnd_info oprnd_info;
1311 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1312 {
1313 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1314 stmts, i, &oprnds_info);
1315 if (res != 0)
1316 matches[(res == -1) ? 0 : i] = false;
1317 if (!matches[0])
1318 break;
1319 }
1320 for (i = 0; i < group_size; ++i)
1321 if (!matches[i])
1322 {
1323 vect_free_oprnd_info (oprnds_info);
1324 return NULL;
1325 }
1326
1327 auto_vec<slp_tree, 4> children;
1328
1329 stmt_info = stmts[0];
1330
1331 /* Create SLP_TREE nodes for the definition node/s. */
1332 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1333 {
1334 slp_tree child;
1335 unsigned int j;
1336
1337 if (oprnd_info->first_dt == vect_uninitialized_def)
1338 {
1339 /* COND_EXPR have one too many eventually if the condition
1340 is a SSA name. */
1341 gcc_assert (i == 3 && nops == 4);
1342 continue;
1343 }
1344
1345 if (oprnd_info->first_dt != vect_internal_def
1346 && oprnd_info->first_dt != vect_reduction_def
1347 && oprnd_info->first_dt != vect_induction_def)
1348 {
1349 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1350 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1351 oprnd_info->ops = vNULL;
1352 children.safe_push (invnode);
1353 continue;
1354 }
1355
1356 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1357 group_size, &this_max_nunits,
1358 matches, npermutes,
1359 &this_tree_size, bst_map)) != NULL)
1360 {
1361 oprnd_info->def_stmts = vNULL;
1362 children.safe_push (child);
1363 continue;
1364 }
1365
1366 /* If the SLP build failed fatally and we analyze a basic-block
1367 simply treat nodes we fail to build as externally defined
1368 (and thus build vectors from the scalar defs).
1369 The cost model will reject outright expensive cases.
1370 ??? This doesn't treat cases where permutation ultimatively
1371 fails (or we don't try permutation below). Ideally we'd
1372 even compute a permutation that will end up with the maximum
1373 SLP tree size... */
1374 if (is_a <bb_vec_info> (vinfo)
1375 && !matches[0]
1376 /* ??? Rejecting patterns this way doesn't work. We'd have to
1377 do extra work to cancel the pattern so the uses see the
1378 scalar version. */
1379 && !is_pattern_stmt_p (stmt_info)
1380 && !oprnd_info->any_pattern)
1381 {
1382 if (dump_enabled_p ())
1383 dump_printf_loc (MSG_NOTE, vect_location,
1384 "Building vector operands from scalars\n");
1385 this_tree_size++;
1386 child = vect_create_new_slp_node (oprnd_info->ops);
1387 children.safe_push (child);
1388 oprnd_info->ops = vNULL;
1389 oprnd_info->def_stmts = vNULL;
1390 continue;
1391 }
1392
1393 /* If the SLP build for operand zero failed and operand zero
1394 and one can be commutated try that for the scalar stmts
1395 that failed the match. */
1396 if (i == 0
1397 /* A first scalar stmt mismatch signals a fatal mismatch. */
1398 && matches[0]
1399 /* ??? For COND_EXPRs we can swap the comparison operands
1400 as well as the arms under some constraints. */
1401 && nops == 2
1402 && oprnds_info[1]->first_dt == vect_internal_def
1403 && is_gimple_assign (stmt_info->stmt)
1404 /* Swapping operands for reductions breaks assumptions later on. */
1405 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1406 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1407 /* Do so only if the number of not successful permutes was nor more
1408 than a cut-ff as re-trying the recursive match on
1409 possibly each level of the tree would expose exponential
1410 behavior. */
1411 && *npermutes < 4)
1412 {
1413 /* See whether we can swap the matching or the non-matching
1414 stmt operands. */
1415 bool swap_not_matching = true;
1416 do
1417 {
1418 for (j = 0; j < group_size; ++j)
1419 {
1420 if (matches[j] != !swap_not_matching)
1421 continue;
1422 stmt_vec_info stmt_info = stmts[j];
1423 /* Verify if we can swap operands of this stmt. */
1424 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1425 if (!stmt
1426 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1427 {
1428 if (!swap_not_matching)
1429 goto fail;
1430 swap_not_matching = false;
1431 break;
1432 }
1433 }
1434 }
1435 while (j != group_size);
1436
1437 /* Swap mismatched definition stmts. */
1438 if (dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE, vect_location,
1440 "Re-trying with swapped operands of stmts ");
1441 for (j = 0; j < group_size; ++j)
1442 if (matches[j] == !swap_not_matching)
1443 {
1444 std::swap (oprnds_info[0]->def_stmts[j],
1445 oprnds_info[1]->def_stmts[j]);
1446 std::swap (oprnds_info[0]->ops[j],
1447 oprnds_info[1]->ops[j]);
1448 if (dump_enabled_p ())
1449 dump_printf (MSG_NOTE, "%d ", j);
1450 }
1451 if (dump_enabled_p ())
1452 dump_printf (MSG_NOTE, "\n");
1453 /* And try again with scratch 'matches' ... */
1454 bool *tem = XALLOCAVEC (bool, group_size);
1455 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1456 group_size, &this_max_nunits,
1457 tem, npermutes,
1458 &this_tree_size, bst_map)) != NULL)
1459 {
1460 oprnd_info->def_stmts = vNULL;
1461 children.safe_push (child);
1462 continue;
1463 }
1464
1465 ++*npermutes;
1466 }
1467
1468 fail:
1469 gcc_assert (child == NULL);
1470 FOR_EACH_VEC_ELT (children, j, child)
1471 vect_free_slp_tree (child, false);
1472 vect_free_oprnd_info (oprnds_info);
1473 return NULL;
1474 }
1475
1476 vect_free_oprnd_info (oprnds_info);
1477
1478 /* If we have all children of a non-unary child built up from
1479 scalars then just throw that away, causing it built up
1480 from scalars. */
1481 if (nops > 1
1482 && is_a <bb_vec_info> (vinfo)
1483 /* ??? Rejecting patterns this way doesn't work. We'd have to
1484 do extra work to cancel the pattern so the uses see the
1485 scalar version. */
1486 && !is_pattern_stmt_p (stmt_info))
1487 {
1488 slp_tree child;
1489 unsigned j;
1490 FOR_EACH_VEC_ELT (children, j, child)
1491 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
1492 break;
1493 if (!child)
1494 {
1495 /* Roll back. */
1496 FOR_EACH_VEC_ELT (children, j, child)
1497 vect_free_slp_tree (child, false);
1498
1499 if (dump_enabled_p ())
1500 dump_printf_loc (MSG_NOTE, vect_location,
1501 "Building parent vector operands from "
1502 "scalars instead\n");
1503 return NULL;
1504 }
1505 }
1506
1507 *tree_size += this_tree_size + 1;
1508 *max_nunits = this_max_nunits;
1509
1510 node = vect_create_new_slp_node (stmts, nops);
1511 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1512 SLP_TREE_CHILDREN (node).splice (children);
1513 return node;
1514 }
1515
1516 /* Dump a single SLP tree NODE. */
1517
1518 static void
1519 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1520 slp_tree node)
1521 {
1522 unsigned i, j;
1523 slp_tree child;
1524 stmt_vec_info stmt_info;
1525 tree op;
1526
1527 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1528 dump_user_location_t user_loc = loc.get_user_location ();
1529 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1530 SLP_TREE_DEF_TYPE (node) == vect_external_def
1531 ? " (external)"
1532 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1533 ? " (constant)"
1534 : ""), node,
1535 estimated_poly_value (node->max_nunits), node->refcnt);
1536 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1537 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1538 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1539 else
1540 {
1541 dump_printf_loc (metadata, user_loc, "\t{ ");
1542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1543 dump_printf (metadata, "%T%s ", op,
1544 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1545 dump_printf (metadata, "}\n");
1546 }
1547 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1548 {
1549 dump_printf_loc (metadata, user_loc, "\tload permutation {");
1550 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), i, j)
1551 dump_printf (dump_kind, " %u", j);
1552 dump_printf (dump_kind, " }\n");
1553 }
1554 if (SLP_TREE_CHILDREN (node).is_empty ())
1555 return;
1556 dump_printf_loc (metadata, user_loc, "\tchildren");
1557 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1558 dump_printf (dump_kind, " %p", (void *)child);
1559 dump_printf (dump_kind, "\n");
1560 }
1561
1562 DEBUG_FUNCTION void
1563 debug (slp_tree node)
1564 {
1565 debug_dump_context ctx;
1566 vect_print_slp_tree (MSG_NOTE,
1567 dump_location_t::from_location_t (UNKNOWN_LOCATION),
1568 node);
1569 }
1570
1571 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1572
1573 static void
1574 vect_print_slp_graph (dump_flags_t dump_kind, dump_location_t loc,
1575 slp_tree node, hash_set<slp_tree> &visited)
1576 {
1577 unsigned i;
1578 slp_tree child;
1579
1580 if (visited.add (node))
1581 return;
1582
1583 vect_print_slp_tree (dump_kind, loc, node);
1584
1585 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1586 vect_print_slp_graph (dump_kind, loc, child, visited);
1587 }
1588
1589 static void
1590 vect_print_slp_graph (dump_flags_t dump_kind, dump_location_t loc,
1591 slp_tree entry)
1592 {
1593 hash_set<slp_tree> visited;
1594 vect_print_slp_graph (dump_kind, loc, entry, visited);
1595 }
1596
1597 /* Mark the tree rooted at NODE with PURE_SLP. */
1598
1599 static void
1600 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1601 {
1602 int i;
1603 stmt_vec_info stmt_info;
1604 slp_tree child;
1605
1606 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1607 return;
1608
1609 if (visited.add (node))
1610 return;
1611
1612 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1613 STMT_SLP_TYPE (stmt_info) = pure_slp;
1614
1615 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1616 vect_mark_slp_stmts (child, visited);
1617 }
1618
1619 static void
1620 vect_mark_slp_stmts (slp_tree node)
1621 {
1622 hash_set<slp_tree> visited;
1623 vect_mark_slp_stmts (node, visited);
1624 }
1625
1626 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1627
1628 static void
1629 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1630 {
1631 int i;
1632 stmt_vec_info stmt_info;
1633 slp_tree child;
1634
1635 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1636 return;
1637
1638 if (visited.add (node))
1639 return;
1640
1641 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1642 {
1643 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1644 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1645 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1646 }
1647
1648 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1649 vect_mark_slp_stmts_relevant (child, visited);
1650 }
1651
1652 static void
1653 vect_mark_slp_stmts_relevant (slp_tree node)
1654 {
1655 hash_set<slp_tree> visited;
1656 vect_mark_slp_stmts_relevant (node, visited);
1657 }
1658
1659 /* Copy the SLP subtree rooted at NODE. */
1660
1661 static slp_tree
1662 slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
1663 {
1664 unsigned i;
1665
1666 bool existed_p;
1667 slp_tree &copy_ref = map.get_or_insert (node, &existed_p);
1668 if (existed_p)
1669 return copy_ref;
1670
1671 copy_ref = new _slp_tree;
1672 slp_tree copy = copy_ref;
1673 SLP_TREE_DEF_TYPE (copy) = SLP_TREE_DEF_TYPE (node);
1674 SLP_TREE_VECTYPE (copy) = SLP_TREE_VECTYPE (node);
1675 SLP_TREE_REPRESENTATIVE (copy) = SLP_TREE_REPRESENTATIVE (node);
1676 SLP_TREE_LANES (copy) = SLP_TREE_LANES (node);
1677 copy->max_nunits = node->max_nunits;
1678 copy->refcnt = 0;
1679 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1680 {
1681 SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
1682 stmt_vec_info stmt_info;
1683 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1684 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
1685 }
1686 if (SLP_TREE_SCALAR_OPS (node).exists ())
1687 SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
1688 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1689 SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
1690 if (SLP_TREE_CHILDREN (node).exists ())
1691 SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
1692 gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
1693
1694 slp_tree child;
1695 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
1696 {
1697 SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
1698 SLP_TREE_CHILDREN (copy)[i]->refcnt++;
1699 }
1700 return copy;
1701 }
1702
1703 /* Rearrange the statements of NODE according to PERMUTATION. */
1704
1705 static void
1706 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1707 vec<unsigned> permutation,
1708 hash_set<slp_tree> &visited)
1709 {
1710 unsigned int i;
1711 slp_tree child;
1712
1713 if (visited.add (node))
1714 return;
1715
1716 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1717 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1718
1719 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1720 {
1721 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1722 vec<stmt_vec_info> tmp_stmts;
1723 tmp_stmts.create (group_size);
1724 tmp_stmts.quick_grow (group_size);
1725 stmt_vec_info stmt_info;
1726 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1727 tmp_stmts[permutation[i]] = stmt_info;
1728 SLP_TREE_SCALAR_STMTS (node).release ();
1729 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1730 }
1731 if (SLP_TREE_SCALAR_OPS (node).exists ())
1732 {
1733 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1734 vec<tree> tmp_ops;
1735 tmp_ops.create (group_size);
1736 tmp_ops.quick_grow (group_size);
1737 tree op;
1738 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1739 tmp_ops[permutation[i]] = op;
1740 SLP_TREE_SCALAR_OPS (node).release ();
1741 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1742 }
1743 }
1744
1745
1746 /* Attempt to reorder stmts in a reduction chain so that we don't
1747 require any load permutation. Return true if that was possible,
1748 otherwise return false. */
1749
1750 static bool
1751 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1752 {
1753 unsigned int i, j;
1754 unsigned int lidx;
1755 slp_tree node, load;
1756
1757 if (SLP_INSTANCE_LOADS (slp_instn).is_empty ())
1758 return false;
1759
1760 /* Compare all the permutation sequences to the first one. We know
1761 that at least one load is permuted. */
1762 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1763 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1764 return false;
1765 unsigned int group_size = SLP_TREE_LOAD_PERMUTATION (node).length ();
1766 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1767 {
1768 if (!SLP_TREE_LOAD_PERMUTATION (load).exists ()
1769 || SLP_TREE_LOAD_PERMUTATION (load).length () != group_size)
1770 return false;
1771 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load), j, lidx)
1772 if (lidx != SLP_TREE_LOAD_PERMUTATION (node)[j])
1773 return false;
1774 }
1775
1776 /* Check that the loads in the first sequence are different and there
1777 are no gaps between them. */
1778 auto_sbitmap load_index (group_size);
1779 bitmap_clear (load_index);
1780 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1781 {
1782 if (lidx >= group_size)
1783 return false;
1784 if (bitmap_bit_p (load_index, lidx))
1785 return false;
1786
1787 bitmap_set_bit (load_index, lidx);
1788 }
1789 for (i = 0; i < group_size; i++)
1790 if (!bitmap_bit_p (load_index, i))
1791 return false;
1792
1793 /* This permutation is valid for reduction. Since the order of the
1794 statements in the nodes is not important unless they are memory
1795 accesses, we can rearrange the statements in all the nodes
1796 according to the order of the loads. */
1797
1798 /* We have to unshare the SLP tree we modify. */
1799 hash_map<slp_tree, slp_tree> map;
1800 slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
1801 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
1802 unshared->refcnt++;
1803 SLP_INSTANCE_TREE (slp_instn) = unshared;
1804 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1805 SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
1806 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1807
1808 /* Do the actual re-arrangement. */
1809 hash_set<slp_tree> visited;
1810 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1811 node->load_permutation, visited);
1812
1813 /* We are done, no actual permutations need to be generated. */
1814 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1815 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1816 {
1817 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1818 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1819 /* But we have to keep those permutations that are required because
1820 of handling of gaps. */
1821 if (known_eq (unrolling_factor, 1U)
1822 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1823 && DR_GROUP_GAP (first_stmt_info) == 0))
1824 SLP_TREE_LOAD_PERMUTATION (node).release ();
1825 else
1826 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1827 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1828 }
1829
1830 return true;
1831 }
1832
1833 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1834
1835 static void
1836 vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
1837 hash_set<slp_tree> &visited)
1838 {
1839 if (visited.add (node))
1840 return;
1841
1842 if (SLP_TREE_CHILDREN (node).length () == 0)
1843 {
1844 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1845 return;
1846 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1847 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1848 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1849 loads.safe_push (node);
1850 }
1851 else
1852 {
1853 unsigned i;
1854 slp_tree child;
1855 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1856 vect_gather_slp_loads (loads, child, visited);
1857 }
1858 }
1859
1860 static void
1861 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1862 {
1863 hash_set<slp_tree> visited;
1864 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst), node, visited);
1865 }
1866
1867
1868 /* Find the last store in SLP INSTANCE. */
1869
1870 stmt_vec_info
1871 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1872 {
1873 stmt_vec_info last = NULL;
1874 stmt_vec_info stmt_vinfo;
1875
1876 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1877 {
1878 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1879 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1880 }
1881
1882 return last;
1883 }
1884
1885 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1886 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1887 (also containing the first GROUP1_SIZE stmts, since stores are
1888 consecutive), the second containing the remainder.
1889 Return the first stmt in the second group. */
1890
1891 static stmt_vec_info
1892 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1893 {
1894 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1895 gcc_assert (group1_size > 0);
1896 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1897 gcc_assert (group2_size > 0);
1898 DR_GROUP_SIZE (first_vinfo) = group1_size;
1899
1900 stmt_vec_info stmt_info = first_vinfo;
1901 for (unsigned i = group1_size; i > 1; i--)
1902 {
1903 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1904 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1905 }
1906 /* STMT is now the last element of the first group. */
1907 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1908 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1909
1910 DR_GROUP_SIZE (group2) = group2_size;
1911 for (stmt_info = group2; stmt_info;
1912 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1913 {
1914 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1915 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1916 }
1917
1918 /* For the second group, the DR_GROUP_GAP is that before the original group,
1919 plus skipping over the first vector. */
1920 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1921
1922 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1923 DR_GROUP_GAP (first_vinfo) += group2_size;
1924
1925 if (dump_enabled_p ())
1926 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1927 group1_size, group2_size);
1928
1929 return group2;
1930 }
1931
1932 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1933 statements and a vector of NUNITS elements. */
1934
1935 static poly_uint64
1936 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1937 {
1938 return exact_div (common_multiple (nunits, group_size), group_size);
1939 }
1940
1941 /* Analyze an SLP instance starting from a group of grouped stores. Call
1942 vect_build_slp_tree to build a tree of packed stmts if possible.
1943 Return FALSE if it's impossible to SLP any stmt in the loop. */
1944
1945 static bool
1946 vect_analyze_slp_instance (vec_info *vinfo,
1947 scalar_stmts_to_slp_tree_map_t *bst_map,
1948 stmt_vec_info stmt_info, unsigned max_tree_size)
1949 {
1950 slp_instance new_instance;
1951 slp_tree node;
1952 unsigned int group_size;
1953 tree vectype, scalar_type = NULL_TREE;
1954 unsigned int i;
1955 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1956 vec<stmt_vec_info> scalar_stmts;
1957 bool constructor = false;
1958
1959 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1960 {
1961 scalar_type = TREE_TYPE (DR_REF (dr));
1962 group_size = DR_GROUP_SIZE (stmt_info);
1963 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
1964 }
1965 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1966 {
1967 gcc_assert (is_a <loop_vec_info> (vinfo));
1968 vectype = STMT_VINFO_VECTYPE (stmt_info);
1969 group_size = REDUC_GROUP_SIZE (stmt_info);
1970 }
1971 else if (is_gimple_assign (stmt_info->stmt)
1972 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
1973 {
1974 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
1975 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
1976 constructor = true;
1977 }
1978 else
1979 {
1980 gcc_assert (is_a <loop_vec_info> (vinfo));
1981 vectype = STMT_VINFO_VECTYPE (stmt_info);
1982 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1983 }
1984
1985 if (!vectype)
1986 {
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1989 "Build SLP failed: unsupported data-type %T\n",
1990 scalar_type);
1991
1992 return false;
1993 }
1994 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1995
1996 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1997 scalar_stmts.create (group_size);
1998 stmt_vec_info next_info = stmt_info;
1999 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2000 {
2001 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2002 while (next_info)
2003 {
2004 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2005 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2006 }
2007 }
2008 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2009 {
2010 /* Collect the reduction stmts and store them in
2011 SLP_TREE_SCALAR_STMTS. */
2012 while (next_info)
2013 {
2014 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2015 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2016 }
2017 /* Mark the first element of the reduction chain as reduction to properly
2018 transform the node. In the reduction analysis phase only the last
2019 element of the chain is marked as reduction. */
2020 STMT_VINFO_DEF_TYPE (stmt_info)
2021 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2022 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2023 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2024 }
2025 else if (constructor)
2026 {
2027 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2028 tree val;
2029 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2030 {
2031 if (TREE_CODE (val) == SSA_NAME)
2032 {
2033 gimple* def = SSA_NAME_DEF_STMT (val);
2034 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2035 /* Value is defined in another basic block. */
2036 if (!def_info)
2037 return false;
2038 def_info = vect_stmt_to_vectorize (def_info);
2039 scalar_stmts.safe_push (def_info);
2040 }
2041 else
2042 return false;
2043 }
2044 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE, vect_location,
2046 "Analyzing vectorizable constructor: %G\n",
2047 stmt_info->stmt);
2048 }
2049 else
2050 {
2051 /* Collect reduction statements. */
2052 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2053 for (i = 0; reductions.iterate (i, &next_info); i++)
2054 scalar_stmts.safe_push (next_info);
2055 }
2056
2057 /* Build the tree for the SLP instance. */
2058 bool *matches = XALLOCAVEC (bool, group_size);
2059 unsigned npermutes = 0;
2060 poly_uint64 max_nunits = nunits;
2061 unsigned tree_size = 0;
2062 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2063 &max_nunits, matches, &npermutes,
2064 &tree_size, bst_map);
2065 if (node != NULL)
2066 {
2067 /* Calculate the unrolling factor based on the smallest type. */
2068 poly_uint64 unrolling_factor
2069 = calculate_unrolling_factor (max_nunits, group_size);
2070
2071 if (maybe_ne (unrolling_factor, 1U)
2072 && is_a <bb_vec_info> (vinfo))
2073 {
2074 unsigned HOST_WIDE_INT const_max_nunits;
2075 if (!max_nunits.is_constant (&const_max_nunits)
2076 || const_max_nunits > group_size)
2077 {
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2080 "Build SLP failed: store group "
2081 "size not a multiple of the vector size "
2082 "in basic block SLP\n");
2083 vect_free_slp_tree (node, false);
2084 return false;
2085 }
2086 /* Fatal mismatch. */
2087 matches[group_size / const_max_nunits * const_max_nunits] = false;
2088 vect_free_slp_tree (node, false);
2089 }
2090 else
2091 {
2092 /* Create a new SLP instance. */
2093 new_instance = XNEW (class _slp_instance);
2094 SLP_INSTANCE_TREE (new_instance) = node;
2095 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2096 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2097 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2098
2099 vect_gather_slp_loads (new_instance, node);
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_NOTE, vect_location,
2102 "SLP size %u vs. limit %u.\n",
2103 tree_size, max_tree_size);
2104
2105 /* Check whether any load is possibly permuted. */
2106 slp_tree load_node;
2107 bool loads_permuted = false;
2108 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2109 {
2110 if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2111 continue;
2112 unsigned j;
2113 stmt_vec_info load_info;
2114 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2115 if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2116 {
2117 loads_permuted = true;
2118 break;
2119 }
2120 }
2121
2122 /* If the loads and stores can be handled with load/store-lane
2123 instructions do not generate this SLP instance. */
2124 if (is_a <loop_vec_info> (vinfo)
2125 && loads_permuted
2126 && dr && vect_store_lanes_supported (vectype, group_size, false))
2127 {
2128 slp_tree load_node;
2129 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2130 {
2131 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2132 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2133 /* Use SLP for strided accesses (or if we can't load-lanes). */
2134 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2135 || ! vect_load_lanes_supported
2136 (STMT_VINFO_VECTYPE (stmt_vinfo),
2137 DR_GROUP_SIZE (stmt_vinfo), false))
2138 break;
2139 }
2140 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2141 {
2142 if (dump_enabled_p ())
2143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2144 "Built SLP cancelled: can use "
2145 "load/store-lanes\n");
2146 vect_free_slp_instance (new_instance, false);
2147 return false;
2148 }
2149 }
2150
2151 /* If this is a reduction chain with a conversion in front
2152 amend the SLP tree with a node for that. */
2153 if (!dr
2154 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2155 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2156 {
2157 /* Get at the conversion stmt - we know it's the single use
2158 of the last stmt of the reduction chain. */
2159 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2160 use_operand_p use_p;
2161 gimple *use_stmt;
2162 bool r = single_imm_use (gimple_assign_lhs (tem),
2163 &use_p, &use_stmt);
2164 gcc_assert (r);
2165 next_info = vinfo->lookup_stmt (use_stmt);
2166 next_info = vect_stmt_to_vectorize (next_info);
2167 scalar_stmts = vNULL;
2168 scalar_stmts.create (group_size);
2169 for (unsigned i = 0; i < group_size; ++i)
2170 scalar_stmts.quick_push (next_info);
2171 slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
2172 SLP_TREE_CHILDREN (conv).quick_push (node);
2173 SLP_INSTANCE_TREE (new_instance) = conv;
2174 /* We also have to fake this conversion stmt as SLP reduction
2175 group so we don't have to mess with too much code
2176 elsewhere. */
2177 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2178 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2179 }
2180
2181 vinfo->slp_instances.safe_push (new_instance);
2182
2183 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2184 the number of scalar stmts in the root in a few places.
2185 Verify that assumption holds. */
2186 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
2187 .length () == group_size);
2188
2189 if (dump_enabled_p ())
2190 {
2191 dump_printf_loc (MSG_NOTE, vect_location,
2192 "Final SLP tree for instance:\n");
2193 vect_print_slp_graph (MSG_NOTE, vect_location,
2194 SLP_INSTANCE_TREE (new_instance));
2195 }
2196
2197 return true;
2198 }
2199 }
2200 else
2201 {
2202 /* Failed to SLP. */
2203 /* Free the allocated memory. */
2204 scalar_stmts.release ();
2205 }
2206
2207 /* For basic block SLP, try to break the group up into multiples of the
2208 vector size. */
2209 unsigned HOST_WIDE_INT const_nunits;
2210 if (is_a <bb_vec_info> (vinfo)
2211 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2212 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2213 && nunits.is_constant (&const_nunits))
2214 {
2215 /* We consider breaking the group only on VF boundaries from the existing
2216 start. */
2217 for (i = 0; i < group_size; i++)
2218 if (!matches[i]) break;
2219
2220 if (i >= const_nunits && i < group_size)
2221 {
2222 /* Split into two groups at the first vector boundary before i. */
2223 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2224 unsigned group1_size = i & ~(const_nunits - 1);
2225
2226 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2227 group1_size);
2228 bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
2229 max_tree_size);
2230 /* If the first non-match was in the middle of a vector,
2231 skip the rest of that vector. */
2232 if (group1_size < i)
2233 {
2234 i = group1_size + const_nunits;
2235 if (i < group_size)
2236 rest = vect_split_slp_store_group (rest, const_nunits);
2237 }
2238 if (i < group_size)
2239 res |= vect_analyze_slp_instance (vinfo, bst_map,
2240 rest, max_tree_size);
2241 return res;
2242 }
2243 /* Even though the first vector did not all match, we might be able to SLP
2244 (some) of the remainder. FORNOW ignore this possibility. */
2245 }
2246
2247 return false;
2248 }
2249
2250
2251 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2252 trees of packed scalar stmts if SLP is possible. */
2253
2254 opt_result
2255 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2256 {
2257 unsigned int i;
2258 stmt_vec_info first_element;
2259
2260 DUMP_VECT_SCOPE ("vect_analyze_slp");
2261
2262 scalar_stmts_to_slp_tree_map_t *bst_map
2263 = new scalar_stmts_to_slp_tree_map_t ();
2264
2265 /* Find SLP sequences starting from groups of grouped stores. */
2266 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2267 vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
2268
2269 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2270 {
2271 if (loop_vinfo->reduction_chains.length () > 0)
2272 {
2273 /* Find SLP sequences starting from reduction chains. */
2274 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2275 if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
2276 max_tree_size))
2277 {
2278 /* Dissolve reduction chain group. */
2279 stmt_vec_info vinfo = first_element;
2280 stmt_vec_info last = NULL;
2281 while (vinfo)
2282 {
2283 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2284 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2285 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2286 last = vinfo;
2287 vinfo = next;
2288 }
2289 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2290 /* It can be still vectorized as part of an SLP reduction. */
2291 loop_vinfo->reductions.safe_push (last);
2292 }
2293 }
2294
2295 /* Find SLP sequences starting from groups of reductions. */
2296 if (loop_vinfo->reductions.length () > 1)
2297 vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
2298 max_tree_size);
2299 }
2300
2301 /* The map keeps a reference on SLP nodes built, release that. */
2302 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2303 it != bst_map->end (); ++it)
2304 if ((*it).second)
2305 vect_free_slp_tree ((*it).second, false);
2306 delete bst_map;
2307
2308 /* Optimize permutations in SLP reductions. */
2309 slp_instance instance;
2310 FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
2311 {
2312 slp_tree node = SLP_INSTANCE_TREE (instance);
2313 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2314 /* Reduction (there are no data-refs in the root).
2315 In reduction chain the order of the loads is not important. */
2316 if (!STMT_VINFO_DATA_REF (stmt_info)
2317 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2318 vect_attempt_slp_rearrange_stmts (instance);
2319 }
2320
2321 /* Gather all loads in the SLP graph. */
2322 hash_set<slp_tree> visited;
2323 FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
2324 vect_gather_slp_loads (vinfo->slp_loads, SLP_INSTANCE_TREE (instance),
2325 visited);
2326
2327 return opt_result::success ();
2328 }
2329
2330 void
2331 vect_optimize_slp (vec_info *vinfo)
2332 {
2333 slp_tree node;
2334 unsigned i;
2335 FOR_EACH_VEC_ELT (vinfo->slp_loads, i, node)
2336 {
2337 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
2338 continue;
2339
2340 /* In basic block vectorization we allow any subchain of an interleaving
2341 chain.
2342 FORNOW: not in loop SLP because of realignment complications. */
2343 if (is_a <bb_vec_info> (vinfo))
2344 {
2345 bool subchain_p = true;
2346 stmt_vec_info next_load_info = NULL;
2347 stmt_vec_info load_info;
2348 unsigned j;
2349 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
2350 {
2351 if (j != 0
2352 && (next_load_info != load_info
2353 || DR_GROUP_GAP (load_info) != 1))
2354 {
2355 subchain_p = false;
2356 break;
2357 }
2358 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
2359 }
2360 if (subchain_p)
2361 {
2362 SLP_TREE_LOAD_PERMUTATION (node).release ();
2363 continue;
2364 }
2365 }
2366 else
2367 {
2368 stmt_vec_info load_info;
2369 bool this_load_permuted = false;
2370 unsigned j;
2371 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
2372 if (SLP_TREE_LOAD_PERMUTATION (node)[j] != j)
2373 {
2374 this_load_permuted = true;
2375 break;
2376 }
2377 stmt_vec_info first_stmt_info
2378 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
2379 if (!this_load_permuted
2380 /* The load requires permutation when unrolling exposes
2381 a gap either because the group is larger than the SLP
2382 group-size or because there is a gap between the groups. */
2383 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a <loop_vec_info> (vinfo)), 1U)
2384 || ((SLP_TREE_LANES (node) == DR_GROUP_SIZE (first_stmt_info))
2385 && DR_GROUP_GAP (first_stmt_info) == 0)))
2386 {
2387 SLP_TREE_LOAD_PERMUTATION (node).release ();
2388 continue;
2389 }
2390 }
2391 }
2392 }
2393
2394
2395 /* For each possible SLP instance decide whether to SLP it and calculate overall
2396 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2397 least one instance. */
2398
2399 bool
2400 vect_make_slp_decision (loop_vec_info loop_vinfo)
2401 {
2402 unsigned int i;
2403 poly_uint64 unrolling_factor = 1;
2404 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2405 slp_instance instance;
2406 int decided_to_slp = 0;
2407
2408 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2409
2410 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2411 {
2412 /* FORNOW: SLP if you can. */
2413 /* All unroll factors have the form:
2414
2415 GET_MODE_SIZE (vinfo->vector_mode) * X
2416
2417 for some rational X, so they must have a common multiple. */
2418 unrolling_factor
2419 = force_common_multiple (unrolling_factor,
2420 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2421
2422 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2423 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2424 loop-based vectorization. Such stmts will be marked as HYBRID. */
2425 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2426 decided_to_slp++;
2427 }
2428
2429 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2430
2431 if (decided_to_slp && dump_enabled_p ())
2432 {
2433 dump_printf_loc (MSG_NOTE, vect_location,
2434 "Decided to SLP %d instances. Unrolling factor ",
2435 decided_to_slp);
2436 dump_dec (MSG_NOTE, unrolling_factor);
2437 dump_printf (MSG_NOTE, "\n");
2438 }
2439
2440 return (decided_to_slp > 0);
2441 }
2442
2443 /* Private data for vect_detect_hybrid_slp. */
2444 struct vdhs_data
2445 {
2446 loop_vec_info loop_vinfo;
2447 vec<stmt_vec_info> *worklist;
2448 };
2449
2450 /* Walker for walk_gimple_op. */
2451
2452 static tree
2453 vect_detect_hybrid_slp (tree *tp, int *, void *data)
2454 {
2455 walk_stmt_info *wi = (walk_stmt_info *)data;
2456 vdhs_data *dat = (vdhs_data *)wi->info;
2457
2458 if (wi->is_lhs)
2459 return NULL_TREE;
2460
2461 stmt_vec_info def_stmt_info = dat->loop_vinfo->lookup_def (*tp);
2462 if (!def_stmt_info)
2463 return NULL_TREE;
2464 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
2465 if (PURE_SLP_STMT (def_stmt_info))
2466 {
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2469 def_stmt_info->stmt);
2470 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2471 dat->worklist->safe_push (def_stmt_info);
2472 }
2473
2474 return NULL_TREE;
2475 }
2476
2477 /* Find stmts that must be both vectorized and SLPed. */
2478
2479 void
2480 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2481 {
2482 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2483
2484 /* All stmts participating in SLP are marked pure_slp, all other
2485 stmts are loop_vect.
2486 First collect all loop_vect stmts into a worklist. */
2487 auto_vec<stmt_vec_info> worklist;
2488 for (unsigned i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2489 {
2490 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2491 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
2492 gsi_next (&gsi))
2493 {
2494 gphi *phi = gsi.phi ();
2495 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (phi);
2496 if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2497 worklist.safe_push (stmt_info);
2498 }
2499 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2500 gsi_next (&gsi))
2501 {
2502 gimple *stmt = gsi_stmt (gsi);
2503 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2504 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2505 {
2506 for (gimple_stmt_iterator gsi2
2507 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2508 !gsi_end_p (gsi2); gsi_next (&gsi2))
2509 {
2510 stmt_vec_info patt_info
2511 = loop_vinfo->lookup_stmt (gsi_stmt (gsi2));
2512 if (!STMT_SLP_TYPE (patt_info)
2513 && STMT_VINFO_RELEVANT (patt_info))
2514 worklist.safe_push (patt_info);
2515 }
2516 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
2517 }
2518 if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2519 worklist.safe_push (stmt_info);
2520 }
2521 }
2522
2523 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2524 mark any SLP vectorized stmt as hybrid.
2525 ??? We're visiting def stmts N times (once for each non-SLP and
2526 once for each hybrid-SLP use). */
2527 walk_stmt_info wi;
2528 vdhs_data dat;
2529 dat.worklist = &worklist;
2530 dat.loop_vinfo = loop_vinfo;
2531 memset (&wi, 0, sizeof (wi));
2532 wi.info = (void *)&dat;
2533 while (!worklist.is_empty ())
2534 {
2535 stmt_vec_info stmt_info = worklist.pop ();
2536 /* Since SSA operands are not set up for pattern stmts we need
2537 to use walk_gimple_op. */
2538 wi.is_lhs = 0;
2539 walk_gimple_op (stmt_info->stmt, vect_detect_hybrid_slp, &wi);
2540 }
2541 }
2542
2543
2544 /* Initialize a bb_vec_info struct for the statements between
2545 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2546
2547 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2548 gimple_stmt_iterator region_end_in,
2549 vec_info_shared *shared)
2550 : vec_info (vec_info::bb, init_cost (NULL), shared),
2551 bb (gsi_bb (region_begin_in)),
2552 region_begin (region_begin_in),
2553 region_end (region_end_in)
2554 {
2555 gimple_stmt_iterator gsi;
2556
2557 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2558 gsi_next (&gsi))
2559 {
2560 gimple *stmt = gsi_stmt (gsi);
2561 gimple_set_uid (stmt, 0);
2562 add_stmt (stmt);
2563 }
2564
2565 bb->aux = this;
2566 }
2567
2568
2569 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2570 stmts in the basic block. */
2571
2572 _bb_vec_info::~_bb_vec_info ()
2573 {
2574 for (gimple_stmt_iterator si = region_begin;
2575 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2576 /* Reset region marker. */
2577 gimple_set_uid (gsi_stmt (si), -1);
2578
2579 bb->aux = NULL;
2580 }
2581
2582 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2583 given then that child nodes have already been processed, and that
2584 their def types currently match their SLP node's def type. */
2585
2586 static bool
2587 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2588 slp_instance node_instance,
2589 stmt_vector_for_cost *cost_vec)
2590 {
2591 stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
2592 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2593
2594 /* Calculate the number of vector statements to be created for the
2595 scalar stmts in this node. For SLP reductions it is equal to the
2596 number of vector statements in the children (which has already been
2597 calculated by the recursive call). Otherwise it is the number of
2598 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2599 VF divided by the number of elements in a vector. */
2600 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2601 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2602 {
2603 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2604 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2605 {
2606 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2607 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2608 break;
2609 }
2610 }
2611 else
2612 {
2613 poly_uint64 vf;
2614 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2615 vf = loop_vinfo->vectorization_factor;
2616 else
2617 vf = 1;
2618 unsigned int group_size = SLP_TREE_LANES (node);
2619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2620 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2621 = vect_get_num_vectors (vf * group_size, vectype);
2622 }
2623
2624 bool dummy;
2625 return vect_analyze_stmt (vinfo, stmt_info, &dummy,
2626 node, node_instance, cost_vec);
2627 }
2628
2629 /* Try to build NODE from scalars, returning true on success.
2630 NODE_INSTANCE is the SLP instance that contains NODE. */
2631
2632 static bool
2633 vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2634 slp_instance node_instance)
2635 {
2636 stmt_vec_info stmt_info;
2637 unsigned int i;
2638
2639 if (!is_a <bb_vec_info> (vinfo)
2640 || node == SLP_INSTANCE_TREE (node_instance)
2641 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2642 return false;
2643
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_NOTE, vect_location,
2646 "Building vector operands from scalars instead\n");
2647
2648 /* Don't remove and free the child nodes here, since they could be
2649 referenced by other structures. The analysis and scheduling phases
2650 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2651 unsigned int group_size = SLP_TREE_LANES (node);
2652 SLP_TREE_DEF_TYPE (node) = vect_external_def;
2653 SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2654 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2655 {
2656 tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2657 SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2658 }
2659 return true;
2660 }
2661
2662 /* Compute the prologue cost for invariant or constant operands represented
2663 by NODE. */
2664
2665 static void
2666 vect_prologue_cost_for_slp (slp_tree node,
2667 stmt_vector_for_cost *cost_vec)
2668 {
2669 /* Without looking at the actual initializer a vector of
2670 constants can be implemented as load from the constant pool.
2671 When all elements are the same we can use a splat. */
2672 tree vectype = SLP_TREE_VECTYPE (node);
2673 unsigned group_size = SLP_TREE_SCALAR_OPS (node).length ();
2674 unsigned num_vects_to_check;
2675 unsigned HOST_WIDE_INT const_nunits;
2676 unsigned nelt_limit;
2677 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
2678 && ! multiple_p (const_nunits, group_size))
2679 {
2680 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
2681 nelt_limit = const_nunits;
2682 }
2683 else
2684 {
2685 /* If either the vector has variable length or the vectors
2686 are composed of repeated whole groups we only need to
2687 cost construction once. All vectors will be the same. */
2688 num_vects_to_check = 1;
2689 nelt_limit = group_size;
2690 }
2691 tree elt = NULL_TREE;
2692 unsigned nelt = 0;
2693 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
2694 {
2695 unsigned si = j % group_size;
2696 if (nelt == 0)
2697 elt = SLP_TREE_SCALAR_OPS (node)[si];
2698 /* ??? We're just tracking whether all operands of a single
2699 vector initializer are the same, ideally we'd check if
2700 we emitted the same one already. */
2701 else if (elt != SLP_TREE_SCALAR_OPS (node)[si])
2702 elt = NULL_TREE;
2703 nelt++;
2704 if (nelt == nelt_limit)
2705 {
2706 record_stmt_cost (cost_vec, 1,
2707 SLP_TREE_DEF_TYPE (node) == vect_external_def
2708 ? (elt ? scalar_to_vec : vec_construct)
2709 : vector_load,
2710 NULL, vectype, 0, vect_prologue);
2711 nelt = 0;
2712 }
2713 }
2714 }
2715
2716 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2717 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2718
2719 Return true if the operations are supported. */
2720
2721 static bool
2722 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2723 slp_instance node_instance,
2724 hash_set<slp_tree> &visited,
2725 hash_set<slp_tree> &lvisited,
2726 stmt_vector_for_cost *cost_vec)
2727 {
2728 int i, j;
2729 slp_tree child;
2730
2731 /* Assume we can code-generate all invariants. */
2732 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2733 return true;
2734
2735 /* If we already analyzed the exact same set of scalar stmts we're done.
2736 We share the generated vector stmts for those.
2737 The SLP graph is acyclic so not caching whether we failed or succeeded
2738 doesn't result in any issue since we throw away the lvisited set
2739 when we fail. */
2740 if (visited.contains (node)
2741 || lvisited.add (node))
2742 return true;
2743
2744 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2745 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2746 visited, lvisited, cost_vec))
2747 return false;
2748
2749 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2750 cost_vec);
2751
2752 /* When the node can be vectorized cost invariant nodes it references.
2753 This is not done in DFS order to allow the refering node
2754 vectorizable_* calls to nail down the invariant nodes vector type
2755 and possibly unshare it if it needs a different vector type than
2756 other referrers. */
2757 if (res)
2758 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2759 if ((SLP_TREE_DEF_TYPE (child) == vect_constant_def
2760 || SLP_TREE_DEF_TYPE (child) == vect_external_def)
2761 /* Perform usual caching, note code-generation still
2762 code-gens these nodes multiple times but we expect
2763 to CSE them later. */
2764 && !visited.contains (child)
2765 && !lvisited.add (child))
2766 {
2767 /* ??? After auditing more code paths make a "default"
2768 and push the vector type from NODE to all children
2769 if it is not already set. */
2770 /* Compute the number of vectors to be generated. */
2771 tree vector_type = SLP_TREE_VECTYPE (child);
2772 if (!vector_type)
2773 {
2774 /* For shifts with a scalar argument we don't need
2775 to cost or code-generate anything.
2776 ??? Represent this more explicitely. */
2777 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node))
2778 == shift_vec_info_type)
2779 && j == 1);
2780 continue;
2781 }
2782 unsigned group_size = SLP_TREE_SCALAR_OPS (child).length ();
2783 poly_uint64 vf = 1;
2784 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2785 vf = loop_vinfo->vectorization_factor;
2786 SLP_TREE_NUMBER_OF_VEC_STMTS (child)
2787 = vect_get_num_vectors (vf * group_size, vector_type);
2788 /* And cost them. */
2789 vect_prologue_cost_for_slp (child, cost_vec);
2790 }
2791
2792 /* If this node can't be vectorized, try pruning the tree here rather
2793 than felling the whole thing. */
2794 if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2795 {
2796 /* We'll need to revisit this for invariant costing and number
2797 of vectorized stmt setting. */
2798 lvisited.remove (node);
2799 res = true;
2800 }
2801
2802 return res;
2803 }
2804
2805
2806 /* Analyze statements in SLP instances of VINFO. Return true if the
2807 operations are supported. */
2808
2809 bool
2810 vect_slp_analyze_operations (vec_info *vinfo)
2811 {
2812 slp_instance instance;
2813 int i;
2814
2815 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2816
2817 hash_set<slp_tree> visited;
2818 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2819 {
2820 hash_set<slp_tree> lvisited;
2821 stmt_vector_for_cost cost_vec;
2822 cost_vec.create (2);
2823 if (!vect_slp_analyze_node_operations (vinfo,
2824 SLP_INSTANCE_TREE (instance),
2825 instance, visited, lvisited,
2826 &cost_vec)
2827 /* Instances with a root stmt require vectorized defs for the
2828 SLP tree root. */
2829 || (SLP_INSTANCE_ROOT_STMT (instance)
2830 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2831 != vect_internal_def)))
2832 {
2833 slp_tree node = SLP_INSTANCE_TREE (instance);
2834 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2835 if (dump_enabled_p ())
2836 dump_printf_loc (MSG_NOTE, vect_location,
2837 "removing SLP instance operations starting from: %G",
2838 stmt_info->stmt);
2839 vect_free_slp_instance (instance, false);
2840 vinfo->slp_instances.ordered_remove (i);
2841 cost_vec.release ();
2842 }
2843 else
2844 {
2845 for (hash_set<slp_tree>::iterator x = lvisited.begin();
2846 x != lvisited.end(); ++x)
2847 visited.add (*x);
2848 i++;
2849
2850 add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
2851 cost_vec.release ();
2852 }
2853 }
2854
2855 return !vinfo->slp_instances.is_empty ();
2856 }
2857
2858
2859 /* Compute the scalar cost of the SLP node NODE and its children
2860 and return it. Do not account defs that are marked in LIFE and
2861 update LIFE according to uses of NODE. */
2862
2863 static void
2864 vect_bb_slp_scalar_cost (vec_info *vinfo,
2865 slp_tree node, vec<bool, va_heap> *life,
2866 stmt_vector_for_cost *cost_vec,
2867 hash_set<slp_tree> &visited)
2868 {
2869 unsigned i;
2870 stmt_vec_info stmt_info;
2871 slp_tree child;
2872
2873 if (visited.add (node))
2874 return;
2875
2876 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2877 {
2878 gimple *stmt = stmt_info->stmt;
2879 ssa_op_iter op_iter;
2880 def_operand_p def_p;
2881
2882 if ((*life)[i])
2883 continue;
2884
2885 /* If there is a non-vectorized use of the defs then the scalar
2886 stmt is kept live in which case we do not account it or any
2887 required defs in the SLP children in the scalar cost. This
2888 way we make the vectorization more costly when compared to
2889 the scalar cost. */
2890 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2891 {
2892 imm_use_iterator use_iter;
2893 gimple *use_stmt;
2894 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2895 if (!is_gimple_debug (use_stmt))
2896 {
2897 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2898 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2899 {
2900 (*life)[i] = true;
2901 BREAK_FROM_IMM_USE_STMT (use_iter);
2902 }
2903 }
2904 }
2905 if ((*life)[i])
2906 continue;
2907
2908 /* Count scalar stmts only once. */
2909 if (gimple_visited_p (stmt))
2910 continue;
2911 gimple_set_visited (stmt, true);
2912
2913 vect_cost_for_stmt kind;
2914 if (STMT_VINFO_DATA_REF (stmt_info))
2915 {
2916 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2917 kind = scalar_load;
2918 else
2919 kind = scalar_store;
2920 }
2921 else if (vect_nop_conversion_p (stmt_info))
2922 continue;
2923 else
2924 kind = scalar_stmt;
2925 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2926 }
2927
2928 auto_vec<bool, 20> subtree_life;
2929 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2930 {
2931 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2932 {
2933 /* Do not directly pass LIFE to the recursive call, copy it to
2934 confine changes in the callee to the current child/subtree. */
2935 subtree_life.safe_splice (*life);
2936 vect_bb_slp_scalar_cost (vinfo, child, &subtree_life, cost_vec,
2937 visited);
2938 subtree_life.truncate (0);
2939 }
2940 }
2941 }
2942
2943 /* Check if vectorization of the basic block is profitable. */
2944
2945 static bool
2946 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2947 {
2948 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2949 slp_instance instance;
2950 int i;
2951 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2952 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2953
2954 /* Calculate scalar cost. */
2955 stmt_vector_for_cost scalar_costs;
2956 scalar_costs.create (0);
2957 hash_set<slp_tree> visited;
2958 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2959 {
2960 auto_vec<bool, 20> life;
2961 life.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance)));
2962 vect_bb_slp_scalar_cost (bb_vinfo,
2963 SLP_INSTANCE_TREE (instance),
2964 &life, &scalar_costs, visited);
2965 }
2966 /* Unset visited flag. */
2967 stmt_info_for_cost *si;
2968 FOR_EACH_VEC_ELT (scalar_costs, i, si)
2969 gimple_set_visited (si->stmt_info->stmt, false);
2970
2971 void *target_cost_data = init_cost (NULL);
2972 add_stmt_costs (bb_vinfo, target_cost_data, &scalar_costs);
2973 scalar_costs.release ();
2974 unsigned dummy;
2975 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2976 destroy_cost_data (target_cost_data);
2977
2978 /* Complete the target-specific cost calculation. */
2979 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2980 &vec_inside_cost, &vec_epilogue_cost);
2981
2982 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2983
2984 if (dump_enabled_p ())
2985 {
2986 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2987 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2988 vec_inside_cost);
2989 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2990 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2991 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2992 }
2993
2994 /* Vectorization is profitable if its cost is more than the cost of scalar
2995 version. Note that we err on the vector side for equal cost because
2996 the cost estimate is otherwise quite pessimistic (constant uses are
2997 free on the scalar side but cost a load on the vector side for
2998 example). */
2999 if (vec_outside_cost + vec_inside_cost > scalar_cost)
3000 return false;
3001
3002 return true;
3003 }
3004
3005 /* Find any vectorizable constructors and add them to the grouped_store
3006 array. */
3007
3008 static void
3009 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3010 {
3011 gimple_stmt_iterator gsi;
3012
3013 for (gsi = bb_vinfo->region_begin;
3014 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3015 {
3016 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3017 if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3018 continue;
3019
3020 tree rhs = gimple_assign_rhs1 (stmt);
3021 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3022 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3023 CONSTRUCTOR_NELTS (rhs))
3024 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3025 || uniform_vector_p (rhs))
3026 continue;
3027
3028 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3029 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3030 }
3031 }
3032
3033 /* Check if the region described by BB_VINFO can be vectorized, returning
3034 true if so. When returning false, set FATAL to true if the same failure
3035 would prevent vectorization at other vector sizes, false if it is still
3036 worth trying other sizes. N_STMTS is the number of statements in the
3037 region. */
3038
3039 static bool
3040 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3041 {
3042 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3043
3044 slp_instance instance;
3045 int i;
3046 poly_uint64 min_vf = 2;
3047
3048 /* The first group of checks is independent of the vector size. */
3049 fatal = true;
3050
3051 /* Analyze the data references. */
3052
3053 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3054 {
3055 if (dump_enabled_p ())
3056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3057 "not vectorized: unhandled data-ref in basic "
3058 "block.\n");
3059 return false;
3060 }
3061
3062 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3063 {
3064 if (dump_enabled_p ())
3065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3066 "not vectorized: not enough data-refs in "
3067 "basic block.\n");
3068 return false;
3069 }
3070
3071 if (!vect_analyze_data_ref_accesses (bb_vinfo))
3072 {
3073 if (dump_enabled_p ())
3074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3075 "not vectorized: unhandled data access in "
3076 "basic block.\n");
3077 return false;
3078 }
3079
3080 vect_slp_check_for_constructors (bb_vinfo);
3081
3082 /* If there are no grouped stores in the region there is no need
3083 to continue with pattern recog as vect_analyze_slp will fail
3084 anyway. */
3085 if (bb_vinfo->grouped_stores.is_empty ())
3086 {
3087 if (dump_enabled_p ())
3088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3089 "not vectorized: no grouped stores in "
3090 "basic block.\n");
3091 return false;
3092 }
3093
3094 /* While the rest of the analysis below depends on it in some way. */
3095 fatal = false;
3096
3097 vect_pattern_recog (bb_vinfo);
3098
3099 /* Check the SLP opportunities in the basic block, analyze and build SLP
3100 trees. */
3101 if (!vect_analyze_slp (bb_vinfo, n_stmts))
3102 {
3103 if (dump_enabled_p ())
3104 {
3105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106 "Failed to SLP the basic block.\n");
3107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3108 "not vectorized: failed to find SLP opportunities "
3109 "in basic block.\n");
3110 }
3111 return false;
3112 }
3113
3114 /* Optimize permutations. */
3115 vect_optimize_slp (bb_vinfo);
3116
3117 vect_record_base_alignments (bb_vinfo);
3118
3119 /* Analyze and verify the alignment of data references and the
3120 dependence in the SLP instances. */
3121 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3122 {
3123 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo, instance)
3124 || ! vect_slp_analyze_instance_dependence (bb_vinfo, instance))
3125 {
3126 slp_tree node = SLP_INSTANCE_TREE (instance);
3127 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3128 if (dump_enabled_p ())
3129 dump_printf_loc (MSG_NOTE, vect_location,
3130 "removing SLP instance operations starting from: %G",
3131 stmt_info->stmt);
3132 vect_free_slp_instance (instance, false);
3133 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3134 continue;
3135 }
3136
3137 /* Mark all the statements that we want to vectorize as pure SLP and
3138 relevant. */
3139 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3140 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3141 if (SLP_INSTANCE_ROOT_STMT (instance))
3142 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3143
3144 i++;
3145 }
3146 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3147 return false;
3148
3149 if (!vect_slp_analyze_operations (bb_vinfo))
3150 {
3151 if (dump_enabled_p ())
3152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3153 "not vectorized: bad operation in basic block.\n");
3154 return false;
3155 }
3156
3157 /* Cost model: check if the vectorization is worthwhile. */
3158 if (!unlimited_cost_model (NULL)
3159 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3160 {
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3163 "not vectorized: vectorization is not "
3164 "profitable.\n");
3165 return false;
3166 }
3167
3168 if (dump_enabled_p ())
3169 dump_printf_loc (MSG_NOTE, vect_location,
3170 "Basic block will be vectorized using SLP\n");
3171 return true;
3172 }
3173
3174 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3175 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3176 on success. The region has N_STMTS statements and has the datarefs
3177 given by DATAREFS. */
3178
3179 static bool
3180 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3181 gimple_stmt_iterator region_end,
3182 vec<data_reference_p> datarefs,
3183 unsigned int n_stmts)
3184 {
3185 bb_vec_info bb_vinfo;
3186 auto_vector_modes vector_modes;
3187
3188 /* Autodetect first vector size we try. */
3189 machine_mode next_vector_mode = VOIDmode;
3190 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3191 unsigned int mode_i = 0;
3192
3193 vec_info_shared shared;
3194
3195 machine_mode autodetected_vector_mode = VOIDmode;
3196 while (1)
3197 {
3198 bool vectorized = false;
3199 bool fatal = false;
3200 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3201
3202 bool first_time_p = shared.datarefs.is_empty ();
3203 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3204 if (first_time_p)
3205 bb_vinfo->shared->save_datarefs ();
3206 else
3207 bb_vinfo->shared->check_datarefs ();
3208 bb_vinfo->vector_mode = next_vector_mode;
3209
3210 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3211 && dbg_cnt (vect_slp))
3212 {
3213 if (dump_enabled_p ())
3214 {
3215 dump_printf_loc (MSG_NOTE, vect_location,
3216 "***** Analysis succeeded with vector mode"
3217 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3218 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3219 }
3220
3221 bb_vinfo->shared->check_datarefs ();
3222 vect_schedule_slp (bb_vinfo);
3223
3224 unsigned HOST_WIDE_INT bytes;
3225 if (dump_enabled_p ())
3226 {
3227 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3228 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3229 "basic block part vectorized using %wu byte "
3230 "vectors\n", bytes);
3231 else
3232 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3233 "basic block part vectorized using variable "
3234 "length vectors\n");
3235 }
3236
3237 vectorized = true;
3238 }
3239 else
3240 {
3241 if (dump_enabled_p ())
3242 dump_printf_loc (MSG_NOTE, vect_location,
3243 "***** Analysis failed with vector mode %s\n",
3244 GET_MODE_NAME (bb_vinfo->vector_mode));
3245 }
3246
3247 if (mode_i == 0)
3248 autodetected_vector_mode = bb_vinfo->vector_mode;
3249
3250 if (!fatal)
3251 while (mode_i < vector_modes.length ()
3252 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3253 {
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_NOTE, vect_location,
3256 "***** The result for vector mode %s would"
3257 " be the same\n",
3258 GET_MODE_NAME (vector_modes[mode_i]));
3259 mode_i += 1;
3260 }
3261
3262 delete bb_vinfo;
3263
3264 if (mode_i < vector_modes.length ()
3265 && VECTOR_MODE_P (autodetected_vector_mode)
3266 && (related_vector_mode (vector_modes[mode_i],
3267 GET_MODE_INNER (autodetected_vector_mode))
3268 == autodetected_vector_mode)
3269 && (related_vector_mode (autodetected_vector_mode,
3270 GET_MODE_INNER (vector_modes[mode_i]))
3271 == vector_modes[mode_i]))
3272 {
3273 if (dump_enabled_p ())
3274 dump_printf_loc (MSG_NOTE, vect_location,
3275 "***** Skipping vector mode %s, which would"
3276 " repeat the analysis for %s\n",
3277 GET_MODE_NAME (vector_modes[mode_i]),
3278 GET_MODE_NAME (autodetected_vector_mode));
3279 mode_i += 1;
3280 }
3281
3282 if (vectorized
3283 || mode_i == vector_modes.length ()
3284 || autodetected_vector_mode == VOIDmode
3285 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3286 vector sizes will fail do not bother iterating. */
3287 || fatal)
3288 return vectorized;
3289
3290 /* Try the next biggest vector size. */
3291 next_vector_mode = vector_modes[mode_i++];
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_NOTE, vect_location,
3294 "***** Re-trying analysis with vector mode %s\n",
3295 GET_MODE_NAME (next_vector_mode));
3296 }
3297 }
3298
3299 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3300 true if anything in the basic-block was vectorized. */
3301
3302 bool
3303 vect_slp_bb (basic_block bb)
3304 {
3305 gimple_stmt_iterator gsi;
3306 bool any_vectorized = false;
3307
3308 gsi = gsi_after_labels (bb);
3309 while (!gsi_end_p (gsi))
3310 {
3311 gimple_stmt_iterator region_begin = gsi;
3312 vec<data_reference_p> datarefs = vNULL;
3313 int insns = 0;
3314
3315 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3316 {
3317 gimple *stmt = gsi_stmt (gsi);
3318 if (is_gimple_debug (stmt))
3319 continue;
3320 insns++;
3321
3322 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3323 vect_location = stmt;
3324
3325 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3326 break;
3327 }
3328
3329 /* Skip leading unhandled stmts. */
3330 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3331 {
3332 gsi_next (&gsi);
3333 continue;
3334 }
3335
3336 gimple_stmt_iterator region_end = gsi;
3337
3338 if (insns > param_slp_max_insns_in_bb)
3339 {
3340 if (dump_enabled_p ())
3341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3342 "not vectorized: too many instructions in "
3343 "basic block.\n");
3344 }
3345 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3346 any_vectorized = true;
3347
3348 if (gsi_end_p (region_end))
3349 break;
3350
3351 /* Skip the unhandled stmt. */
3352 gsi_next (&gsi);
3353 }
3354
3355 return any_vectorized;
3356 }
3357
3358
3359 /* Build a variable-length vector in which the elements in ELTS are repeated
3360 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3361 RESULTS and add any new instructions to SEQ.
3362
3363 The approach we use is:
3364
3365 (1) Find a vector mode VM with integer elements of mode IM.
3366
3367 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3368 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3369 from small vectors to IM.
3370
3371 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3372
3373 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3374 correct byte contents.
3375
3376 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3377
3378 We try to find the largest IM for which this sequence works, in order
3379 to cut down on the number of interleaves. */
3380
3381 void
3382 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3383 vec<tree> elts, unsigned int nresults,
3384 vec<tree> &results)
3385 {
3386 unsigned int nelts = elts.length ();
3387 tree element_type = TREE_TYPE (vector_type);
3388
3389 /* (1) Find a vector mode VM with integer elements of mode IM. */
3390 unsigned int nvectors = 1;
3391 tree new_vector_type;
3392 tree permutes[2];
3393 if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
3394 &nvectors, &new_vector_type,
3395 permutes))
3396 gcc_unreachable ();
3397
3398 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3399 unsigned int partial_nelts = nelts / nvectors;
3400 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3401
3402 tree_vector_builder partial_elts;
3403 auto_vec<tree, 32> pieces (nvectors * 2);
3404 pieces.quick_grow (nvectors * 2);
3405 for (unsigned int i = 0; i < nvectors; ++i)
3406 {
3407 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3408 ELTS' has mode IM. */
3409 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3410 for (unsigned int j = 0; j < partial_nelts; ++j)
3411 partial_elts.quick_push (elts[i * partial_nelts + j]);
3412 tree t = gimple_build_vector (seq, &partial_elts);
3413 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3414 TREE_TYPE (new_vector_type), t);
3415
3416 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3417 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3418 }
3419
3420 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3421 correct byte contents.
3422
3423 We need to repeat the following operation log2(nvectors) times:
3424
3425 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3426 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3427
3428 However, if each input repeats every N elements and the VF is
3429 a multiple of N * 2, the HI result is the same as the LO. */
3430 unsigned int in_start = 0;
3431 unsigned int out_start = nvectors;
3432 unsigned int hi_start = nvectors / 2;
3433 /* A bound on the number of outputs needed to produce NRESULTS results
3434 in the final iteration. */
3435 unsigned int noutputs_bound = nvectors * nresults;
3436 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3437 {
3438 noutputs_bound /= 2;
3439 unsigned int limit = MIN (noutputs_bound, nvectors);
3440 for (unsigned int i = 0; i < limit; ++i)
3441 {
3442 if ((i & 1) != 0
3443 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3444 2 * in_repeat))
3445 {
3446 pieces[out_start + i] = pieces[out_start + i - 1];
3447 continue;
3448 }
3449
3450 tree output = make_ssa_name (new_vector_type);
3451 tree input1 = pieces[in_start + (i / 2)];
3452 tree input2 = pieces[in_start + (i / 2) + hi_start];
3453 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3454 input1, input2,
3455 permutes[i & 1]);
3456 gimple_seq_add_stmt (seq, stmt);
3457 pieces[out_start + i] = output;
3458 }
3459 std::swap (in_start, out_start);
3460 }
3461
3462 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3463 results.reserve (nresults);
3464 for (unsigned int i = 0; i < nresults; ++i)
3465 if (i < nvectors)
3466 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3467 pieces[in_start + i]));
3468 else
3469 results.quick_push (results[i - nvectors]);
3470 }
3471
3472
3473 /* For constant and loop invariant defs in OP_NODE this function creates
3474 vector defs that will be used in the vectorized stmts and stores them
3475 to SLP_TREE_VEC_DEFS of OP_NODE. */
3476
3477 static void
3478 vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
3479 {
3480 unsigned HOST_WIDE_INT nunits;
3481 tree vec_cst;
3482 unsigned j, number_of_places_left_in_vector;
3483 tree vector_type;
3484 tree vop;
3485 int group_size = op_node->ops.length ();
3486 unsigned int vec_num, i;
3487 unsigned number_of_copies = 1;
3488 bool constant_p;
3489 gimple_seq ctor_seq = NULL;
3490 auto_vec<tree, 16> permute_results;
3491
3492 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
3493 vector_type = SLP_TREE_VECTYPE (op_node);
3494
3495 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (op_node);
3496 SLP_TREE_VEC_DEFS (op_node).create (number_of_vectors);
3497 auto_vec<tree> voprnds (number_of_vectors);
3498
3499 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3500 created vectors. It is greater than 1 if unrolling is performed.
3501
3502 For example, we have two scalar operands, s1 and s2 (e.g., group of
3503 strided accesses of size two), while NUNITS is four (i.e., four scalars
3504 of this type can be packed in a vector). The output vector will contain
3505 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3506 will be 2).
3507
3508 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3509 containing the operands.
3510
3511 For example, NUNITS is four as before, and the group size is 8
3512 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3513 {s5, s6, s7, s8}. */
3514
3515 /* When using duplicate_and_interleave, we just need one element for
3516 each scalar statement. */
3517 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3518 nunits = group_size;
3519
3520 number_of_copies = nunits * number_of_vectors / group_size;
3521
3522 number_of_places_left_in_vector = nunits;
3523 constant_p = true;
3524 tree_vector_builder elts (vector_type, nunits, 1);
3525 elts.quick_grow (nunits);
3526 stmt_vec_info insert_after = NULL;
3527 for (j = 0; j < number_of_copies; j++)
3528 {
3529 tree op;
3530 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3531 {
3532 /* Create 'vect_ = {op0,op1,...,opn}'. */
3533 number_of_places_left_in_vector--;
3534 tree orig_op = op;
3535 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3536 {
3537 if (CONSTANT_CLASS_P (op))
3538 {
3539 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3540 {
3541 /* Can't use VIEW_CONVERT_EXPR for booleans because
3542 of possibly different sizes of scalar value and
3543 vector element. */
3544 if (integer_zerop (op))
3545 op = build_int_cst (TREE_TYPE (vector_type), 0);
3546 else if (integer_onep (op))
3547 op = build_all_ones_cst (TREE_TYPE (vector_type));
3548 else
3549 gcc_unreachable ();
3550 }
3551 else
3552 op = fold_unary (VIEW_CONVERT_EXPR,
3553 TREE_TYPE (vector_type), op);
3554 gcc_assert (op && CONSTANT_CLASS_P (op));
3555 }
3556 else
3557 {
3558 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3559 gimple *init_stmt;
3560 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3561 {
3562 tree true_val
3563 = build_all_ones_cst (TREE_TYPE (vector_type));
3564 tree false_val
3565 = build_zero_cst (TREE_TYPE (vector_type));
3566 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3567 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3568 op, true_val,
3569 false_val);
3570 }
3571 else
3572 {
3573 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3574 op);
3575 init_stmt
3576 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3577 op);
3578 }
3579 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3580 op = new_temp;
3581 }
3582 }
3583 elts[number_of_places_left_in_vector] = op;
3584 if (!CONSTANT_CLASS_P (op))
3585 constant_p = false;
3586 /* For BB vectorization we have to compute an insert location
3587 when a def is inside the analyzed region since we cannot
3588 simply insert at the BB start in this case. */
3589 stmt_vec_info opdef;
3590 if (TREE_CODE (orig_op) == SSA_NAME
3591 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3592 && is_a <bb_vec_info> (vinfo)
3593 && (opdef = vinfo->lookup_def (orig_op)))
3594 {
3595 if (!insert_after)
3596 insert_after = opdef;
3597 else
3598 insert_after = get_later_stmt (insert_after, opdef);
3599 }
3600
3601 if (number_of_places_left_in_vector == 0)
3602 {
3603 if (constant_p
3604 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3605 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3606 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3607 else
3608 {
3609 if (permute_results.is_empty ())
3610 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3611 elts, number_of_vectors,
3612 permute_results);
3613 vec_cst = permute_results[number_of_vectors - j - 1];
3614 }
3615 tree init;
3616 if (insert_after)
3617 {
3618 gimple_stmt_iterator gsi = gsi_for_stmt (insert_after->stmt);
3619 /* vect_init_vector inserts before. */
3620 gsi_next (&gsi);
3621 init = vect_init_vector (vinfo, NULL, vec_cst,
3622 vector_type, &gsi);
3623 }
3624 else
3625 init = vect_init_vector (vinfo, NULL, vec_cst,
3626 vector_type, NULL);
3627 if (ctor_seq != NULL)
3628 {
3629 gimple_stmt_iterator gsi
3630 = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3631 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3632 ctor_seq = NULL;
3633 }
3634 voprnds.quick_push (init);
3635 insert_after = NULL;
3636 number_of_places_left_in_vector = nunits;
3637 constant_p = true;
3638 elts.new_vector (vector_type, nunits, 1);
3639 elts.quick_grow (nunits);
3640 }
3641 }
3642 }
3643
3644 /* Since the vectors are created in the reverse order, we should invert
3645 them. */
3646 vec_num = voprnds.length ();
3647 for (j = vec_num; j != 0; j--)
3648 {
3649 vop = voprnds[j - 1];
3650 SLP_TREE_VEC_DEFS (op_node).quick_push (vop);
3651 }
3652
3653 /* In case that VF is greater than the unrolling factor needed for the SLP
3654 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3655 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3656 to replicate the vectors. */
3657 while (number_of_vectors > SLP_TREE_VEC_DEFS (op_node).length ())
3658 for (i = 0; SLP_TREE_VEC_DEFS (op_node).iterate (i, &vop) && i < vec_num;
3659 i++)
3660 SLP_TREE_VEC_DEFS (op_node).quick_push (vop);
3661 }
3662
3663 /* Get the Ith vectorized definition from SLP_NODE. */
3664
3665 tree
3666 vect_get_slp_vect_def (slp_tree slp_node, unsigned i)
3667 {
3668 if (SLP_TREE_VEC_STMTS (slp_node).exists ())
3669 return gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt);
3670 else
3671 return SLP_TREE_VEC_DEFS (slp_node)[i];
3672 }
3673
3674 /* Get N vectorized definitions for SLP_NODE. */
3675
3676 void
3677 vect_get_slp_defs (vec_info *,
3678 slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3679 {
3680 if (n == -1U)
3681 n = SLP_TREE_CHILDREN (slp_node).length ();
3682
3683 for (unsigned i = 0; i < n; ++i)
3684 {
3685 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3686
3687 vec<tree> vec_defs = vNULL;
3688
3689 /* For each operand we check if it has vectorized definitions in a child
3690 node or we need to create them (for invariants and constants). */
3691 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3692 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3693 {
3694 unsigned j;
3695 stmt_vec_info vec_def_stmt_info;
3696 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (child), j, vec_def_stmt_info)
3697 vec_defs.quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3698 }
3699 else
3700 vec_defs.splice (SLP_TREE_VEC_DEFS (child));
3701
3702 vec_oprnds->quick_push (vec_defs);
3703 }
3704 }
3705
3706 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3707 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3708 permute statements for the SLP node NODE. */
3709
3710 bool
3711 vect_transform_slp_perm_load (vec_info *vinfo,
3712 slp_tree node, vec<tree> dr_chain,
3713 gimple_stmt_iterator *gsi, poly_uint64 vf,
3714 bool analyze_only, unsigned *n_perms)
3715 {
3716 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3717 int vec_index = 0;
3718 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3719 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
3720 unsigned int mask_element;
3721 machine_mode mode;
3722
3723 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3724 return false;
3725
3726 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3727
3728 mode = TYPE_MODE (vectype);
3729 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3730
3731 /* Initialize the vect stmts of NODE to properly insert the generated
3732 stmts later. */
3733 if (! analyze_only)
3734 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3735 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3736 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3737
3738 /* Generate permutation masks for every NODE. Number of masks for each NODE
3739 is equal to GROUP_SIZE.
3740 E.g., we have a group of three nodes with three loads from the same
3741 location in each node, and the vector size is 4. I.e., we have a
3742 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3743 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3744 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3745 ...
3746
3747 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3748 The last mask is illegal since we assume two operands for permute
3749 operation, and the mask element values can't be outside that range.
3750 Hence, the last mask must be converted into {2,5,5,5}.
3751 For the first two permutations we need the first and the second input
3752 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3753 we need the second and the third vectors: {b1,c1,a2,b2} and
3754 {c2,a3,b3,c3}. */
3755
3756 int vect_stmts_counter = 0;
3757 unsigned int index = 0;
3758 int first_vec_index = -1;
3759 int second_vec_index = -1;
3760 bool noop_p = true;
3761 *n_perms = 0;
3762
3763 vec_perm_builder mask;
3764 unsigned int nelts_to_build;
3765 unsigned int nvectors_per_build;
3766 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3767 && multiple_p (nunits, group_size));
3768 if (repeating_p)
3769 {
3770 /* A single vector contains a whole number of copies of the node, so:
3771 (a) all permutes can use the same mask; and
3772 (b) the permutes only need a single vector input. */
3773 mask.new_vector (nunits, group_size, 3);
3774 nelts_to_build = mask.encoded_nelts ();
3775 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3776 }
3777 else
3778 {
3779 /* We need to construct a separate mask for each vector statement. */
3780 unsigned HOST_WIDE_INT const_nunits, const_vf;
3781 if (!nunits.is_constant (&const_nunits)
3782 || !vf.is_constant (&const_vf))
3783 return false;
3784 mask.new_vector (const_nunits, const_nunits, 1);
3785 nelts_to_build = const_vf * group_size;
3786 nvectors_per_build = 1;
3787 }
3788
3789 unsigned int count = mask.encoded_nelts ();
3790 mask.quick_grow (count);
3791 vec_perm_indices indices;
3792
3793 for (unsigned int j = 0; j < nelts_to_build; j++)
3794 {
3795 unsigned int iter_num = j / group_size;
3796 unsigned int stmt_num = j % group_size;
3797 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3798 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3799 if (repeating_p)
3800 {
3801 first_vec_index = 0;
3802 mask_element = i;
3803 }
3804 else
3805 {
3806 /* Enforced before the loop when !repeating_p. */
3807 unsigned int const_nunits = nunits.to_constant ();
3808 vec_index = i / const_nunits;
3809 mask_element = i % const_nunits;
3810 if (vec_index == first_vec_index
3811 || first_vec_index == -1)
3812 {
3813 first_vec_index = vec_index;
3814 }
3815 else if (vec_index == second_vec_index
3816 || second_vec_index == -1)
3817 {
3818 second_vec_index = vec_index;
3819 mask_element += const_nunits;
3820 }
3821 else
3822 {
3823 if (dump_enabled_p ())
3824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3825 "permutation requires at "
3826 "least three vectors %G",
3827 stmt_info->stmt);
3828 gcc_assert (analyze_only);
3829 return false;
3830 }
3831
3832 gcc_assert (mask_element < 2 * const_nunits);
3833 }
3834
3835 if (mask_element != index)
3836 noop_p = false;
3837 mask[index++] = mask_element;
3838
3839 if (index == count && !noop_p)
3840 {
3841 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3842 if (!can_vec_perm_const_p (mode, indices))
3843 {
3844 if (dump_enabled_p ())
3845 {
3846 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3847 vect_location,
3848 "unsupported vect permute { ");
3849 for (i = 0; i < count; ++i)
3850 {
3851 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3852 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3853 }
3854 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3855 }
3856 gcc_assert (analyze_only);
3857 return false;
3858 }
3859
3860 ++*n_perms;
3861 }
3862
3863 if (index == count)
3864 {
3865 if (!analyze_only)
3866 {
3867 tree mask_vec = NULL_TREE;
3868
3869 if (! noop_p)
3870 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3871
3872 if (second_vec_index == -1)
3873 second_vec_index = first_vec_index;
3874
3875 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3876 {
3877 /* Generate the permute statement if necessary. */
3878 tree first_vec = dr_chain[first_vec_index + ri];
3879 tree second_vec = dr_chain[second_vec_index + ri];
3880 stmt_vec_info perm_stmt_info;
3881 if (! noop_p)
3882 {
3883 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3884 tree perm_dest
3885 = vect_create_destination_var (gimple_assign_lhs (stmt),
3886 vectype);
3887 perm_dest = make_ssa_name (perm_dest);
3888 gassign *perm_stmt
3889 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3890 first_vec, second_vec,
3891 mask_vec);
3892 perm_stmt_info
3893 = vect_finish_stmt_generation (vinfo,
3894 stmt_info, perm_stmt,
3895 gsi);
3896 }
3897 else
3898 /* If mask was NULL_TREE generate the requested
3899 identity transform. */
3900 perm_stmt_info = vinfo->lookup_def (first_vec);
3901
3902 /* Store the vector statement in NODE. */
3903 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3904 = perm_stmt_info;
3905 }
3906 }
3907
3908 index = 0;
3909 first_vec_index = -1;
3910 second_vec_index = -1;
3911 noop_p = true;
3912 }
3913 }
3914
3915 return true;
3916 }
3917
3918 /* Vectorize SLP instance tree in postorder. */
3919
3920 static void
3921 vect_schedule_slp_instance (vec_info *vinfo,
3922 slp_tree node, slp_instance instance)
3923 {
3924 gimple_stmt_iterator si;
3925 int i;
3926 slp_tree child;
3927
3928 /* See if we have already vectorized the node in the graph of the
3929 SLP instance. */
3930 if ((SLP_TREE_DEF_TYPE (node) == vect_internal_def
3931 && SLP_TREE_VEC_STMTS (node).exists ())
3932 || SLP_TREE_VEC_DEFS (node).exists ())
3933 return;
3934
3935 /* Vectorize externals and constants. */
3936 if (SLP_TREE_DEF_TYPE (node) == vect_constant_def
3937 || SLP_TREE_DEF_TYPE (node) == vect_external_def)
3938 {
3939 /* ??? vectorizable_shift can end up using a scalar operand which is
3940 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
3941 node in this case. */
3942 if (!SLP_TREE_VECTYPE (node))
3943 return;
3944
3945 vect_create_constant_vectors (vinfo, node);
3946 return;
3947 }
3948
3949 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3950 vect_schedule_slp_instance (vinfo, child, instance);
3951
3952 stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
3953
3954 /* VECTYPE is the type of the destination. */
3955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3956 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3957 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
3958
3959 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3960 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3961
3962 if (dump_enabled_p ())
3963 dump_printf_loc (MSG_NOTE, vect_location,
3964 "------>vectorizing SLP node starting from: %G",
3965 stmt_info->stmt);
3966
3967 /* Vectorized stmts go before the last scalar stmt which is where
3968 all uses are ready. */
3969 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3970 si = gsi_for_stmt (last_stmt_info->stmt);
3971
3972 /* Handle two-operation SLP nodes by vectorizing the group with
3973 both operations and then performing a merge. */
3974 bool done_p = false;
3975 if (SLP_TREE_TWO_OPERATORS (node))
3976 {
3977 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3978 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3979 enum tree_code ocode = ERROR_MARK;
3980 stmt_vec_info ostmt_info;
3981 vec_perm_builder mask (group_size, group_size, 1);
3982 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3983 {
3984 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3985 if (gimple_assign_rhs_code (ostmt) != code0)
3986 {
3987 mask.quick_push (1);
3988 ocode = gimple_assign_rhs_code (ostmt);
3989 }
3990 else
3991 mask.quick_push (0);
3992 }
3993 if (ocode != ERROR_MARK)
3994 {
3995 vec<stmt_vec_info> v0;
3996 vec<stmt_vec_info> v1;
3997 unsigned j;
3998 tree tmask = NULL_TREE;
3999 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
4000 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4001 SLP_TREE_VEC_STMTS (node).truncate (0);
4002 gimple_assign_set_rhs_code (stmt, ocode);
4003 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
4004 gimple_assign_set_rhs_code (stmt, code0);
4005 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4006 SLP_TREE_VEC_STMTS (node).truncate (0);
4007 tree meltype = build_nonstandard_integer_type
4008 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4009 tree mvectype = get_same_sized_vectype (meltype, vectype);
4010 unsigned k = 0, l;
4011 for (j = 0; j < v0.length (); ++j)
4012 {
4013 /* Enforced by vect_build_slp_tree, which rejects variable-length
4014 vectors for SLP_TREE_TWO_OPERATORS. */
4015 unsigned int const_nunits = nunits.to_constant ();
4016 tree_vector_builder melts (mvectype, const_nunits, 1);
4017 for (l = 0; l < const_nunits; ++l)
4018 {
4019 if (k >= group_size)
4020 k = 0;
4021 tree t = build_int_cst (meltype,
4022 mask[k++] * const_nunits + l);
4023 melts.quick_push (t);
4024 }
4025 tmask = melts.build ();
4026
4027 /* ??? Not all targets support a VEC_PERM_EXPR with a
4028 constant mask that would translate to a vec_merge RTX
4029 (with their vec_perm_const_ok). We can either not
4030 vectorize in that case or let veclower do its job.
4031 Unfortunately that isn't too great and at least for
4032 plus/minus we'd eventually like to match targets
4033 vector addsub instructions. */
4034 gimple *vstmt;
4035 vstmt = gimple_build_assign (make_ssa_name (vectype),
4036 VEC_PERM_EXPR,
4037 gimple_assign_lhs (v0[j]->stmt),
4038 gimple_assign_lhs (v1[j]->stmt),
4039 tmask);
4040 SLP_TREE_VEC_STMTS (node).quick_push
4041 (vect_finish_stmt_generation (vinfo, stmt_info, vstmt, &si));
4042 }
4043 v0.release ();
4044 v1.release ();
4045 done_p = true;
4046 }
4047 }
4048 if (!done_p)
4049 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
4050 }
4051
4052 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4053 For loop vectorization this is done in vectorizable_call, but for SLP
4054 it needs to be deferred until end of vect_schedule_slp, because multiple
4055 SLP instances may refer to the same scalar stmt. */
4056
4057 static void
4058 vect_remove_slp_scalar_calls (vec_info *vinfo,
4059 slp_tree node, hash_set<slp_tree> &visited)
4060 {
4061 gimple *new_stmt;
4062 gimple_stmt_iterator gsi;
4063 int i;
4064 slp_tree child;
4065 tree lhs;
4066 stmt_vec_info stmt_info;
4067
4068 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4069 return;
4070
4071 if (visited.add (node))
4072 return;
4073
4074 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4075 vect_remove_slp_scalar_calls (vinfo, child, visited);
4076
4077 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4078 {
4079 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4080 if (!stmt || gimple_bb (stmt) == NULL)
4081 continue;
4082 if (is_pattern_stmt_p (stmt_info)
4083 || !PURE_SLP_STMT (stmt_info))
4084 continue;
4085 lhs = gimple_call_lhs (stmt);
4086 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4087 gsi = gsi_for_stmt (stmt);
4088 vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4089 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4090 }
4091 }
4092
4093 static void
4094 vect_remove_slp_scalar_calls (vec_info *vinfo, slp_tree node)
4095 {
4096 hash_set<slp_tree> visited;
4097 vect_remove_slp_scalar_calls (vinfo, node, visited);
4098 }
4099
4100 /* Vectorize the instance root. */
4101
4102 void
4103 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4104 {
4105 gassign *rstmt = NULL;
4106
4107 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4108 {
4109 stmt_vec_info child_stmt_info;
4110 int j;
4111
4112 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4113 {
4114 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4115 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4116 if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
4117 TREE_TYPE (vect_lhs)))
4118 vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
4119 vect_lhs);
4120 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4121 break;
4122 }
4123 }
4124 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4125 {
4126 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4127 stmt_vec_info child_stmt_info;
4128 int j;
4129 vec<constructor_elt, va_gc> *v;
4130 vec_alloc (v, nelts);
4131
4132 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4133 {
4134 CONSTRUCTOR_APPEND_ELT (v,
4135 NULL_TREE,
4136 gimple_get_lhs (child_stmt_info->stmt));
4137 }
4138 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4139 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4140 tree r_constructor = build_constructor (rtype, v);
4141 rstmt = gimple_build_assign (lhs, r_constructor);
4142 }
4143
4144 gcc_assert (rstmt);
4145
4146 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4147 gsi_replace (&rgsi, rstmt, true);
4148 }
4149
4150 /* Generate vector code for all SLP instances in the loop/basic block. */
4151
4152 void
4153 vect_schedule_slp (vec_info *vinfo)
4154 {
4155 vec<slp_instance> slp_instances;
4156 slp_instance instance;
4157 unsigned int i;
4158
4159 slp_instances = vinfo->slp_instances;
4160 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4161 {
4162 slp_tree node = SLP_INSTANCE_TREE (instance);
4163 /* Schedule the tree of INSTANCE. */
4164 vect_schedule_slp_instance (vinfo, node, instance);
4165
4166 if (SLP_INSTANCE_ROOT_STMT (instance))
4167 vectorize_slp_instance_root_stmt (node, instance);
4168
4169 if (dump_enabled_p ())
4170 dump_printf_loc (MSG_NOTE, vect_location,
4171 "vectorizing stmts using SLP.\n");
4172 }
4173
4174 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4175 {
4176 slp_tree root = SLP_INSTANCE_TREE (instance);
4177 stmt_vec_info store_info;
4178 unsigned int j;
4179
4180 /* Remove scalar call stmts. Do not do this for basic-block
4181 vectorization as not all uses may be vectorized.
4182 ??? Why should this be necessary? DCE should be able to
4183 remove the stmts itself.
4184 ??? For BB vectorization we can as well remove scalar
4185 stmts starting from the SLP tree root if they have no
4186 uses. */
4187 if (is_a <loop_vec_info> (vinfo))
4188 vect_remove_slp_scalar_calls (vinfo, root);
4189
4190 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info); j++)
4191 {
4192 if (!STMT_VINFO_DATA_REF (store_info))
4193 break;
4194
4195 if (SLP_INSTANCE_ROOT_STMT (instance))
4196 continue;
4197
4198 store_info = vect_orig_stmt (store_info);
4199 /* Free the attached stmt_vec_info and remove the stmt. */
4200 vinfo->remove_stmt (store_info);
4201 }
4202 }
4203 }