ea8c202d187a6377876a84cee395cc1ff65a1747
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69 int i;
70 slp_tree child;
71
72 if (!node)
73 return;
74
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76 vect_free_slp_tree (child);
77
78 SLP_TREE_CHILDREN (node).release ();
79 SLP_TREE_SCALAR_STMTS (node).release ();
80 SLP_TREE_VEC_STMTS (node).release ();
81 SLP_TREE_LOAD_PERMUTATION (node).release ();
82
83 free (node);
84 }
85
86
87 /* Free the memory allocated for the SLP instance. */
88
89 void
90 vect_free_slp_instance (slp_instance instance)
91 {
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 SLP_INSTANCE_LOADS (instance).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95 free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS. */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104 slp_tree node;
105 gimple stmt = scalar_stmts[0];
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
116 else
117 return NULL;
118
119 node = XNEW (struct _slp_tree);
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node).create (0);
122 SLP_TREE_CHILDREN (node).create (nops);
123 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
124
125 return node;
126 }
127
128
129 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
130 operand. */
131 static vec<slp_oprnd_info>
132 vect_create_oprnd_info (int nops, int group_size)
133 {
134 int i;
135 slp_oprnd_info oprnd_info;
136 vec<slp_oprnd_info> oprnds_info;
137
138 oprnds_info.create (nops);
139 for (i = 0; i < nops; i++)
140 {
141 oprnd_info = XNEW (struct _slp_oprnd_info);
142 oprnd_info->def_stmts.create (group_size);
143 oprnd_info->first_dt = vect_uninitialized_def;
144 oprnd_info->first_op_type = NULL_TREE;
145 oprnd_info->first_pattern = false;
146 oprnds_info.quick_push (oprnd_info);
147 }
148
149 return oprnds_info;
150 }
151
152
153 /* Free operands info. */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158 int i;
159 slp_oprnd_info oprnd_info;
160
161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162 {
163 oprnd_info->def_stmts.release ();
164 XDELETE (oprnd_info);
165 }
166
167 oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177 gimple next_stmt = first_stmt;
178 int result = 0;
179
180 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181 return -1;
182
183 do
184 {
185 if (next_stmt == stmt)
186 return result;
187 result++;
188 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189 }
190 while (next_stmt);
191
192 return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197 they are of a valid type and that they match the defs of the first stmt of
198 the SLP group (stored in OPRNDS_INFO). */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202 gimple stmt, bool first,
203 vec<slp_oprnd_info> *oprnds_info)
204 {
205 tree oprnd;
206 unsigned int i, number_of_oprnds;
207 tree def;
208 gimple def_stmt;
209 enum vect_def_type dt = vect_uninitialized_def;
210 struct loop *loop = NULL;
211 bool pattern = false;
212 slp_oprnd_info oprnd_info;
213 int op_idx = 1;
214 tree compare_rhs = NULL_TREE;
215
216 if (loop_vinfo)
217 loop = LOOP_VINFO_LOOP (loop_vinfo);
218
219 if (is_gimple_call (stmt))
220 {
221 number_of_oprnds = gimple_call_num_args (stmt);
222 op_idx = 3;
223 }
224 else if (is_gimple_assign (stmt))
225 {
226 number_of_oprnds = gimple_num_ops (stmt) - 1;
227 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
228 number_of_oprnds++;
229 }
230 else
231 return false;
232
233 for (i = 0; i < number_of_oprnds; i++)
234 {
235 if (compare_rhs)
236 {
237 oprnd = compare_rhs;
238 compare_rhs = NULL_TREE;
239 }
240 else
241 oprnd = gimple_op (stmt, op_idx++);
242
243 oprnd_info = (*oprnds_info)[i];
244
245 if (COMPARISON_CLASS_P (oprnd))
246 {
247 compare_rhs = TREE_OPERAND (oprnd, 1);
248 oprnd = TREE_OPERAND (oprnd, 0);
249 }
250
251 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
252 &def, &dt)
253 || (!def_stmt && dt != vect_constant_def))
254 {
255 if (dump_enabled_p ())
256 {
257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
258 "Build SLP failed: can't find def for ");
259 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
260 }
261
262 return false;
263 }
264
265 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
266 from the pattern. Check that all the stmts of the node are in the
267 pattern. */
268 if (def_stmt && gimple_bb (def_stmt)
269 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
270 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
271 && gimple_code (def_stmt) != GIMPLE_PHI))
272 && vinfo_for_stmt (def_stmt)
273 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
274 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
275 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
276 {
277 pattern = true;
278 if (!first && !oprnd_info->first_pattern)
279 {
280 if (dump_enabled_p ())
281 {
282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
283 "Build SLP failed: some of the stmts"
284 " are in a pattern, and others are not ");
285 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
286 }
287
288 return false;
289 }
290
291 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
292 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
293
294 if (dt == vect_unknown_def_type)
295 {
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298 "Unsupported pattern.");
299 return false;
300 }
301
302 switch (gimple_code (def_stmt))
303 {
304 case GIMPLE_PHI:
305 def = gimple_phi_result (def_stmt);
306 break;
307
308 case GIMPLE_ASSIGN:
309 def = gimple_assign_lhs (def_stmt);
310 break;
311
312 default:
313 if (dump_enabled_p ())
314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
315 "unsupported defining stmt: ");
316 return false;
317 }
318 }
319
320 if (first)
321 {
322 oprnd_info->first_dt = dt;
323 oprnd_info->first_pattern = pattern;
324 oprnd_info->first_op_type = TREE_TYPE (oprnd);
325 }
326 else
327 {
328 /* Not first stmt of the group, check that the def-stmt/s match
329 the def-stmt/s of the first stmt. Allow different definition
330 types for reduction chains: the first stmt must be a
331 vect_reduction_def (a phi node), and the rest
332 vect_internal_def. */
333 if (((oprnd_info->first_dt != dt
334 && !(oprnd_info->first_dt == vect_reduction_def
335 && dt == vect_internal_def)
336 && !((oprnd_info->first_dt == vect_external_def
337 || oprnd_info->first_dt == vect_constant_def)
338 && (dt == vect_external_def
339 || dt == vect_constant_def)))
340 || !types_compatible_p (oprnd_info->first_op_type,
341 TREE_TYPE (oprnd))))
342 {
343 if (dump_enabled_p ())
344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
345 "Build SLP failed: different types ");
346
347 return false;
348 }
349 }
350
351 /* Check the types of the definitions. */
352 switch (dt)
353 {
354 case vect_constant_def:
355 case vect_external_def:
356 case vect_reduction_def:
357 break;
358
359 case vect_internal_def:
360 oprnd_info->def_stmts.quick_push (def_stmt);
361 break;
362
363 default:
364 /* FORNOW: Not supported. */
365 if (dump_enabled_p ())
366 {
367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
368 "Build SLP failed: illegal type of def ");
369 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
370 }
371
372 return false;
373 }
374 }
375
376 return true;
377 }
378
379
380 /* Verify if the scalar stmts STMTS are isomorphic, require data
381 permutation or are of unsupported types of operation. Return
382 true if they are, otherwise return false and indicate in *MATCHES
383 which stmts are not isomorphic to the first one. If MATCHES[0]
384 is false then this indicates the comparison could not be
385 carried out or the stmts will never be vectorized by SLP. */
386
387 static bool
388 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
389 vec<gimple> stmts, unsigned int group_size,
390 unsigned nops, unsigned int *max_nunits,
391 unsigned int vectorization_factor, bool *matches)
392 {
393 unsigned int i;
394 gimple stmt = stmts[0];
395 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
396 enum tree_code first_cond_code = ERROR_MARK;
397 tree lhs;
398 bool need_same_oprnds = false;
399 tree vectype, scalar_type, first_op1 = NULL_TREE;
400 optab optab;
401 int icode;
402 enum machine_mode optab_op2_mode;
403 enum machine_mode vec_mode;
404 struct data_reference *first_dr;
405 HOST_WIDE_INT dummy;
406 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
407 tree cond;
408
409 /* For every stmt in NODE find its def stmt/s. */
410 FOR_EACH_VEC_ELT (stmts, i, stmt)
411 {
412 matches[i] = false;
413
414 if (dump_enabled_p ())
415 {
416 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
417 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
418 }
419
420 /* Fail to vectorize statements marked as unvectorizable. */
421 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
422 {
423 if (dump_enabled_p ())
424 {
425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426 "Build SLP failed: unvectorizable statement ");
427 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
428 }
429 /* Fatal mismatch. */
430 matches[0] = false;
431 return false;
432 }
433
434 lhs = gimple_get_lhs (stmt);
435 if (lhs == NULL_TREE)
436 {
437 if (dump_enabled_p ())
438 {
439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440 "Build SLP failed: not GIMPLE_ASSIGN nor "
441 "GIMPLE_CALL ");
442 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
443 }
444 /* Fatal mismatch. */
445 matches[0] = false;
446 return false;
447 }
448
449 if (is_gimple_assign (stmt)
450 && gimple_assign_rhs_code (stmt) == COND_EXPR
451 && (cond = gimple_assign_rhs1 (stmt))
452 && !COMPARISON_CLASS_P (cond))
453 {
454 if (dump_enabled_p ())
455 {
456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
457 "Build SLP failed: condition is not "
458 "comparison ");
459 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
460 }
461 /* Fatal mismatch. */
462 matches[0] = false;
463 return false;
464 }
465
466 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
467 vectype = get_vectype_for_scalar_type (scalar_type);
468 if (!vectype)
469 {
470 if (dump_enabled_p ())
471 {
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
473 "Build SLP failed: unsupported data-type ");
474 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
475 scalar_type);
476 }
477 /* Fatal mismatch. */
478 matches[0] = false;
479 return false;
480 }
481
482 /* In case of multiple types we need to detect the smallest type. */
483 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
484 {
485 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
486 if (bb_vinfo)
487 vectorization_factor = *max_nunits;
488 }
489
490 if (is_gimple_call (stmt))
491 {
492 rhs_code = CALL_EXPR;
493 if (gimple_call_internal_p (stmt)
494 || gimple_call_tail_p (stmt)
495 || gimple_call_noreturn_p (stmt)
496 || !gimple_call_nothrow_p (stmt)
497 || gimple_call_chain (stmt))
498 {
499 if (dump_enabled_p ())
500 {
501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
502 "Build SLP failed: unsupported call type ");
503 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
504 }
505 /* Fatal mismatch. */
506 matches[0] = false;
507 return false;
508 }
509 }
510 else
511 rhs_code = gimple_assign_rhs_code (stmt);
512
513 /* Check the operation. */
514 if (i == 0)
515 {
516 first_stmt_code = rhs_code;
517
518 /* Shift arguments should be equal in all the packed stmts for a
519 vector shift with scalar shift operand. */
520 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
521 || rhs_code == LROTATE_EXPR
522 || rhs_code == RROTATE_EXPR)
523 {
524 vec_mode = TYPE_MODE (vectype);
525
526 /* First see if we have a vector/vector shift. */
527 optab = optab_for_tree_code (rhs_code, vectype,
528 optab_vector);
529
530 if (!optab
531 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
532 {
533 /* No vector/vector shift, try for a vector/scalar shift. */
534 optab = optab_for_tree_code (rhs_code, vectype,
535 optab_scalar);
536
537 if (!optab)
538 {
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
541 "Build SLP failed: no optab.");
542 /* Fatal mismatch. */
543 matches[0] = false;
544 return false;
545 }
546 icode = (int) optab_handler (optab, vec_mode);
547 if (icode == CODE_FOR_nothing)
548 {
549 if (dump_enabled_p ())
550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
551 "Build SLP failed: "
552 "op not supported by target.");
553 /* Fatal mismatch. */
554 matches[0] = false;
555 return false;
556 }
557 optab_op2_mode = insn_data[icode].operand[2].mode;
558 if (!VECTOR_MODE_P (optab_op2_mode))
559 {
560 need_same_oprnds = true;
561 first_op1 = gimple_assign_rhs2 (stmt);
562 }
563 }
564 }
565 else if (rhs_code == WIDEN_LSHIFT_EXPR)
566 {
567 need_same_oprnds = true;
568 first_op1 = gimple_assign_rhs2 (stmt);
569 }
570 }
571 else
572 {
573 if (first_stmt_code != rhs_code
574 && (first_stmt_code != IMAGPART_EXPR
575 || rhs_code != REALPART_EXPR)
576 && (first_stmt_code != REALPART_EXPR
577 || rhs_code != IMAGPART_EXPR)
578 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
579 && (first_stmt_code == ARRAY_REF
580 || first_stmt_code == BIT_FIELD_REF
581 || first_stmt_code == INDIRECT_REF
582 || first_stmt_code == COMPONENT_REF
583 || first_stmt_code == MEM_REF)))
584 {
585 if (dump_enabled_p ())
586 {
587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
588 "Build SLP failed: different operation "
589 "in stmt ");
590 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
591 }
592 /* Mismatch. */
593 continue;
594 }
595
596 if (need_same_oprnds
597 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
598 {
599 if (dump_enabled_p ())
600 {
601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
602 "Build SLP failed: different shift "
603 "arguments in ");
604 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
605 }
606 /* Mismatch. */
607 continue;
608 }
609
610 if (rhs_code == CALL_EXPR)
611 {
612 gimple first_stmt = stmts[0];
613 if (gimple_call_num_args (stmt) != nops
614 || !operand_equal_p (gimple_call_fn (first_stmt),
615 gimple_call_fn (stmt), 0)
616 || gimple_call_fntype (first_stmt)
617 != gimple_call_fntype (stmt))
618 {
619 if (dump_enabled_p ())
620 {
621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
622 "Build SLP failed: different calls in ");
623 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
624 stmt, 0);
625 }
626 /* Mismatch. */
627 continue;
628 }
629 }
630 }
631
632 /* Grouped store or load. */
633 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
634 {
635 if (REFERENCE_CLASS_P (lhs))
636 {
637 /* Store. */
638 ;
639 }
640 else
641 {
642 /* Load. */
643 unsigned unrolling_factor
644 = least_common_multiple
645 (*max_nunits, group_size) / group_size;
646 /* FORNOW: Check that there is no gap between the loads
647 and no gap between the groups when we need to load
648 multiple groups at once.
649 ??? We should enhance this to only disallow gaps
650 inside vectors. */
651 if ((unrolling_factor > 1
652 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
653 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
654 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
655 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
656 {
657 if (dump_enabled_p ())
658 {
659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
660 "Build SLP failed: grouped "
661 "loads have gaps ");
662 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
663 stmt, 0);
664 }
665 /* Fatal mismatch. */
666 matches[0] = false;
667 return false;
668 }
669
670 /* Check that the size of interleaved loads group is not
671 greater than the SLP group size. */
672 unsigned ncopies
673 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
674 if (loop_vinfo
675 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
676 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
677 - GROUP_GAP (vinfo_for_stmt (stmt)))
678 > ncopies * group_size))
679 {
680 if (dump_enabled_p ())
681 {
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "Build SLP failed: the number "
684 "of interleaved loads is greater than "
685 "the SLP group size ");
686 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
687 stmt, 0);
688 }
689 /* Fatal mismatch. */
690 matches[0] = false;
691 return false;
692 }
693
694 old_first_load = first_load;
695 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
696 if (prev_first_load)
697 {
698 /* Check that there are no loads from different interleaving
699 chains in the same node. */
700 if (prev_first_load != first_load)
701 {
702 if (dump_enabled_p ())
703 {
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
705 vect_location,
706 "Build SLP failed: different "
707 "interleaving chains in one node ");
708 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
709 stmt, 0);
710 }
711 /* Mismatch. */
712 continue;
713 }
714 }
715 else
716 prev_first_load = first_load;
717
718 /* In some cases a group of loads is just the same load
719 repeated N times. Only analyze its cost once. */
720 if (first_load == stmt && old_first_load != first_load)
721 {
722 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
723 if (vect_supportable_dr_alignment (first_dr, false)
724 == dr_unaligned_unsupported)
725 {
726 if (dump_enabled_p ())
727 {
728 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
729 vect_location,
730 "Build SLP failed: unsupported "
731 "unaligned load ");
732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
733 stmt, 0);
734 }
735 /* Fatal mismatch. */
736 matches[0] = false;
737 return false;
738 }
739 }
740 }
741 } /* Grouped access. */
742 else
743 {
744 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
745 {
746 /* Not grouped load. */
747 if (dump_enabled_p ())
748 {
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
750 "Build SLP failed: not grouped load ");
751 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
752 }
753
754 /* FORNOW: Not grouped loads are not supported. */
755 /* Fatal mismatch. */
756 matches[0] = false;
757 return false;
758 }
759
760 /* Not memory operation. */
761 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
762 && TREE_CODE_CLASS (rhs_code) != tcc_unary
763 && rhs_code != COND_EXPR
764 && rhs_code != CALL_EXPR)
765 {
766 if (dump_enabled_p ())
767 {
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769 "Build SLP failed: operation");
770 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
771 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
772 }
773 /* Fatal mismatch. */
774 matches[0] = false;
775 return false;
776 }
777
778 if (rhs_code == COND_EXPR)
779 {
780 tree cond_expr = gimple_assign_rhs1 (stmt);
781
782 if (i == 0)
783 first_cond_code = TREE_CODE (cond_expr);
784 else if (first_cond_code != TREE_CODE (cond_expr))
785 {
786 if (dump_enabled_p ())
787 {
788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
789 "Build SLP failed: different"
790 " operation");
791 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
792 stmt, 0);
793 }
794 /* Mismatch. */
795 continue;
796 }
797 }
798 }
799
800 matches[i] = true;
801 }
802
803 for (i = 0; i < group_size; ++i)
804 if (!matches[i])
805 return false;
806
807 return true;
808 }
809
810 /* Recursively build an SLP tree starting from NODE.
811 Fail (and return a value not equal to zero) if def-stmts are not
812 isomorphic, require data permutation or are of unsupported types of
813 operation. Otherwise, return 0.
814 The value returned is the depth in the SLP tree where a mismatch
815 was found. */
816
817 static bool
818 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
819 slp_tree *node, unsigned int group_size,
820 unsigned int *max_nunits,
821 vec<slp_tree> *loads,
822 unsigned int vectorization_factor,
823 bool *matches, unsigned *npermutes)
824 {
825 unsigned nops, i, this_npermutes = 0;
826 gimple stmt;
827
828 if (!matches)
829 matches = XALLOCAVEC (bool, group_size);
830 if (!npermutes)
831 npermutes = &this_npermutes;
832
833 matches[0] = false;
834
835 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
836 if (is_gimple_call (stmt))
837 nops = gimple_call_num_args (stmt);
838 else if (is_gimple_assign (stmt))
839 {
840 nops = gimple_num_ops (stmt) - 1;
841 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
842 nops++;
843 }
844 else
845 return false;
846
847 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
848 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
849 max_nunits, vectorization_factor, matches))
850 return false;
851
852 /* If the SLP node is a load, terminate the recursion. */
853 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
854 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
855 {
856 loads->safe_push (*node);
857 return true;
858 }
859
860 /* Get at the operands, verifying they are compatible. */
861 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
862 slp_oprnd_info oprnd_info;
863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
864 {
865 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
866 stmt, (i == 0), &oprnds_info))
867 {
868 vect_free_oprnd_info (oprnds_info);
869 return false;
870 }
871 }
872
873 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
874
875 /* Create SLP_TREE nodes for the definition node/s. */
876 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
877 {
878 slp_tree child;
879 unsigned old_nloads = loads->length ();
880 unsigned old_max_nunits = *max_nunits;
881
882 if (oprnd_info->first_dt != vect_internal_def)
883 continue;
884
885 child = vect_create_new_slp_node (oprnd_info->def_stmts);
886 if (!child)
887 {
888 vect_free_oprnd_info (oprnds_info);
889 return false;
890 }
891
892 bool *matches = XALLOCAVEC (bool, group_size);
893 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
894 group_size, max_nunits, loads,
895 vectorization_factor, matches, npermutes))
896 {
897 oprnd_info->def_stmts = vNULL;
898 SLP_TREE_CHILDREN (*node).quick_push (child);
899 continue;
900 }
901
902 /* If the SLP build for operand zero failed and operand zero
903 and one can be commutated try that for the scalar stmts
904 that failed the match. */
905 if (i == 0
906 /* A first scalar stmt mismatch signals a fatal mismatch. */
907 && matches[0]
908 /* ??? For COND_EXPRs we can swap the comparison operands
909 as well as the arms under some constraints. */
910 && nops == 2
911 && oprnds_info[1]->first_dt == vect_internal_def
912 && is_gimple_assign (stmt)
913 && commutative_tree_code (gimple_assign_rhs_code (stmt))
914 /* Do so only if the number of not successful permutes was nor more
915 than a cut-ff as re-trying the recursive match on
916 possibly each level of the tree would expose exponential
917 behavior. */
918 && *npermutes < 4)
919 {
920 /* Roll back. */
921 *max_nunits = old_max_nunits;
922 loads->truncate (old_nloads);
923 /* Swap mismatched definition stmts. */
924 for (unsigned j = 0; j < group_size; ++j)
925 if (!matches[j])
926 {
927 gimple tem = oprnds_info[0]->def_stmts[j];
928 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
929 oprnds_info[1]->def_stmts[j] = tem;
930 }
931 /* And try again ... */
932 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
933 group_size, max_nunits, loads,
934 vectorization_factor,
935 matches, npermutes))
936 {
937 oprnd_info->def_stmts = vNULL;
938 SLP_TREE_CHILDREN (*node).quick_push (child);
939 continue;
940 }
941
942 ++*npermutes;
943 }
944
945 oprnd_info->def_stmts = vNULL;
946 vect_free_slp_tree (child);
947 vect_free_oprnd_info (oprnds_info);
948 return false;
949 }
950
951 vect_free_oprnd_info (oprnds_info);
952 return true;
953 }
954
955 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
956
957 static void
958 vect_print_slp_tree (int dump_kind, slp_tree node)
959 {
960 int i;
961 gimple stmt;
962 slp_tree child;
963
964 if (!node)
965 return;
966
967 dump_printf (dump_kind, "node ");
968 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
969 {
970 dump_printf (dump_kind, "\n\tstmt %d ", i);
971 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
972 }
973 dump_printf (dump_kind, "\n");
974
975 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
976 vect_print_slp_tree (dump_kind, child);
977 }
978
979
980 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
981 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
982 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
983 stmts in NODE are to be marked. */
984
985 static void
986 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
987 {
988 int i;
989 gimple stmt;
990 slp_tree child;
991
992 if (!node)
993 return;
994
995 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
996 if (j < 0 || i == j)
997 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
998
999 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1000 vect_mark_slp_stmts (child, mark, j);
1001 }
1002
1003
1004 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1005
1006 static void
1007 vect_mark_slp_stmts_relevant (slp_tree node)
1008 {
1009 int i;
1010 gimple stmt;
1011 stmt_vec_info stmt_info;
1012 slp_tree child;
1013
1014 if (!node)
1015 return;
1016
1017 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1018 {
1019 stmt_info = vinfo_for_stmt (stmt);
1020 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1021 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1022 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1023 }
1024
1025 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1026 vect_mark_slp_stmts_relevant (child);
1027 }
1028
1029
1030 /* Rearrange the statements of NODE according to PERMUTATION. */
1031
1032 static void
1033 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1034 vec<unsigned> permutation)
1035 {
1036 gimple stmt;
1037 vec<gimple> tmp_stmts;
1038 unsigned int i;
1039 slp_tree child;
1040
1041 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1042 vect_slp_rearrange_stmts (child, group_size, permutation);
1043
1044 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1045 tmp_stmts.create (group_size);
1046 tmp_stmts.quick_grow_cleared (group_size);
1047
1048 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1049 tmp_stmts[permutation[i]] = stmt;
1050
1051 SLP_TREE_SCALAR_STMTS (node).release ();
1052 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1053 }
1054
1055
1056 /* Check if the required load permutations in the SLP instance
1057 SLP_INSTN are supported. */
1058
1059 static bool
1060 vect_supported_load_permutation_p (slp_instance slp_instn)
1061 {
1062 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1063 unsigned int i, j, k, next;
1064 sbitmap load_index;
1065 slp_tree node;
1066 gimple stmt, load, next_load, first_load;
1067 struct data_reference *dr;
1068
1069 if (dump_enabled_p ())
1070 {
1071 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1072 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1073 if (node->load_permutation.exists ())
1074 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1075 dump_printf (MSG_NOTE, "%d ", next);
1076 else
1077 for (i = 0; i < group_size; ++i)
1078 dump_printf (MSG_NOTE, "%d ", i);
1079 }
1080
1081 /* In case of reduction every load permutation is allowed, since the order
1082 of the reduction statements is not important (as opposed to the case of
1083 grouped stores). The only condition we need to check is that all the
1084 load nodes are of the same size and have the same permutation (and then
1085 rearrange all the nodes of the SLP instance according to this
1086 permutation). */
1087
1088 /* Check that all the load nodes are of the same size. */
1089 /* ??? Can't we assert this? */
1090 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1091 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1092 return false;
1093
1094 node = SLP_INSTANCE_TREE (slp_instn);
1095 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1096
1097 /* Reduction (there are no data-refs in the root).
1098 In reduction chain the order of the loads is important. */
1099 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1100 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1101 {
1102 slp_tree load;
1103 unsigned int lidx;
1104
1105 /* Compare all the permutation sequences to the first one. We know
1106 that at least one load is permuted. */
1107 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1108 if (!node->load_permutation.exists ())
1109 return false;
1110 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1111 {
1112 if (!load->load_permutation.exists ())
1113 return false;
1114 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1115 if (lidx != node->load_permutation[j])
1116 return false;
1117 }
1118
1119 /* Check that the loads in the first sequence are different and there
1120 are no gaps between them. */
1121 load_index = sbitmap_alloc (group_size);
1122 bitmap_clear (load_index);
1123 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1124 {
1125 if (bitmap_bit_p (load_index, lidx))
1126 {
1127 sbitmap_free (load_index);
1128 return false;
1129 }
1130 bitmap_set_bit (load_index, lidx);
1131 }
1132 for (i = 0; i < group_size; i++)
1133 if (!bitmap_bit_p (load_index, i))
1134 {
1135 sbitmap_free (load_index);
1136 return false;
1137 }
1138 sbitmap_free (load_index);
1139
1140 /* This permutation is valid for reduction. Since the order of the
1141 statements in the nodes is not important unless they are memory
1142 accesses, we can rearrange the statements in all the nodes
1143 according to the order of the loads. */
1144 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1145 node->load_permutation);
1146
1147 /* We are done, no actual permutations need to be generated. */
1148 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1149 SLP_TREE_LOAD_PERMUTATION (node).release ();
1150 return true;
1151 }
1152
1153 /* In basic block vectorization we allow any subchain of an interleaving
1154 chain.
1155 FORNOW: not supported in loop SLP because of realignment compications. */
1156 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1157 {
1158 /* Check that for every node in the instance the loads
1159 form a subchain. */
1160 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1161 {
1162 next_load = NULL;
1163 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1164 {
1165 if (j != 0 && next_load != load)
1166 return false;
1167 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1168 }
1169 }
1170
1171 /* Check that the alignment of the first load in every subchain, i.e.,
1172 the first statement in every load node, is supported.
1173 ??? This belongs in alignment checking. */
1174 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1175 {
1176 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1177 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1178 {
1179 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1180 if (vect_supportable_dr_alignment (dr, false)
1181 == dr_unaligned_unsupported)
1182 {
1183 if (dump_enabled_p ())
1184 {
1185 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1186 vect_location,
1187 "unsupported unaligned load ");
1188 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1189 first_load, 0);
1190 }
1191 return false;
1192 }
1193 }
1194 }
1195
1196 /* We are done, no actual permutations need to be generated. */
1197 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1198 SLP_TREE_LOAD_PERMUTATION (node).release ();
1199 return true;
1200 }
1201
1202 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1203 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1204 well (unless it's reduction). */
1205 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1206 return false;
1207 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1208 if (!node->load_permutation.exists ())
1209 return false;
1210
1211 load_index = sbitmap_alloc (group_size);
1212 bitmap_clear (load_index);
1213 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1214 {
1215 unsigned int lidx = node->load_permutation[0];
1216 if (bitmap_bit_p (load_index, lidx))
1217 {
1218 sbitmap_free (load_index);
1219 return false;
1220 }
1221 bitmap_set_bit (load_index, lidx);
1222 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1223 if (k != lidx)
1224 {
1225 sbitmap_free (load_index);
1226 return false;
1227 }
1228 }
1229 for (i = 0; i < group_size; i++)
1230 if (!bitmap_bit_p (load_index, i))
1231 {
1232 sbitmap_free (load_index);
1233 return false;
1234 }
1235 sbitmap_free (load_index);
1236
1237 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1238 if (node->load_permutation.exists ()
1239 && !vect_transform_slp_perm_load
1240 (node, vNULL, NULL,
1241 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1242 return false;
1243 return true;
1244 }
1245
1246
1247 /* Find the first load in the loop that belongs to INSTANCE.
1248 When loads are in several SLP nodes, there can be a case in which the first
1249 load does not appear in the first SLP node to be transformed, causing
1250 incorrect order of statements. Since we generate all the loads together,
1251 they must be inserted before the first load of the SLP instance and not
1252 before the first load of the first node of the instance. */
1253
1254 static gimple
1255 vect_find_first_load_in_slp_instance (slp_instance instance)
1256 {
1257 int i, j;
1258 slp_tree load_node;
1259 gimple first_load = NULL, load;
1260
1261 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1262 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1263 first_load = get_earlier_stmt (load, first_load);
1264
1265 return first_load;
1266 }
1267
1268
1269 /* Find the last store in SLP INSTANCE. */
1270
1271 static gimple
1272 vect_find_last_store_in_slp_instance (slp_instance instance)
1273 {
1274 int i;
1275 slp_tree node;
1276 gimple last_store = NULL, store;
1277
1278 node = SLP_INSTANCE_TREE (instance);
1279 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1280 last_store = get_later_stmt (store, last_store);
1281
1282 return last_store;
1283 }
1284
1285 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1286
1287 static void
1288 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1289 slp_instance instance, slp_tree node,
1290 stmt_vector_for_cost *prologue_cost_vec,
1291 unsigned ncopies_for_cost)
1292 {
1293 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1294
1295 unsigned i;
1296 slp_tree child;
1297 gimple stmt, s;
1298 stmt_vec_info stmt_info;
1299 tree lhs;
1300 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1301
1302 /* Recurse down the SLP tree. */
1303 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1304 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1305 instance, child, prologue_cost_vec,
1306 ncopies_for_cost);
1307
1308 /* Look at the first scalar stmt to determine the cost. */
1309 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1310 stmt_info = vinfo_for_stmt (stmt);
1311 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1312 {
1313 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1314 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1315 vect_uninitialized_def,
1316 node, prologue_cost_vec, body_cost_vec);
1317 else
1318 {
1319 int i;
1320 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1321 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1322 node, prologue_cost_vec, body_cost_vec);
1323 /* If the load is permuted record the cost for the permutation.
1324 ??? Loads from multiple chains are let through here only
1325 for a single special case involving complex numbers where
1326 in the end no permutation is necessary. */
1327 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1328 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1329 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1330 && vect_get_place_in_interleaving_chain
1331 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1332 {
1333 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1334 stmt_info, 0, vect_body);
1335 break;
1336 }
1337 }
1338 }
1339 else
1340 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1341 stmt_info, 0, vect_body);
1342
1343 /* Scan operands and account for prologue cost of constants/externals.
1344 ??? This over-estimates cost for multiple uses and should be
1345 re-engineered. */
1346 lhs = gimple_get_lhs (stmt);
1347 for (i = 0; i < gimple_num_ops (stmt); ++i)
1348 {
1349 tree def, op = gimple_op (stmt, i);
1350 gimple def_stmt;
1351 enum vect_def_type dt;
1352 if (!op || op == lhs)
1353 continue;
1354 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1355 &def_stmt, &def, &dt)
1356 && (dt == vect_constant_def || dt == vect_external_def))
1357 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1358 stmt_info, 0, vect_prologue);
1359 }
1360 }
1361
1362 /* Compute the cost for the SLP instance INSTANCE. */
1363
1364 static void
1365 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1366 slp_instance instance, unsigned nunits)
1367 {
1368 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1369 unsigned ncopies_for_cost;
1370 stmt_info_for_cost *si;
1371 unsigned i;
1372
1373 /* Calculate the number of vector stmts to create based on the unrolling
1374 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1375 GROUP_SIZE / NUNITS otherwise. */
1376 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1377 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1378
1379 prologue_cost_vec.create (10);
1380 body_cost_vec.create (10);
1381 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1382 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1383 instance, SLP_INSTANCE_TREE (instance),
1384 &prologue_cost_vec, ncopies_for_cost);
1385
1386 /* Record the prologue costs, which were delayed until we were
1387 sure that SLP was successful. Unlike the body costs, we know
1388 the final values now regardless of the loop vectorization factor. */
1389 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1390 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1391 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1392 {
1393 struct _stmt_vec_info *stmt_info
1394 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1395 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1396 si->misalign, vect_prologue);
1397 }
1398
1399 prologue_cost_vec.release ();
1400 }
1401
1402 /* Analyze an SLP instance starting from a group of grouped stores. Call
1403 vect_build_slp_tree to build a tree of packed stmts if possible.
1404 Return FALSE if it's impossible to SLP any stmt in the loop. */
1405
1406 static bool
1407 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1408 gimple stmt)
1409 {
1410 slp_instance new_instance;
1411 slp_tree node;
1412 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1413 unsigned int unrolling_factor = 1, nunits;
1414 tree vectype, scalar_type = NULL_TREE;
1415 gimple next;
1416 unsigned int vectorization_factor = 0;
1417 int i;
1418 unsigned int max_nunits = 0;
1419 vec<slp_tree> loads;
1420 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1421 vec<gimple> scalar_stmts;
1422
1423 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1424 {
1425 if (dr)
1426 {
1427 scalar_type = TREE_TYPE (DR_REF (dr));
1428 vectype = get_vectype_for_scalar_type (scalar_type);
1429 }
1430 else
1431 {
1432 gcc_assert (loop_vinfo);
1433 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1434 }
1435
1436 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1437 }
1438 else
1439 {
1440 gcc_assert (loop_vinfo);
1441 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1442 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1443 }
1444
1445 if (!vectype)
1446 {
1447 if (dump_enabled_p ())
1448 {
1449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1450 "Build SLP failed: unsupported data-type ");
1451 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1452 }
1453
1454 return false;
1455 }
1456
1457 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1458 if (loop_vinfo)
1459 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1460 else
1461 vectorization_factor = nunits;
1462
1463 /* Calculate the unrolling factor. */
1464 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1465 if (unrolling_factor != 1 && !loop_vinfo)
1466 {
1467 if (dump_enabled_p ())
1468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1469 "Build SLP failed: unrolling required in basic"
1470 " block SLP");
1471
1472 return false;
1473 }
1474
1475 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1476 scalar_stmts.create (group_size);
1477 next = stmt;
1478 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1479 {
1480 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1481 while (next)
1482 {
1483 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1484 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1485 scalar_stmts.safe_push (
1486 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1487 else
1488 scalar_stmts.safe_push (next);
1489 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1490 }
1491 }
1492 else
1493 {
1494 /* Collect reduction statements. */
1495 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1496 for (i = 0; reductions.iterate (i, &next); i++)
1497 scalar_stmts.safe_push (next);
1498 }
1499
1500 node = vect_create_new_slp_node (scalar_stmts);
1501
1502 loads.create (group_size);
1503
1504 /* Build the tree for the SLP instance. */
1505 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1506 &max_nunits, &loads,
1507 vectorization_factor, NULL, NULL))
1508 {
1509 /* Calculate the unrolling factor based on the smallest type. */
1510 if (max_nunits > nunits)
1511 unrolling_factor = least_common_multiple (max_nunits, group_size)
1512 / group_size;
1513
1514 if (unrolling_factor != 1 && !loop_vinfo)
1515 {
1516 if (dump_enabled_p ())
1517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1518 "Build SLP failed: unrolling required in basic"
1519 " block SLP");
1520 vect_free_slp_tree (node);
1521 loads.release ();
1522 return false;
1523 }
1524
1525 /* Create a new SLP instance. */
1526 new_instance = XNEW (struct _slp_instance);
1527 SLP_INSTANCE_TREE (new_instance) = node;
1528 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1529 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1530 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1531 SLP_INSTANCE_LOADS (new_instance) = loads;
1532 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1533
1534 /* Compute the load permutation. */
1535 slp_tree load_node;
1536 bool loads_permuted = false;
1537 FOR_EACH_VEC_ELT (loads, i, load_node)
1538 {
1539 vec<unsigned> load_permutation;
1540 int j;
1541 gimple load, first_stmt;
1542 bool this_load_permuted = false;
1543 load_permutation.create (group_size);
1544 first_stmt = GROUP_FIRST_ELEMENT
1545 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1546 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1547 {
1548 int load_place
1549 = vect_get_place_in_interleaving_chain (load, first_stmt);
1550 gcc_assert (load_place != -1);
1551 if (load_place != j)
1552 this_load_permuted = true;
1553 load_permutation.safe_push (load_place);
1554 }
1555 if (!this_load_permuted)
1556 {
1557 load_permutation.release ();
1558 continue;
1559 }
1560 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1561 loads_permuted = true;
1562 }
1563
1564 if (loads_permuted)
1565 {
1566 if (!vect_supported_load_permutation_p (new_instance))
1567 {
1568 if (dump_enabled_p ())
1569 {
1570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1571 "Build SLP failed: unsupported load "
1572 "permutation ");
1573 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1574 }
1575 vect_free_slp_instance (new_instance);
1576 return false;
1577 }
1578
1579 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1580 = vect_find_first_load_in_slp_instance (new_instance);
1581 }
1582
1583 /* Compute the costs of this SLP instance. */
1584 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1585 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1586
1587 if (loop_vinfo)
1588 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1589 else
1590 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1591
1592 if (dump_enabled_p ())
1593 vect_print_slp_tree (MSG_NOTE, node);
1594
1595 return true;
1596 }
1597
1598 /* Failed to SLP. */
1599 /* Free the allocated memory. */
1600 vect_free_slp_tree (node);
1601 loads.release ();
1602
1603 return false;
1604 }
1605
1606
1607 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1608 trees of packed scalar stmts if SLP is possible. */
1609
1610 bool
1611 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1612 {
1613 unsigned int i;
1614 vec<gimple> grouped_stores;
1615 vec<gimple> reductions = vNULL;
1616 vec<gimple> reduc_chains = vNULL;
1617 gimple first_element;
1618 bool ok = false;
1619
1620 if (dump_enabled_p ())
1621 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1622
1623 if (loop_vinfo)
1624 {
1625 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1626 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1627 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1628 }
1629 else
1630 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1631
1632 /* Find SLP sequences starting from groups of grouped stores. */
1633 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1634 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1635 ok = true;
1636
1637 if (bb_vinfo && !ok)
1638 {
1639 if (dump_enabled_p ())
1640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1641 "Failed to SLP the basic block.");
1642
1643 return false;
1644 }
1645
1646 if (loop_vinfo
1647 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1648 {
1649 /* Find SLP sequences starting from reduction chains. */
1650 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1651 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1652 ok = true;
1653 else
1654 return false;
1655
1656 /* Don't try to vectorize SLP reductions if reduction chain was
1657 detected. */
1658 return ok;
1659 }
1660
1661 /* Find SLP sequences starting from groups of reductions. */
1662 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1663 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1664 ok = true;
1665
1666 return true;
1667 }
1668
1669
1670 /* For each possible SLP instance decide whether to SLP it and calculate overall
1671 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1672 least one instance. */
1673
1674 bool
1675 vect_make_slp_decision (loop_vec_info loop_vinfo)
1676 {
1677 unsigned int i, unrolling_factor = 1;
1678 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1679 slp_instance instance;
1680 int decided_to_slp = 0;
1681
1682 if (dump_enabled_p ())
1683 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1684
1685 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1686 {
1687 /* FORNOW: SLP if you can. */
1688 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1689 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1690
1691 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1692 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1693 loop-based vectorization. Such stmts will be marked as HYBRID. */
1694 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1695 decided_to_slp++;
1696 }
1697
1698 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1699
1700 if (decided_to_slp && dump_enabled_p ())
1701 dump_printf_loc (MSG_NOTE, vect_location,
1702 "Decided to SLP %d instances. Unrolling factor %d",
1703 decided_to_slp, unrolling_factor);
1704
1705 return (decided_to_slp > 0);
1706 }
1707
1708
1709 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1710 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1711
1712 static void
1713 vect_detect_hybrid_slp_stmts (slp_tree node)
1714 {
1715 int i;
1716 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1717 gimple stmt = stmts[0];
1718 imm_use_iterator imm_iter;
1719 gimple use_stmt;
1720 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1721 slp_tree child;
1722 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1723 struct loop *loop = NULL;
1724 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1725 basic_block bb = NULL;
1726
1727 if (!node)
1728 return;
1729
1730 if (loop_vinfo)
1731 loop = LOOP_VINFO_LOOP (loop_vinfo);
1732 else
1733 bb = BB_VINFO_BB (bb_vinfo);
1734
1735 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1736 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1737 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1738 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1739 if (gimple_bb (use_stmt)
1740 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1741 || bb == gimple_bb (use_stmt))
1742 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1743 && !STMT_SLP_TYPE (stmt_vinfo)
1744 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1745 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1746 && !(gimple_code (use_stmt) == GIMPLE_PHI
1747 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1748 == vect_reduction_def))
1749 vect_mark_slp_stmts (node, hybrid, i);
1750
1751 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1752 vect_detect_hybrid_slp_stmts (child);
1753 }
1754
1755
1756 /* Find stmts that must be both vectorized and SLPed. */
1757
1758 void
1759 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1760 {
1761 unsigned int i;
1762 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1763 slp_instance instance;
1764
1765 if (dump_enabled_p ())
1766 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1767
1768 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1769 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1770 }
1771
1772
1773 /* Create and initialize a new bb_vec_info struct for BB, as well as
1774 stmt_vec_info structs for all the stmts in it. */
1775
1776 static bb_vec_info
1777 new_bb_vec_info (basic_block bb)
1778 {
1779 bb_vec_info res = NULL;
1780 gimple_stmt_iterator gsi;
1781
1782 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1783 BB_VINFO_BB (res) = bb;
1784
1785 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1786 {
1787 gimple stmt = gsi_stmt (gsi);
1788 gimple_set_uid (stmt, 0);
1789 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1790 }
1791
1792 BB_VINFO_GROUPED_STORES (res).create (10);
1793 BB_VINFO_SLP_INSTANCES (res).create (2);
1794 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1795
1796 bb->aux = res;
1797 return res;
1798 }
1799
1800
1801 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1802 stmts in the basic block. */
1803
1804 static void
1805 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1806 {
1807 vec<slp_instance> slp_instances;
1808 slp_instance instance;
1809 basic_block bb;
1810 gimple_stmt_iterator si;
1811 unsigned i;
1812
1813 if (!bb_vinfo)
1814 return;
1815
1816 bb = BB_VINFO_BB (bb_vinfo);
1817
1818 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1819 {
1820 gimple stmt = gsi_stmt (si);
1821 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1822
1823 if (stmt_info)
1824 /* Free stmt_vec_info. */
1825 free_stmt_vec_info (stmt);
1826 }
1827
1828 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1829 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1830 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1831 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1832 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1833 vect_free_slp_instance (instance);
1834 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1835 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1836 free (bb_vinfo);
1837 bb->aux = NULL;
1838 }
1839
1840
1841 /* Analyze statements contained in SLP tree node after recursively analyzing
1842 the subtree. Return TRUE if the operations are supported. */
1843
1844 static bool
1845 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1846 {
1847 bool dummy;
1848 int i;
1849 gimple stmt;
1850 slp_tree child;
1851
1852 if (!node)
1853 return true;
1854
1855 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1856 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1857 return false;
1858
1859 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1860 {
1861 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1862 gcc_assert (stmt_info);
1863 gcc_assert (PURE_SLP_STMT (stmt_info));
1864
1865 if (!vect_analyze_stmt (stmt, &dummy, node))
1866 return false;
1867 }
1868
1869 return true;
1870 }
1871
1872
1873 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1874 operations are supported. */
1875
1876 static bool
1877 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1878 {
1879 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1880 slp_instance instance;
1881 int i;
1882
1883 for (i = 0; slp_instances.iterate (i, &instance); )
1884 {
1885 if (!vect_slp_analyze_node_operations (bb_vinfo,
1886 SLP_INSTANCE_TREE (instance)))
1887 {
1888 vect_free_slp_instance (instance);
1889 slp_instances.ordered_remove (i);
1890 }
1891 else
1892 i++;
1893 }
1894
1895 if (!slp_instances.length ())
1896 return false;
1897
1898 return true;
1899 }
1900
1901
1902 /* Compute the scalar cost of the SLP node NODE and its children
1903 and return it. Do not account defs that are marked in LIFE and
1904 update LIFE according to uses of NODE. */
1905
1906 static unsigned
1907 vect_bb_slp_scalar_cost (basic_block bb,
1908 slp_tree node, vec<bool, va_stack> life)
1909 {
1910 unsigned scalar_cost = 0;
1911 unsigned i;
1912 gimple stmt;
1913 slp_tree child;
1914
1915 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1916 {
1917 unsigned stmt_cost;
1918 ssa_op_iter op_iter;
1919 def_operand_p def_p;
1920 stmt_vec_info stmt_info;
1921
1922 if (life[i])
1923 continue;
1924
1925 /* If there is a non-vectorized use of the defs then the scalar
1926 stmt is kept live in which case we do not account it or any
1927 required defs in the SLP children in the scalar cost. This
1928 way we make the vectorization more costly when compared to
1929 the scalar cost. */
1930 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1931 {
1932 imm_use_iterator use_iter;
1933 gimple use_stmt;
1934 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1935 if (gimple_code (use_stmt) == GIMPLE_PHI
1936 || gimple_bb (use_stmt) != bb
1937 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1938 {
1939 life[i] = true;
1940 BREAK_FROM_IMM_USE_STMT (use_iter);
1941 }
1942 }
1943 if (life[i])
1944 continue;
1945
1946 stmt_info = vinfo_for_stmt (stmt);
1947 if (STMT_VINFO_DATA_REF (stmt_info))
1948 {
1949 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1950 stmt_cost = vect_get_stmt_cost (scalar_load);
1951 else
1952 stmt_cost = vect_get_stmt_cost (scalar_store);
1953 }
1954 else
1955 stmt_cost = vect_get_stmt_cost (scalar_stmt);
1956
1957 scalar_cost += stmt_cost;
1958 }
1959
1960 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1961 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
1962
1963 return scalar_cost;
1964 }
1965
1966 /* Check if vectorization of the basic block is profitable. */
1967
1968 static bool
1969 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1970 {
1971 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1972 slp_instance instance;
1973 int i, j;
1974 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
1975 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
1976 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
1977 stmt_vec_info stmt_info = NULL;
1978 stmt_vector_for_cost body_cost_vec;
1979 stmt_info_for_cost *ci;
1980
1981 /* Calculate vector costs. */
1982 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1983 {
1984 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
1985
1986 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
1987 {
1988 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
1989 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
1990 stmt_info, ci->misalign, vect_body);
1991 }
1992 }
1993
1994 /* Calculate scalar cost. */
1995 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1996 {
1997 vec<bool, va_stack> life;
1998 vec_stack_alloc (bool, life, SLP_INSTANCE_GROUP_SIZE (instance));
1999 life.quick_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2000 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2001 SLP_INSTANCE_TREE (instance),
2002 life);
2003 life.release ();
2004 }
2005
2006 /* Complete the target-specific cost calculation. */
2007 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2008 &vec_inside_cost, &vec_epilogue_cost);
2009
2010 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2011
2012 if (dump_enabled_p ())
2013 {
2014 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2015 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2016 vec_inside_cost);
2017 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2018 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2019 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2020 }
2021
2022 /* Vectorization is profitable if its cost is less than the cost of scalar
2023 version. */
2024 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2025 return false;
2026
2027 return true;
2028 }
2029
2030 /* Check if the basic block can be vectorized. */
2031
2032 static bb_vec_info
2033 vect_slp_analyze_bb_1 (basic_block bb)
2034 {
2035 bb_vec_info bb_vinfo;
2036 vec<slp_instance> slp_instances;
2037 slp_instance instance;
2038 int i;
2039 int min_vf = 2;
2040
2041 bb_vinfo = new_bb_vec_info (bb);
2042 if (!bb_vinfo)
2043 return NULL;
2044
2045 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2046 {
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2049 "not vectorized: unhandled data-ref in basic "
2050 "block.\n");
2051
2052 destroy_bb_vec_info (bb_vinfo);
2053 return NULL;
2054 }
2055
2056 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2057 {
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "not vectorized: not enough data-refs in "
2061 "basic block.\n");
2062
2063 destroy_bb_vec_info (bb_vinfo);
2064 return NULL;
2065 }
2066
2067 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2068 {
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2071 "not vectorized: unhandled data access in "
2072 "basic block.\n");
2073
2074 destroy_bb_vec_info (bb_vinfo);
2075 return NULL;
2076 }
2077
2078 vect_pattern_recog (NULL, bb_vinfo);
2079
2080 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2081 {
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2084 "not vectorized: unhandled data dependence "
2085 "in basic block.\n");
2086
2087 destroy_bb_vec_info (bb_vinfo);
2088 return NULL;
2089 }
2090
2091 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2092 {
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2095 "not vectorized: bad data alignment in basic "
2096 "block.\n");
2097
2098 destroy_bb_vec_info (bb_vinfo);
2099 return NULL;
2100 }
2101
2102 /* Check the SLP opportunities in the basic block, analyze and build SLP
2103 trees. */
2104 if (!vect_analyze_slp (NULL, bb_vinfo))
2105 {
2106 if (dump_enabled_p ())
2107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2108 "not vectorized: failed to find SLP opportunities "
2109 "in basic block.\n");
2110
2111 destroy_bb_vec_info (bb_vinfo);
2112 return NULL;
2113 }
2114
2115 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2116
2117 /* Mark all the statements that we want to vectorize as pure SLP and
2118 relevant. */
2119 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2120 {
2121 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2122 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2123 }
2124
2125 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2126 {
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129 "not vectorized: unsupported alignment in basic "
2130 "block.\n");
2131 destroy_bb_vec_info (bb_vinfo);
2132 return NULL;
2133 }
2134
2135 if (!vect_slp_analyze_operations (bb_vinfo))
2136 {
2137 if (dump_enabled_p ())
2138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2139 "not vectorized: bad operation in basic block.\n");
2140
2141 destroy_bb_vec_info (bb_vinfo);
2142 return NULL;
2143 }
2144
2145 /* Cost model: check if the vectorization is worthwhile. */
2146 if (flag_vect_cost_model
2147 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2148 {
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2151 "not vectorized: vectorization is not "
2152 "profitable.\n");
2153
2154 destroy_bb_vec_info (bb_vinfo);
2155 return NULL;
2156 }
2157
2158 if (dump_enabled_p ())
2159 dump_printf_loc (MSG_NOTE, vect_location,
2160 "Basic block will be vectorized using SLP\n");
2161
2162 return bb_vinfo;
2163 }
2164
2165
2166 bb_vec_info
2167 vect_slp_analyze_bb (basic_block bb)
2168 {
2169 bb_vec_info bb_vinfo;
2170 int insns = 0;
2171 gimple_stmt_iterator gsi;
2172 unsigned int vector_sizes;
2173
2174 if (dump_enabled_p ())
2175 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2176
2177 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2178 {
2179 gimple stmt = gsi_stmt (gsi);
2180 if (!is_gimple_debug (stmt)
2181 && !gimple_nop_p (stmt)
2182 && gimple_code (stmt) != GIMPLE_LABEL)
2183 insns++;
2184 }
2185
2186 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2187 {
2188 if (dump_enabled_p ())
2189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2190 "not vectorized: too many instructions in "
2191 "basic block.\n");
2192
2193 return NULL;
2194 }
2195
2196 /* Autodetect first vector size we try. */
2197 current_vector_size = 0;
2198 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2199
2200 while (1)
2201 {
2202 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2203 if (bb_vinfo)
2204 return bb_vinfo;
2205
2206 destroy_bb_vec_info (bb_vinfo);
2207
2208 vector_sizes &= ~current_vector_size;
2209 if (vector_sizes == 0
2210 || current_vector_size == 0)
2211 return NULL;
2212
2213 /* Try the next biggest vector size. */
2214 current_vector_size = 1 << floor_log2 (vector_sizes);
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_NOTE, vect_location,
2217 "***** Re-trying analysis with "
2218 "vector size %d\n", current_vector_size);
2219 }
2220 }
2221
2222
2223 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2224 the number of created vector stmts depends on the unrolling factor).
2225 However, the actual number of vector stmts for every SLP node depends on
2226 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2227 should be updated. In this function we assume that the inside costs
2228 calculated in vect_model_xxx_cost are linear in ncopies. */
2229
2230 void
2231 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2232 {
2233 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2234 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2235 slp_instance instance;
2236 stmt_vector_for_cost body_cost_vec;
2237 stmt_info_for_cost *si;
2238 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2239
2240 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_NOTE, vect_location,
2242 "=== vect_update_slp_costs_according_to_vf ===");
2243
2244 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2245 {
2246 /* We assume that costs are linear in ncopies. */
2247 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2248
2249 /* Record the instance's instructions in the target cost model.
2250 This was delayed until here because the count of instructions
2251 isn't known beforehand. */
2252 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2253
2254 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2255 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2256 vinfo_for_stmt (si->stmt), si->misalign,
2257 vect_body);
2258 }
2259 }
2260
2261
2262 /* For constant and loop invariant defs of SLP_NODE this function returns
2263 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2264 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2265 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2266 REDUC_INDEX is the index of the reduction operand in the statements, unless
2267 it is -1. */
2268
2269 static void
2270 vect_get_constant_vectors (tree op, slp_tree slp_node,
2271 vec<tree> *vec_oprnds,
2272 unsigned int op_num, unsigned int number_of_vectors,
2273 int reduc_index)
2274 {
2275 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2276 gimple stmt = stmts[0];
2277 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2278 unsigned nunits;
2279 tree vec_cst;
2280 tree *elts;
2281 unsigned j, number_of_places_left_in_vector;
2282 tree vector_type;
2283 tree vop;
2284 int group_size = stmts.length ();
2285 unsigned int vec_num, i;
2286 unsigned number_of_copies = 1;
2287 vec<tree> voprnds;
2288 voprnds.create (number_of_vectors);
2289 bool constant_p, is_store;
2290 tree neutral_op = NULL;
2291 enum tree_code code = gimple_expr_code (stmt);
2292 gimple def_stmt;
2293 struct loop *loop;
2294 gimple_seq ctor_seq = NULL;
2295
2296 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2297 && reduc_index != -1)
2298 {
2299 op_num = reduc_index - 1;
2300 op = gimple_op (stmt, reduc_index);
2301 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2302 we need either neutral operands or the original operands. See
2303 get_initial_def_for_reduction() for details. */
2304 switch (code)
2305 {
2306 case WIDEN_SUM_EXPR:
2307 case DOT_PROD_EXPR:
2308 case PLUS_EXPR:
2309 case MINUS_EXPR:
2310 case BIT_IOR_EXPR:
2311 case BIT_XOR_EXPR:
2312 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2313 neutral_op = build_real (TREE_TYPE (op), dconst0);
2314 else
2315 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2316
2317 break;
2318
2319 case MULT_EXPR:
2320 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2321 neutral_op = build_real (TREE_TYPE (op), dconst1);
2322 else
2323 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2324
2325 break;
2326
2327 case BIT_AND_EXPR:
2328 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2329 break;
2330
2331 case MAX_EXPR:
2332 case MIN_EXPR:
2333 def_stmt = SSA_NAME_DEF_STMT (op);
2334 loop = (gimple_bb (stmt))->loop_father;
2335 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2336 loop_preheader_edge (loop));
2337 break;
2338
2339 default:
2340 neutral_op = NULL;
2341 }
2342 }
2343
2344 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2345 {
2346 is_store = true;
2347 op = gimple_assign_rhs1 (stmt);
2348 }
2349 else
2350 is_store = false;
2351
2352 gcc_assert (op);
2353
2354 if (CONSTANT_CLASS_P (op))
2355 constant_p = true;
2356 else
2357 constant_p = false;
2358
2359 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2360 gcc_assert (vector_type);
2361 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2362
2363 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2364 created vectors. It is greater than 1 if unrolling is performed.
2365
2366 For example, we have two scalar operands, s1 and s2 (e.g., group of
2367 strided accesses of size two), while NUNITS is four (i.e., four scalars
2368 of this type can be packed in a vector). The output vector will contain
2369 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2370 will be 2).
2371
2372 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2373 containing the operands.
2374
2375 For example, NUNITS is four as before, and the group size is 8
2376 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2377 {s5, s6, s7, s8}. */
2378
2379 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2380
2381 number_of_places_left_in_vector = nunits;
2382 elts = XALLOCAVEC (tree, nunits);
2383 for (j = 0; j < number_of_copies; j++)
2384 {
2385 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2386 {
2387 if (is_store)
2388 op = gimple_assign_rhs1 (stmt);
2389 else
2390 {
2391 switch (code)
2392 {
2393 case COND_EXPR:
2394 if (op_num == 0 || op_num == 1)
2395 {
2396 tree cond = gimple_assign_rhs1 (stmt);
2397 op = TREE_OPERAND (cond, op_num);
2398 }
2399 else
2400 {
2401 if (op_num == 2)
2402 op = gimple_assign_rhs2 (stmt);
2403 else
2404 op = gimple_assign_rhs3 (stmt);
2405 }
2406 break;
2407
2408 case CALL_EXPR:
2409 op = gimple_call_arg (stmt, op_num);
2410 break;
2411
2412 case LSHIFT_EXPR:
2413 case RSHIFT_EXPR:
2414 case LROTATE_EXPR:
2415 case RROTATE_EXPR:
2416 op = gimple_op (stmt, op_num + 1);
2417 /* Unlike the other binary operators, shifts/rotates have
2418 the shift count being int, instead of the same type as
2419 the lhs, so make sure the scalar is the right type if
2420 we are dealing with vectors of
2421 long long/long/short/char. */
2422 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2423 op = fold_convert (TREE_TYPE (vector_type), op);
2424 break;
2425
2426 default:
2427 op = gimple_op (stmt, op_num + 1);
2428 break;
2429 }
2430 }
2431
2432 if (reduc_index != -1)
2433 {
2434 loop = (gimple_bb (stmt))->loop_father;
2435 def_stmt = SSA_NAME_DEF_STMT (op);
2436
2437 gcc_assert (loop);
2438
2439 /* Get the def before the loop. In reduction chain we have only
2440 one initial value. */
2441 if ((j != (number_of_copies - 1)
2442 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2443 && i != 0))
2444 && neutral_op)
2445 op = neutral_op;
2446 else
2447 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2448 loop_preheader_edge (loop));
2449 }
2450
2451 /* Create 'vect_ = {op0,op1,...,opn}'. */
2452 number_of_places_left_in_vector--;
2453 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2454 {
2455 if (CONSTANT_CLASS_P (op))
2456 {
2457 op = fold_unary (VIEW_CONVERT_EXPR,
2458 TREE_TYPE (vector_type), op);
2459 gcc_assert (op && CONSTANT_CLASS_P (op));
2460 }
2461 else
2462 {
2463 tree new_temp
2464 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2465 gimple init_stmt;
2466 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2467 op);
2468 init_stmt
2469 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2470 new_temp, op, NULL_TREE);
2471 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2472 op = new_temp;
2473 }
2474 }
2475 elts[number_of_places_left_in_vector] = op;
2476 if (!CONSTANT_CLASS_P (op))
2477 constant_p = false;
2478
2479 if (number_of_places_left_in_vector == 0)
2480 {
2481 number_of_places_left_in_vector = nunits;
2482
2483 if (constant_p)
2484 vec_cst = build_vector (vector_type, elts);
2485 else
2486 {
2487 vec<constructor_elt, va_gc> *v;
2488 unsigned k;
2489 vec_alloc (v, nunits);
2490 for (k = 0; k < nunits; ++k)
2491 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2492 vec_cst = build_constructor (vector_type, v);
2493 }
2494 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2495 vector_type, NULL));
2496 if (ctor_seq != NULL)
2497 {
2498 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2499 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2500 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2501 GSI_SAME_STMT);
2502 ctor_seq = NULL;
2503 }
2504 }
2505 }
2506 }
2507
2508 /* Since the vectors are created in the reverse order, we should invert
2509 them. */
2510 vec_num = voprnds.length ();
2511 for (j = vec_num; j != 0; j--)
2512 {
2513 vop = voprnds[j - 1];
2514 vec_oprnds->quick_push (vop);
2515 }
2516
2517 voprnds.release ();
2518
2519 /* In case that VF is greater than the unrolling factor needed for the SLP
2520 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2521 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2522 to replicate the vectors. */
2523 while (number_of_vectors > vec_oprnds->length ())
2524 {
2525 tree neutral_vec = NULL;
2526
2527 if (neutral_op)
2528 {
2529 if (!neutral_vec)
2530 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2531
2532 vec_oprnds->quick_push (neutral_vec);
2533 }
2534 else
2535 {
2536 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2537 vec_oprnds->quick_push (vop);
2538 }
2539 }
2540 }
2541
2542
2543 /* Get vectorized definitions from SLP_NODE that contains corresponding
2544 vectorized def-stmts. */
2545
2546 static void
2547 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2548 {
2549 tree vec_oprnd;
2550 gimple vec_def_stmt;
2551 unsigned int i;
2552
2553 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2554
2555 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2556 {
2557 gcc_assert (vec_def_stmt);
2558 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2559 vec_oprnds->quick_push (vec_oprnd);
2560 }
2561 }
2562
2563
2564 /* Get vectorized definitions for SLP_NODE.
2565 If the scalar definitions are loop invariants or constants, collect them and
2566 call vect_get_constant_vectors() to create vector stmts.
2567 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2568 must be stored in the corresponding child of SLP_NODE, and we call
2569 vect_get_slp_vect_defs () to retrieve them. */
2570
2571 void
2572 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2573 vec<vec<tree> > *vec_oprnds, int reduc_index)
2574 {
2575 gimple first_stmt;
2576 int number_of_vects = 0, i;
2577 unsigned int child_index = 0;
2578 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2579 slp_tree child = NULL;
2580 vec<tree> vec_defs;
2581 tree oprnd;
2582 bool vectorized_defs;
2583
2584 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2585 FOR_EACH_VEC_ELT (ops, i, oprnd)
2586 {
2587 /* For each operand we check if it has vectorized definitions in a child
2588 node or we need to create them (for invariants and constants). We
2589 check if the LHS of the first stmt of the next child matches OPRND.
2590 If it does, we found the correct child. Otherwise, we call
2591 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2592 to check this child node for the next operand. */
2593 vectorized_defs = false;
2594 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2595 {
2596 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2597
2598 /* We have to check both pattern and original def, if available. */
2599 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2600 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2601
2602 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2603 || (related
2604 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2605 {
2606 /* The number of vector defs is determined by the number of
2607 vector statements in the node from which we get those
2608 statements. */
2609 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2610 vectorized_defs = true;
2611 child_index++;
2612 }
2613 }
2614
2615 if (!vectorized_defs)
2616 {
2617 if (i == 0)
2618 {
2619 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2620 /* Number of vector stmts was calculated according to LHS in
2621 vect_schedule_slp_instance (), fix it by replacing LHS with
2622 RHS, if necessary. See vect_get_smallest_scalar_type () for
2623 details. */
2624 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2625 &rhs_size_unit);
2626 if (rhs_size_unit != lhs_size_unit)
2627 {
2628 number_of_vects *= rhs_size_unit;
2629 number_of_vects /= lhs_size_unit;
2630 }
2631 }
2632 }
2633
2634 /* Allocate memory for vectorized defs. */
2635 vec_defs = vNULL;
2636 vec_defs.create (number_of_vects);
2637
2638 /* For reduction defs we call vect_get_constant_vectors (), since we are
2639 looking for initial loop invariant values. */
2640 if (vectorized_defs && reduc_index == -1)
2641 /* The defs are already vectorized. */
2642 vect_get_slp_vect_defs (child, &vec_defs);
2643 else
2644 /* Build vectors from scalar defs. */
2645 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2646 number_of_vects, reduc_index);
2647
2648 vec_oprnds->quick_push (vec_defs);
2649
2650 /* For reductions, we only need initial values. */
2651 if (reduc_index != -1)
2652 return;
2653 }
2654 }
2655
2656
2657 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2658 building a vector of type MASK_TYPE from it) and two input vectors placed in
2659 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2660 shifting by STRIDE elements of DR_CHAIN for every copy.
2661 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2662 copies).
2663 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2664 the created stmts must be inserted. */
2665
2666 static inline void
2667 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2668 tree mask, int first_vec_indx, int second_vec_indx,
2669 gimple_stmt_iterator *gsi, slp_tree node,
2670 tree vectype, vec<tree> dr_chain,
2671 int ncopies, int vect_stmts_counter)
2672 {
2673 tree perm_dest;
2674 gimple perm_stmt = NULL;
2675 stmt_vec_info next_stmt_info;
2676 int i, stride;
2677 tree first_vec, second_vec, data_ref;
2678
2679 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2680
2681 /* Initialize the vect stmts of NODE to properly insert the generated
2682 stmts later. */
2683 for (i = SLP_TREE_VEC_STMTS (node).length ();
2684 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2685 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2686
2687 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2688 for (i = 0; i < ncopies; i++)
2689 {
2690 first_vec = dr_chain[first_vec_indx];
2691 second_vec = dr_chain[second_vec_indx];
2692
2693 /* Generate the permute statement. */
2694 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2695 first_vec, second_vec, mask);
2696 data_ref = make_ssa_name (perm_dest, perm_stmt);
2697 gimple_set_lhs (perm_stmt, data_ref);
2698 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2699
2700 /* Store the vector statement in NODE. */
2701 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2702
2703 first_vec_indx += stride;
2704 second_vec_indx += stride;
2705 }
2706
2707 /* Mark the scalar stmt as vectorized. */
2708 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2709 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2710 }
2711
2712
2713 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2714 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2715 representation. Check that the mask is valid and return FALSE if not.
2716 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2717 the next vector, i.e., the current first vector is not needed. */
2718
2719 static bool
2720 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2721 int mask_nunits, bool only_one_vec, int index,
2722 unsigned char *mask, int *current_mask_element,
2723 bool *need_next_vector, int *number_of_mask_fixes,
2724 bool *mask_fixed, bool *needs_first_vector)
2725 {
2726 int i;
2727
2728 /* Convert to target specific representation. */
2729 *current_mask_element = first_mask_element + m;
2730 /* Adjust the value in case it's a mask for second and third vectors. */
2731 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2732
2733 if (*current_mask_element < mask_nunits)
2734 *needs_first_vector = true;
2735
2736 /* We have only one input vector to permute but the mask accesses values in
2737 the next vector as well. */
2738 if (only_one_vec && *current_mask_element >= mask_nunits)
2739 {
2740 if (dump_enabled_p ())
2741 {
2742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2743 "permutation requires at least two vectors ");
2744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2745 }
2746
2747 return false;
2748 }
2749
2750 /* The mask requires the next vector. */
2751 if (*current_mask_element >= mask_nunits * 2)
2752 {
2753 if (*needs_first_vector || *mask_fixed)
2754 {
2755 /* We either need the first vector too or have already moved to the
2756 next vector. In both cases, this permutation needs three
2757 vectors. */
2758 if (dump_enabled_p ())
2759 {
2760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2761 "permutation requires at "
2762 "least three vectors ");
2763 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2764 }
2765
2766 return false;
2767 }
2768
2769 /* We move to the next vector, dropping the first one and working with
2770 the second and the third - we need to adjust the values of the mask
2771 accordingly. */
2772 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2773
2774 for (i = 0; i < index; i++)
2775 mask[i] -= mask_nunits * *number_of_mask_fixes;
2776
2777 (*number_of_mask_fixes)++;
2778 *mask_fixed = true;
2779 }
2780
2781 *need_next_vector = *mask_fixed;
2782
2783 /* This was the last element of this mask. Start a new one. */
2784 if (index == mask_nunits - 1)
2785 {
2786 *number_of_mask_fixes = 1;
2787 *mask_fixed = false;
2788 *needs_first_vector = false;
2789 }
2790
2791 return true;
2792 }
2793
2794
2795 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2796 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2797 permute statements for the SLP node NODE of the SLP instance
2798 SLP_NODE_INSTANCE. */
2799
2800 bool
2801 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2802 gimple_stmt_iterator *gsi, int vf,
2803 slp_instance slp_node_instance, bool analyze_only)
2804 {
2805 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2806 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2807 tree mask_element_type = NULL_TREE, mask_type;
2808 int i, j, k, nunits, vec_index = 0, scalar_index;
2809 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2810 gimple next_scalar_stmt;
2811 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2812 int first_mask_element;
2813 int index, unroll_factor, current_mask_element, ncopies;
2814 unsigned char *mask;
2815 bool only_one_vec = false, need_next_vector = false;
2816 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2817 int number_of_mask_fixes = 1;
2818 bool mask_fixed = false;
2819 bool needs_first_vector = false;
2820 enum machine_mode mode;
2821
2822 mode = TYPE_MODE (vectype);
2823
2824 if (!can_vec_perm_p (mode, false, NULL))
2825 {
2826 if (dump_enabled_p ())
2827 {
2828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2829 "no vect permute for ");
2830 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2831 }
2832 return false;
2833 }
2834
2835 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2836 same size as the vector element being permuted. */
2837 mask_element_type = lang_hooks.types.type_for_mode
2838 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2839 mask_type = get_vectype_for_scalar_type (mask_element_type);
2840 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2841 mask = XALLOCAVEC (unsigned char, nunits);
2842 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2843
2844 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2845 unrolling factor. */
2846 orig_vec_stmts_num = group_size *
2847 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2848 if (orig_vec_stmts_num == 1)
2849 only_one_vec = true;
2850
2851 /* Number of copies is determined by the final vectorization factor
2852 relatively to SLP_NODE_INSTANCE unrolling factor. */
2853 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2854
2855 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2856 return false;
2857
2858 /* Generate permutation masks for every NODE. Number of masks for each NODE
2859 is equal to GROUP_SIZE.
2860 E.g., we have a group of three nodes with three loads from the same
2861 location in each node, and the vector size is 4. I.e., we have a
2862 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2863 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2864 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2865 ...
2866
2867 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2868 The last mask is illegal since we assume two operands for permute
2869 operation, and the mask element values can't be outside that range.
2870 Hence, the last mask must be converted into {2,5,5,5}.
2871 For the first two permutations we need the first and the second input
2872 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2873 we need the second and the third vectors: {b1,c1,a2,b2} and
2874 {c2,a3,b3,c3}. */
2875
2876 {
2877 scalar_index = 0;
2878 index = 0;
2879 vect_stmts_counter = 0;
2880 vec_index = 0;
2881 first_vec_index = vec_index++;
2882 if (only_one_vec)
2883 second_vec_index = first_vec_index;
2884 else
2885 second_vec_index = vec_index++;
2886
2887 for (j = 0; j < unroll_factor; j++)
2888 {
2889 for (k = 0; k < group_size; k++)
2890 {
2891 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2892 first_mask_element = i + j * group_size;
2893 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2894 nunits, only_one_vec, index,
2895 mask, &current_mask_element,
2896 &need_next_vector,
2897 &number_of_mask_fixes, &mask_fixed,
2898 &needs_first_vector))
2899 return false;
2900 mask[index++] = current_mask_element;
2901
2902 if (index == nunits)
2903 {
2904 index = 0;
2905 if (!can_vec_perm_p (mode, false, mask))
2906 {
2907 if (dump_enabled_p ())
2908 {
2909 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2910 vect_location,
2911 "unsupported vect permute { ");
2912 for (i = 0; i < nunits; ++i)
2913 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2914 mask[i]);
2915 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2916 }
2917 return false;
2918 }
2919
2920 if (!analyze_only)
2921 {
2922 int l;
2923 tree mask_vec, *mask_elts;
2924 mask_elts = XALLOCAVEC (tree, nunits);
2925 for (l = 0; l < nunits; ++l)
2926 mask_elts[l] = build_int_cst (mask_element_type,
2927 mask[l]);
2928 mask_vec = build_vector (mask_type, mask_elts);
2929
2930 if (need_next_vector)
2931 {
2932 first_vec_index = second_vec_index;
2933 second_vec_index = vec_index;
2934 }
2935
2936 next_scalar_stmt
2937 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2938
2939 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2940 mask_vec, first_vec_index, second_vec_index,
2941 gsi, node, vectype, dr_chain,
2942 ncopies, vect_stmts_counter++);
2943 }
2944 }
2945 }
2946 }
2947 }
2948
2949 return true;
2950 }
2951
2952
2953
2954 /* Vectorize SLP instance tree in postorder. */
2955
2956 static bool
2957 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2958 unsigned int vectorization_factor)
2959 {
2960 gimple stmt;
2961 bool grouped_store, is_store;
2962 gimple_stmt_iterator si;
2963 stmt_vec_info stmt_info;
2964 unsigned int vec_stmts_size, nunits, group_size;
2965 tree vectype;
2966 int i;
2967 slp_tree child;
2968
2969 if (!node)
2970 return false;
2971
2972 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2973 vect_schedule_slp_instance (child, instance, vectorization_factor);
2974
2975 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2976 stmt_info = vinfo_for_stmt (stmt);
2977
2978 /* VECTYPE is the type of the destination. */
2979 vectype = STMT_VINFO_VECTYPE (stmt_info);
2980 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2981 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2982
2983 /* For each SLP instance calculate number of vector stmts to be created
2984 for the scalar stmts in each node of the SLP tree. Number of vector
2985 elements in one vector iteration is the number of scalar elements in
2986 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2987 size. */
2988 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2989
2990 if (!SLP_TREE_VEC_STMTS (node).exists ())
2991 {
2992 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
2993 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2994 }
2995
2996 if (dump_enabled_p ())
2997 {
2998 dump_printf_loc (MSG_NOTE,vect_location,
2999 "------>vectorizing SLP node starting from: ");
3000 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3001 }
3002
3003 /* Loads should be inserted before the first load. */
3004 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3005 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3006 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3007 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3008 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3009 else if (is_pattern_stmt_p (stmt_info))
3010 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3011 else
3012 si = gsi_for_stmt (stmt);
3013
3014 /* Stores should be inserted just before the last store. */
3015 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3016 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3017 {
3018 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3019 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3020 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3021 si = gsi_for_stmt (last_store);
3022 }
3023
3024 /* Mark the first element of the reduction chain as reduction to properly
3025 transform the node. In the analysis phase only the last element of the
3026 chain is marked as reduction. */
3027 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3028 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3029 {
3030 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3031 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3032 }
3033
3034 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3035 return is_store;
3036 }
3037
3038 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3039 For loop vectorization this is done in vectorizable_call, but for SLP
3040 it needs to be deferred until end of vect_schedule_slp, because multiple
3041 SLP instances may refer to the same scalar stmt. */
3042
3043 static void
3044 vect_remove_slp_scalar_calls (slp_tree node)
3045 {
3046 gimple stmt, new_stmt;
3047 gimple_stmt_iterator gsi;
3048 int i;
3049 slp_tree child;
3050 tree lhs;
3051 stmt_vec_info stmt_info;
3052
3053 if (!node)
3054 return;
3055
3056 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3057 vect_remove_slp_scalar_calls (child);
3058
3059 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3060 {
3061 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3062 continue;
3063 stmt_info = vinfo_for_stmt (stmt);
3064 if (stmt_info == NULL
3065 || is_pattern_stmt_p (stmt_info)
3066 || !PURE_SLP_STMT (stmt_info))
3067 continue;
3068 lhs = gimple_call_lhs (stmt);
3069 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3070 set_vinfo_for_stmt (new_stmt, stmt_info);
3071 set_vinfo_for_stmt (stmt, NULL);
3072 STMT_VINFO_STMT (stmt_info) = new_stmt;
3073 gsi = gsi_for_stmt (stmt);
3074 gsi_replace (&gsi, new_stmt, false);
3075 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3076 }
3077 }
3078
3079 /* Generate vector code for all SLP instances in the loop/basic block. */
3080
3081 bool
3082 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3083 {
3084 vec<slp_instance> slp_instances;
3085 slp_instance instance;
3086 unsigned int i, vf;
3087 bool is_store = false;
3088
3089 if (loop_vinfo)
3090 {
3091 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3092 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3093 }
3094 else
3095 {
3096 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3097 vf = 1;
3098 }
3099
3100 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3101 {
3102 /* Schedule the tree of INSTANCE. */
3103 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3104 instance, vf);
3105 if (dump_enabled_p ())
3106 dump_printf_loc (MSG_NOTE, vect_location,
3107 "vectorizing stmts using SLP.");
3108 }
3109
3110 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3111 {
3112 slp_tree root = SLP_INSTANCE_TREE (instance);
3113 gimple store;
3114 unsigned int j;
3115 gimple_stmt_iterator gsi;
3116
3117 /* Remove scalar call stmts. Do not do this for basic-block
3118 vectorization as not all uses may be vectorized.
3119 ??? Why should this be necessary? DCE should be able to
3120 remove the stmts itself.
3121 ??? For BB vectorization we can as well remove scalar
3122 stmts starting from the SLP tree root if they have no
3123 uses. */
3124 if (loop_vinfo)
3125 vect_remove_slp_scalar_calls (root);
3126
3127 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3128 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3129 {
3130 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3131 break;
3132
3133 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3134 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3135 /* Free the attached stmt_vec_info and remove the stmt. */
3136 gsi = gsi_for_stmt (store);
3137 unlink_stmt_vdef (store);
3138 gsi_remove (&gsi, true);
3139 release_defs (store);
3140 free_stmt_vec_info (store);
3141 }
3142 }
3143
3144 return is_store;
3145 }
3146
3147
3148 /* Vectorize the basic block. */
3149
3150 void
3151 vect_slp_transform_bb (basic_block bb)
3152 {
3153 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3154 gimple_stmt_iterator si;
3155
3156 gcc_assert (bb_vinfo);
3157
3158 if (dump_enabled_p ())
3159 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3160
3161 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3162 {
3163 gimple stmt = gsi_stmt (si);
3164 stmt_vec_info stmt_info;
3165
3166 if (dump_enabled_p ())
3167 {
3168 dump_printf_loc (MSG_NOTE, vect_location,
3169 "------>SLPing statement: ");
3170 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3171 }
3172
3173 stmt_info = vinfo_for_stmt (stmt);
3174 gcc_assert (stmt_info);
3175
3176 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3177 if (STMT_SLP_TYPE (stmt_info))
3178 {
3179 vect_schedule_slp (NULL, bb_vinfo);
3180 break;
3181 }
3182 }
3183
3184 if (dump_enabled_p ())
3185 dump_printf_loc (MSG_NOTE, vect_location,
3186 "BASIC BLOCK VECTORIZED\n");
3187
3188 destroy_bb_vec_info (bb_vinfo);
3189 }