tree-vect-slp.c (vect_get_and_check_slp_defs): Remove broken code swapping operands.
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69 int i;
70 slp_tree child;
71
72 if (!node)
73 return;
74
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76 vect_free_slp_tree (child);
77
78 SLP_TREE_CHILDREN (node).release ();
79 SLP_TREE_SCALAR_STMTS (node).release ();
80 SLP_TREE_VEC_STMTS (node).release ();
81
82 free (node);
83 }
84
85
86 /* Free the memory allocated for the SLP instance. */
87
88 void
89 vect_free_slp_instance (slp_instance instance)
90 {
91 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
92 SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93 SLP_INSTANCE_LOADS (instance).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95 free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS. */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104 slp_tree node;
105 gimple stmt = scalar_stmts[0];
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
116 else
117 return NULL;
118
119 node = XNEW (struct _slp_tree);
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node).create (0);
122 SLP_TREE_CHILDREN (node).create (nops);
123
124 return node;
125 }
126
127
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
130 static vec<slp_oprnd_info>
131 vect_create_oprnd_info (int nops, int group_size)
132 {
133 int i;
134 slp_oprnd_info oprnd_info;
135 vec<slp_oprnd_info> oprnds_info;
136
137 oprnds_info.create (nops);
138 for (i = 0; i < nops; i++)
139 {
140 oprnd_info = XNEW (struct _slp_oprnd_info);
141 oprnd_info->def_stmts.create (group_size);
142 oprnd_info->first_dt = vect_uninitialized_def;
143 oprnd_info->first_def_type = NULL_TREE;
144 oprnd_info->first_const_oprnd = NULL_TREE;
145 oprnd_info->first_pattern = false;
146 oprnds_info.quick_push (oprnd_info);
147 }
148
149 return oprnds_info;
150 }
151
152
153 /* Free operands info. */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158 int i;
159 slp_oprnd_info oprnd_info;
160
161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162 {
163 oprnd_info->def_stmts.release ();
164 XDELETE (oprnd_info);
165 }
166
167 oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177 gimple next_stmt = first_stmt;
178 int result = 0;
179
180 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181 return -1;
182
183 do
184 {
185 if (next_stmt == stmt)
186 return result;
187 result++;
188 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189 }
190 while (next_stmt);
191
192 return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197 they are of a valid type and that they match the defs of the first stmt of
198 the SLP group (stored in OPRNDS_INFO). */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202 gimple stmt, bool first,
203 vec<slp_oprnd_info> *oprnds_info)
204 {
205 tree oprnd;
206 unsigned int i, number_of_oprnds;
207 tree def;
208 gimple def_stmt;
209 enum vect_def_type dt = vect_uninitialized_def;
210 struct loop *loop = NULL;
211 bool pattern = false;
212 slp_oprnd_info oprnd_info;
213 int op_idx = 1;
214 tree compare_rhs = NULL_TREE;
215
216 if (loop_vinfo)
217 loop = LOOP_VINFO_LOOP (loop_vinfo);
218
219 if (is_gimple_call (stmt))
220 {
221 number_of_oprnds = gimple_call_num_args (stmt);
222 op_idx = 3;
223 }
224 else if (is_gimple_assign (stmt))
225 {
226 number_of_oprnds = gimple_num_ops (stmt) - 1;
227 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
228 number_of_oprnds++;
229 }
230 else
231 return false;
232
233 for (i = 0; i < number_of_oprnds; i++)
234 {
235 if (compare_rhs)
236 {
237 oprnd = compare_rhs;
238 compare_rhs = NULL_TREE;
239 }
240 else
241 oprnd = gimple_op (stmt, op_idx++);
242
243 oprnd_info = (*oprnds_info)[i];
244
245 if (COMPARISON_CLASS_P (oprnd))
246 {
247 compare_rhs = TREE_OPERAND (oprnd, 1);
248 oprnd = TREE_OPERAND (oprnd, 0);
249 }
250
251 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
252 &def, &dt)
253 || (!def_stmt && dt != vect_constant_def))
254 {
255 if (dump_enabled_p ())
256 {
257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
258 "Build SLP failed: can't find def for ");
259 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
260 }
261
262 return false;
263 }
264
265 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
266 from the pattern. Check that all the stmts of the node are in the
267 pattern. */
268 if (def_stmt && gimple_bb (def_stmt)
269 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
270 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
271 && gimple_code (def_stmt) != GIMPLE_PHI))
272 && vinfo_for_stmt (def_stmt)
273 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
274 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
275 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
276 {
277 pattern = true;
278 if (!first && !oprnd_info->first_pattern)
279 {
280 if (dump_enabled_p ())
281 {
282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
283 "Build SLP failed: some of the stmts"
284 " are in a pattern, and others are not ");
285 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
286 }
287
288 return false;
289 }
290
291 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
292 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
293
294 if (dt == vect_unknown_def_type)
295 {
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298 "Unsupported pattern.");
299 return false;
300 }
301
302 switch (gimple_code (def_stmt))
303 {
304 case GIMPLE_PHI:
305 def = gimple_phi_result (def_stmt);
306 break;
307
308 case GIMPLE_ASSIGN:
309 def = gimple_assign_lhs (def_stmt);
310 break;
311
312 default:
313 if (dump_enabled_p ())
314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
315 "unsupported defining stmt: ");
316 return false;
317 }
318 }
319
320 if (first)
321 {
322 oprnd_info->first_dt = dt;
323 oprnd_info->first_pattern = pattern;
324 if (def)
325 {
326 oprnd_info->first_def_type = TREE_TYPE (def);
327 oprnd_info->first_const_oprnd = NULL_TREE;
328 }
329 else
330 {
331 oprnd_info->first_def_type = NULL_TREE;
332 oprnd_info->first_const_oprnd = oprnd;
333 }
334 }
335 else
336 {
337 /* Not first stmt of the group, check that the def-stmt/s match
338 the def-stmt/s of the first stmt. Allow different definition
339 types for reduction chains: the first stmt must be a
340 vect_reduction_def (a phi node), and the rest
341 vect_internal_def. */
342 if (((oprnd_info->first_dt != dt
343 && !(oprnd_info->first_dt == vect_reduction_def
344 && dt == vect_internal_def))
345 || (oprnd_info->first_def_type != NULL_TREE
346 && def
347 && !types_compatible_p (oprnd_info->first_def_type,
348 TREE_TYPE (def))))
349 || (!def
350 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
351 TREE_TYPE (oprnd))))
352 {
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355 "Build SLP failed: different types ");
356
357 return false;
358 }
359 }
360
361 /* Check the types of the definitions. */
362 switch (dt)
363 {
364 case vect_constant_def:
365 case vect_external_def:
366 case vect_reduction_def:
367 break;
368
369 case vect_internal_def:
370 oprnd_info->def_stmts.quick_push (def_stmt);
371 break;
372
373 default:
374 /* FORNOW: Not supported. */
375 if (dump_enabled_p ())
376 {
377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
378 "Build SLP failed: illegal type of def ");
379 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
380 }
381
382 return false;
383 }
384 }
385
386 return true;
387 }
388
389
390 /* Recursively build an SLP tree starting from NODE.
391 Fail (and return FALSE) if def-stmts are not isomorphic, require data
392 permutation or are of unsupported types of operation. Otherwise, return
393 TRUE. */
394
395 static bool
396 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
397 slp_tree *node, unsigned int group_size,
398 unsigned int *max_nunits,
399 vec<slp_tree> *loads,
400 unsigned int vectorization_factor)
401 {
402 unsigned int i;
403 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
404 gimple stmt = stmts[0];
405 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
406 enum tree_code first_cond_code = ERROR_MARK;
407 tree lhs;
408 bool stop_recursion = false, need_same_oprnds = false;
409 tree vectype, scalar_type, first_op1 = NULL_TREE;
410 optab optab;
411 int icode;
412 enum machine_mode optab_op2_mode;
413 enum machine_mode vec_mode;
414 struct data_reference *first_dr;
415 HOST_WIDE_INT dummy;
416 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
417 vec<slp_oprnd_info> oprnds_info;
418 unsigned int nops;
419 slp_oprnd_info oprnd_info;
420 tree cond;
421
422 if (is_gimple_call (stmt))
423 nops = gimple_call_num_args (stmt);
424 else if (is_gimple_assign (stmt))
425 {
426 nops = gimple_num_ops (stmt) - 1;
427 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
428 nops++;
429 }
430 else
431 return false;
432
433 oprnds_info = vect_create_oprnd_info (nops, group_size);
434
435 /* For every stmt in NODE find its def stmt/s. */
436 FOR_EACH_VEC_ELT (stmts, i, stmt)
437 {
438 if (dump_enabled_p ())
439 {
440 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
441 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
442 }
443
444 /* Fail to vectorize statements marked as unvectorizable. */
445 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
446 {
447 if (dump_enabled_p ())
448 {
449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
450 "Build SLP failed: unvectorizable statement ");
451 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
452 }
453
454 vect_free_oprnd_info (oprnds_info);
455 return false;
456 }
457
458 lhs = gimple_get_lhs (stmt);
459 if (lhs == NULL_TREE)
460 {
461 if (dump_enabled_p ())
462 {
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464 "Build SLP failed: not GIMPLE_ASSIGN nor "
465 "GIMPLE_CALL ");
466 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
467 }
468
469 vect_free_oprnd_info (oprnds_info);
470 return false;
471 }
472
473 if (is_gimple_assign (stmt)
474 && gimple_assign_rhs_code (stmt) == COND_EXPR
475 && (cond = gimple_assign_rhs1 (stmt))
476 && !COMPARISON_CLASS_P (cond))
477 {
478 if (dump_enabled_p ())
479 {
480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
481 "Build SLP failed: condition is not "
482 "comparison ");
483 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
484 }
485
486 vect_free_oprnd_info (oprnds_info);
487 return false;
488 }
489
490 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
491 vectype = get_vectype_for_scalar_type (scalar_type);
492 if (!vectype)
493 {
494 if (dump_enabled_p ())
495 {
496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
497 "Build SLP failed: unsupported data-type ");
498 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
499 scalar_type);
500 }
501
502 vect_free_oprnd_info (oprnds_info);
503 return false;
504 }
505
506 /* In case of multiple types we need to detect the smallest type. */
507 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
508 {
509 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
510 if (bb_vinfo)
511 vectorization_factor = *max_nunits;
512 }
513
514 if (is_gimple_call (stmt))
515 {
516 rhs_code = CALL_EXPR;
517 if (gimple_call_internal_p (stmt)
518 || gimple_call_tail_p (stmt)
519 || gimple_call_noreturn_p (stmt)
520 || !gimple_call_nothrow_p (stmt)
521 || gimple_call_chain (stmt))
522 {
523 if (dump_enabled_p ())
524 {
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
526 "Build SLP failed: unsupported call type ");
527 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
528 }
529
530 vect_free_oprnd_info (oprnds_info);
531 return false;
532 }
533 }
534 else
535 rhs_code = gimple_assign_rhs_code (stmt);
536
537 /* Check the operation. */
538 if (i == 0)
539 {
540 first_stmt_code = rhs_code;
541
542 /* Shift arguments should be equal in all the packed stmts for a
543 vector shift with scalar shift operand. */
544 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
545 || rhs_code == LROTATE_EXPR
546 || rhs_code == RROTATE_EXPR)
547 {
548 vec_mode = TYPE_MODE (vectype);
549
550 /* First see if we have a vector/vector shift. */
551 optab = optab_for_tree_code (rhs_code, vectype,
552 optab_vector);
553
554 if (!optab
555 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
556 {
557 /* No vector/vector shift, try for a vector/scalar shift. */
558 optab = optab_for_tree_code (rhs_code, vectype,
559 optab_scalar);
560
561 if (!optab)
562 {
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
565 "Build SLP failed: no optab.");
566 vect_free_oprnd_info (oprnds_info);
567 return false;
568 }
569 icode = (int) optab_handler (optab, vec_mode);
570 if (icode == CODE_FOR_nothing)
571 {
572 if (dump_enabled_p ())
573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
574 "Build SLP failed: "
575 "op not supported by target.");
576 vect_free_oprnd_info (oprnds_info);
577 return false;
578 }
579 optab_op2_mode = insn_data[icode].operand[2].mode;
580 if (!VECTOR_MODE_P (optab_op2_mode))
581 {
582 need_same_oprnds = true;
583 first_op1 = gimple_assign_rhs2 (stmt);
584 }
585 }
586 }
587 else if (rhs_code == WIDEN_LSHIFT_EXPR)
588 {
589 need_same_oprnds = true;
590 first_op1 = gimple_assign_rhs2 (stmt);
591 }
592 }
593 else
594 {
595 if (first_stmt_code != rhs_code
596 && (first_stmt_code != IMAGPART_EXPR
597 || rhs_code != REALPART_EXPR)
598 && (first_stmt_code != REALPART_EXPR
599 || rhs_code != IMAGPART_EXPR)
600 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
601 && (first_stmt_code == ARRAY_REF
602 || first_stmt_code == BIT_FIELD_REF
603 || first_stmt_code == INDIRECT_REF
604 || first_stmt_code == COMPONENT_REF
605 || first_stmt_code == MEM_REF)))
606 {
607 if (dump_enabled_p ())
608 {
609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
610 "Build SLP failed: different operation "
611 "in stmt ");
612 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
613 }
614
615 vect_free_oprnd_info (oprnds_info);
616 return false;
617 }
618
619 if (need_same_oprnds
620 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
621 {
622 if (dump_enabled_p ())
623 {
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: different shift "
626 "arguments in ");
627 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
628 }
629
630 vect_free_oprnd_info (oprnds_info);
631 return false;
632 }
633
634 if (rhs_code == CALL_EXPR)
635 {
636 gimple first_stmt = stmts[0];
637 if (gimple_call_num_args (stmt) != nops
638 || !operand_equal_p (gimple_call_fn (first_stmt),
639 gimple_call_fn (stmt), 0)
640 || gimple_call_fntype (first_stmt)
641 != gimple_call_fntype (stmt))
642 {
643 if (dump_enabled_p ())
644 {
645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
646 "Build SLP failed: different calls in ");
647 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
648 stmt, 0);
649 }
650
651 vect_free_oprnd_info (oprnds_info);
652 return false;
653 }
654 }
655 }
656
657 /* Grouped store or load. */
658 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
659 {
660 if (REFERENCE_CLASS_P (lhs))
661 {
662 /* Store. */
663 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
664 stmt, (i == 0), &oprnds_info))
665 {
666 vect_free_oprnd_info (oprnds_info);
667 return false;
668 }
669 }
670 else
671 {
672 /* Load. */
673 unsigned unrolling_factor
674 = least_common_multiple
675 (*max_nunits, group_size) / group_size;
676 /* FORNOW: Check that there is no gap between the loads
677 and no gap between the groups when we need to load
678 multiple groups at once.
679 ??? We should enhance this to only disallow gaps
680 inside vectors. */
681 if ((unrolling_factor > 1
682 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
683 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
684 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
685 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
686 {
687 if (dump_enabled_p ())
688 {
689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
690 "Build SLP failed: grouped "
691 "loads have gaps ");
692 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
693 stmt, 0);
694 }
695
696 vect_free_oprnd_info (oprnds_info);
697 return false;
698 }
699
700 /* Check that the size of interleaved loads group is not
701 greater than the SLP group size. */
702 unsigned ncopies
703 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
704 if (loop_vinfo
705 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
706 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
707 - GROUP_GAP (vinfo_for_stmt (stmt)))
708 > ncopies * group_size))
709 {
710 if (dump_enabled_p ())
711 {
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: the number "
714 "of interleaved loads is greater than "
715 "the SLP group size ");
716 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
717 stmt, 0);
718 }
719
720 vect_free_oprnd_info (oprnds_info);
721 return false;
722 }
723
724 old_first_load = first_load;
725 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
726 if (prev_first_load)
727 {
728 /* Check that there are no loads from different interleaving
729 chains in the same node. The only exception is complex
730 numbers. */
731 if (prev_first_load != first_load
732 && rhs_code != REALPART_EXPR
733 && rhs_code != IMAGPART_EXPR)
734 {
735 if (dump_enabled_p ())
736 {
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
738 vect_location,
739 "Build SLP failed: different "
740 "interleaving chains in one node ");
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
742 stmt, 0);
743 }
744
745 vect_free_oprnd_info (oprnds_info);
746 return false;
747 }
748 }
749 else
750 prev_first_load = first_load;
751
752 /* In some cases a group of loads is just the same load
753 repeated N times. Only analyze its cost once. */
754 if (first_load == stmt && old_first_load != first_load)
755 {
756 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
757 if (vect_supportable_dr_alignment (first_dr, false)
758 == dr_unaligned_unsupported)
759 {
760 if (dump_enabled_p ())
761 {
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
763 vect_location,
764 "Build SLP failed: unsupported "
765 "unaligned load ");
766 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
767 stmt, 0);
768 }
769
770 vect_free_oprnd_info (oprnds_info);
771 return false;
772 }
773 }
774
775 /* We stop the tree when we reach a group of loads. */
776 stop_recursion = true;
777 continue;
778 }
779 } /* Grouped access. */
780 else
781 {
782 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
783 {
784 /* Not grouped load. */
785 if (dump_enabled_p ())
786 {
787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
788 "Build SLP failed: not grouped load ");
789 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
790 }
791
792 /* FORNOW: Not grouped loads are not supported. */
793 vect_free_oprnd_info (oprnds_info);
794 return false;
795 }
796
797 /* Not memory operation. */
798 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
799 && TREE_CODE_CLASS (rhs_code) != tcc_unary
800 && rhs_code != COND_EXPR
801 && rhs_code != CALL_EXPR)
802 {
803 if (dump_enabled_p ())
804 {
805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
806 "Build SLP failed: operation");
807 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
808 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
809 }
810
811 vect_free_oprnd_info (oprnds_info);
812 return false;
813 }
814
815 if (rhs_code == COND_EXPR)
816 {
817 tree cond_expr = gimple_assign_rhs1 (stmt);
818
819 if (i == 0)
820 first_cond_code = TREE_CODE (cond_expr);
821 else if (first_cond_code != TREE_CODE (cond_expr))
822 {
823 if (dump_enabled_p ())
824 {
825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
826 "Build SLP failed: different"
827 " operation");
828 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
829 stmt, 0);
830 }
831
832 vect_free_oprnd_info (oprnds_info);
833 return false;
834 }
835 }
836
837 /* Find the def-stmts. */
838 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, stmt,
839 (i == 0), &oprnds_info))
840 {
841 vect_free_oprnd_info (oprnds_info);
842 return false;
843 }
844 }
845 }
846
847 /* Grouped loads were reached - stop the recursion. */
848 if (stop_recursion)
849 {
850 loads->safe_push (*node);
851 vect_free_oprnd_info (oprnds_info);
852 return true;
853 }
854
855 /* Create SLP_TREE nodes for the definition node/s. */
856 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
857 {
858 slp_tree child;
859
860 if (oprnd_info->first_dt != vect_internal_def)
861 continue;
862
863 child = vect_create_new_slp_node (oprnd_info->def_stmts);
864 if (!child
865 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
866 max_nunits, loads,
867 vectorization_factor))
868 {
869 if (child)
870 oprnd_info->def_stmts = vNULL;
871 vect_free_slp_tree (child);
872 vect_free_oprnd_info (oprnds_info);
873 return false;
874 }
875
876 oprnd_info->def_stmts.create (0);
877 SLP_TREE_CHILDREN (*node).quick_push (child);
878 }
879
880 vect_free_oprnd_info (oprnds_info);
881 return true;
882 }
883
884 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
885
886 static void
887 vect_print_slp_tree (int dump_kind, slp_tree node)
888 {
889 int i;
890 gimple stmt;
891 slp_tree child;
892
893 if (!node)
894 return;
895
896 dump_printf (dump_kind, "node ");
897 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
898 {
899 dump_printf (dump_kind, "\n\tstmt %d ", i);
900 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
901 }
902 dump_printf (dump_kind, "\n");
903
904 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
905 vect_print_slp_tree (dump_kind, child);
906 }
907
908
909 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
910 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
911 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
912 stmts in NODE are to be marked. */
913
914 static void
915 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
916 {
917 int i;
918 gimple stmt;
919 slp_tree child;
920
921 if (!node)
922 return;
923
924 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
925 if (j < 0 || i == j)
926 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
927
928 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
929 vect_mark_slp_stmts (child, mark, j);
930 }
931
932
933 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
934
935 static void
936 vect_mark_slp_stmts_relevant (slp_tree node)
937 {
938 int i;
939 gimple stmt;
940 stmt_vec_info stmt_info;
941 slp_tree child;
942
943 if (!node)
944 return;
945
946 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
947 {
948 stmt_info = vinfo_for_stmt (stmt);
949 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
950 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
951 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
952 }
953
954 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
955 vect_mark_slp_stmts_relevant (child);
956 }
957
958
959 /* Check if the permutation required by the SLP INSTANCE is supported.
960 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
961
962 static bool
963 vect_supported_slp_permutation_p (slp_instance instance)
964 {
965 slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
966 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
967 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
968 vec<slp_tree> sorted_loads = vNULL;
969 int index;
970 slp_tree *tmp_loads = NULL;
971 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
972 slp_tree load;
973
974 /* FORNOW: The only supported loads permutation is loads from the same
975 location in all the loads in the node, when the data-refs in
976 nodes of LOADS constitute an interleaving chain.
977 Sort the nodes according to the order of accesses in the chain. */
978 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
979 for (i = 0, j = 0;
980 SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
981 && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
982 i += group_size, j++)
983 {
984 gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
985 /* Check that the loads are all in the same interleaving chain. */
986 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
987 {
988 if (dump_enabled_p ())
989 {
990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
991 "Build SLP failed: unsupported data "
992 "permutation ");
993 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
994 scalar_stmt, 0);
995 }
996
997 free (tmp_loads);
998 return false;
999 }
1000
1001 tmp_loads[index] = load;
1002 }
1003
1004 sorted_loads.create (group_size);
1005 for (i = 0; i < group_size; i++)
1006 sorted_loads.safe_push (tmp_loads[i]);
1007
1008 SLP_INSTANCE_LOADS (instance).release ();
1009 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1010 free (tmp_loads);
1011
1012 if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
1013 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1014 instance, true))
1015 return false;
1016
1017 return true;
1018 }
1019
1020
1021 /* Rearrange the statements of NODE according to PERMUTATION. */
1022
1023 static void
1024 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1025 vec<int> permutation)
1026 {
1027 gimple stmt;
1028 vec<gimple> tmp_stmts;
1029 unsigned int i;
1030 slp_tree child;
1031
1032 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1033 vect_slp_rearrange_stmts (child, group_size, permutation);
1034
1035 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1036 tmp_stmts.create (group_size);
1037 tmp_stmts.quick_grow_cleared (group_size);
1038
1039 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1040 tmp_stmts[permutation[i]] = stmt;
1041
1042 SLP_TREE_SCALAR_STMTS (node).release ();
1043 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1044 }
1045
1046
1047 /* Check if the required load permutation is supported.
1048 LOAD_PERMUTATION contains a list of indices of the loads.
1049 In SLP this permutation is relative to the order of grouped stores that are
1050 the base of the SLP instance. */
1051
1052 static bool
1053 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1054 vec<int> load_permutation)
1055 {
1056 int i = 0, j, prev = -1, next, k, number_of_groups;
1057 bool supported, bad_permutation = false;
1058 sbitmap load_index;
1059 slp_tree node, other_complex_node;
1060 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1061 unsigned complex_numbers = 0;
1062 struct data_reference *dr;
1063 bb_vec_info bb_vinfo;
1064
1065 /* FORNOW: permutations are only supported in SLP. */
1066 if (!slp_instn)
1067 return false;
1068
1069 if (dump_enabled_p ())
1070 {
1071 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1072 FOR_EACH_VEC_ELT (load_permutation, i, next)
1073 dump_printf (MSG_NOTE, "%d ", next);
1074 }
1075
1076 /* In case of reduction every load permutation is allowed, since the order
1077 of the reduction statements is not important (as opposed to the case of
1078 grouped stores). The only condition we need to check is that all the
1079 load nodes are of the same size and have the same permutation (and then
1080 rearrange all the nodes of the SLP instance according to this
1081 permutation). */
1082
1083 /* Check that all the load nodes are of the same size. */
1084 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1085 {
1086 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1087 return false;
1088
1089 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1090 if (is_gimple_assign (stmt)
1091 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1092 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1093 complex_numbers++;
1094 }
1095
1096 /* Complex operands can be swapped as following:
1097 real_c = real_b + real_a;
1098 imag_c = imag_a + imag_b;
1099 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1100 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1101 chains are mixed, they match the above pattern. */
1102 if (complex_numbers)
1103 {
1104 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1105 {
1106 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1107 {
1108 if (j == 0)
1109 first = stmt;
1110 else
1111 {
1112 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1113 {
1114 if (complex_numbers != 2)
1115 return false;
1116
1117 if (i == 0)
1118 k = 1;
1119 else
1120 k = 0;
1121
1122 other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1123 other_node_first =
1124 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1125
1126 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1127 != other_node_first)
1128 return false;
1129 }
1130 }
1131 }
1132 }
1133 }
1134
1135 /* We checked that this case ok, so there is no need to proceed with
1136 permutation tests. */
1137 if (complex_numbers == 2
1138 && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1139 {
1140 SLP_INSTANCE_LOADS (slp_instn).release ();
1141 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1142 return true;
1143 }
1144
1145 node = SLP_INSTANCE_TREE (slp_instn);
1146 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1147 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1148 instance, not all the loads belong to the same node or interleaving
1149 group. Hence, we need to divide them into groups according to
1150 GROUP_SIZE. */
1151 number_of_groups = load_permutation.length () / group_size;
1152
1153 /* Reduction (there are no data-refs in the root).
1154 In reduction chain the order of the loads is important. */
1155 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1156 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1157 {
1158 int first_group_load_index;
1159
1160 /* Compare all the permutation sequences to the first one. */
1161 for (i = 1; i < number_of_groups; i++)
1162 {
1163 k = 0;
1164 for (j = i * group_size; j < i * group_size + group_size; j++)
1165 {
1166 next = load_permutation[j];
1167 first_group_load_index = load_permutation[k];
1168
1169 if (next != first_group_load_index)
1170 {
1171 bad_permutation = true;
1172 break;
1173 }
1174
1175 k++;
1176 }
1177
1178 if (bad_permutation)
1179 break;
1180 }
1181
1182 if (!bad_permutation)
1183 {
1184 /* Check that the loads in the first sequence are different and there
1185 are no gaps between them. */
1186 load_index = sbitmap_alloc (group_size);
1187 bitmap_clear (load_index);
1188 for (k = 0; k < group_size; k++)
1189 {
1190 first_group_load_index = load_permutation[k];
1191 if (bitmap_bit_p (load_index, first_group_load_index))
1192 {
1193 bad_permutation = true;
1194 break;
1195 }
1196
1197 bitmap_set_bit (load_index, first_group_load_index);
1198 }
1199
1200 if (!bad_permutation)
1201 for (k = 0; k < group_size; k++)
1202 if (!bitmap_bit_p (load_index, k))
1203 {
1204 bad_permutation = true;
1205 break;
1206 }
1207
1208 sbitmap_free (load_index);
1209 }
1210
1211 if (!bad_permutation)
1212 {
1213 /* This permutation is valid for reduction. Since the order of the
1214 statements in the nodes is not important unless they are memory
1215 accesses, we can rearrange the statements in all the nodes
1216 according to the order of the loads. */
1217 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1218 load_permutation);
1219 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1220 return true;
1221 }
1222 }
1223
1224 /* In basic block vectorization we allow any subchain of an interleaving
1225 chain.
1226 FORNOW: not supported in loop SLP because of realignment compications. */
1227 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1228 bad_permutation = false;
1229 /* Check that for every node in the instance the loads form a subchain. */
1230 if (bb_vinfo)
1231 {
1232 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1233 {
1234 next_load = NULL;
1235 first_load = NULL;
1236 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1237 {
1238 if (!first_load)
1239 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1240 else if (first_load
1241 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1242 {
1243 bad_permutation = true;
1244 break;
1245 }
1246
1247 if (j != 0 && next_load != load)
1248 {
1249 bad_permutation = true;
1250 break;
1251 }
1252
1253 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1254 }
1255
1256 if (bad_permutation)
1257 break;
1258 }
1259
1260 /* Check that the alignment of the first load in every subchain, i.e.,
1261 the first statement in every load node, is supported. */
1262 if (!bad_permutation)
1263 {
1264 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1265 {
1266 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1267 if (first_load
1268 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1269 {
1270 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1271 if (vect_supportable_dr_alignment (dr, false)
1272 == dr_unaligned_unsupported)
1273 {
1274 if (dump_enabled_p ())
1275 {
1276 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1277 vect_location,
1278 "unsupported unaligned load ");
1279 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1280 first_load, 0);
1281 }
1282 bad_permutation = true;
1283 break;
1284 }
1285 }
1286 }
1287
1288 if (!bad_permutation)
1289 {
1290 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1291 return true;
1292 }
1293 }
1294 }
1295
1296 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1297 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1298 well (unless it's reduction). */
1299 if (load_permutation.length ()
1300 != (unsigned int) (group_size * group_size))
1301 return false;
1302
1303 supported = true;
1304 load_index = sbitmap_alloc (group_size);
1305 bitmap_clear (load_index);
1306 for (j = 0; j < group_size; j++)
1307 {
1308 for (i = j * group_size, k = 0;
1309 load_permutation.iterate (i, &next) && k < group_size;
1310 i++, k++)
1311 {
1312 if (i != j * group_size && next != prev)
1313 {
1314 supported = false;
1315 break;
1316 }
1317
1318 prev = next;
1319 }
1320
1321 if (bitmap_bit_p (load_index, prev))
1322 {
1323 supported = false;
1324 break;
1325 }
1326
1327 bitmap_set_bit (load_index, prev);
1328 }
1329
1330 for (j = 0; j < group_size; j++)
1331 if (!bitmap_bit_p (load_index, j))
1332 {
1333 sbitmap_free (load_index);
1334 return false;
1335 }
1336
1337 sbitmap_free (load_index);
1338
1339 if (supported && i == group_size * group_size
1340 && vect_supported_slp_permutation_p (slp_instn))
1341 return true;
1342
1343 return false;
1344 }
1345
1346
1347 /* Find the first load in the loop that belongs to INSTANCE.
1348 When loads are in several SLP nodes, there can be a case in which the first
1349 load does not appear in the first SLP node to be transformed, causing
1350 incorrect order of statements. Since we generate all the loads together,
1351 they must be inserted before the first load of the SLP instance and not
1352 before the first load of the first node of the instance. */
1353
1354 static gimple
1355 vect_find_first_load_in_slp_instance (slp_instance instance)
1356 {
1357 int i, j;
1358 slp_tree load_node;
1359 gimple first_load = NULL, load;
1360
1361 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1362 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1363 first_load = get_earlier_stmt (load, first_load);
1364
1365 return first_load;
1366 }
1367
1368
1369 /* Find the last store in SLP INSTANCE. */
1370
1371 static gimple
1372 vect_find_last_store_in_slp_instance (slp_instance instance)
1373 {
1374 int i;
1375 slp_tree node;
1376 gimple last_store = NULL, store;
1377
1378 node = SLP_INSTANCE_TREE (instance);
1379 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1380 last_store = get_later_stmt (store, last_store);
1381
1382 return last_store;
1383 }
1384
1385 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1386
1387 static void
1388 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1389 slp_instance instance, slp_tree node,
1390 stmt_vector_for_cost *prologue_cost_vec,
1391 unsigned ncopies_for_cost)
1392 {
1393 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1394
1395 unsigned i;
1396 slp_tree child;
1397 gimple stmt, s;
1398 stmt_vec_info stmt_info;
1399 tree lhs;
1400 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1401
1402 /* Recurse down the SLP tree. */
1403 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1404 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1405 instance, child, prologue_cost_vec,
1406 ncopies_for_cost);
1407
1408 /* Look at the first scalar stmt to determine the cost. */
1409 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1410 stmt_info = vinfo_for_stmt (stmt);
1411 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1412 {
1413 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1414 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1415 vect_uninitialized_def,
1416 node, prologue_cost_vec, body_cost_vec);
1417 else
1418 {
1419 int i;
1420 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1421 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1422 node, prologue_cost_vec, body_cost_vec);
1423 /* If the load is permuted record the cost for the permutation.
1424 ??? Loads from multiple chains are let through here only
1425 for a single special case involving complex numbers where
1426 in the end no permutation is necessary. */
1427 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1428 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1429 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1430 && vect_get_place_in_interleaving_chain
1431 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1432 {
1433 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1434 stmt_info, 0, vect_body);
1435 break;
1436 }
1437 }
1438 }
1439 else
1440 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1441 stmt_info, 0, vect_body);
1442
1443 /* Scan operands and account for prologue cost of constants/externals.
1444 ??? This over-estimates cost for multiple uses and should be
1445 re-engineered. */
1446 lhs = gimple_get_lhs (stmt);
1447 for (i = 0; i < gimple_num_ops (stmt); ++i)
1448 {
1449 tree def, op = gimple_op (stmt, i);
1450 gimple def_stmt;
1451 enum vect_def_type dt;
1452 if (!op || op == lhs)
1453 continue;
1454 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1455 &def_stmt, &def, &dt)
1456 && (dt == vect_constant_def || dt == vect_external_def))
1457 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1458 stmt_info, 0, vect_prologue);
1459 }
1460 }
1461
1462 /* Compute the cost for the SLP instance INSTANCE. */
1463
1464 static void
1465 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1466 slp_instance instance, unsigned nunits)
1467 {
1468 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1469 unsigned ncopies_for_cost;
1470 stmt_info_for_cost *si;
1471 unsigned i;
1472
1473 /* Calculate the number of vector stmts to create based on the unrolling
1474 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1475 GROUP_SIZE / NUNITS otherwise. */
1476 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1477 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1478
1479 prologue_cost_vec.create (10);
1480 body_cost_vec.create (10);
1481 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1482 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1483 instance, SLP_INSTANCE_TREE (instance),
1484 &prologue_cost_vec, ncopies_for_cost);
1485
1486 /* Record the prologue costs, which were delayed until we were
1487 sure that SLP was successful. Unlike the body costs, we know
1488 the final values now regardless of the loop vectorization factor. */
1489 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1490 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1491 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1492 {
1493 struct _stmt_vec_info *stmt_info
1494 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1495 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1496 si->misalign, vect_prologue);
1497 }
1498
1499 prologue_cost_vec.release ();
1500 }
1501
1502 /* Analyze an SLP instance starting from a group of grouped stores. Call
1503 vect_build_slp_tree to build a tree of packed stmts if possible.
1504 Return FALSE if it's impossible to SLP any stmt in the loop. */
1505
1506 static bool
1507 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1508 gimple stmt)
1509 {
1510 slp_instance new_instance;
1511 slp_tree node;
1512 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1513 unsigned int unrolling_factor = 1, nunits;
1514 tree vectype, scalar_type = NULL_TREE;
1515 gimple next;
1516 unsigned int vectorization_factor = 0;
1517 int i;
1518 unsigned int max_nunits = 0;
1519 vec<slp_tree> loads;
1520 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1521 vec<gimple> scalar_stmts;
1522
1523 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1524 {
1525 if (dr)
1526 {
1527 scalar_type = TREE_TYPE (DR_REF (dr));
1528 vectype = get_vectype_for_scalar_type (scalar_type);
1529 }
1530 else
1531 {
1532 gcc_assert (loop_vinfo);
1533 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1534 }
1535
1536 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1537 }
1538 else
1539 {
1540 gcc_assert (loop_vinfo);
1541 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1542 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1543 }
1544
1545 if (!vectype)
1546 {
1547 if (dump_enabled_p ())
1548 {
1549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1550 "Build SLP failed: unsupported data-type ");
1551 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1552 }
1553
1554 return false;
1555 }
1556
1557 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1558 if (loop_vinfo)
1559 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1560 else
1561 vectorization_factor = nunits;
1562
1563 /* Calculate the unrolling factor. */
1564 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1565 if (unrolling_factor != 1 && !loop_vinfo)
1566 {
1567 if (dump_enabled_p ())
1568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1569 "Build SLP failed: unrolling required in basic"
1570 " block SLP");
1571
1572 return false;
1573 }
1574
1575 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1576 scalar_stmts.create (group_size);
1577 next = stmt;
1578 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1579 {
1580 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1581 while (next)
1582 {
1583 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1584 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1585 scalar_stmts.safe_push (
1586 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1587 else
1588 scalar_stmts.safe_push (next);
1589 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1590 }
1591 }
1592 else
1593 {
1594 /* Collect reduction statements. */
1595 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1596 for (i = 0; reductions.iterate (i, &next); i++)
1597 scalar_stmts.safe_push (next);
1598 }
1599
1600 node = vect_create_new_slp_node (scalar_stmts);
1601
1602 loads.create (group_size);
1603
1604 /* Build the tree for the SLP instance. */
1605 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1606 &max_nunits, &loads,
1607 vectorization_factor))
1608 {
1609 /* Calculate the unrolling factor based on the smallest type. */
1610 if (max_nunits > nunits)
1611 unrolling_factor = least_common_multiple (max_nunits, group_size)
1612 / group_size;
1613
1614 if (unrolling_factor != 1 && !loop_vinfo)
1615 {
1616 if (dump_enabled_p ())
1617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1618 "Build SLP failed: unrolling required in basic"
1619 " block SLP");
1620 vect_free_slp_tree (node);
1621 loads.release ();
1622 return false;
1623 }
1624
1625 /* Create a new SLP instance. */
1626 new_instance = XNEW (struct _slp_instance);
1627 SLP_INSTANCE_TREE (new_instance) = node;
1628 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1629 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1630 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1631 SLP_INSTANCE_LOADS (new_instance) = loads;
1632 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1633 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = vNULL;
1634
1635 /* Compute the load permutation. */
1636 slp_tree load_node;
1637 bool loads_permuted = false;
1638 vec<int> load_permutation;
1639 load_permutation.create (group_size * group_size);
1640 FOR_EACH_VEC_ELT (loads, i, load_node)
1641 {
1642 int j;
1643 gimple load;
1644 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1645 {
1646 int load_place;
1647 load_place = vect_get_place_in_interleaving_chain
1648 (load, GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)));
1649 if (load_place != j
1650 /* ??? We allow loads from different groups to
1651 get to here for a special case handled in
1652 the permutation code. Make sure we get to that. */
1653 || (GROUP_FIRST_ELEMENT
1654 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]))
1655 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load))))
1656 loads_permuted = true;
1657 load_permutation.safe_push (load_place);
1658 }
1659 }
1660
1661 if (loads_permuted)
1662 {
1663 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1664 if (!vect_supported_load_permutation_p (new_instance, group_size,
1665 load_permutation))
1666 {
1667 if (dump_enabled_p ())
1668 {
1669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1670 "Build SLP failed: unsupported load "
1671 "permutation ");
1672 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1673 }
1674
1675 vect_free_slp_instance (new_instance);
1676 return false;
1677 }
1678
1679 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1680 = vect_find_first_load_in_slp_instance (new_instance);
1681 }
1682 else
1683 load_permutation.release ();
1684
1685 /* Compute the costs of this SLP instance. */
1686 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1687 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1688
1689 if (loop_vinfo)
1690 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1691 else
1692 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1693
1694 if (dump_enabled_p ())
1695 vect_print_slp_tree (MSG_NOTE, node);
1696
1697 return true;
1698 }
1699
1700 /* Failed to SLP. */
1701 /* Free the allocated memory. */
1702 vect_free_slp_tree (node);
1703 loads.release ();
1704
1705 return false;
1706 }
1707
1708
1709 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1710 trees of packed scalar stmts if SLP is possible. */
1711
1712 bool
1713 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1714 {
1715 unsigned int i;
1716 vec<gimple> grouped_stores;
1717 vec<gimple> reductions = vNULL;
1718 vec<gimple> reduc_chains = vNULL;
1719 gimple first_element;
1720 bool ok = false;
1721
1722 if (dump_enabled_p ())
1723 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1724
1725 if (loop_vinfo)
1726 {
1727 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1728 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1729 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1730 }
1731 else
1732 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1733
1734 /* Find SLP sequences starting from groups of grouped stores. */
1735 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1736 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1737 ok = true;
1738
1739 if (bb_vinfo && !ok)
1740 {
1741 if (dump_enabled_p ())
1742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1743 "Failed to SLP the basic block.");
1744
1745 return false;
1746 }
1747
1748 if (loop_vinfo
1749 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1750 {
1751 /* Find SLP sequences starting from reduction chains. */
1752 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1753 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1754 ok = true;
1755 else
1756 return false;
1757
1758 /* Don't try to vectorize SLP reductions if reduction chain was
1759 detected. */
1760 return ok;
1761 }
1762
1763 /* Find SLP sequences starting from groups of reductions. */
1764 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1765 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1766 ok = true;
1767
1768 return true;
1769 }
1770
1771
1772 /* For each possible SLP instance decide whether to SLP it and calculate overall
1773 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1774 least one instance. */
1775
1776 bool
1777 vect_make_slp_decision (loop_vec_info loop_vinfo)
1778 {
1779 unsigned int i, unrolling_factor = 1;
1780 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1781 slp_instance instance;
1782 int decided_to_slp = 0;
1783
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1786
1787 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1788 {
1789 /* FORNOW: SLP if you can. */
1790 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1791 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1792
1793 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1794 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1795 loop-based vectorization. Such stmts will be marked as HYBRID. */
1796 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1797 decided_to_slp++;
1798 }
1799
1800 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1801
1802 if (decided_to_slp && dump_enabled_p ())
1803 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1804 "Decided to SLP %d instances. Unrolling factor %d",
1805 decided_to_slp, unrolling_factor);
1806
1807 return (decided_to_slp > 0);
1808 }
1809
1810
1811 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1812 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1813
1814 static void
1815 vect_detect_hybrid_slp_stmts (slp_tree node)
1816 {
1817 int i;
1818 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1819 gimple stmt = stmts[0];
1820 imm_use_iterator imm_iter;
1821 gimple use_stmt;
1822 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1823 slp_tree child;
1824 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1825 struct loop *loop = NULL;
1826 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1827 basic_block bb = NULL;
1828
1829 if (!node)
1830 return;
1831
1832 if (loop_vinfo)
1833 loop = LOOP_VINFO_LOOP (loop_vinfo);
1834 else
1835 bb = BB_VINFO_BB (bb_vinfo);
1836
1837 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1838 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1839 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1840 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1841 if (gimple_bb (use_stmt)
1842 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1843 || bb == gimple_bb (use_stmt))
1844 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1845 && !STMT_SLP_TYPE (stmt_vinfo)
1846 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1847 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1848 && !(gimple_code (use_stmt) == GIMPLE_PHI
1849 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1850 == vect_reduction_def))
1851 vect_mark_slp_stmts (node, hybrid, i);
1852
1853 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1854 vect_detect_hybrid_slp_stmts (child);
1855 }
1856
1857
1858 /* Find stmts that must be both vectorized and SLPed. */
1859
1860 void
1861 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1862 {
1863 unsigned int i;
1864 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1865 slp_instance instance;
1866
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1869
1870 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1871 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1872 }
1873
1874
1875 /* Create and initialize a new bb_vec_info struct for BB, as well as
1876 stmt_vec_info structs for all the stmts in it. */
1877
1878 static bb_vec_info
1879 new_bb_vec_info (basic_block bb)
1880 {
1881 bb_vec_info res = NULL;
1882 gimple_stmt_iterator gsi;
1883
1884 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1885 BB_VINFO_BB (res) = bb;
1886
1887 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1888 {
1889 gimple stmt = gsi_stmt (gsi);
1890 gimple_set_uid (stmt, 0);
1891 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1892 }
1893
1894 BB_VINFO_GROUPED_STORES (res).create (10);
1895 BB_VINFO_SLP_INSTANCES (res).create (2);
1896 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1897
1898 bb->aux = res;
1899 return res;
1900 }
1901
1902
1903 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1904 stmts in the basic block. */
1905
1906 static void
1907 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1908 {
1909 vec<slp_instance> slp_instances;
1910 slp_instance instance;
1911 basic_block bb;
1912 gimple_stmt_iterator si;
1913 unsigned i;
1914
1915 if (!bb_vinfo)
1916 return;
1917
1918 bb = BB_VINFO_BB (bb_vinfo);
1919
1920 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1921 {
1922 gimple stmt = gsi_stmt (si);
1923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1924
1925 if (stmt_info)
1926 /* Free stmt_vec_info. */
1927 free_stmt_vec_info (stmt);
1928 }
1929
1930 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1931 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1932 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1933 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1934 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1935 vect_free_slp_instance (instance);
1936 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1937 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1938 free (bb_vinfo);
1939 bb->aux = NULL;
1940 }
1941
1942
1943 /* Analyze statements contained in SLP tree node after recursively analyzing
1944 the subtree. Return TRUE if the operations are supported. */
1945
1946 static bool
1947 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1948 {
1949 bool dummy;
1950 int i;
1951 gimple stmt;
1952 slp_tree child;
1953
1954 if (!node)
1955 return true;
1956
1957 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1958 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1959 return false;
1960
1961 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1962 {
1963 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1964 gcc_assert (stmt_info);
1965 gcc_assert (PURE_SLP_STMT (stmt_info));
1966
1967 if (!vect_analyze_stmt (stmt, &dummy, node))
1968 return false;
1969 }
1970
1971 return true;
1972 }
1973
1974
1975 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1976 operations are supported. */
1977
1978 static bool
1979 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1980 {
1981 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1982 slp_instance instance;
1983 int i;
1984
1985 for (i = 0; slp_instances.iterate (i, &instance); )
1986 {
1987 if (!vect_slp_analyze_node_operations (bb_vinfo,
1988 SLP_INSTANCE_TREE (instance)))
1989 {
1990 vect_free_slp_instance (instance);
1991 slp_instances.ordered_remove (i);
1992 }
1993 else
1994 i++;
1995 }
1996
1997 if (!slp_instances.length ())
1998 return false;
1999
2000 return true;
2001 }
2002
2003 /* Check if vectorization of the basic block is profitable. */
2004
2005 static bool
2006 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2007 {
2008 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2009 slp_instance instance;
2010 int i, j;
2011 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2012 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2013 unsigned int stmt_cost;
2014 gimple stmt;
2015 gimple_stmt_iterator si;
2016 basic_block bb = BB_VINFO_BB (bb_vinfo);
2017 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2018 stmt_vec_info stmt_info = NULL;
2019 stmt_vector_for_cost body_cost_vec;
2020 stmt_info_for_cost *ci;
2021
2022 /* Calculate vector costs. */
2023 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2024 {
2025 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2026
2027 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2028 {
2029 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2030 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2031 stmt_info, ci->misalign, vect_body);
2032 }
2033 }
2034
2035 /* Calculate scalar cost. */
2036 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2037 {
2038 stmt = gsi_stmt (si);
2039 stmt_info = vinfo_for_stmt (stmt);
2040
2041 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2042 || !PURE_SLP_STMT (stmt_info))
2043 continue;
2044
2045 if (STMT_VINFO_DATA_REF (stmt_info))
2046 {
2047 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2048 stmt_cost = vect_get_stmt_cost (scalar_load);
2049 else
2050 stmt_cost = vect_get_stmt_cost (scalar_store);
2051 }
2052 else
2053 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2054
2055 scalar_cost += stmt_cost;
2056 }
2057
2058 /* Complete the target-specific cost calculation. */
2059 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2060 &vec_inside_cost, &vec_epilogue_cost);
2061
2062 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2063
2064 if (dump_enabled_p ())
2065 {
2066 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2067 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2068 vec_inside_cost);
2069 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2070 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2071 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2072 }
2073
2074 /* Vectorization is profitable if its cost is less than the cost of scalar
2075 version. */
2076 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2077 return false;
2078
2079 return true;
2080 }
2081
2082 /* Check if the basic block can be vectorized. */
2083
2084 static bb_vec_info
2085 vect_slp_analyze_bb_1 (basic_block bb)
2086 {
2087 bb_vec_info bb_vinfo;
2088 vec<slp_instance> slp_instances;
2089 slp_instance instance;
2090 int i;
2091 int min_vf = 2;
2092
2093 bb_vinfo = new_bb_vec_info (bb);
2094 if (!bb_vinfo)
2095 return NULL;
2096
2097 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2098 {
2099 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2101 "not vectorized: unhandled data-ref in basic "
2102 "block.\n");
2103
2104 destroy_bb_vec_info (bb_vinfo);
2105 return NULL;
2106 }
2107
2108 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2109 {
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112 "not vectorized: not enough data-refs in "
2113 "basic block.\n");
2114
2115 destroy_bb_vec_info (bb_vinfo);
2116 return NULL;
2117 }
2118
2119 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2120 {
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123 "not vectorized: unhandled data access in "
2124 "basic block.\n");
2125
2126 destroy_bb_vec_info (bb_vinfo);
2127 return NULL;
2128 }
2129
2130 vect_pattern_recog (NULL, bb_vinfo);
2131
2132 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2133 {
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2136 "not vectorized: unhandled data dependence "
2137 "in basic block.\n");
2138
2139 destroy_bb_vec_info (bb_vinfo);
2140 return NULL;
2141 }
2142
2143 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2144 {
2145 if (dump_enabled_p ())
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 "not vectorized: bad data alignment in basic "
2148 "block.\n");
2149
2150 destroy_bb_vec_info (bb_vinfo);
2151 return NULL;
2152 }
2153
2154 /* Check the SLP opportunities in the basic block, analyze and build SLP
2155 trees. */
2156 if (!vect_analyze_slp (NULL, bb_vinfo))
2157 {
2158 if (dump_enabled_p ())
2159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2160 "not vectorized: failed to find SLP opportunities "
2161 "in basic block.\n");
2162
2163 destroy_bb_vec_info (bb_vinfo);
2164 return NULL;
2165 }
2166
2167 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2168
2169 /* Mark all the statements that we want to vectorize as pure SLP and
2170 relevant. */
2171 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2172 {
2173 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2174 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2175 }
2176
2177 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2178 {
2179 if (dump_enabled_p ())
2180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2181 "not vectorized: unsupported alignment in basic "
2182 "block.\n");
2183 destroy_bb_vec_info (bb_vinfo);
2184 return NULL;
2185 }
2186
2187 if (!vect_slp_analyze_operations (bb_vinfo))
2188 {
2189 if (dump_enabled_p ())
2190 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2191 "not vectorized: bad operation in basic block.\n");
2192
2193 destroy_bb_vec_info (bb_vinfo);
2194 return NULL;
2195 }
2196
2197 /* Cost model: check if the vectorization is worthwhile. */
2198 if (flag_vect_cost_model
2199 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2200 {
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2203 "not vectorized: vectorization is not "
2204 "profitable.\n");
2205
2206 destroy_bb_vec_info (bb_vinfo);
2207 return NULL;
2208 }
2209
2210 if (dump_enabled_p ())
2211 dump_printf_loc (MSG_NOTE, vect_location,
2212 "Basic block will be vectorized using SLP\n");
2213
2214 return bb_vinfo;
2215 }
2216
2217
2218 bb_vec_info
2219 vect_slp_analyze_bb (basic_block bb)
2220 {
2221 bb_vec_info bb_vinfo;
2222 int insns = 0;
2223 gimple_stmt_iterator gsi;
2224 unsigned int vector_sizes;
2225
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2228
2229 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2230 {
2231 gimple stmt = gsi_stmt (gsi);
2232 if (!is_gimple_debug (stmt)
2233 && !gimple_nop_p (stmt)
2234 && gimple_code (stmt) != GIMPLE_LABEL)
2235 insns++;
2236 }
2237
2238 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2239 {
2240 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2242 "not vectorized: too many instructions in "
2243 "basic block.\n");
2244
2245 return NULL;
2246 }
2247
2248 /* Autodetect first vector size we try. */
2249 current_vector_size = 0;
2250 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2251
2252 while (1)
2253 {
2254 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2255 if (bb_vinfo)
2256 return bb_vinfo;
2257
2258 destroy_bb_vec_info (bb_vinfo);
2259
2260 vector_sizes &= ~current_vector_size;
2261 if (vector_sizes == 0
2262 || current_vector_size == 0)
2263 return NULL;
2264
2265 /* Try the next biggest vector size. */
2266 current_vector_size = 1 << floor_log2 (vector_sizes);
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_NOTE, vect_location,
2269 "***** Re-trying analysis with "
2270 "vector size %d\n", current_vector_size);
2271 }
2272 }
2273
2274
2275 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2276 the number of created vector stmts depends on the unrolling factor).
2277 However, the actual number of vector stmts for every SLP node depends on
2278 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2279 should be updated. In this function we assume that the inside costs
2280 calculated in vect_model_xxx_cost are linear in ncopies. */
2281
2282 void
2283 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2284 {
2285 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2286 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2287 slp_instance instance;
2288 stmt_vector_for_cost body_cost_vec;
2289 stmt_info_for_cost *si;
2290 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2291
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_NOTE, vect_location,
2294 "=== vect_update_slp_costs_according_to_vf ===");
2295
2296 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2297 {
2298 /* We assume that costs are linear in ncopies. */
2299 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2300
2301 /* Record the instance's instructions in the target cost model.
2302 This was delayed until here because the count of instructions
2303 isn't known beforehand. */
2304 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2305
2306 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2307 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2308 vinfo_for_stmt (si->stmt), si->misalign,
2309 vect_body);
2310 }
2311 }
2312
2313
2314 /* For constant and loop invariant defs of SLP_NODE this function returns
2315 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2316 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2317 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2318 REDUC_INDEX is the index of the reduction operand in the statements, unless
2319 it is -1. */
2320
2321 static void
2322 vect_get_constant_vectors (tree op, slp_tree slp_node,
2323 vec<tree> *vec_oprnds,
2324 unsigned int op_num, unsigned int number_of_vectors,
2325 int reduc_index)
2326 {
2327 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2328 gimple stmt = stmts[0];
2329 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2330 unsigned nunits;
2331 tree vec_cst;
2332 tree *elts;
2333 unsigned j, number_of_places_left_in_vector;
2334 tree vector_type;
2335 tree vop;
2336 int group_size = stmts.length ();
2337 unsigned int vec_num, i;
2338 unsigned number_of_copies = 1;
2339 vec<tree> voprnds;
2340 voprnds.create (number_of_vectors);
2341 bool constant_p, is_store;
2342 tree neutral_op = NULL;
2343 enum tree_code code = gimple_expr_code (stmt);
2344 gimple def_stmt;
2345 struct loop *loop;
2346 gimple_seq ctor_seq = NULL;
2347
2348 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2349 && reduc_index != -1)
2350 {
2351 op_num = reduc_index - 1;
2352 op = gimple_op (stmt, reduc_index);
2353 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2354 we need either neutral operands or the original operands. See
2355 get_initial_def_for_reduction() for details. */
2356 switch (code)
2357 {
2358 case WIDEN_SUM_EXPR:
2359 case DOT_PROD_EXPR:
2360 case PLUS_EXPR:
2361 case MINUS_EXPR:
2362 case BIT_IOR_EXPR:
2363 case BIT_XOR_EXPR:
2364 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2365 neutral_op = build_real (TREE_TYPE (op), dconst0);
2366 else
2367 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2368
2369 break;
2370
2371 case MULT_EXPR:
2372 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2373 neutral_op = build_real (TREE_TYPE (op), dconst1);
2374 else
2375 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2376
2377 break;
2378
2379 case BIT_AND_EXPR:
2380 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2381 break;
2382
2383 case MAX_EXPR:
2384 case MIN_EXPR:
2385 def_stmt = SSA_NAME_DEF_STMT (op);
2386 loop = (gimple_bb (stmt))->loop_father;
2387 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2388 loop_preheader_edge (loop));
2389 break;
2390
2391 default:
2392 neutral_op = NULL;
2393 }
2394 }
2395
2396 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2397 {
2398 is_store = true;
2399 op = gimple_assign_rhs1 (stmt);
2400 }
2401 else
2402 is_store = false;
2403
2404 gcc_assert (op);
2405
2406 if (CONSTANT_CLASS_P (op))
2407 constant_p = true;
2408 else
2409 constant_p = false;
2410
2411 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2412 gcc_assert (vector_type);
2413 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2414
2415 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2416 created vectors. It is greater than 1 if unrolling is performed.
2417
2418 For example, we have two scalar operands, s1 and s2 (e.g., group of
2419 strided accesses of size two), while NUNITS is four (i.e., four scalars
2420 of this type can be packed in a vector). The output vector will contain
2421 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2422 will be 2).
2423
2424 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2425 containing the operands.
2426
2427 For example, NUNITS is four as before, and the group size is 8
2428 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2429 {s5, s6, s7, s8}. */
2430
2431 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2432
2433 number_of_places_left_in_vector = nunits;
2434 elts = XALLOCAVEC (tree, nunits);
2435 for (j = 0; j < number_of_copies; j++)
2436 {
2437 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2438 {
2439 if (is_store)
2440 op = gimple_assign_rhs1 (stmt);
2441 else
2442 {
2443 switch (code)
2444 {
2445 case COND_EXPR:
2446 if (op_num == 0 || op_num == 1)
2447 {
2448 tree cond = gimple_assign_rhs1 (stmt);
2449 op = TREE_OPERAND (cond, op_num);
2450 }
2451 else
2452 {
2453 if (op_num == 2)
2454 op = gimple_assign_rhs2 (stmt);
2455 else
2456 op = gimple_assign_rhs3 (stmt);
2457 }
2458 break;
2459
2460 case CALL_EXPR:
2461 op = gimple_call_arg (stmt, op_num);
2462 break;
2463
2464 case LSHIFT_EXPR:
2465 case RSHIFT_EXPR:
2466 case LROTATE_EXPR:
2467 case RROTATE_EXPR:
2468 op = gimple_op (stmt, op_num + 1);
2469 /* Unlike the other binary operators, shifts/rotates have
2470 the shift count being int, instead of the same type as
2471 the lhs, so make sure the scalar is the right type if
2472 we are dealing with vectors of
2473 long long/long/short/char. */
2474 if (op_num == 1 && constant_p)
2475 op = fold_convert (TREE_TYPE (vector_type), op);
2476 break;
2477
2478 default:
2479 op = gimple_op (stmt, op_num + 1);
2480 break;
2481 }
2482 }
2483
2484 if (reduc_index != -1)
2485 {
2486 loop = (gimple_bb (stmt))->loop_father;
2487 def_stmt = SSA_NAME_DEF_STMT (op);
2488
2489 gcc_assert (loop);
2490
2491 /* Get the def before the loop. In reduction chain we have only
2492 one initial value. */
2493 if ((j != (number_of_copies - 1)
2494 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2495 && i != 0))
2496 && neutral_op)
2497 op = neutral_op;
2498 else
2499 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2500 loop_preheader_edge (loop));
2501 }
2502
2503 /* Create 'vect_ = {op0,op1,...,opn}'. */
2504 number_of_places_left_in_vector--;
2505 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2506 {
2507 if (constant_p)
2508 {
2509 op = fold_unary (VIEW_CONVERT_EXPR,
2510 TREE_TYPE (vector_type), op);
2511 gcc_assert (op && CONSTANT_CLASS_P (op));
2512 }
2513 else
2514 {
2515 tree new_temp
2516 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2517 gimple init_stmt;
2518 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2519 op);
2520 init_stmt
2521 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2522 new_temp, op, NULL_TREE);
2523 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2524 op = new_temp;
2525 }
2526 }
2527 elts[number_of_places_left_in_vector] = op;
2528
2529 if (number_of_places_left_in_vector == 0)
2530 {
2531 number_of_places_left_in_vector = nunits;
2532
2533 if (constant_p)
2534 vec_cst = build_vector (vector_type, elts);
2535 else
2536 {
2537 vec<constructor_elt, va_gc> *v;
2538 unsigned k;
2539 vec_alloc (v, nunits);
2540 for (k = 0; k < nunits; ++k)
2541 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2542 vec_cst = build_constructor (vector_type, v);
2543 }
2544 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2545 vector_type, NULL));
2546 if (ctor_seq != NULL)
2547 {
2548 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2549 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2550 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2551 GSI_SAME_STMT);
2552 ctor_seq = NULL;
2553 }
2554 }
2555 }
2556 }
2557
2558 /* Since the vectors are created in the reverse order, we should invert
2559 them. */
2560 vec_num = voprnds.length ();
2561 for (j = vec_num; j != 0; j--)
2562 {
2563 vop = voprnds[j - 1];
2564 vec_oprnds->quick_push (vop);
2565 }
2566
2567 voprnds.release ();
2568
2569 /* In case that VF is greater than the unrolling factor needed for the SLP
2570 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2571 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2572 to replicate the vectors. */
2573 while (number_of_vectors > vec_oprnds->length ())
2574 {
2575 tree neutral_vec = NULL;
2576
2577 if (neutral_op)
2578 {
2579 if (!neutral_vec)
2580 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2581
2582 vec_oprnds->quick_push (neutral_vec);
2583 }
2584 else
2585 {
2586 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2587 vec_oprnds->quick_push (vop);
2588 }
2589 }
2590 }
2591
2592
2593 /* Get vectorized definitions from SLP_NODE that contains corresponding
2594 vectorized def-stmts. */
2595
2596 static void
2597 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2598 {
2599 tree vec_oprnd;
2600 gimple vec_def_stmt;
2601 unsigned int i;
2602
2603 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2604
2605 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2606 {
2607 gcc_assert (vec_def_stmt);
2608 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2609 vec_oprnds->quick_push (vec_oprnd);
2610 }
2611 }
2612
2613
2614 /* Get vectorized definitions for SLP_NODE.
2615 If the scalar definitions are loop invariants or constants, collect them and
2616 call vect_get_constant_vectors() to create vector stmts.
2617 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2618 must be stored in the corresponding child of SLP_NODE, and we call
2619 vect_get_slp_vect_defs () to retrieve them. */
2620
2621 void
2622 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2623 vec<vec<tree> > *vec_oprnds, int reduc_index)
2624 {
2625 gimple first_stmt;
2626 int number_of_vects = 0, i;
2627 unsigned int child_index = 0;
2628 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2629 slp_tree child = NULL;
2630 vec<tree> vec_defs;
2631 tree oprnd;
2632 bool vectorized_defs;
2633
2634 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2635 FOR_EACH_VEC_ELT (ops, i, oprnd)
2636 {
2637 /* For each operand we check if it has vectorized definitions in a child
2638 node or we need to create them (for invariants and constants). We
2639 check if the LHS of the first stmt of the next child matches OPRND.
2640 If it does, we found the correct child. Otherwise, we call
2641 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2642 to check this child node for the next operand. */
2643 vectorized_defs = false;
2644 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2645 {
2646 child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2647
2648 /* We have to check both pattern and original def, if available. */
2649 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2650 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2651
2652 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2653 || (related
2654 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2655 {
2656 /* The number of vector defs is determined by the number of
2657 vector statements in the node from which we get those
2658 statements. */
2659 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2660 vectorized_defs = true;
2661 child_index++;
2662 }
2663 }
2664
2665 if (!vectorized_defs)
2666 {
2667 if (i == 0)
2668 {
2669 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2670 /* Number of vector stmts was calculated according to LHS in
2671 vect_schedule_slp_instance (), fix it by replacing LHS with
2672 RHS, if necessary. See vect_get_smallest_scalar_type () for
2673 details. */
2674 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2675 &rhs_size_unit);
2676 if (rhs_size_unit != lhs_size_unit)
2677 {
2678 number_of_vects *= rhs_size_unit;
2679 number_of_vects /= lhs_size_unit;
2680 }
2681 }
2682 }
2683
2684 /* Allocate memory for vectorized defs. */
2685 vec_defs = vNULL;
2686 vec_defs.create (number_of_vects);
2687
2688 /* For reduction defs we call vect_get_constant_vectors (), since we are
2689 looking for initial loop invariant values. */
2690 if (vectorized_defs && reduc_index == -1)
2691 /* The defs are already vectorized. */
2692 vect_get_slp_vect_defs (child, &vec_defs);
2693 else
2694 /* Build vectors from scalar defs. */
2695 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2696 number_of_vects, reduc_index);
2697
2698 vec_oprnds->quick_push (vec_defs);
2699
2700 /* For reductions, we only need initial values. */
2701 if (reduc_index != -1)
2702 return;
2703 }
2704 }
2705
2706
2707 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2708 building a vector of type MASK_TYPE from it) and two input vectors placed in
2709 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2710 shifting by STRIDE elements of DR_CHAIN for every copy.
2711 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2712 copies).
2713 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2714 the created stmts must be inserted. */
2715
2716 static inline void
2717 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2718 tree mask, int first_vec_indx, int second_vec_indx,
2719 gimple_stmt_iterator *gsi, slp_tree node,
2720 tree vectype, vec<tree> dr_chain,
2721 int ncopies, int vect_stmts_counter)
2722 {
2723 tree perm_dest;
2724 gimple perm_stmt = NULL;
2725 stmt_vec_info next_stmt_info;
2726 int i, stride;
2727 tree first_vec, second_vec, data_ref;
2728
2729 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2730
2731 /* Initialize the vect stmts of NODE to properly insert the generated
2732 stmts later. */
2733 for (i = SLP_TREE_VEC_STMTS (node).length ();
2734 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2735 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2736
2737 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2738 for (i = 0; i < ncopies; i++)
2739 {
2740 first_vec = dr_chain[first_vec_indx];
2741 second_vec = dr_chain[second_vec_indx];
2742
2743 /* Generate the permute statement. */
2744 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2745 first_vec, second_vec, mask);
2746 data_ref = make_ssa_name (perm_dest, perm_stmt);
2747 gimple_set_lhs (perm_stmt, data_ref);
2748 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2749
2750 /* Store the vector statement in NODE. */
2751 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2752
2753 first_vec_indx += stride;
2754 second_vec_indx += stride;
2755 }
2756
2757 /* Mark the scalar stmt as vectorized. */
2758 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2759 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2760 }
2761
2762
2763 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2764 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2765 representation. Check that the mask is valid and return FALSE if not.
2766 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2767 the next vector, i.e., the current first vector is not needed. */
2768
2769 static bool
2770 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2771 int mask_nunits, bool only_one_vec, int index,
2772 unsigned char *mask, int *current_mask_element,
2773 bool *need_next_vector, int *number_of_mask_fixes,
2774 bool *mask_fixed, bool *needs_first_vector)
2775 {
2776 int i;
2777
2778 /* Convert to target specific representation. */
2779 *current_mask_element = first_mask_element + m;
2780 /* Adjust the value in case it's a mask for second and third vectors. */
2781 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2782
2783 if (*current_mask_element < mask_nunits)
2784 *needs_first_vector = true;
2785
2786 /* We have only one input vector to permute but the mask accesses values in
2787 the next vector as well. */
2788 if (only_one_vec && *current_mask_element >= mask_nunits)
2789 {
2790 if (dump_enabled_p ())
2791 {
2792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2793 "permutation requires at least two vectors ");
2794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2795 }
2796
2797 return false;
2798 }
2799
2800 /* The mask requires the next vector. */
2801 if (*current_mask_element >= mask_nunits * 2)
2802 {
2803 if (*needs_first_vector || *mask_fixed)
2804 {
2805 /* We either need the first vector too or have already moved to the
2806 next vector. In both cases, this permutation needs three
2807 vectors. */
2808 if (dump_enabled_p ())
2809 {
2810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2811 "permutation requires at "
2812 "least three vectors ");
2813 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2814 }
2815
2816 return false;
2817 }
2818
2819 /* We move to the next vector, dropping the first one and working with
2820 the second and the third - we need to adjust the values of the mask
2821 accordingly. */
2822 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2823
2824 for (i = 0; i < index; i++)
2825 mask[i] -= mask_nunits * *number_of_mask_fixes;
2826
2827 (*number_of_mask_fixes)++;
2828 *mask_fixed = true;
2829 }
2830
2831 *need_next_vector = *mask_fixed;
2832
2833 /* This was the last element of this mask. Start a new one. */
2834 if (index == mask_nunits - 1)
2835 {
2836 *number_of_mask_fixes = 1;
2837 *mask_fixed = false;
2838 *needs_first_vector = false;
2839 }
2840
2841 return true;
2842 }
2843
2844
2845 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2846 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2847 permute statements for SLP_NODE_INSTANCE. */
2848 bool
2849 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2850 gimple_stmt_iterator *gsi, int vf,
2851 slp_instance slp_node_instance, bool analyze_only)
2852 {
2853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2854 tree mask_element_type = NULL_TREE, mask_type;
2855 int i, j, k, nunits, vec_index = 0, scalar_index;
2856 slp_tree node;
2857 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2858 gimple next_scalar_stmt;
2859 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2860 int first_mask_element;
2861 int index, unroll_factor, current_mask_element, ncopies;
2862 unsigned char *mask;
2863 bool only_one_vec = false, need_next_vector = false;
2864 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2865 int number_of_mask_fixes = 1;
2866 bool mask_fixed = false;
2867 bool needs_first_vector = false;
2868 enum machine_mode mode;
2869
2870 mode = TYPE_MODE (vectype);
2871
2872 if (!can_vec_perm_p (mode, false, NULL))
2873 {
2874 if (dump_enabled_p ())
2875 {
2876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2877 "no vect permute for ");
2878 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2879 }
2880 return false;
2881 }
2882
2883 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2884 same size as the vector element being permuted. */
2885 mask_element_type = lang_hooks.types.type_for_mode
2886 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2887 mask_type = get_vectype_for_scalar_type (mask_element_type);
2888 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2889 mask = XALLOCAVEC (unsigned char, nunits);
2890 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2891
2892 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2893 unrolling factor. */
2894 orig_vec_stmts_num = group_size *
2895 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2896 if (orig_vec_stmts_num == 1)
2897 only_one_vec = true;
2898
2899 /* Number of copies is determined by the final vectorization factor
2900 relatively to SLP_NODE_INSTANCE unrolling factor. */
2901 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2902
2903 /* Generate permutation masks for every NODE. Number of masks for each NODE
2904 is equal to GROUP_SIZE.
2905 E.g., we have a group of three nodes with three loads from the same
2906 location in each node, and the vector size is 4. I.e., we have a
2907 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2908 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2909 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2910 ...
2911
2912 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2913 The last mask is illegal since we assume two operands for permute
2914 operation, and the mask element values can't be outside that range.
2915 Hence, the last mask must be converted into {2,5,5,5}.
2916 For the first two permutations we need the first and the second input
2917 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2918 we need the second and the third vectors: {b1,c1,a2,b2} and
2919 {c2,a3,b3,c3}. */
2920
2921 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2922 {
2923 scalar_index = 0;
2924 index = 0;
2925 vect_stmts_counter = 0;
2926 vec_index = 0;
2927 first_vec_index = vec_index++;
2928 if (only_one_vec)
2929 second_vec_index = first_vec_index;
2930 else
2931 second_vec_index = vec_index++;
2932
2933 for (j = 0; j < unroll_factor; j++)
2934 {
2935 for (k = 0; k < group_size; k++)
2936 {
2937 first_mask_element = i + j * group_size;
2938 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2939 nunits, only_one_vec, index,
2940 mask, &current_mask_element,
2941 &need_next_vector,
2942 &number_of_mask_fixes, &mask_fixed,
2943 &needs_first_vector))
2944 return false;
2945 mask[index++] = current_mask_element;
2946
2947 if (index == nunits)
2948 {
2949 tree mask_vec, *mask_elts;
2950 int l;
2951
2952 if (!can_vec_perm_p (mode, false, mask))
2953 {
2954 if (dump_enabled_p ())
2955 {
2956 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2957 vect_location,
2958 "unsupported vect permute { ");
2959 for (i = 0; i < nunits; ++i)
2960 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2961 mask[i]);
2962 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2963 }
2964 return false;
2965 }
2966
2967 mask_elts = XALLOCAVEC (tree, nunits);
2968 for (l = 0; l < nunits; ++l)
2969 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2970 mask_vec = build_vector (mask_type, mask_elts);
2971 index = 0;
2972
2973 if (!analyze_only)
2974 {
2975 if (need_next_vector)
2976 {
2977 first_vec_index = second_vec_index;
2978 second_vec_index = vec_index;
2979 }
2980
2981 next_scalar_stmt
2982 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2983
2984 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2985 mask_vec, first_vec_index, second_vec_index,
2986 gsi, node, vectype, dr_chain,
2987 ncopies, vect_stmts_counter++);
2988 }
2989 }
2990 }
2991 }
2992 }
2993
2994 return true;
2995 }
2996
2997
2998
2999 /* Vectorize SLP instance tree in postorder. */
3000
3001 static bool
3002 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3003 unsigned int vectorization_factor)
3004 {
3005 gimple stmt;
3006 bool grouped_store, is_store;
3007 gimple_stmt_iterator si;
3008 stmt_vec_info stmt_info;
3009 unsigned int vec_stmts_size, nunits, group_size;
3010 tree vectype;
3011 int i;
3012 slp_tree loads_node;
3013 slp_tree child;
3014
3015 if (!node)
3016 return false;
3017
3018 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3019 vect_schedule_slp_instance (child, instance, vectorization_factor);
3020
3021 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3022 stmt_info = vinfo_for_stmt (stmt);
3023
3024 /* VECTYPE is the type of the destination. */
3025 vectype = STMT_VINFO_VECTYPE (stmt_info);
3026 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3027 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3028
3029 /* For each SLP instance calculate number of vector stmts to be created
3030 for the scalar stmts in each node of the SLP tree. Number of vector
3031 elements in one vector iteration is the number of scalar elements in
3032 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3033 size. */
3034 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3035
3036 /* In case of load permutation we have to allocate vectorized statements for
3037 all the nodes that participate in that permutation. */
3038 if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3039 {
3040 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3041 {
3042 if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3043 {
3044 SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3045 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3046 }
3047 }
3048 }
3049
3050 if (!SLP_TREE_VEC_STMTS (node).exists ())
3051 {
3052 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3053 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3054 }
3055
3056 if (dump_enabled_p ())
3057 {
3058 dump_printf_loc (MSG_NOTE,vect_location,
3059 "------>vectorizing SLP node starting from: ");
3060 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3061 }
3062
3063 /* Loads should be inserted before the first load. */
3064 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3065 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3066 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3067 && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3068 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3069 else if (is_pattern_stmt_p (stmt_info))
3070 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3071 else
3072 si = gsi_for_stmt (stmt);
3073
3074 /* Stores should be inserted just before the last store. */
3075 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3076 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3077 {
3078 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3079 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3080 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3081 si = gsi_for_stmt (last_store);
3082 }
3083
3084 /* Mark the first element of the reduction chain as reduction to properly
3085 transform the node. In the analysis phase only the last element of the
3086 chain is marked as reduction. */
3087 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3088 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3089 {
3090 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3091 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3092 }
3093
3094 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3095 return is_store;
3096 }
3097
3098 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3099 For loop vectorization this is done in vectorizable_call, but for SLP
3100 it needs to be deferred until end of vect_schedule_slp, because multiple
3101 SLP instances may refer to the same scalar stmt. */
3102
3103 static void
3104 vect_remove_slp_scalar_calls (slp_tree node)
3105 {
3106 gimple stmt, new_stmt;
3107 gimple_stmt_iterator gsi;
3108 int i;
3109 slp_tree child;
3110 tree lhs;
3111 stmt_vec_info stmt_info;
3112
3113 if (!node)
3114 return;
3115
3116 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3117 vect_remove_slp_scalar_calls (child);
3118
3119 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3120 {
3121 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3122 continue;
3123 stmt_info = vinfo_for_stmt (stmt);
3124 if (stmt_info == NULL
3125 || is_pattern_stmt_p (stmt_info)
3126 || !PURE_SLP_STMT (stmt_info))
3127 continue;
3128 lhs = gimple_call_lhs (stmt);
3129 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3130 set_vinfo_for_stmt (new_stmt, stmt_info);
3131 set_vinfo_for_stmt (stmt, NULL);
3132 STMT_VINFO_STMT (stmt_info) = new_stmt;
3133 gsi = gsi_for_stmt (stmt);
3134 gsi_replace (&gsi, new_stmt, false);
3135 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3136 }
3137 }
3138
3139 /* Generate vector code for all SLP instances in the loop/basic block. */
3140
3141 bool
3142 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3143 {
3144 vec<slp_instance> slp_instances;
3145 slp_instance instance;
3146 slp_tree loads_node;
3147 unsigned int i, j, vf;
3148 bool is_store = false;
3149
3150 if (loop_vinfo)
3151 {
3152 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3153 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3154 }
3155 else
3156 {
3157 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3158 vf = 1;
3159 }
3160
3161 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3162 {
3163 /* Schedule the tree of INSTANCE. */
3164 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3165 instance, vf);
3166
3167 /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3168 between SLP instances we fail to properly initialize the
3169 vectorized SLP stmts and confuse different load permutations. */
3170 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
3171 STMT_VINFO_VEC_STMT
3172 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
3173
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_NOTE, vect_location,
3176 "vectorizing stmts using SLP.");
3177 }
3178
3179 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3180 {
3181 slp_tree root = SLP_INSTANCE_TREE (instance);
3182 gimple store;
3183 unsigned int j;
3184 gimple_stmt_iterator gsi;
3185
3186 /* Remove scalar call stmts. Do not do this for basic-block
3187 vectorization as not all uses may be vectorized.
3188 ??? Why should this be necessary? DCE should be able to
3189 remove the stmts itself.
3190 ??? For BB vectorization we can as well remove scalar
3191 stmts starting from the SLP tree root if they have no
3192 uses. */
3193 if (loop_vinfo)
3194 vect_remove_slp_scalar_calls (root);
3195
3196 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3197 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3198 {
3199 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3200 break;
3201
3202 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3203 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3204 /* Free the attached stmt_vec_info and remove the stmt. */
3205 gsi = gsi_for_stmt (store);
3206 unlink_stmt_vdef (store);
3207 gsi_remove (&gsi, true);
3208 release_defs (store);
3209 free_stmt_vec_info (store);
3210 }
3211 }
3212
3213 return is_store;
3214 }
3215
3216
3217 /* Vectorize the basic block. */
3218
3219 void
3220 vect_slp_transform_bb (basic_block bb)
3221 {
3222 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3223 gimple_stmt_iterator si;
3224
3225 gcc_assert (bb_vinfo);
3226
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3229
3230 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3231 {
3232 gimple stmt = gsi_stmt (si);
3233 stmt_vec_info stmt_info;
3234
3235 if (dump_enabled_p ())
3236 {
3237 dump_printf_loc (MSG_NOTE, vect_location,
3238 "------>SLPing statement: ");
3239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3240 }
3241
3242 stmt_info = vinfo_for_stmt (stmt);
3243 gcc_assert (stmt_info);
3244
3245 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3246 if (STMT_SLP_TYPE (stmt_info))
3247 {
3248 vect_schedule_slp (NULL, bb_vinfo);
3249 break;
3250 }
3251 }
3252
3253 if (dump_enabled_p ())
3254 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3255
3256 destroy_bb_vec_info (bb_vinfo);
3257 }