This patch rewrites the old VEC macro-based interface into a new one based on the...
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "dumpfile.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-pass.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
44
45 LOC
46 find_bb_location (basic_block bb)
47 {
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
50
51 if (!bb)
52 return UNKNOWN_LOC;
53
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 {
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
59 }
60
61 return UNKNOWN_LOC;
62 }
63
64
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66
67 static void
68 vect_free_slp_tree (slp_tree node)
69 {
70 int i;
71 slp_void_p child;
72
73 if (!node)
74 return;
75
76 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
77 vect_free_slp_tree ((slp_tree) child);
78
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
81 SLP_TREE_VEC_STMTS (node).release ();
82
83 free (node);
84 }
85
86
87 /* Free the memory allocated for the SLP instance. */
88
89 void
90 vect_free_slp_instance (slp_instance instance)
91 {
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
94 SLP_INSTANCE_LOADS (instance).release ();
95 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
96 free (instance);
97 }
98
99
100 /* Create an SLP node for SCALAR_STMTS. */
101
102 static slp_tree
103 vect_create_new_slp_node (vec<gimple> scalar_stmts)
104 {
105 slp_tree node;
106 gimple stmt = scalar_stmts[0];
107 unsigned int nops;
108
109 if (is_gimple_call (stmt))
110 nops = gimple_call_num_args (stmt);
111 else if (is_gimple_assign (stmt))
112 {
113 nops = gimple_num_ops (stmt) - 1;
114 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
115 nops++;
116 }
117 else
118 return NULL;
119
120 node = XNEW (struct _slp_tree);
121 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
122 SLP_TREE_VEC_STMTS (node).create (0);
123 SLP_TREE_CHILDREN (node).create (nops);
124
125 return node;
126 }
127
128
129 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
130 operand. */
131 static vec<slp_oprnd_info>
132 vect_create_oprnd_info (int nops, int group_size)
133 {
134 int i;
135 slp_oprnd_info oprnd_info;
136 vec<slp_oprnd_info> oprnds_info;
137
138 oprnds_info.create (nops);
139 for (i = 0; i < nops; i++)
140 {
141 oprnd_info = XNEW (struct _slp_oprnd_info);
142 oprnd_info->def_stmts.create (group_size);
143 oprnd_info->first_dt = vect_uninitialized_def;
144 oprnd_info->first_def_type = NULL_TREE;
145 oprnd_info->first_const_oprnd = NULL_TREE;
146 oprnd_info->first_pattern = false;
147 oprnds_info.quick_push (oprnd_info);
148 }
149
150 return oprnds_info;
151 }
152
153
154 /* Free operands info. */
155
156 static void
157 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
158 {
159 int i;
160 slp_oprnd_info oprnd_info;
161
162 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
163 {
164 oprnd_info->def_stmts.release ();
165 XDELETE (oprnd_info);
166 }
167
168 oprnds_info.release ();
169 }
170
171
172 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
173 they are of a valid type and that they match the defs of the first stmt of
174 the SLP group (stored in OPRNDS_INFO). */
175
176 static bool
177 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
178 slp_tree slp_node, gimple stmt,
179 int ncopies_for_cost, bool first,
180 vec<slp_oprnd_info> *oprnds_info,
181 stmt_vector_for_cost *prologue_cost_vec,
182 stmt_vector_for_cost *body_cost_vec)
183 {
184 tree oprnd;
185 unsigned int i, number_of_oprnds;
186 tree def, def_op0 = NULL_TREE;
187 gimple def_stmt;
188 enum vect_def_type dt = vect_uninitialized_def;
189 enum vect_def_type dt_op0 = vect_uninitialized_def;
190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
191 tree lhs = gimple_get_lhs (stmt);
192 struct loop *loop = NULL;
193 enum tree_code rhs_code;
194 bool different_types = false;
195 bool pattern = false;
196 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
197 int op_idx = 1;
198 tree compare_rhs = NULL_TREE;
199
200 if (loop_vinfo)
201 loop = LOOP_VINFO_LOOP (loop_vinfo);
202
203 if (is_gimple_call (stmt))
204 {
205 number_of_oprnds = gimple_call_num_args (stmt);
206 op_idx = 3;
207 }
208 else if (is_gimple_assign (stmt))
209 {
210 number_of_oprnds = gimple_num_ops (stmt) - 1;
211 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
212 number_of_oprnds++;
213 }
214 else
215 return false;
216
217 for (i = 0; i < number_of_oprnds; i++)
218 {
219 if (compare_rhs)
220 {
221 oprnd = compare_rhs;
222 compare_rhs = NULL_TREE;
223 }
224 else
225 oprnd = gimple_op (stmt, op_idx++);
226
227 oprnd_info = (*oprnds_info)[i];
228
229 if (COMPARISON_CLASS_P (oprnd))
230 {
231 compare_rhs = TREE_OPERAND (oprnd, 1);
232 oprnd = TREE_OPERAND (oprnd, 0);
233 }
234
235 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
236 &def, &dt)
237 || (!def_stmt && dt != vect_constant_def))
238 {
239 if (dump_enabled_p ())
240 {
241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
242 "Build SLP failed: can't find def for ");
243 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
244 }
245
246 return false;
247 }
248
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
251 pattern. */
252 if (def_stmt && gimple_bb (def_stmt)
253 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
254 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
255 && gimple_code (def_stmt) != GIMPLE_PHI))
256 && vinfo_for_stmt (def_stmt)
257 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
258 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
259 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
260 {
261 pattern = true;
262 if (!first && !oprnd_info->first_pattern)
263 {
264 if (dump_enabled_p ())
265 {
266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
267 "Build SLP failed: some of the stmts"
268 " are in a pattern, and others are not ");
269 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
270 }
271
272 return false;
273 }
274
275 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
276 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
277
278 if (dt == vect_unknown_def_type)
279 {
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
282 "Unsupported pattern.");
283 return false;
284 }
285
286 switch (gimple_code (def_stmt))
287 {
288 case GIMPLE_PHI:
289 def = gimple_phi_result (def_stmt);
290 break;
291
292 case GIMPLE_ASSIGN:
293 def = gimple_assign_lhs (def_stmt);
294 break;
295
296 default:
297 if (dump_enabled_p ())
298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
299 "unsupported defining stmt: ");
300 return false;
301 }
302 }
303
304 if (first)
305 {
306 oprnd_info->first_dt = dt;
307 oprnd_info->first_pattern = pattern;
308 if (def)
309 {
310 oprnd_info->first_def_type = TREE_TYPE (def);
311 oprnd_info->first_const_oprnd = NULL_TREE;
312 }
313 else
314 {
315 oprnd_info->first_def_type = NULL_TREE;
316 oprnd_info->first_const_oprnd = oprnd;
317 }
318
319 if (i == 0)
320 {
321 def_op0 = def;
322 dt_op0 = dt;
323 /* Analyze costs (for the first stmt of the group only). */
324 if (REFERENCE_CLASS_P (lhs))
325 /* Store. */
326 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
327 dt, slp_node, prologue_cost_vec,
328 body_cost_vec);
329 else
330 {
331 enum vect_def_type dts[2];
332 dts[0] = dt;
333 dts[1] = vect_uninitialized_def;
334 /* Not memory operation (we don't call this function for
335 loads). */
336 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
337 prologue_cost_vec, body_cost_vec);
338 }
339 }
340 }
341 else
342 {
343 /* Not first stmt of the group, check that the def-stmt/s match
344 the def-stmt/s of the first stmt. Allow different definition
345 types for reduction chains: the first stmt must be a
346 vect_reduction_def (a phi node), and the rest
347 vect_internal_def. */
348 if (((oprnd_info->first_dt != dt
349 && !(oprnd_info->first_dt == vect_reduction_def
350 && dt == vect_internal_def))
351 || (oprnd_info->first_def_type != NULL_TREE
352 && def
353 && !types_compatible_p (oprnd_info->first_def_type,
354 TREE_TYPE (def))))
355 || (!def
356 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
357 TREE_TYPE (oprnd)))
358 || different_types)
359 {
360 if (number_of_oprnds != 2)
361 {
362 if (dump_enabled_p ())
363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
364 "Build SLP failed: different types ");
365
366 return false;
367 }
368
369 /* Try to swap operands in case of binary operation. */
370 if (i == 0)
371 different_types = true;
372 else
373 {
374 oprnd0_info = (*oprnds_info)[0];
375 if (is_gimple_assign (stmt)
376 && (rhs_code = gimple_assign_rhs_code (stmt))
377 && TREE_CODE_CLASS (rhs_code) == tcc_binary
378 && commutative_tree_code (rhs_code)
379 && oprnd0_info->first_dt == dt
380 && oprnd_info->first_dt == dt_op0
381 && def_op0 && def
382 && !(oprnd0_info->first_def_type
383 && !types_compatible_p (oprnd0_info->first_def_type,
384 TREE_TYPE (def)))
385 && !(oprnd_info->first_def_type
386 && !types_compatible_p (oprnd_info->first_def_type,
387 TREE_TYPE (def_op0))))
388 {
389 if (dump_enabled_p ())
390 {
391 dump_printf_loc (MSG_NOTE, vect_location,
392 "Swapping operands of ");
393 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
394 }
395
396 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
397 gimple_assign_rhs2_ptr (stmt));
398 }
399 else
400 {
401 if (dump_enabled_p ())
402 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
403 "Build SLP failed: different types ");
404
405 return false;
406 }
407 }
408 }
409 }
410
411 /* Check the types of the definitions. */
412 switch (dt)
413 {
414 case vect_constant_def:
415 case vect_external_def:
416 case vect_reduction_def:
417 break;
418
419 case vect_internal_def:
420 if (different_types)
421 {
422 oprnd0_info = (*oprnds_info)[0];
423 oprnd1_info = (*oprnds_info)[0];
424 if (i == 0)
425 oprnd1_info->def_stmts.quick_push (def_stmt);
426 else
427 oprnd0_info->def_stmts.quick_push (def_stmt);
428 }
429 else
430 oprnd_info->def_stmts.quick_push (def_stmt);
431
432 break;
433
434 default:
435 /* FORNOW: Not supported. */
436 if (dump_enabled_p ())
437 {
438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
439 "Build SLP failed: illegal type of def ");
440 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
441 }
442
443 return false;
444 }
445 }
446
447 return true;
448 }
449
450
451 /* Recursively build an SLP tree starting from NODE.
452 Fail (and return FALSE) if def-stmts are not isomorphic, require data
453 permutation or are of unsupported types of operation. Otherwise, return
454 TRUE. */
455
456 static bool
457 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
458 slp_tree *node, unsigned int group_size, int *outside_cost,
459 int ncopies_for_cost, unsigned int *max_nunits,
460 vec<int> *load_permutation,
461 vec<slp_tree> *loads,
462 unsigned int vectorization_factor, bool *loads_permuted,
463 stmt_vector_for_cost *prologue_cost_vec,
464 stmt_vector_for_cost *body_cost_vec)
465 {
466 unsigned int i;
467 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
468 gimple stmt = stmts[0];
469 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
470 enum tree_code first_cond_code = ERROR_MARK;
471 tree lhs;
472 bool stop_recursion = false, need_same_oprnds = false;
473 tree vectype, scalar_type, first_op1 = NULL_TREE;
474 unsigned int ncopies;
475 optab optab;
476 int icode;
477 enum machine_mode optab_op2_mode;
478 enum machine_mode vec_mode;
479 struct data_reference *first_dr;
480 HOST_WIDE_INT dummy;
481 bool permutation = false;
482 unsigned int load_place;
483 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
484 vec<slp_oprnd_info> oprnds_info;
485 unsigned int nops;
486 slp_oprnd_info oprnd_info;
487 tree cond;
488
489 if (is_gimple_call (stmt))
490 nops = gimple_call_num_args (stmt);
491 else if (is_gimple_assign (stmt))
492 {
493 nops = gimple_num_ops (stmt) - 1;
494 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
495 nops++;
496 }
497 else
498 return false;
499
500 oprnds_info = vect_create_oprnd_info (nops, group_size);
501
502 /* For every stmt in NODE find its def stmt/s. */
503 FOR_EACH_VEC_ELT (stmts, i, stmt)
504 {
505 if (dump_enabled_p ())
506 {
507 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
508 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
509 }
510
511 /* Fail to vectorize statements marked as unvectorizable. */
512 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
513 {
514 if (dump_enabled_p ())
515 {
516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
517 "Build SLP failed: unvectorizable statement ");
518 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
519 }
520
521 vect_free_oprnd_info (oprnds_info);
522 return false;
523 }
524
525 lhs = gimple_get_lhs (stmt);
526 if (lhs == NULL_TREE)
527 {
528 if (dump_enabled_p ())
529 {
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 "Build SLP failed: not GIMPLE_ASSIGN nor "
532 "GIMPLE_CALL ");
533 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
534 }
535
536 vect_free_oprnd_info (oprnds_info);
537 return false;
538 }
539
540 if (is_gimple_assign (stmt)
541 && gimple_assign_rhs_code (stmt) == COND_EXPR
542 && (cond = gimple_assign_rhs1 (stmt))
543 && !COMPARISON_CLASS_P (cond))
544 {
545 if (dump_enabled_p ())
546 {
547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
548 "Build SLP failed: condition is not "
549 "comparison ");
550 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
551 }
552
553 vect_free_oprnd_info (oprnds_info);
554 return false;
555 }
556
557 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
558 vectype = get_vectype_for_scalar_type (scalar_type);
559 if (!vectype)
560 {
561 if (dump_enabled_p ())
562 {
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
564 "Build SLP failed: unsupported data-type ");
565 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
566 scalar_type);
567 }
568
569 vect_free_oprnd_info (oprnds_info);
570 return false;
571 }
572
573 /* In case of multiple types we need to detect the smallest type. */
574 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
575 {
576 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
577 if (bb_vinfo)
578 vectorization_factor = *max_nunits;
579 }
580
581 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
582
583 if (is_gimple_call (stmt))
584 {
585 rhs_code = CALL_EXPR;
586 if (gimple_call_internal_p (stmt)
587 || gimple_call_tail_p (stmt)
588 || gimple_call_noreturn_p (stmt)
589 || !gimple_call_nothrow_p (stmt)
590 || gimple_call_chain (stmt))
591 {
592 if (dump_enabled_p ())
593 {
594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
595 "Build SLP failed: unsupported call type ");
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
597 }
598
599 vect_free_oprnd_info (oprnds_info);
600 return false;
601 }
602 }
603 else
604 rhs_code = gimple_assign_rhs_code (stmt);
605
606 /* Check the operation. */
607 if (i == 0)
608 {
609 first_stmt_code = rhs_code;
610
611 /* Shift arguments should be equal in all the packed stmts for a
612 vector shift with scalar shift operand. */
613 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
614 || rhs_code == LROTATE_EXPR
615 || rhs_code == RROTATE_EXPR)
616 {
617 vec_mode = TYPE_MODE (vectype);
618
619 /* First see if we have a vector/vector shift. */
620 optab = optab_for_tree_code (rhs_code, vectype,
621 optab_vector);
622
623 if (!optab
624 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
625 {
626 /* No vector/vector shift, try for a vector/scalar shift. */
627 optab = optab_for_tree_code (rhs_code, vectype,
628 optab_scalar);
629
630 if (!optab)
631 {
632 if (dump_enabled_p ())
633 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
634 "Build SLP failed: no optab.");
635 vect_free_oprnd_info (oprnds_info);
636 return false;
637 }
638 icode = (int) optab_handler (optab, vec_mode);
639 if (icode == CODE_FOR_nothing)
640 {
641 if (dump_enabled_p ())
642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
643 "Build SLP failed: "
644 "op not supported by target.");
645 vect_free_oprnd_info (oprnds_info);
646 return false;
647 }
648 optab_op2_mode = insn_data[icode].operand[2].mode;
649 if (!VECTOR_MODE_P (optab_op2_mode))
650 {
651 need_same_oprnds = true;
652 first_op1 = gimple_assign_rhs2 (stmt);
653 }
654 }
655 }
656 else if (rhs_code == WIDEN_LSHIFT_EXPR)
657 {
658 need_same_oprnds = true;
659 first_op1 = gimple_assign_rhs2 (stmt);
660 }
661 }
662 else
663 {
664 if (first_stmt_code != rhs_code
665 && (first_stmt_code != IMAGPART_EXPR
666 || rhs_code != REALPART_EXPR)
667 && (first_stmt_code != REALPART_EXPR
668 || rhs_code != IMAGPART_EXPR)
669 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
670 && (first_stmt_code == ARRAY_REF
671 || first_stmt_code == INDIRECT_REF
672 || first_stmt_code == COMPONENT_REF
673 || first_stmt_code == MEM_REF)))
674 {
675 if (dump_enabled_p ())
676 {
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
678 "Build SLP failed: different operation "
679 "in stmt ");
680 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
681 }
682
683 vect_free_oprnd_info (oprnds_info);
684 return false;
685 }
686
687 if (need_same_oprnds
688 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
689 {
690 if (dump_enabled_p ())
691 {
692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
693 "Build SLP failed: different shift "
694 "arguments in ");
695 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
696 }
697
698 vect_free_oprnd_info (oprnds_info);
699 return false;
700 }
701
702 if (rhs_code == CALL_EXPR)
703 {
704 gimple first_stmt = stmts[0];
705 if (gimple_call_num_args (stmt) != nops
706 || !operand_equal_p (gimple_call_fn (first_stmt),
707 gimple_call_fn (stmt), 0)
708 || gimple_call_fntype (first_stmt)
709 != gimple_call_fntype (stmt))
710 {
711 if (dump_enabled_p ())
712 {
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714 "Build SLP failed: different calls in ");
715 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
716 stmt, 0);
717 }
718
719 vect_free_oprnd_info (oprnds_info);
720 return false;
721 }
722 }
723 }
724
725 /* Grouped store or load. */
726 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
727 {
728 if (REFERENCE_CLASS_P (lhs))
729 {
730 /* Store. */
731 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
732 stmt, ncopies_for_cost,
733 (i == 0), &oprnds_info,
734 prologue_cost_vec,
735 body_cost_vec))
736 {
737 vect_free_oprnd_info (oprnds_info);
738 return false;
739 }
740 }
741 else
742 {
743 /* Load. */
744 /* FORNOW: Check that there is no gap between the loads. */
745 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
746 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
747 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
748 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
749 {
750 if (dump_enabled_p ())
751 {
752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
753 "Build SLP failed: grouped "
754 "loads have gaps ");
755 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
756 stmt, 0);
757 }
758
759 vect_free_oprnd_info (oprnds_info);
760 return false;
761 }
762
763 /* Check that the size of interleaved loads group is not
764 greater than the SLP group size. */
765 if (loop_vinfo
766 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
767 {
768 if (dump_enabled_p ())
769 {
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
771 "Build SLP failed: the number "
772 "of interleaved loads is greater than "
773 "the SLP group size ");
774 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
775 stmt, 0);
776 }
777
778 vect_free_oprnd_info (oprnds_info);
779 return false;
780 }
781
782 old_first_load = first_load;
783 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
784 if (prev_first_load)
785 {
786 /* Check that there are no loads from different interleaving
787 chains in the same node. The only exception is complex
788 numbers. */
789 if (prev_first_load != first_load
790 && rhs_code != REALPART_EXPR
791 && rhs_code != IMAGPART_EXPR)
792 {
793 if (dump_enabled_p ())
794 {
795 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
796 vect_location,
797 "Build SLP failed: different "
798 "interleaving chains in one node ");
799 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
800 stmt, 0);
801 }
802
803 vect_free_oprnd_info (oprnds_info);
804 return false;
805 }
806 }
807 else
808 prev_first_load = first_load;
809
810 /* In some cases a group of loads is just the same load
811 repeated N times. Only analyze its cost once. */
812 if (first_load == stmt && old_first_load != first_load)
813 {
814 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
815 if (vect_supportable_dr_alignment (first_dr, false)
816 == dr_unaligned_unsupported)
817 {
818 if (dump_enabled_p ())
819 {
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
821 vect_location,
822 "Build SLP failed: unsupported "
823 "unaligned load ");
824 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
825 stmt, 0);
826 }
827
828 vect_free_oprnd_info (oprnds_info);
829 return false;
830 }
831
832 /* Analyze costs (for the first stmt in the group). */
833 vect_model_load_cost (vinfo_for_stmt (stmt),
834 ncopies_for_cost, false, *node,
835 prologue_cost_vec, body_cost_vec);
836 }
837
838 /* Store the place of this load in the interleaving chain. In
839 case that permutation is needed we later decide if a specific
840 permutation is supported. */
841 load_place = vect_get_place_in_interleaving_chain (stmt,
842 first_load);
843 if (load_place != i)
844 permutation = true;
845
846 load_permutation->safe_push (load_place);
847
848 /* We stop the tree when we reach a group of loads. */
849 stop_recursion = true;
850 continue;
851 }
852 } /* Grouped access. */
853 else
854 {
855 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
856 {
857 /* Not grouped load. */
858 if (dump_enabled_p ())
859 {
860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
861 "Build SLP failed: not grouped load ");
862 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
863 }
864
865 /* FORNOW: Not grouped loads are not supported. */
866 vect_free_oprnd_info (oprnds_info);
867 return false;
868 }
869
870 /* Not memory operation. */
871 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
872 && TREE_CODE_CLASS (rhs_code) != tcc_unary
873 && rhs_code != COND_EXPR
874 && rhs_code != CALL_EXPR)
875 {
876 if (dump_enabled_p ())
877 {
878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
879 "Build SLP failed: operation");
880 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
881 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
882 }
883
884 vect_free_oprnd_info (oprnds_info);
885 return false;
886 }
887
888 if (rhs_code == COND_EXPR)
889 {
890 tree cond_expr = gimple_assign_rhs1 (stmt);
891
892 if (i == 0)
893 first_cond_code = TREE_CODE (cond_expr);
894 else if (first_cond_code != TREE_CODE (cond_expr))
895 {
896 if (dump_enabled_p ())
897 {
898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
899 "Build SLP failed: different"
900 " operation");
901 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
902 stmt, 0);
903 }
904
905 vect_free_oprnd_info (oprnds_info);
906 return false;
907 }
908 }
909
910 /* Find the def-stmts. */
911 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
912 ncopies_for_cost, (i == 0),
913 &oprnds_info, prologue_cost_vec,
914 body_cost_vec))
915 {
916 vect_free_oprnd_info (oprnds_info);
917 return false;
918 }
919 }
920 }
921
922 /* Grouped loads were reached - stop the recursion. */
923 if (stop_recursion)
924 {
925 loads->safe_push (*node);
926 if (permutation)
927 {
928 gimple first_stmt = stmts[0];
929 *loads_permuted = true;
930 (void) record_stmt_cost (body_cost_vec, group_size, vec_perm,
931 vinfo_for_stmt (first_stmt), 0, vect_body);
932 }
933 else
934 {
935 /* We don't check here complex numbers chains, so we set
936 LOADS_PERMUTED for further check in
937 vect_supported_load_permutation_p. */
938 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
939 *loads_permuted = true;
940 }
941
942 vect_free_oprnd_info (oprnds_info);
943 return true;
944 }
945
946 /* Create SLP_TREE nodes for the definition node/s. */
947 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
948 {
949 slp_tree child;
950
951 if (oprnd_info->first_dt != vect_internal_def)
952 continue;
953
954 child = vect_create_new_slp_node (oprnd_info->def_stmts);
955 if (!child
956 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
957 outside_cost, ncopies_for_cost,
958 max_nunits, load_permutation, loads,
959 vectorization_factor, loads_permuted,
960 prologue_cost_vec, body_cost_vec))
961 {
962 if (child)
963 oprnd_info->def_stmts = vec<gimple>();
964 vect_free_slp_tree (child);
965 vect_free_oprnd_info (oprnds_info);
966 return false;
967 }
968
969 oprnd_info->def_stmts.create (0);
970 SLP_TREE_CHILDREN (*node).quick_push (child);
971 }
972
973 vect_free_oprnd_info (oprnds_info);
974 return true;
975 }
976
977 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
978
979 static void
980 vect_print_slp_tree (int dump_kind, slp_tree node)
981 {
982 int i;
983 gimple stmt;
984 slp_void_p child;
985
986 if (!node)
987 return;
988
989 dump_printf (dump_kind, "node ");
990 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
991 {
992 dump_printf (dump_kind, "\n\tstmt %d ", i);
993 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
994 }
995 dump_printf (dump_kind, "\n");
996
997 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
998 vect_print_slp_tree (dump_kind, (slp_tree) child);
999 }
1000
1001
1002 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1003 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1004 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1005 stmts in NODE are to be marked. */
1006
1007 static void
1008 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1009 {
1010 int i;
1011 gimple stmt;
1012 slp_void_p child;
1013
1014 if (!node)
1015 return;
1016
1017 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1018 if (j < 0 || i == j)
1019 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1020
1021 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1022 vect_mark_slp_stmts ((slp_tree) child, mark, j);
1023 }
1024
1025
1026 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1027
1028 static void
1029 vect_mark_slp_stmts_relevant (slp_tree node)
1030 {
1031 int i;
1032 gimple stmt;
1033 stmt_vec_info stmt_info;
1034 slp_void_p child;
1035
1036 if (!node)
1037 return;
1038
1039 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1040 {
1041 stmt_info = vinfo_for_stmt (stmt);
1042 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1043 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1044 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1045 }
1046
1047 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1048 vect_mark_slp_stmts_relevant ((slp_tree) child);
1049 }
1050
1051
1052 /* Check if the permutation required by the SLP INSTANCE is supported.
1053 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1054
1055 static bool
1056 vect_supported_slp_permutation_p (slp_instance instance)
1057 {
1058 slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
1059 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1060 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1061 vec<slp_tree> sorted_loads = vec<slp_tree>();
1062 int index;
1063 slp_tree *tmp_loads = NULL;
1064 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1065 slp_tree load;
1066
1067 /* FORNOW: The only supported loads permutation is loads from the same
1068 location in all the loads in the node, when the data-refs in
1069 nodes of LOADS constitute an interleaving chain.
1070 Sort the nodes according to the order of accesses in the chain. */
1071 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1072 for (i = 0, j = 0;
1073 SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
1074 && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
1075 i += group_size, j++)
1076 {
1077 gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
1078 /* Check that the loads are all in the same interleaving chain. */
1079 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1080 {
1081 if (dump_enabled_p ())
1082 {
1083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1084 "Build SLP failed: unsupported data "
1085 "permutation ");
1086 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1087 scalar_stmt, 0);
1088 }
1089
1090 free (tmp_loads);
1091 return false;
1092 }
1093
1094 tmp_loads[index] = load;
1095 }
1096
1097 sorted_loads.create (group_size);
1098 for (i = 0; i < group_size; i++)
1099 sorted_loads.safe_push (tmp_loads[i]);
1100
1101 SLP_INSTANCE_LOADS (instance).release ();
1102 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1103 free (tmp_loads);
1104
1105 if (!vect_transform_slp_perm_load (stmt, vec<tree>(), NULL,
1106 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1107 instance, true))
1108 return false;
1109
1110 return true;
1111 }
1112
1113
1114 /* Rearrange the statements of NODE according to PERMUTATION. */
1115
1116 static void
1117 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1118 vec<int> permutation)
1119 {
1120 gimple stmt;
1121 vec<gimple> tmp_stmts;
1122 unsigned int index, i;
1123 slp_void_p child;
1124
1125 if (!node)
1126 return;
1127
1128 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1129 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1130
1131 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1132 tmp_stmts.create (group_size);
1133
1134 for (i = 0; i < group_size; i++)
1135 tmp_stmts.safe_push (NULL);
1136
1137 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1138 {
1139 index = permutation[i];
1140 tmp_stmts[index] = stmt;
1141 }
1142
1143 SLP_TREE_SCALAR_STMTS (node).release ();
1144 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1145 }
1146
1147
1148 /* Check if the required load permutation is supported.
1149 LOAD_PERMUTATION contains a list of indices of the loads.
1150 In SLP this permutation is relative to the order of grouped stores that are
1151 the base of the SLP instance. */
1152
1153 static bool
1154 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1155 vec<int> load_permutation)
1156 {
1157 int i = 0, j, prev = -1, next, k, number_of_groups;
1158 bool supported, bad_permutation = false;
1159 sbitmap load_index;
1160 slp_tree node, other_complex_node;
1161 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1162 unsigned complex_numbers = 0;
1163 struct data_reference *dr;
1164 bb_vec_info bb_vinfo;
1165
1166 /* FORNOW: permutations are only supported in SLP. */
1167 if (!slp_instn)
1168 return false;
1169
1170 if (dump_enabled_p ())
1171 {
1172 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1173 FOR_EACH_VEC_ELT (load_permutation, i, next)
1174 dump_printf (MSG_NOTE, "%d ", next);
1175 }
1176
1177 /* In case of reduction every load permutation is allowed, since the order
1178 of the reduction statements is not important (as opposed to the case of
1179 grouped stores). The only condition we need to check is that all the
1180 load nodes are of the same size and have the same permutation (and then
1181 rearrange all the nodes of the SLP instance according to this
1182 permutation). */
1183
1184 /* Check that all the load nodes are of the same size. */
1185 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1186 {
1187 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1188 return false;
1189
1190 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1191 if (is_gimple_assign (stmt)
1192 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1193 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1194 complex_numbers++;
1195 }
1196
1197 /* Complex operands can be swapped as following:
1198 real_c = real_b + real_a;
1199 imag_c = imag_a + imag_b;
1200 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1201 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1202 chains are mixed, they match the above pattern. */
1203 if (complex_numbers)
1204 {
1205 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1206 {
1207 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1208 {
1209 if (j == 0)
1210 first = stmt;
1211 else
1212 {
1213 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1214 {
1215 if (complex_numbers != 2)
1216 return false;
1217
1218 if (i == 0)
1219 k = 1;
1220 else
1221 k = 0;
1222
1223 other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1224 other_node_first =
1225 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1226
1227 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1228 != other_node_first)
1229 return false;
1230 }
1231 }
1232 }
1233 }
1234 }
1235
1236 /* We checked that this case ok, so there is no need to proceed with
1237 permutation tests. */
1238 if (complex_numbers == 2
1239 && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1240 {
1241 SLP_INSTANCE_LOADS (slp_instn).release ();
1242 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1243 return true;
1244 }
1245
1246 node = SLP_INSTANCE_TREE (slp_instn);
1247 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1248 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1249 instance, not all the loads belong to the same node or interleaving
1250 group. Hence, we need to divide them into groups according to
1251 GROUP_SIZE. */
1252 number_of_groups = load_permutation.length () / group_size;
1253
1254 /* Reduction (there are no data-refs in the root).
1255 In reduction chain the order of the loads is important. */
1256 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1257 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1258 {
1259 int first_group_load_index;
1260
1261 /* Compare all the permutation sequences to the first one. */
1262 for (i = 1; i < number_of_groups; i++)
1263 {
1264 k = 0;
1265 for (j = i * group_size; j < i * group_size + group_size; j++)
1266 {
1267 next = load_permutation[j];
1268 first_group_load_index = load_permutation[k];
1269
1270 if (next != first_group_load_index)
1271 {
1272 bad_permutation = true;
1273 break;
1274 }
1275
1276 k++;
1277 }
1278
1279 if (bad_permutation)
1280 break;
1281 }
1282
1283 if (!bad_permutation)
1284 {
1285 /* Check that the loads in the first sequence are different and there
1286 are no gaps between them. */
1287 load_index = sbitmap_alloc (group_size);
1288 bitmap_clear (load_index);
1289 for (k = 0; k < group_size; k++)
1290 {
1291 first_group_load_index = load_permutation[k];
1292 if (bitmap_bit_p (load_index, first_group_load_index))
1293 {
1294 bad_permutation = true;
1295 break;
1296 }
1297
1298 bitmap_set_bit (load_index, first_group_load_index);
1299 }
1300
1301 if (!bad_permutation)
1302 for (k = 0; k < group_size; k++)
1303 if (!bitmap_bit_p (load_index, k))
1304 {
1305 bad_permutation = true;
1306 break;
1307 }
1308
1309 sbitmap_free (load_index);
1310 }
1311
1312 if (!bad_permutation)
1313 {
1314 /* This permutation is valid for reduction. Since the order of the
1315 statements in the nodes is not important unless they are memory
1316 accesses, we can rearrange the statements in all the nodes
1317 according to the order of the loads. */
1318 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1319 load_permutation);
1320 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1321 return true;
1322 }
1323 }
1324
1325 /* In basic block vectorization we allow any subchain of an interleaving
1326 chain.
1327 FORNOW: not supported in loop SLP because of realignment compications. */
1328 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1329 bad_permutation = false;
1330 /* Check that for every node in the instance the loads form a subchain. */
1331 if (bb_vinfo)
1332 {
1333 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1334 {
1335 next_load = NULL;
1336 first_load = NULL;
1337 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1338 {
1339 if (!first_load)
1340 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1341 else if (first_load
1342 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1343 {
1344 bad_permutation = true;
1345 break;
1346 }
1347
1348 if (j != 0 && next_load != load)
1349 {
1350 bad_permutation = true;
1351 break;
1352 }
1353
1354 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1355 }
1356
1357 if (bad_permutation)
1358 break;
1359 }
1360
1361 /* Check that the alignment of the first load in every subchain, i.e.,
1362 the first statement in every load node, is supported. */
1363 if (!bad_permutation)
1364 {
1365 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1366 {
1367 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1368 if (first_load
1369 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1370 {
1371 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1372 if (vect_supportable_dr_alignment (dr, false)
1373 == dr_unaligned_unsupported)
1374 {
1375 if (dump_enabled_p ())
1376 {
1377 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1378 vect_location,
1379 "unsupported unaligned load ");
1380 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1381 first_load, 0);
1382 }
1383 bad_permutation = true;
1384 break;
1385 }
1386 }
1387 }
1388
1389 if (!bad_permutation)
1390 {
1391 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1392 return true;
1393 }
1394 }
1395 }
1396
1397 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1398 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1399 well (unless it's reduction). */
1400 if (load_permutation.length ()
1401 != (unsigned int) (group_size * group_size))
1402 return false;
1403
1404 supported = true;
1405 load_index = sbitmap_alloc (group_size);
1406 bitmap_clear (load_index);
1407 for (j = 0; j < group_size; j++)
1408 {
1409 for (i = j * group_size, k = 0;
1410 load_permutation.iterate (i, &next) && k < group_size;
1411 i++, k++)
1412 {
1413 if (i != j * group_size && next != prev)
1414 {
1415 supported = false;
1416 break;
1417 }
1418
1419 prev = next;
1420 }
1421
1422 if (bitmap_bit_p (load_index, prev))
1423 {
1424 supported = false;
1425 break;
1426 }
1427
1428 bitmap_set_bit (load_index, prev);
1429 }
1430
1431 for (j = 0; j < group_size; j++)
1432 if (!bitmap_bit_p (load_index, j))
1433 return false;
1434
1435 sbitmap_free (load_index);
1436
1437 if (supported && i == group_size * group_size
1438 && vect_supported_slp_permutation_p (slp_instn))
1439 return true;
1440
1441 return false;
1442 }
1443
1444
1445 /* Find the first load in the loop that belongs to INSTANCE.
1446 When loads are in several SLP nodes, there can be a case in which the first
1447 load does not appear in the first SLP node to be transformed, causing
1448 incorrect order of statements. Since we generate all the loads together,
1449 they must be inserted before the first load of the SLP instance and not
1450 before the first load of the first node of the instance. */
1451
1452 static gimple
1453 vect_find_first_load_in_slp_instance (slp_instance instance)
1454 {
1455 int i, j;
1456 slp_tree load_node;
1457 gimple first_load = NULL, load;
1458
1459 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1460 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1461 first_load = get_earlier_stmt (load, first_load);
1462
1463 return first_load;
1464 }
1465
1466
1467 /* Find the last store in SLP INSTANCE. */
1468
1469 static gimple
1470 vect_find_last_store_in_slp_instance (slp_instance instance)
1471 {
1472 int i;
1473 slp_tree node;
1474 gimple last_store = NULL, store;
1475
1476 node = SLP_INSTANCE_TREE (instance);
1477 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1478 last_store = get_later_stmt (store, last_store);
1479
1480 return last_store;
1481 }
1482
1483
1484 /* Analyze an SLP instance starting from a group of grouped stores. Call
1485 vect_build_slp_tree to build a tree of packed stmts if possible.
1486 Return FALSE if it's impossible to SLP any stmt in the loop. */
1487
1488 static bool
1489 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1490 gimple stmt)
1491 {
1492 slp_instance new_instance;
1493 slp_tree node;
1494 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1495 unsigned int unrolling_factor = 1, nunits;
1496 tree vectype, scalar_type = NULL_TREE;
1497 gimple next;
1498 unsigned int vectorization_factor = 0;
1499 int outside_cost = 0, ncopies_for_cost, i;
1500 unsigned int max_nunits = 0;
1501 vec<int> load_permutation;
1502 vec<slp_tree> loads;
1503 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1504 bool loads_permuted = false;
1505 vec<gimple> scalar_stmts;
1506 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1507 stmt_info_for_cost *si;
1508
1509 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1510 {
1511 if (dr)
1512 {
1513 scalar_type = TREE_TYPE (DR_REF (dr));
1514 vectype = get_vectype_for_scalar_type (scalar_type);
1515 }
1516 else
1517 {
1518 gcc_assert (loop_vinfo);
1519 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1520 }
1521
1522 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1523 }
1524 else
1525 {
1526 gcc_assert (loop_vinfo);
1527 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1528 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1529 }
1530
1531 if (!vectype)
1532 {
1533 if (dump_enabled_p ())
1534 {
1535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1536 "Build SLP failed: unsupported data-type ");
1537 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1538 }
1539
1540 return false;
1541 }
1542
1543 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1544 if (loop_vinfo)
1545 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1546 else
1547 vectorization_factor = nunits;
1548
1549 /* Calculate the unrolling factor. */
1550 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1551 if (unrolling_factor != 1 && !loop_vinfo)
1552 {
1553 if (dump_enabled_p ())
1554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1555 "Build SLP failed: unrolling required in basic"
1556 " block SLP");
1557
1558 return false;
1559 }
1560
1561 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1562 scalar_stmts.create (group_size);
1563 next = stmt;
1564 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1565 {
1566 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1567 while (next)
1568 {
1569 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1570 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1571 scalar_stmts.safe_push (
1572 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1573 else
1574 scalar_stmts.safe_push (next);
1575 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1576 }
1577 }
1578 else
1579 {
1580 /* Collect reduction statements. */
1581 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1582 for (i = 0; reductions.iterate (i, &next); i++)
1583 scalar_stmts.safe_push (next);
1584 }
1585
1586 node = vect_create_new_slp_node (scalar_stmts);
1587
1588 /* Calculate the number of vector stmts to create based on the unrolling
1589 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1590 GROUP_SIZE / NUNITS otherwise. */
1591 ncopies_for_cost = unrolling_factor * group_size / nunits;
1592
1593 load_permutation.create (group_size * group_size);
1594 loads.create (group_size);
1595 prologue_cost_vec.create (10);
1596 body_cost_vec.create (10);
1597
1598 /* Build the tree for the SLP instance. */
1599 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1600 &outside_cost, ncopies_for_cost,
1601 &max_nunits, &load_permutation, &loads,
1602 vectorization_factor, &loads_permuted,
1603 &prologue_cost_vec, &body_cost_vec))
1604 {
1605 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1606 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1607
1608 /* Calculate the unrolling factor based on the smallest type. */
1609 if (max_nunits > nunits)
1610 unrolling_factor = least_common_multiple (max_nunits, group_size)
1611 / group_size;
1612
1613 if (unrolling_factor != 1 && !loop_vinfo)
1614 {
1615 if (dump_enabled_p ())
1616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1617 "Build SLP failed: unrolling required in basic"
1618 " block SLP");
1619 vect_free_slp_tree (node);
1620 body_cost_vec.release ();
1621 prologue_cost_vec.release ();
1622 load_permutation.release ();
1623 loads.release ();
1624 return false;
1625 }
1626
1627 /* Create a new SLP instance. */
1628 new_instance = XNEW (struct _slp_instance);
1629 SLP_INSTANCE_TREE (new_instance) = node;
1630 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1631 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1632 SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
1633 SLP_INSTANCE_LOADS (new_instance) = loads;
1634 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1635 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1636
1637 if (loads_permuted)
1638 {
1639 if (!vect_supported_load_permutation_p (new_instance, group_size,
1640 load_permutation))
1641 {
1642 if (dump_enabled_p ())
1643 {
1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1645 "Build SLP failed: unsupported load "
1646 "permutation ");
1647 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1648 }
1649
1650 vect_free_slp_instance (new_instance);
1651 prologue_cost_vec.release ();
1652 return false;
1653 }
1654
1655 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1656 = vect_find_first_load_in_slp_instance (new_instance);
1657 }
1658 else
1659 SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
1660
1661 /* Record the prologue costs, which were delayed until we were
1662 sure that SLP was successful. Unlike the body costs, we know
1663 the final values now regardless of the loop vectorization factor. */
1664 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1665 {
1666 struct _stmt_vec_info *stmt_info
1667 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1668 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1669 si->misalign, vect_prologue);
1670 }
1671
1672 prologue_cost_vec.release ();
1673
1674 if (loop_vinfo)
1675 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1676 else
1677 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1678
1679 if (dump_enabled_p ())
1680 vect_print_slp_tree (MSG_NOTE, node);
1681
1682 return true;
1683 }
1684 else
1685 {
1686 body_cost_vec.release ();
1687 prologue_cost_vec.release ();
1688 }
1689
1690 /* Failed to SLP. */
1691 /* Free the allocated memory. */
1692 vect_free_slp_tree (node);
1693 load_permutation.release ();
1694 loads.release ();
1695
1696 return false;
1697 }
1698
1699
1700 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1701 trees of packed scalar stmts if SLP is possible. */
1702
1703 bool
1704 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1705 {
1706 unsigned int i;
1707 vec<gimple> grouped_stores;
1708 vec<gimple> reductions = vec<gimple>();
1709 vec<gimple> reduc_chains = vec<gimple>();
1710 gimple first_element;
1711 bool ok = false;
1712
1713 if (dump_enabled_p ())
1714 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1715
1716 if (loop_vinfo)
1717 {
1718 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1719 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1720 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1721 }
1722 else
1723 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1724
1725 /* Find SLP sequences starting from groups of grouped stores. */
1726 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1727 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1728 ok = true;
1729
1730 if (bb_vinfo && !ok)
1731 {
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1734 "Failed to SLP the basic block.");
1735
1736 return false;
1737 }
1738
1739 if (loop_vinfo
1740 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1741 {
1742 /* Find SLP sequences starting from reduction chains. */
1743 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1744 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1745 ok = true;
1746 else
1747 return false;
1748
1749 /* Don't try to vectorize SLP reductions if reduction chain was
1750 detected. */
1751 return ok;
1752 }
1753
1754 /* Find SLP sequences starting from groups of reductions. */
1755 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1756 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1757 ok = true;
1758
1759 return true;
1760 }
1761
1762
1763 /* For each possible SLP instance decide whether to SLP it and calculate overall
1764 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1765 least one instance. */
1766
1767 bool
1768 vect_make_slp_decision (loop_vec_info loop_vinfo)
1769 {
1770 unsigned int i, unrolling_factor = 1;
1771 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1772 slp_instance instance;
1773 int decided_to_slp = 0;
1774
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1777
1778 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1779 {
1780 /* FORNOW: SLP if you can. */
1781 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1782 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1783
1784 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1785 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1786 loop-based vectorization. Such stmts will be marked as HYBRID. */
1787 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1788 decided_to_slp++;
1789 }
1790
1791 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1792
1793 if (decided_to_slp && dump_enabled_p ())
1794 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1795 "Decided to SLP %d instances. Unrolling factor %d",
1796 decided_to_slp, unrolling_factor);
1797
1798 return (decided_to_slp > 0);
1799 }
1800
1801
1802 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1803 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1804
1805 static void
1806 vect_detect_hybrid_slp_stmts (slp_tree node)
1807 {
1808 int i;
1809 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1810 gimple stmt = stmts[0];
1811 imm_use_iterator imm_iter;
1812 gimple use_stmt;
1813 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1814 slp_void_p child;
1815 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1816 struct loop *loop = NULL;
1817 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1818 basic_block bb = NULL;
1819
1820 if (!node)
1821 return;
1822
1823 if (loop_vinfo)
1824 loop = LOOP_VINFO_LOOP (loop_vinfo);
1825 else
1826 bb = BB_VINFO_BB (bb_vinfo);
1827
1828 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1829 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1830 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1831 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1832 if (gimple_bb (use_stmt)
1833 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1834 || bb == gimple_bb (use_stmt))
1835 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1836 && !STMT_SLP_TYPE (stmt_vinfo)
1837 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1838 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1839 && !(gimple_code (use_stmt) == GIMPLE_PHI
1840 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1841 == vect_reduction_def))
1842 vect_mark_slp_stmts (node, hybrid, i);
1843
1844 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1845 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1846 }
1847
1848
1849 /* Find stmts that must be both vectorized and SLPed. */
1850
1851 void
1852 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1853 {
1854 unsigned int i;
1855 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1856 slp_instance instance;
1857
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1860
1861 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1862 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1863 }
1864
1865
1866 /* Create and initialize a new bb_vec_info struct for BB, as well as
1867 stmt_vec_info structs for all the stmts in it. */
1868
1869 static bb_vec_info
1870 new_bb_vec_info (basic_block bb)
1871 {
1872 bb_vec_info res = NULL;
1873 gimple_stmt_iterator gsi;
1874
1875 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1876 BB_VINFO_BB (res) = bb;
1877
1878 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1879 {
1880 gimple stmt = gsi_stmt (gsi);
1881 gimple_set_uid (stmt, 0);
1882 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1883 }
1884
1885 BB_VINFO_GROUPED_STORES (res).create (10);
1886 BB_VINFO_SLP_INSTANCES (res).create (2);
1887 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1888
1889 bb->aux = res;
1890 return res;
1891 }
1892
1893
1894 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1895 stmts in the basic block. */
1896
1897 static void
1898 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1899 {
1900 vec<slp_instance> slp_instances;
1901 slp_instance instance;
1902 basic_block bb;
1903 gimple_stmt_iterator si;
1904 unsigned i;
1905
1906 if (!bb_vinfo)
1907 return;
1908
1909 bb = BB_VINFO_BB (bb_vinfo);
1910
1911 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1912 {
1913 gimple stmt = gsi_stmt (si);
1914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1915
1916 if (stmt_info)
1917 /* Free stmt_vec_info. */
1918 free_stmt_vec_info (stmt);
1919 }
1920
1921 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1922 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1923 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1924 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1925 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1926 vect_free_slp_instance (instance);
1927 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1928 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1929 free (bb_vinfo);
1930 bb->aux = NULL;
1931 }
1932
1933
1934 /* Analyze statements contained in SLP tree node after recursively analyzing
1935 the subtree. Return TRUE if the operations are supported. */
1936
1937 static bool
1938 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1939 {
1940 bool dummy;
1941 int i;
1942 gimple stmt;
1943 slp_void_p child;
1944
1945 if (!node)
1946 return true;
1947
1948 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1949 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1950 return false;
1951
1952 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1953 {
1954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1955 gcc_assert (stmt_info);
1956 gcc_assert (PURE_SLP_STMT (stmt_info));
1957
1958 if (!vect_analyze_stmt (stmt, &dummy, node))
1959 return false;
1960 }
1961
1962 return true;
1963 }
1964
1965
1966 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1967 operations are supported. */
1968
1969 static bool
1970 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1971 {
1972 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1973 slp_instance instance;
1974 int i;
1975
1976 for (i = 0; slp_instances.iterate (i, &instance); )
1977 {
1978 if (!vect_slp_analyze_node_operations (bb_vinfo,
1979 SLP_INSTANCE_TREE (instance)))
1980 {
1981 vect_free_slp_instance (instance);
1982 slp_instances.ordered_remove (i);
1983 }
1984 else
1985 i++;
1986 }
1987
1988 if (!slp_instances.length ())
1989 return false;
1990
1991 return true;
1992 }
1993
1994 /* Check if vectorization of the basic block is profitable. */
1995
1996 static bool
1997 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1998 {
1999 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2000 slp_instance instance;
2001 int i, j;
2002 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2003 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2004 unsigned int stmt_cost;
2005 gimple stmt;
2006 gimple_stmt_iterator si;
2007 basic_block bb = BB_VINFO_BB (bb_vinfo);
2008 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2009 stmt_vec_info stmt_info = NULL;
2010 stmt_vector_for_cost body_cost_vec;
2011 stmt_info_for_cost *ci;
2012
2013 /* Calculate vector costs. */
2014 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2015 {
2016 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2017
2018 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2019 {
2020 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2021 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2022 stmt_info, ci->misalign, vect_body);
2023 }
2024 }
2025
2026 /* Calculate scalar cost. */
2027 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2028 {
2029 stmt = gsi_stmt (si);
2030 stmt_info = vinfo_for_stmt (stmt);
2031
2032 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2033 || !PURE_SLP_STMT (stmt_info))
2034 continue;
2035
2036 if (STMT_VINFO_DATA_REF (stmt_info))
2037 {
2038 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2039 stmt_cost = vect_get_stmt_cost (scalar_load);
2040 else
2041 stmt_cost = vect_get_stmt_cost (scalar_store);
2042 }
2043 else
2044 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2045
2046 scalar_cost += stmt_cost;
2047 }
2048
2049 /* Complete the target-specific cost calculation. */
2050 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2051 &vec_inside_cost, &vec_epilogue_cost);
2052
2053 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2054
2055 if (dump_enabled_p ())
2056 {
2057 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2058 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2059 vec_inside_cost);
2060 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2061 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2062 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2063 }
2064
2065 /* Vectorization is profitable if its cost is less than the cost of scalar
2066 version. */
2067 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2068 return false;
2069
2070 return true;
2071 }
2072
2073 /* Check if the basic block can be vectorized. */
2074
2075 static bb_vec_info
2076 vect_slp_analyze_bb_1 (basic_block bb)
2077 {
2078 bb_vec_info bb_vinfo;
2079 vec<ddr_p> ddrs;
2080 vec<slp_instance> slp_instances;
2081 slp_instance instance;
2082 int i;
2083 int min_vf = 2;
2084 int max_vf = MAX_VECTORIZATION_FACTOR;
2085
2086 bb_vinfo = new_bb_vec_info (bb);
2087 if (!bb_vinfo)
2088 return NULL;
2089
2090 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2091 {
2092 if (dump_enabled_p ())
2093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2094 "not vectorized: unhandled data-ref in basic "
2095 "block.\n");
2096
2097 destroy_bb_vec_info (bb_vinfo);
2098 return NULL;
2099 }
2100
2101 ddrs = BB_VINFO_DDRS (bb_vinfo);
2102 if (!ddrs.length ())
2103 {
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2106 "not vectorized: not enough data-refs in "
2107 "basic block.\n");
2108
2109 destroy_bb_vec_info (bb_vinfo);
2110 return NULL;
2111 }
2112
2113 vect_pattern_recog (NULL, bb_vinfo);
2114
2115 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2116 || min_vf > max_vf)
2117 {
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 "not vectorized: unhandled data dependence "
2121 "in basic block.\n");
2122
2123 destroy_bb_vec_info (bb_vinfo);
2124 return NULL;
2125 }
2126
2127 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2128 {
2129 if (dump_enabled_p ())
2130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2131 "not vectorized: bad data alignment in basic "
2132 "block.\n");
2133
2134 destroy_bb_vec_info (bb_vinfo);
2135 return NULL;
2136 }
2137
2138 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2139 {
2140 if (dump_enabled_p ())
2141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2142 "not vectorized: unhandled data access in "
2143 "basic block.\n");
2144
2145 destroy_bb_vec_info (bb_vinfo);
2146 return NULL;
2147 }
2148
2149 /* Check the SLP opportunities in the basic block, analyze and build SLP
2150 trees. */
2151 if (!vect_analyze_slp (NULL, bb_vinfo))
2152 {
2153 if (dump_enabled_p ())
2154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2155 "not vectorized: failed to find SLP opportunities "
2156 "in basic block.\n");
2157
2158 destroy_bb_vec_info (bb_vinfo);
2159 return NULL;
2160 }
2161
2162 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2163
2164 /* Mark all the statements that we want to vectorize as pure SLP and
2165 relevant. */
2166 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2167 {
2168 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2169 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2170 }
2171
2172 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2173 {
2174 if (dump_enabled_p ())
2175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2176 "not vectorized: unsupported alignment in basic "
2177 "block.\n");
2178 destroy_bb_vec_info (bb_vinfo);
2179 return NULL;
2180 }
2181
2182 if (!vect_slp_analyze_operations (bb_vinfo))
2183 {
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2186 "not vectorized: bad operation in basic block.\n");
2187
2188 destroy_bb_vec_info (bb_vinfo);
2189 return NULL;
2190 }
2191
2192 /* Cost model: check if the vectorization is worthwhile. */
2193 if (flag_vect_cost_model
2194 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2195 {
2196 if (dump_enabled_p ())
2197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2198 "not vectorized: vectorization is not "
2199 "profitable.\n");
2200
2201 destroy_bb_vec_info (bb_vinfo);
2202 return NULL;
2203 }
2204
2205 if (dump_enabled_p ())
2206 dump_printf_loc (MSG_NOTE, vect_location,
2207 "Basic block will be vectorized using SLP\n");
2208
2209 return bb_vinfo;
2210 }
2211
2212
2213 bb_vec_info
2214 vect_slp_analyze_bb (basic_block bb)
2215 {
2216 bb_vec_info bb_vinfo;
2217 int insns = 0;
2218 gimple_stmt_iterator gsi;
2219 unsigned int vector_sizes;
2220
2221 if (dump_enabled_p ())
2222 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2223
2224 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2225 {
2226 gimple stmt = gsi_stmt (gsi);
2227 if (!is_gimple_debug (stmt)
2228 && !gimple_nop_p (stmt)
2229 && gimple_code (stmt) != GIMPLE_LABEL)
2230 insns++;
2231 }
2232
2233 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2234 {
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "not vectorized: too many instructions in "
2238 "basic block.\n");
2239
2240 return NULL;
2241 }
2242
2243 /* Autodetect first vector size we try. */
2244 current_vector_size = 0;
2245 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2246
2247 while (1)
2248 {
2249 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2250 if (bb_vinfo)
2251 return bb_vinfo;
2252
2253 destroy_bb_vec_info (bb_vinfo);
2254
2255 vector_sizes &= ~current_vector_size;
2256 if (vector_sizes == 0
2257 || current_vector_size == 0)
2258 return NULL;
2259
2260 /* Try the next biggest vector size. */
2261 current_vector_size = 1 << floor_log2 (vector_sizes);
2262 if (dump_enabled_p ())
2263 dump_printf_loc (MSG_NOTE, vect_location,
2264 "***** Re-trying analysis with "
2265 "vector size %d\n", current_vector_size);
2266 }
2267 }
2268
2269
2270 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2271 the number of created vector stmts depends on the unrolling factor).
2272 However, the actual number of vector stmts for every SLP node depends on
2273 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2274 should be updated. In this function we assume that the inside costs
2275 calculated in vect_model_xxx_cost are linear in ncopies. */
2276
2277 void
2278 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2279 {
2280 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2281 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2282 slp_instance instance;
2283 stmt_vector_for_cost body_cost_vec;
2284 stmt_info_for_cost *si;
2285 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2286
2287 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_NOTE, vect_location,
2289 "=== vect_update_slp_costs_according_to_vf ===");
2290
2291 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2292 {
2293 /* We assume that costs are linear in ncopies. */
2294 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2295
2296 /* Record the instance's instructions in the target cost model.
2297 This was delayed until here because the count of instructions
2298 isn't known beforehand. */
2299 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2300
2301 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2302 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2303 vinfo_for_stmt (si->stmt), si->misalign,
2304 vect_body);
2305 }
2306 }
2307
2308
2309 /* For constant and loop invariant defs of SLP_NODE this function returns
2310 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2311 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2312 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2313 REDUC_INDEX is the index of the reduction operand in the statements, unless
2314 it is -1. */
2315
2316 static void
2317 vect_get_constant_vectors (tree op, slp_tree slp_node,
2318 vec<tree> *vec_oprnds,
2319 unsigned int op_num, unsigned int number_of_vectors,
2320 int reduc_index)
2321 {
2322 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2323 gimple stmt = stmts[0];
2324 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2325 unsigned nunits;
2326 tree vec_cst;
2327 tree *elts;
2328 unsigned j, number_of_places_left_in_vector;
2329 tree vector_type;
2330 tree vop;
2331 int group_size = stmts.length ();
2332 unsigned int vec_num, i;
2333 unsigned number_of_copies = 1;
2334 vec<tree> voprnds;
2335 voprnds.create (number_of_vectors);
2336 bool constant_p, is_store;
2337 tree neutral_op = NULL;
2338 enum tree_code code = gimple_expr_code (stmt);
2339 gimple def_stmt;
2340 struct loop *loop;
2341 gimple_seq ctor_seq = NULL;
2342
2343 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2344 && reduc_index != -1)
2345 {
2346 op_num = reduc_index - 1;
2347 op = gimple_op (stmt, reduc_index);
2348 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2349 we need either neutral operands or the original operands. See
2350 get_initial_def_for_reduction() for details. */
2351 switch (code)
2352 {
2353 case WIDEN_SUM_EXPR:
2354 case DOT_PROD_EXPR:
2355 case PLUS_EXPR:
2356 case MINUS_EXPR:
2357 case BIT_IOR_EXPR:
2358 case BIT_XOR_EXPR:
2359 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2360 neutral_op = build_real (TREE_TYPE (op), dconst0);
2361 else
2362 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2363
2364 break;
2365
2366 case MULT_EXPR:
2367 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2368 neutral_op = build_real (TREE_TYPE (op), dconst1);
2369 else
2370 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2371
2372 break;
2373
2374 case BIT_AND_EXPR:
2375 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2376 break;
2377
2378 case MAX_EXPR:
2379 case MIN_EXPR:
2380 def_stmt = SSA_NAME_DEF_STMT (op);
2381 loop = (gimple_bb (stmt))->loop_father;
2382 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2383 loop_preheader_edge (loop));
2384 break;
2385
2386 default:
2387 neutral_op = NULL;
2388 }
2389 }
2390
2391 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2392 {
2393 is_store = true;
2394 op = gimple_assign_rhs1 (stmt);
2395 }
2396 else
2397 is_store = false;
2398
2399 gcc_assert (op);
2400
2401 if (CONSTANT_CLASS_P (op))
2402 constant_p = true;
2403 else
2404 constant_p = false;
2405
2406 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2407 gcc_assert (vector_type);
2408 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2409
2410 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2411 created vectors. It is greater than 1 if unrolling is performed.
2412
2413 For example, we have two scalar operands, s1 and s2 (e.g., group of
2414 strided accesses of size two), while NUNITS is four (i.e., four scalars
2415 of this type can be packed in a vector). The output vector will contain
2416 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2417 will be 2).
2418
2419 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2420 containing the operands.
2421
2422 For example, NUNITS is four as before, and the group size is 8
2423 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2424 {s5, s6, s7, s8}. */
2425
2426 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2427
2428 number_of_places_left_in_vector = nunits;
2429 elts = XALLOCAVEC (tree, nunits);
2430 for (j = 0; j < number_of_copies; j++)
2431 {
2432 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2433 {
2434 if (is_store)
2435 op = gimple_assign_rhs1 (stmt);
2436 else
2437 {
2438 switch (code)
2439 {
2440 case COND_EXPR:
2441 if (op_num == 0 || op_num == 1)
2442 {
2443 tree cond = gimple_assign_rhs1 (stmt);
2444 op = TREE_OPERAND (cond, op_num);
2445 }
2446 else
2447 {
2448 if (op_num == 2)
2449 op = gimple_assign_rhs2 (stmt);
2450 else
2451 op = gimple_assign_rhs3 (stmt);
2452 }
2453 break;
2454
2455 case CALL_EXPR:
2456 op = gimple_call_arg (stmt, op_num);
2457 break;
2458
2459 case LSHIFT_EXPR:
2460 case RSHIFT_EXPR:
2461 case LROTATE_EXPR:
2462 case RROTATE_EXPR:
2463 op = gimple_op (stmt, op_num + 1);
2464 /* Unlike the other binary operators, shifts/rotates have
2465 the shift count being int, instead of the same type as
2466 the lhs, so make sure the scalar is the right type if
2467 we are dealing with vectors of
2468 long long/long/short/char. */
2469 if (op_num == 1 && constant_p)
2470 op = fold_convert (TREE_TYPE (vector_type), op);
2471 break;
2472
2473 default:
2474 op = gimple_op (stmt, op_num + 1);
2475 break;
2476 }
2477 }
2478
2479 if (reduc_index != -1)
2480 {
2481 loop = (gimple_bb (stmt))->loop_father;
2482 def_stmt = SSA_NAME_DEF_STMT (op);
2483
2484 gcc_assert (loop);
2485
2486 /* Get the def before the loop. In reduction chain we have only
2487 one initial value. */
2488 if ((j != (number_of_copies - 1)
2489 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2490 && i != 0))
2491 && neutral_op)
2492 op = neutral_op;
2493 else
2494 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2495 loop_preheader_edge (loop));
2496 }
2497
2498 /* Create 'vect_ = {op0,op1,...,opn}'. */
2499 number_of_places_left_in_vector--;
2500 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2501 {
2502 if (constant_p)
2503 {
2504 op = fold_unary (VIEW_CONVERT_EXPR,
2505 TREE_TYPE (vector_type), op);
2506 gcc_assert (op && CONSTANT_CLASS_P (op));
2507 }
2508 else
2509 {
2510 tree new_temp
2511 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2512 gimple init_stmt;
2513 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2514 op);
2515 init_stmt
2516 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2517 new_temp, op, NULL_TREE);
2518 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2519 op = new_temp;
2520 }
2521 }
2522 elts[number_of_places_left_in_vector] = op;
2523
2524 if (number_of_places_left_in_vector == 0)
2525 {
2526 number_of_places_left_in_vector = nunits;
2527
2528 if (constant_p)
2529 vec_cst = build_vector (vector_type, elts);
2530 else
2531 {
2532 vec<constructor_elt, va_gc> *v;
2533 unsigned k;
2534 vec_alloc (v, nunits);
2535 for (k = 0; k < nunits; ++k)
2536 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2537 vec_cst = build_constructor (vector_type, v);
2538 }
2539 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2540 vector_type, NULL));
2541 if (ctor_seq != NULL)
2542 {
2543 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2544 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2545 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2546 GSI_SAME_STMT);
2547 ctor_seq = NULL;
2548 }
2549 }
2550 }
2551 }
2552
2553 /* Since the vectors are created in the reverse order, we should invert
2554 them. */
2555 vec_num = voprnds.length ();
2556 for (j = vec_num; j != 0; j--)
2557 {
2558 vop = voprnds[j - 1];
2559 vec_oprnds->quick_push (vop);
2560 }
2561
2562 voprnds.release ();
2563
2564 /* In case that VF is greater than the unrolling factor needed for the SLP
2565 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2566 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2567 to replicate the vectors. */
2568 while (number_of_vectors > vec_oprnds->length ())
2569 {
2570 tree neutral_vec = NULL;
2571
2572 if (neutral_op)
2573 {
2574 if (!neutral_vec)
2575 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2576
2577 vec_oprnds->quick_push (neutral_vec);
2578 }
2579 else
2580 {
2581 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2582 vec_oprnds->quick_push (vop);
2583 }
2584 }
2585 }
2586
2587
2588 /* Get vectorized definitions from SLP_NODE that contains corresponding
2589 vectorized def-stmts. */
2590
2591 static void
2592 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2593 {
2594 tree vec_oprnd;
2595 gimple vec_def_stmt;
2596 unsigned int i;
2597
2598 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2599
2600 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2601 {
2602 gcc_assert (vec_def_stmt);
2603 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2604 vec_oprnds->quick_push (vec_oprnd);
2605 }
2606 }
2607
2608
2609 /* Get vectorized definitions for SLP_NODE.
2610 If the scalar definitions are loop invariants or constants, collect them and
2611 call vect_get_constant_vectors() to create vector stmts.
2612 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2613 must be stored in the corresponding child of SLP_NODE, and we call
2614 vect_get_slp_vect_defs () to retrieve them. */
2615
2616 void
2617 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2618 vec<slp_void_p> *vec_oprnds, int reduc_index)
2619 {
2620 gimple first_stmt, first_def;
2621 int number_of_vects = 0, i;
2622 unsigned int child_index = 0;
2623 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2624 slp_tree child = NULL;
2625 vec<tree> *vec_defs;
2626 tree oprnd, def_lhs;
2627 bool vectorized_defs;
2628
2629 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2630 FOR_EACH_VEC_ELT (ops, i, oprnd)
2631 {
2632 /* For each operand we check if it has vectorized definitions in a child
2633 node or we need to create them (for invariants and constants). We
2634 check if the LHS of the first stmt of the next child matches OPRND.
2635 If it does, we found the correct child. Otherwise, we call
2636 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2637 to check this child node for the next operand. */
2638 vectorized_defs = false;
2639 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2640 {
2641 child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2642 first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2643
2644 /* In the end of a pattern sequence we have a use of the original stmt,
2645 so we need to compare OPRND with the original def. */
2646 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2647 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2648 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2649 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2650
2651 if (is_gimple_call (first_def))
2652 def_lhs = gimple_call_lhs (first_def);
2653 else
2654 def_lhs = gimple_assign_lhs (first_def);
2655
2656 if (operand_equal_p (oprnd, def_lhs, 0))
2657 {
2658 /* The number of vector defs is determined by the number of
2659 vector statements in the node from which we get those
2660 statements. */
2661 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2662 vectorized_defs = true;
2663 child_index++;
2664 }
2665 }
2666
2667 if (!vectorized_defs)
2668 {
2669 if (i == 0)
2670 {
2671 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2672 /* Number of vector stmts was calculated according to LHS in
2673 vect_schedule_slp_instance (), fix it by replacing LHS with
2674 RHS, if necessary. See vect_get_smallest_scalar_type () for
2675 details. */
2676 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2677 &rhs_size_unit);
2678 if (rhs_size_unit != lhs_size_unit)
2679 {
2680 number_of_vects *= rhs_size_unit;
2681 number_of_vects /= lhs_size_unit;
2682 }
2683 }
2684 }
2685
2686 /* Allocate memory for vectorized defs. */
2687 vec_alloc (vec_defs, number_of_vects);
2688
2689 /* For reduction defs we call vect_get_constant_vectors (), since we are
2690 looking for initial loop invariant values. */
2691 if (vectorized_defs && reduc_index == -1)
2692 /* The defs are already vectorized. */
2693 vect_get_slp_vect_defs (child, vec_defs);
2694 else
2695 /* Build vectors from scalar defs. */
2696 vect_get_constant_vectors (oprnd, slp_node, vec_defs, i,
2697 number_of_vects, reduc_index);
2698
2699 vec_oprnds->quick_push ((slp_void_p) vec_defs);
2700
2701 /* For reductions, we only need initial values. */
2702 if (reduc_index != -1)
2703 return;
2704 }
2705 }
2706
2707
2708 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2709 building a vector of type MASK_TYPE from it) and two input vectors placed in
2710 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2711 shifting by STRIDE elements of DR_CHAIN for every copy.
2712 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2713 copies).
2714 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2715 the created stmts must be inserted. */
2716
2717 static inline void
2718 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2719 tree mask, int first_vec_indx, int second_vec_indx,
2720 gimple_stmt_iterator *gsi, slp_tree node,
2721 tree vectype, vec<tree> dr_chain,
2722 int ncopies, int vect_stmts_counter)
2723 {
2724 tree perm_dest;
2725 gimple perm_stmt = NULL;
2726 stmt_vec_info next_stmt_info;
2727 int i, stride;
2728 tree first_vec, second_vec, data_ref;
2729
2730 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2731
2732 /* Initialize the vect stmts of NODE to properly insert the generated
2733 stmts later. */
2734 for (i = SLP_TREE_VEC_STMTS (node).length ();
2735 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2736 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2737
2738 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2739 for (i = 0; i < ncopies; i++)
2740 {
2741 first_vec = dr_chain[first_vec_indx];
2742 second_vec = dr_chain[second_vec_indx];
2743
2744 /* Generate the permute statement. */
2745 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2746 first_vec, second_vec, mask);
2747 data_ref = make_ssa_name (perm_dest, perm_stmt);
2748 gimple_set_lhs (perm_stmt, data_ref);
2749 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2750
2751 /* Store the vector statement in NODE. */
2752 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2753
2754 first_vec_indx += stride;
2755 second_vec_indx += stride;
2756 }
2757
2758 /* Mark the scalar stmt as vectorized. */
2759 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2760 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2761 }
2762
2763
2764 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2765 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2766 representation. Check that the mask is valid and return FALSE if not.
2767 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2768 the next vector, i.e., the current first vector is not needed. */
2769
2770 static bool
2771 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2772 int mask_nunits, bool only_one_vec, int index,
2773 unsigned char *mask, int *current_mask_element,
2774 bool *need_next_vector, int *number_of_mask_fixes,
2775 bool *mask_fixed, bool *needs_first_vector)
2776 {
2777 int i;
2778
2779 /* Convert to target specific representation. */
2780 *current_mask_element = first_mask_element + m;
2781 /* Adjust the value in case it's a mask for second and third vectors. */
2782 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2783
2784 if (*current_mask_element < mask_nunits)
2785 *needs_first_vector = true;
2786
2787 /* We have only one input vector to permute but the mask accesses values in
2788 the next vector as well. */
2789 if (only_one_vec && *current_mask_element >= mask_nunits)
2790 {
2791 if (dump_enabled_p ())
2792 {
2793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2794 "permutation requires at least two vectors ");
2795 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2796 }
2797
2798 return false;
2799 }
2800
2801 /* The mask requires the next vector. */
2802 if (*current_mask_element >= mask_nunits * 2)
2803 {
2804 if (*needs_first_vector || *mask_fixed)
2805 {
2806 /* We either need the first vector too or have already moved to the
2807 next vector. In both cases, this permutation needs three
2808 vectors. */
2809 if (dump_enabled_p ())
2810 {
2811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2812 "permutation requires at "
2813 "least three vectors ");
2814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2815 }
2816
2817 return false;
2818 }
2819
2820 /* We move to the next vector, dropping the first one and working with
2821 the second and the third - we need to adjust the values of the mask
2822 accordingly. */
2823 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2824
2825 for (i = 0; i < index; i++)
2826 mask[i] -= mask_nunits * *number_of_mask_fixes;
2827
2828 (*number_of_mask_fixes)++;
2829 *mask_fixed = true;
2830 }
2831
2832 *need_next_vector = *mask_fixed;
2833
2834 /* This was the last element of this mask. Start a new one. */
2835 if (index == mask_nunits - 1)
2836 {
2837 *number_of_mask_fixes = 1;
2838 *mask_fixed = false;
2839 *needs_first_vector = false;
2840 }
2841
2842 return true;
2843 }
2844
2845
2846 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2847 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2848 permute statements for SLP_NODE_INSTANCE. */
2849 bool
2850 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2851 gimple_stmt_iterator *gsi, int vf,
2852 slp_instance slp_node_instance, bool analyze_only)
2853 {
2854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2855 tree mask_element_type = NULL_TREE, mask_type;
2856 int i, j, k, nunits, vec_index = 0, scalar_index;
2857 slp_tree node;
2858 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2859 gimple next_scalar_stmt;
2860 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2861 int first_mask_element;
2862 int index, unroll_factor, current_mask_element, ncopies;
2863 unsigned char *mask;
2864 bool only_one_vec = false, need_next_vector = false;
2865 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2866 int number_of_mask_fixes = 1;
2867 bool mask_fixed = false;
2868 bool needs_first_vector = false;
2869 enum machine_mode mode;
2870
2871 mode = TYPE_MODE (vectype);
2872
2873 if (!can_vec_perm_p (mode, false, NULL))
2874 {
2875 if (dump_enabled_p ())
2876 {
2877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2878 "no vect permute for ");
2879 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2880 }
2881 return false;
2882 }
2883
2884 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2885 same size as the vector element being permuted. */
2886 mask_element_type = lang_hooks.types.type_for_mode
2887 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2888 mask_type = get_vectype_for_scalar_type (mask_element_type);
2889 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2890 mask = XALLOCAVEC (unsigned char, nunits);
2891 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2892
2893 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2894 unrolling factor. */
2895 orig_vec_stmts_num = group_size *
2896 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2897 if (orig_vec_stmts_num == 1)
2898 only_one_vec = true;
2899
2900 /* Number of copies is determined by the final vectorization factor
2901 relatively to SLP_NODE_INSTANCE unrolling factor. */
2902 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2903
2904 /* Generate permutation masks for every NODE. Number of masks for each NODE
2905 is equal to GROUP_SIZE.
2906 E.g., we have a group of three nodes with three loads from the same
2907 location in each node, and the vector size is 4. I.e., we have a
2908 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2909 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2910 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2911 ...
2912
2913 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2914 The last mask is illegal since we assume two operands for permute
2915 operation, and the mask element values can't be outside that range.
2916 Hence, the last mask must be converted into {2,5,5,5}.
2917 For the first two permutations we need the first and the second input
2918 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2919 we need the second and the third vectors: {b1,c1,a2,b2} and
2920 {c2,a3,b3,c3}. */
2921
2922 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2923 {
2924 scalar_index = 0;
2925 index = 0;
2926 vect_stmts_counter = 0;
2927 vec_index = 0;
2928 first_vec_index = vec_index++;
2929 if (only_one_vec)
2930 second_vec_index = first_vec_index;
2931 else
2932 second_vec_index = vec_index++;
2933
2934 for (j = 0; j < unroll_factor; j++)
2935 {
2936 for (k = 0; k < group_size; k++)
2937 {
2938 first_mask_element = i + j * group_size;
2939 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2940 nunits, only_one_vec, index,
2941 mask, &current_mask_element,
2942 &need_next_vector,
2943 &number_of_mask_fixes, &mask_fixed,
2944 &needs_first_vector))
2945 return false;
2946 mask[index++] = current_mask_element;
2947
2948 if (index == nunits)
2949 {
2950 tree mask_vec, *mask_elts;
2951 int l;
2952
2953 if (!can_vec_perm_p (mode, false, mask))
2954 {
2955 if (dump_enabled_p ())
2956 {
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2958 vect_location,
2959 "unsupported vect permute { ");
2960 for (i = 0; i < nunits; ++i)
2961 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2962 mask[i]);
2963 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2964 }
2965 return false;
2966 }
2967
2968 mask_elts = XALLOCAVEC (tree, nunits);
2969 for (l = 0; l < nunits; ++l)
2970 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2971 mask_vec = build_vector (mask_type, mask_elts);
2972 index = 0;
2973
2974 if (!analyze_only)
2975 {
2976 if (need_next_vector)
2977 {
2978 first_vec_index = second_vec_index;
2979 second_vec_index = vec_index;
2980 }
2981
2982 next_scalar_stmt
2983 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2984
2985 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2986 mask_vec, first_vec_index, second_vec_index,
2987 gsi, node, vectype, dr_chain,
2988 ncopies, vect_stmts_counter++);
2989 }
2990 }
2991 }
2992 }
2993 }
2994
2995 return true;
2996 }
2997
2998
2999
3000 /* Vectorize SLP instance tree in postorder. */
3001
3002 static bool
3003 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3004 unsigned int vectorization_factor)
3005 {
3006 gimple stmt;
3007 bool grouped_store, is_store;
3008 gimple_stmt_iterator si;
3009 stmt_vec_info stmt_info;
3010 unsigned int vec_stmts_size, nunits, group_size;
3011 tree vectype;
3012 int i;
3013 slp_tree loads_node;
3014 slp_void_p child;
3015
3016 if (!node)
3017 return false;
3018
3019 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3020 vect_schedule_slp_instance ((slp_tree) child, instance,
3021 vectorization_factor);
3022
3023 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3024 stmt_info = vinfo_for_stmt (stmt);
3025
3026 /* VECTYPE is the type of the destination. */
3027 vectype = STMT_VINFO_VECTYPE (stmt_info);
3028 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3029 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3030
3031 /* For each SLP instance calculate number of vector stmts to be created
3032 for the scalar stmts in each node of the SLP tree. Number of vector
3033 elements in one vector iteration is the number of scalar elements in
3034 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3035 size. */
3036 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3037
3038 /* In case of load permutation we have to allocate vectorized statements for
3039 all the nodes that participate in that permutation. */
3040 if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3041 {
3042 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3043 {
3044 if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3045 {
3046 SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3047 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3048 }
3049 }
3050 }
3051
3052 if (!SLP_TREE_VEC_STMTS (node).exists ())
3053 {
3054 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3055 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3056 }
3057
3058 if (dump_enabled_p ())
3059 {
3060 dump_printf_loc (MSG_NOTE,vect_location,
3061 "------>vectorizing SLP node starting from: ");
3062 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3063 }
3064
3065 /* Loads should be inserted before the first load. */
3066 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3067 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3068 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3069 && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3070 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3071 else if (is_pattern_stmt_p (stmt_info))
3072 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3073 else
3074 si = gsi_for_stmt (stmt);
3075
3076 /* Stores should be inserted just before the last store. */
3077 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3078 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3079 {
3080 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3081 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3082 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3083 si = gsi_for_stmt (last_store);
3084 }
3085
3086 /* Mark the first element of the reduction chain as reduction to properly
3087 transform the node. In the analysis phase only the last element of the
3088 chain is marked as reduction. */
3089 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3090 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3091 {
3092 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3093 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3094 }
3095
3096 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3097 return is_store;
3098 }
3099
3100 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3101 For loop vectorization this is done in vectorizable_call, but for SLP
3102 it needs to be deferred until end of vect_schedule_slp, because multiple
3103 SLP instances may refer to the same scalar stmt. */
3104
3105 static void
3106 vect_remove_slp_scalar_calls (slp_tree node)
3107 {
3108 gimple stmt, new_stmt;
3109 gimple_stmt_iterator gsi;
3110 int i;
3111 slp_void_p child;
3112 tree lhs;
3113 stmt_vec_info stmt_info;
3114
3115 if (!node)
3116 return;
3117
3118 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3119 vect_remove_slp_scalar_calls ((slp_tree) child);
3120
3121 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3122 {
3123 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3124 continue;
3125 stmt_info = vinfo_for_stmt (stmt);
3126 if (stmt_info == NULL
3127 || is_pattern_stmt_p (stmt_info)
3128 || !PURE_SLP_STMT (stmt_info))
3129 continue;
3130 lhs = gimple_call_lhs (stmt);
3131 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3132 set_vinfo_for_stmt (new_stmt, stmt_info);
3133 set_vinfo_for_stmt (stmt, NULL);
3134 STMT_VINFO_STMT (stmt_info) = new_stmt;
3135 gsi = gsi_for_stmt (stmt);
3136 gsi_replace (&gsi, new_stmt, false);
3137 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3138 }
3139 }
3140
3141 /* Generate vector code for all SLP instances in the loop/basic block. */
3142
3143 bool
3144 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3145 {
3146 vec<slp_instance> slp_instances;
3147 slp_instance instance;
3148 unsigned int i, vf;
3149 bool is_store = false;
3150
3151 if (loop_vinfo)
3152 {
3153 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3154 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3155 }
3156 else
3157 {
3158 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3159 vf = 1;
3160 }
3161
3162 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3163 {
3164 /* Schedule the tree of INSTANCE. */
3165 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3166 instance, vf);
3167 if (dump_enabled_p ())
3168 dump_printf_loc (MSG_NOTE, vect_location,
3169 "vectorizing stmts using SLP.");
3170 }
3171
3172 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3173 {
3174 slp_tree root = SLP_INSTANCE_TREE (instance);
3175 gimple store;
3176 unsigned int j;
3177 gimple_stmt_iterator gsi;
3178
3179 vect_remove_slp_scalar_calls (root);
3180
3181 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3182 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3183 {
3184 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3185 break;
3186
3187 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3188 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3189 /* Free the attached stmt_vec_info and remove the stmt. */
3190 gsi = gsi_for_stmt (store);
3191 unlink_stmt_vdef (store);
3192 gsi_remove (&gsi, true);
3193 release_defs (store);
3194 free_stmt_vec_info (store);
3195 }
3196 }
3197
3198 return is_store;
3199 }
3200
3201
3202 /* Vectorize the basic block. */
3203
3204 void
3205 vect_slp_transform_bb (basic_block bb)
3206 {
3207 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3208 gimple_stmt_iterator si;
3209
3210 gcc_assert (bb_vinfo);
3211
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3214
3215 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3216 {
3217 gimple stmt = gsi_stmt (si);
3218 stmt_vec_info stmt_info;
3219
3220 if (dump_enabled_p ())
3221 {
3222 dump_printf_loc (MSG_NOTE, vect_location,
3223 "------>SLPing statement: ");
3224 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3225 }
3226
3227 stmt_info = vinfo_for_stmt (stmt);
3228 gcc_assert (stmt_info);
3229
3230 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3231 if (STMT_SLP_TYPE (stmt_info))
3232 {
3233 vect_schedule_slp (NULL, bb_vinfo);
3234 break;
3235 }
3236 }
3237
3238 if (dump_enabled_p ())
3239 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3240
3241 destroy_bb_vec_info (bb_vinfo);
3242 }