tree-vectorizer.h (slp_void_p): Remove.
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69 int i;
70 slp_tree child;
71
72 if (!node)
73 return;
74
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76 vect_free_slp_tree (child);
77
78 SLP_TREE_CHILDREN (node).release ();
79 SLP_TREE_SCALAR_STMTS (node).release ();
80 SLP_TREE_VEC_STMTS (node).release ();
81
82 free (node);
83 }
84
85
86 /* Free the memory allocated for the SLP instance. */
87
88 void
89 vect_free_slp_instance (slp_instance instance)
90 {
91 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
92 SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93 SLP_INSTANCE_LOADS (instance).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95 free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS. */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104 slp_tree node;
105 gimple stmt = scalar_stmts[0];
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
116 else
117 return NULL;
118
119 node = XNEW (struct _slp_tree);
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node).create (0);
122 SLP_TREE_CHILDREN (node).create (nops);
123
124 return node;
125 }
126
127
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
130 static vec<slp_oprnd_info>
131 vect_create_oprnd_info (int nops, int group_size)
132 {
133 int i;
134 slp_oprnd_info oprnd_info;
135 vec<slp_oprnd_info> oprnds_info;
136
137 oprnds_info.create (nops);
138 for (i = 0; i < nops; i++)
139 {
140 oprnd_info = XNEW (struct _slp_oprnd_info);
141 oprnd_info->def_stmts.create (group_size);
142 oprnd_info->first_dt = vect_uninitialized_def;
143 oprnd_info->first_def_type = NULL_TREE;
144 oprnd_info->first_const_oprnd = NULL_TREE;
145 oprnd_info->first_pattern = false;
146 oprnds_info.quick_push (oprnd_info);
147 }
148
149 return oprnds_info;
150 }
151
152
153 /* Free operands info. */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158 int i;
159 slp_oprnd_info oprnd_info;
160
161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162 {
163 oprnd_info->def_stmts.release ();
164 XDELETE (oprnd_info);
165 }
166
167 oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177 gimple next_stmt = first_stmt;
178 int result = 0;
179
180 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181 return -1;
182
183 do
184 {
185 if (next_stmt == stmt)
186 return result;
187 result++;
188 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189 }
190 while (next_stmt);
191
192 return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197 they are of a valid type and that they match the defs of the first stmt of
198 the SLP group (stored in OPRNDS_INFO). */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202 slp_tree slp_node, gimple stmt,
203 int ncopies_for_cost, bool first,
204 vec<slp_oprnd_info> *oprnds_info,
205 stmt_vector_for_cost *prologue_cost_vec,
206 stmt_vector_for_cost *body_cost_vec)
207 {
208 tree oprnd;
209 unsigned int i, number_of_oprnds;
210 tree def, def_op0 = NULL_TREE;
211 gimple def_stmt;
212 enum vect_def_type dt = vect_uninitialized_def;
213 enum vect_def_type dt_op0 = vect_uninitialized_def;
214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
215 tree lhs = gimple_get_lhs (stmt);
216 struct loop *loop = NULL;
217 enum tree_code rhs_code;
218 bool different_types = false;
219 bool pattern = false;
220 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
221 int op_idx = 1;
222 tree compare_rhs = NULL_TREE;
223
224 if (loop_vinfo)
225 loop = LOOP_VINFO_LOOP (loop_vinfo);
226
227 if (is_gimple_call (stmt))
228 {
229 number_of_oprnds = gimple_call_num_args (stmt);
230 op_idx = 3;
231 }
232 else if (is_gimple_assign (stmt))
233 {
234 number_of_oprnds = gimple_num_ops (stmt) - 1;
235 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
236 number_of_oprnds++;
237 }
238 else
239 return false;
240
241 for (i = 0; i < number_of_oprnds; i++)
242 {
243 if (compare_rhs)
244 {
245 oprnd = compare_rhs;
246 compare_rhs = NULL_TREE;
247 }
248 else
249 oprnd = gimple_op (stmt, op_idx++);
250
251 oprnd_info = (*oprnds_info)[i];
252
253 if (COMPARISON_CLASS_P (oprnd))
254 {
255 compare_rhs = TREE_OPERAND (oprnd, 1);
256 oprnd = TREE_OPERAND (oprnd, 0);
257 }
258
259 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
260 &def, &dt)
261 || (!def_stmt && dt != vect_constant_def))
262 {
263 if (dump_enabled_p ())
264 {
265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
266 "Build SLP failed: can't find def for ");
267 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
268 }
269
270 return false;
271 }
272
273 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
274 from the pattern. Check that all the stmts of the node are in the
275 pattern. */
276 if (def_stmt && gimple_bb (def_stmt)
277 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
278 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
279 && gimple_code (def_stmt) != GIMPLE_PHI))
280 && vinfo_for_stmt (def_stmt)
281 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
282 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
283 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
284 {
285 pattern = true;
286 if (!first && !oprnd_info->first_pattern)
287 {
288 if (dump_enabled_p ())
289 {
290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
291 "Build SLP failed: some of the stmts"
292 " are in a pattern, and others are not ");
293 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
294 }
295
296 return false;
297 }
298
299 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
300 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
301
302 if (dt == vect_unknown_def_type)
303 {
304 if (dump_enabled_p ())
305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
306 "Unsupported pattern.");
307 return false;
308 }
309
310 switch (gimple_code (def_stmt))
311 {
312 case GIMPLE_PHI:
313 def = gimple_phi_result (def_stmt);
314 break;
315
316 case GIMPLE_ASSIGN:
317 def = gimple_assign_lhs (def_stmt);
318 break;
319
320 default:
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
323 "unsupported defining stmt: ");
324 return false;
325 }
326 }
327
328 if (first)
329 {
330 oprnd_info->first_dt = dt;
331 oprnd_info->first_pattern = pattern;
332 if (def)
333 {
334 oprnd_info->first_def_type = TREE_TYPE (def);
335 oprnd_info->first_const_oprnd = NULL_TREE;
336 }
337 else
338 {
339 oprnd_info->first_def_type = NULL_TREE;
340 oprnd_info->first_const_oprnd = oprnd;
341 }
342
343 if (i == 0)
344 {
345 def_op0 = def;
346 dt_op0 = dt;
347 /* Analyze costs (for the first stmt of the group only). */
348 if (REFERENCE_CLASS_P (lhs))
349 /* Store. */
350 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
351 dt, slp_node, prologue_cost_vec,
352 body_cost_vec);
353 else
354 {
355 enum vect_def_type dts[2];
356 dts[0] = dt;
357 dts[1] = vect_uninitialized_def;
358 /* Not memory operation (we don't call this function for
359 loads). */
360 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
361 prologue_cost_vec, body_cost_vec);
362 }
363 }
364 }
365 else
366 {
367 /* Not first stmt of the group, check that the def-stmt/s match
368 the def-stmt/s of the first stmt. Allow different definition
369 types for reduction chains: the first stmt must be a
370 vect_reduction_def (a phi node), and the rest
371 vect_internal_def. */
372 if (((oprnd_info->first_dt != dt
373 && !(oprnd_info->first_dt == vect_reduction_def
374 && dt == vect_internal_def))
375 || (oprnd_info->first_def_type != NULL_TREE
376 && def
377 && !types_compatible_p (oprnd_info->first_def_type,
378 TREE_TYPE (def))))
379 || (!def
380 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
381 TREE_TYPE (oprnd)))
382 || different_types)
383 {
384 if (number_of_oprnds != 2)
385 {
386 if (dump_enabled_p ())
387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
388 "Build SLP failed: different types ");
389
390 return false;
391 }
392
393 /* Try to swap operands in case of binary operation. */
394 if (i == 0)
395 different_types = true;
396 else
397 {
398 oprnd0_info = (*oprnds_info)[0];
399 if (is_gimple_assign (stmt)
400 && (rhs_code = gimple_assign_rhs_code (stmt))
401 && TREE_CODE_CLASS (rhs_code) == tcc_binary
402 && commutative_tree_code (rhs_code)
403 && oprnd0_info->first_dt == dt
404 && oprnd_info->first_dt == dt_op0
405 && def_op0 && def
406 && !(oprnd0_info->first_def_type
407 && !types_compatible_p (oprnd0_info->first_def_type,
408 TREE_TYPE (def)))
409 && !(oprnd_info->first_def_type
410 && !types_compatible_p (oprnd_info->first_def_type,
411 TREE_TYPE (def_op0))))
412 {
413 if (dump_enabled_p ())
414 {
415 dump_printf_loc (MSG_NOTE, vect_location,
416 "Swapping operands of ");
417 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
418 }
419
420 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
421 gimple_assign_rhs2_ptr (stmt));
422 }
423 else
424 {
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427 "Build SLP failed: different types ");
428
429 return false;
430 }
431 }
432 }
433 }
434
435 /* Check the types of the definitions. */
436 switch (dt)
437 {
438 case vect_constant_def:
439 case vect_external_def:
440 case vect_reduction_def:
441 break;
442
443 case vect_internal_def:
444 if (different_types)
445 {
446 oprnd0_info = (*oprnds_info)[0];
447 oprnd1_info = (*oprnds_info)[0];
448 if (i == 0)
449 oprnd1_info->def_stmts.quick_push (def_stmt);
450 else
451 oprnd0_info->def_stmts.quick_push (def_stmt);
452 }
453 else
454 oprnd_info->def_stmts.quick_push (def_stmt);
455
456 break;
457
458 default:
459 /* FORNOW: Not supported. */
460 if (dump_enabled_p ())
461 {
462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
463 "Build SLP failed: illegal type of def ");
464 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
465 }
466
467 return false;
468 }
469 }
470
471 return true;
472 }
473
474
475 /* Recursively build an SLP tree starting from NODE.
476 Fail (and return FALSE) if def-stmts are not isomorphic, require data
477 permutation or are of unsupported types of operation. Otherwise, return
478 TRUE. */
479
480 static bool
481 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
482 slp_tree *node, unsigned int group_size, int *outside_cost,
483 int ncopies_for_cost, unsigned int *max_nunits,
484 vec<int> *load_permutation,
485 vec<slp_tree> *loads,
486 unsigned int vectorization_factor, bool *loads_permuted,
487 stmt_vector_for_cost *prologue_cost_vec,
488 stmt_vector_for_cost *body_cost_vec)
489 {
490 unsigned int i;
491 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
492 gimple stmt = stmts[0];
493 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
494 enum tree_code first_cond_code = ERROR_MARK;
495 tree lhs;
496 bool stop_recursion = false, need_same_oprnds = false;
497 tree vectype, scalar_type, first_op1 = NULL_TREE;
498 optab optab;
499 int icode;
500 enum machine_mode optab_op2_mode;
501 enum machine_mode vec_mode;
502 struct data_reference *first_dr;
503 HOST_WIDE_INT dummy;
504 bool permutation = false;
505 unsigned int load_place;
506 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
507 vec<slp_oprnd_info> oprnds_info;
508 unsigned int nops;
509 slp_oprnd_info oprnd_info;
510 tree cond;
511
512 if (is_gimple_call (stmt))
513 nops = gimple_call_num_args (stmt);
514 else if (is_gimple_assign (stmt))
515 {
516 nops = gimple_num_ops (stmt) - 1;
517 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
518 nops++;
519 }
520 else
521 return false;
522
523 oprnds_info = vect_create_oprnd_info (nops, group_size);
524
525 /* For every stmt in NODE find its def stmt/s. */
526 FOR_EACH_VEC_ELT (stmts, i, stmt)
527 {
528 if (dump_enabled_p ())
529 {
530 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
531 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
532 }
533
534 /* Fail to vectorize statements marked as unvectorizable. */
535 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
536 {
537 if (dump_enabled_p ())
538 {
539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540 "Build SLP failed: unvectorizable statement ");
541 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
542 }
543
544 vect_free_oprnd_info (oprnds_info);
545 return false;
546 }
547
548 lhs = gimple_get_lhs (stmt);
549 if (lhs == NULL_TREE)
550 {
551 if (dump_enabled_p ())
552 {
553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
554 "Build SLP failed: not GIMPLE_ASSIGN nor "
555 "GIMPLE_CALL ");
556 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
557 }
558
559 vect_free_oprnd_info (oprnds_info);
560 return false;
561 }
562
563 if (is_gimple_assign (stmt)
564 && gimple_assign_rhs_code (stmt) == COND_EXPR
565 && (cond = gimple_assign_rhs1 (stmt))
566 && !COMPARISON_CLASS_P (cond))
567 {
568 if (dump_enabled_p ())
569 {
570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
571 "Build SLP failed: condition is not "
572 "comparison ");
573 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
574 }
575
576 vect_free_oprnd_info (oprnds_info);
577 return false;
578 }
579
580 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
581 vectype = get_vectype_for_scalar_type (scalar_type);
582 if (!vectype)
583 {
584 if (dump_enabled_p ())
585 {
586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
587 "Build SLP failed: unsupported data-type ");
588 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
589 scalar_type);
590 }
591
592 vect_free_oprnd_info (oprnds_info);
593 return false;
594 }
595
596 /* In case of multiple types we need to detect the smallest type. */
597 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
598 {
599 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
600 if (bb_vinfo)
601 vectorization_factor = *max_nunits;
602 }
603
604 if (is_gimple_call (stmt))
605 {
606 rhs_code = CALL_EXPR;
607 if (gimple_call_internal_p (stmt)
608 || gimple_call_tail_p (stmt)
609 || gimple_call_noreturn_p (stmt)
610 || !gimple_call_nothrow_p (stmt)
611 || gimple_call_chain (stmt))
612 {
613 if (dump_enabled_p ())
614 {
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 "Build SLP failed: unsupported call type ");
617 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
618 }
619
620 vect_free_oprnd_info (oprnds_info);
621 return false;
622 }
623 }
624 else
625 rhs_code = gimple_assign_rhs_code (stmt);
626
627 /* Check the operation. */
628 if (i == 0)
629 {
630 first_stmt_code = rhs_code;
631
632 /* Shift arguments should be equal in all the packed stmts for a
633 vector shift with scalar shift operand. */
634 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
635 || rhs_code == LROTATE_EXPR
636 || rhs_code == RROTATE_EXPR)
637 {
638 vec_mode = TYPE_MODE (vectype);
639
640 /* First see if we have a vector/vector shift. */
641 optab = optab_for_tree_code (rhs_code, vectype,
642 optab_vector);
643
644 if (!optab
645 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
646 {
647 /* No vector/vector shift, try for a vector/scalar shift. */
648 optab = optab_for_tree_code (rhs_code, vectype,
649 optab_scalar);
650
651 if (!optab)
652 {
653 if (dump_enabled_p ())
654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
655 "Build SLP failed: no optab.");
656 vect_free_oprnd_info (oprnds_info);
657 return false;
658 }
659 icode = (int) optab_handler (optab, vec_mode);
660 if (icode == CODE_FOR_nothing)
661 {
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
664 "Build SLP failed: "
665 "op not supported by target.");
666 vect_free_oprnd_info (oprnds_info);
667 return false;
668 }
669 optab_op2_mode = insn_data[icode].operand[2].mode;
670 if (!VECTOR_MODE_P (optab_op2_mode))
671 {
672 need_same_oprnds = true;
673 first_op1 = gimple_assign_rhs2 (stmt);
674 }
675 }
676 }
677 else if (rhs_code == WIDEN_LSHIFT_EXPR)
678 {
679 need_same_oprnds = true;
680 first_op1 = gimple_assign_rhs2 (stmt);
681 }
682 }
683 else
684 {
685 if (first_stmt_code != rhs_code
686 && (first_stmt_code != IMAGPART_EXPR
687 || rhs_code != REALPART_EXPR)
688 && (first_stmt_code != REALPART_EXPR
689 || rhs_code != IMAGPART_EXPR)
690 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
691 && (first_stmt_code == ARRAY_REF
692 || first_stmt_code == BIT_FIELD_REF
693 || first_stmt_code == INDIRECT_REF
694 || first_stmt_code == COMPONENT_REF
695 || first_stmt_code == MEM_REF)))
696 {
697 if (dump_enabled_p ())
698 {
699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
700 "Build SLP failed: different operation "
701 "in stmt ");
702 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
703 }
704
705 vect_free_oprnd_info (oprnds_info);
706 return false;
707 }
708
709 if (need_same_oprnds
710 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
711 {
712 if (dump_enabled_p ())
713 {
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "Build SLP failed: different shift "
716 "arguments in ");
717 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
718 }
719
720 vect_free_oprnd_info (oprnds_info);
721 return false;
722 }
723
724 if (rhs_code == CALL_EXPR)
725 {
726 gimple first_stmt = stmts[0];
727 if (gimple_call_num_args (stmt) != nops
728 || !operand_equal_p (gimple_call_fn (first_stmt),
729 gimple_call_fn (stmt), 0)
730 || gimple_call_fntype (first_stmt)
731 != gimple_call_fntype (stmt))
732 {
733 if (dump_enabled_p ())
734 {
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
736 "Build SLP failed: different calls in ");
737 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
738 stmt, 0);
739 }
740
741 vect_free_oprnd_info (oprnds_info);
742 return false;
743 }
744 }
745 }
746
747 /* Grouped store or load. */
748 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
749 {
750 if (REFERENCE_CLASS_P (lhs))
751 {
752 /* Store. */
753 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
754 stmt, ncopies_for_cost,
755 (i == 0), &oprnds_info,
756 prologue_cost_vec,
757 body_cost_vec))
758 {
759 vect_free_oprnd_info (oprnds_info);
760 return false;
761 }
762 }
763 else
764 {
765 /* Load. */
766 unsigned unrolling_factor
767 = least_common_multiple
768 (*max_nunits, group_size) / group_size;
769 /* FORNOW: Check that there is no gap between the loads
770 and no gap between the groups when we need to load
771 multiple groups at once.
772 ??? We should enhance this to only disallow gaps
773 inside vectors. */
774 if ((unrolling_factor > 1
775 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
776 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
777 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
778 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
779 {
780 if (dump_enabled_p ())
781 {
782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
783 "Build SLP failed: grouped "
784 "loads have gaps ");
785 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
786 stmt, 0);
787 }
788
789 vect_free_oprnd_info (oprnds_info);
790 return false;
791 }
792
793 /* Check that the size of interleaved loads group is not
794 greater than the SLP group size. */
795 unsigned ncopies
796 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
797 if (loop_vinfo
798 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
799 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
800 - GROUP_GAP (vinfo_for_stmt (stmt)))
801 > ncopies * group_size))
802 {
803 if (dump_enabled_p ())
804 {
805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
806 "Build SLP failed: the number "
807 "of interleaved loads is greater than "
808 "the SLP group size ");
809 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
810 stmt, 0);
811 }
812
813 vect_free_oprnd_info (oprnds_info);
814 return false;
815 }
816
817 old_first_load = first_load;
818 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
819 if (prev_first_load)
820 {
821 /* Check that there are no loads from different interleaving
822 chains in the same node. The only exception is complex
823 numbers. */
824 if (prev_first_load != first_load
825 && rhs_code != REALPART_EXPR
826 && rhs_code != IMAGPART_EXPR)
827 {
828 if (dump_enabled_p ())
829 {
830 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
831 vect_location,
832 "Build SLP failed: different "
833 "interleaving chains in one node ");
834 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
835 stmt, 0);
836 }
837
838 vect_free_oprnd_info (oprnds_info);
839 return false;
840 }
841 }
842 else
843 prev_first_load = first_load;
844
845 /* In some cases a group of loads is just the same load
846 repeated N times. Only analyze its cost once. */
847 if (first_load == stmt && old_first_load != first_load)
848 {
849 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
850 if (vect_supportable_dr_alignment (first_dr, false)
851 == dr_unaligned_unsupported)
852 {
853 if (dump_enabled_p ())
854 {
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
856 vect_location,
857 "Build SLP failed: unsupported "
858 "unaligned load ");
859 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
860 stmt, 0);
861 }
862
863 vect_free_oprnd_info (oprnds_info);
864 return false;
865 }
866
867 /* Analyze costs (for the first stmt in the group). */
868 vect_model_load_cost (vinfo_for_stmt (stmt),
869 ncopies_for_cost, false, *node,
870 prologue_cost_vec, body_cost_vec);
871 }
872
873 /* Store the place of this load in the interleaving chain. In
874 case that permutation is needed we later decide if a specific
875 permutation is supported. */
876 load_place = vect_get_place_in_interleaving_chain (stmt,
877 first_load);
878 if (load_place != i)
879 permutation = true;
880
881 load_permutation->safe_push (load_place);
882
883 /* We stop the tree when we reach a group of loads. */
884 stop_recursion = true;
885 continue;
886 }
887 } /* Grouped access. */
888 else
889 {
890 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
891 {
892 /* Not grouped load. */
893 if (dump_enabled_p ())
894 {
895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
896 "Build SLP failed: not grouped load ");
897 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
898 }
899
900 /* FORNOW: Not grouped loads are not supported. */
901 vect_free_oprnd_info (oprnds_info);
902 return false;
903 }
904
905 /* Not memory operation. */
906 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
907 && TREE_CODE_CLASS (rhs_code) != tcc_unary
908 && rhs_code != COND_EXPR
909 && rhs_code != CALL_EXPR)
910 {
911 if (dump_enabled_p ())
912 {
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
914 "Build SLP failed: operation");
915 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
916 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
917 }
918
919 vect_free_oprnd_info (oprnds_info);
920 return false;
921 }
922
923 if (rhs_code == COND_EXPR)
924 {
925 tree cond_expr = gimple_assign_rhs1 (stmt);
926
927 if (i == 0)
928 first_cond_code = TREE_CODE (cond_expr);
929 else if (first_cond_code != TREE_CODE (cond_expr))
930 {
931 if (dump_enabled_p ())
932 {
933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
934 "Build SLP failed: different"
935 " operation");
936 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
937 stmt, 0);
938 }
939
940 vect_free_oprnd_info (oprnds_info);
941 return false;
942 }
943 }
944
945 /* Find the def-stmts. */
946 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
947 ncopies_for_cost, (i == 0),
948 &oprnds_info, prologue_cost_vec,
949 body_cost_vec))
950 {
951 vect_free_oprnd_info (oprnds_info);
952 return false;
953 }
954 }
955 }
956
957 /* Grouped loads were reached - stop the recursion. */
958 if (stop_recursion)
959 {
960 loads->safe_push (*node);
961 if (permutation)
962 {
963 gimple first_stmt = stmts[0];
964 *loads_permuted = true;
965 (void) record_stmt_cost (body_cost_vec, group_size, vec_perm,
966 vinfo_for_stmt (first_stmt), 0, vect_body);
967 }
968 else
969 {
970 /* We don't check here complex numbers chains, so we set
971 LOADS_PERMUTED for further check in
972 vect_supported_load_permutation_p. */
973 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
974 *loads_permuted = true;
975 }
976
977 vect_free_oprnd_info (oprnds_info);
978 return true;
979 }
980
981 /* Create SLP_TREE nodes for the definition node/s. */
982 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
983 {
984 slp_tree child;
985
986 if (oprnd_info->first_dt != vect_internal_def)
987 continue;
988
989 child = vect_create_new_slp_node (oprnd_info->def_stmts);
990 if (!child
991 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
992 outside_cost, ncopies_for_cost,
993 max_nunits, load_permutation, loads,
994 vectorization_factor, loads_permuted,
995 prologue_cost_vec, body_cost_vec))
996 {
997 if (child)
998 oprnd_info->def_stmts = vNULL;
999 vect_free_slp_tree (child);
1000 vect_free_oprnd_info (oprnds_info);
1001 return false;
1002 }
1003
1004 oprnd_info->def_stmts.create (0);
1005 SLP_TREE_CHILDREN (*node).quick_push (child);
1006 }
1007
1008 vect_free_oprnd_info (oprnds_info);
1009 return true;
1010 }
1011
1012 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1013
1014 static void
1015 vect_print_slp_tree (int dump_kind, slp_tree node)
1016 {
1017 int i;
1018 gimple stmt;
1019 slp_tree child;
1020
1021 if (!node)
1022 return;
1023
1024 dump_printf (dump_kind, "node ");
1025 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1026 {
1027 dump_printf (dump_kind, "\n\tstmt %d ", i);
1028 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1029 }
1030 dump_printf (dump_kind, "\n");
1031
1032 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1033 vect_print_slp_tree (dump_kind, child);
1034 }
1035
1036
1037 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1038 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1039 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1040 stmts in NODE are to be marked. */
1041
1042 static void
1043 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1044 {
1045 int i;
1046 gimple stmt;
1047 slp_tree child;
1048
1049 if (!node)
1050 return;
1051
1052 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1053 if (j < 0 || i == j)
1054 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1055
1056 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1057 vect_mark_slp_stmts (child, mark, j);
1058 }
1059
1060
1061 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1062
1063 static void
1064 vect_mark_slp_stmts_relevant (slp_tree node)
1065 {
1066 int i;
1067 gimple stmt;
1068 stmt_vec_info stmt_info;
1069 slp_tree child;
1070
1071 if (!node)
1072 return;
1073
1074 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1075 {
1076 stmt_info = vinfo_for_stmt (stmt);
1077 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1078 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1079 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1080 }
1081
1082 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1083 vect_mark_slp_stmts_relevant (child);
1084 }
1085
1086
1087 /* Check if the permutation required by the SLP INSTANCE is supported.
1088 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1089
1090 static bool
1091 vect_supported_slp_permutation_p (slp_instance instance)
1092 {
1093 slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
1094 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1095 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1096 vec<slp_tree> sorted_loads = vNULL;
1097 int index;
1098 slp_tree *tmp_loads = NULL;
1099 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1100 slp_tree load;
1101
1102 /* FORNOW: The only supported loads permutation is loads from the same
1103 location in all the loads in the node, when the data-refs in
1104 nodes of LOADS constitute an interleaving chain.
1105 Sort the nodes according to the order of accesses in the chain. */
1106 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1107 for (i = 0, j = 0;
1108 SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
1109 && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
1110 i += group_size, j++)
1111 {
1112 gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
1113 /* Check that the loads are all in the same interleaving chain. */
1114 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1115 {
1116 if (dump_enabled_p ())
1117 {
1118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1119 "Build SLP failed: unsupported data "
1120 "permutation ");
1121 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1122 scalar_stmt, 0);
1123 }
1124
1125 free (tmp_loads);
1126 return false;
1127 }
1128
1129 tmp_loads[index] = load;
1130 }
1131
1132 sorted_loads.create (group_size);
1133 for (i = 0; i < group_size; i++)
1134 sorted_loads.safe_push (tmp_loads[i]);
1135
1136 SLP_INSTANCE_LOADS (instance).release ();
1137 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1138 free (tmp_loads);
1139
1140 if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
1141 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1142 instance, true))
1143 return false;
1144
1145 return true;
1146 }
1147
1148
1149 /* Rearrange the statements of NODE according to PERMUTATION. */
1150
1151 static void
1152 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1153 vec<int> permutation)
1154 {
1155 gimple stmt;
1156 vec<gimple> tmp_stmts;
1157 unsigned int i;
1158 slp_tree child;
1159
1160 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1161 vect_slp_rearrange_stmts (child, group_size, permutation);
1162
1163 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1164 tmp_stmts.create (group_size);
1165 tmp_stmts.quick_grow_cleared (group_size);
1166
1167 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1168 tmp_stmts[permutation[i]] = stmt;
1169
1170 SLP_TREE_SCALAR_STMTS (node).release ();
1171 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1172 }
1173
1174
1175 /* Check if the required load permutation is supported.
1176 LOAD_PERMUTATION contains a list of indices of the loads.
1177 In SLP this permutation is relative to the order of grouped stores that are
1178 the base of the SLP instance. */
1179
1180 static bool
1181 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1182 vec<int> load_permutation)
1183 {
1184 int i = 0, j, prev = -1, next, k, number_of_groups;
1185 bool supported, bad_permutation = false;
1186 sbitmap load_index;
1187 slp_tree node, other_complex_node;
1188 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1189 unsigned complex_numbers = 0;
1190 struct data_reference *dr;
1191 bb_vec_info bb_vinfo;
1192
1193 /* FORNOW: permutations are only supported in SLP. */
1194 if (!slp_instn)
1195 return false;
1196
1197 if (dump_enabled_p ())
1198 {
1199 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1200 FOR_EACH_VEC_ELT (load_permutation, i, next)
1201 dump_printf (MSG_NOTE, "%d ", next);
1202 }
1203
1204 /* In case of reduction every load permutation is allowed, since the order
1205 of the reduction statements is not important (as opposed to the case of
1206 grouped stores). The only condition we need to check is that all the
1207 load nodes are of the same size and have the same permutation (and then
1208 rearrange all the nodes of the SLP instance according to this
1209 permutation). */
1210
1211 /* Check that all the load nodes are of the same size. */
1212 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1213 {
1214 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1215 return false;
1216
1217 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1218 if (is_gimple_assign (stmt)
1219 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1220 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1221 complex_numbers++;
1222 }
1223
1224 /* Complex operands can be swapped as following:
1225 real_c = real_b + real_a;
1226 imag_c = imag_a + imag_b;
1227 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1228 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1229 chains are mixed, they match the above pattern. */
1230 if (complex_numbers)
1231 {
1232 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1233 {
1234 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1235 {
1236 if (j == 0)
1237 first = stmt;
1238 else
1239 {
1240 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1241 {
1242 if (complex_numbers != 2)
1243 return false;
1244
1245 if (i == 0)
1246 k = 1;
1247 else
1248 k = 0;
1249
1250 other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1251 other_node_first =
1252 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1253
1254 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1255 != other_node_first)
1256 return false;
1257 }
1258 }
1259 }
1260 }
1261 }
1262
1263 /* We checked that this case ok, so there is no need to proceed with
1264 permutation tests. */
1265 if (complex_numbers == 2
1266 && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1267 {
1268 SLP_INSTANCE_LOADS (slp_instn).release ();
1269 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1270 return true;
1271 }
1272
1273 node = SLP_INSTANCE_TREE (slp_instn);
1274 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1275 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1276 instance, not all the loads belong to the same node or interleaving
1277 group. Hence, we need to divide them into groups according to
1278 GROUP_SIZE. */
1279 number_of_groups = load_permutation.length () / group_size;
1280
1281 /* Reduction (there are no data-refs in the root).
1282 In reduction chain the order of the loads is important. */
1283 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1284 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1285 {
1286 int first_group_load_index;
1287
1288 /* Compare all the permutation sequences to the first one. */
1289 for (i = 1; i < number_of_groups; i++)
1290 {
1291 k = 0;
1292 for (j = i * group_size; j < i * group_size + group_size; j++)
1293 {
1294 next = load_permutation[j];
1295 first_group_load_index = load_permutation[k];
1296
1297 if (next != first_group_load_index)
1298 {
1299 bad_permutation = true;
1300 break;
1301 }
1302
1303 k++;
1304 }
1305
1306 if (bad_permutation)
1307 break;
1308 }
1309
1310 if (!bad_permutation)
1311 {
1312 /* Check that the loads in the first sequence are different and there
1313 are no gaps between them. */
1314 load_index = sbitmap_alloc (group_size);
1315 bitmap_clear (load_index);
1316 for (k = 0; k < group_size; k++)
1317 {
1318 first_group_load_index = load_permutation[k];
1319 if (bitmap_bit_p (load_index, first_group_load_index))
1320 {
1321 bad_permutation = true;
1322 break;
1323 }
1324
1325 bitmap_set_bit (load_index, first_group_load_index);
1326 }
1327
1328 if (!bad_permutation)
1329 for (k = 0; k < group_size; k++)
1330 if (!bitmap_bit_p (load_index, k))
1331 {
1332 bad_permutation = true;
1333 break;
1334 }
1335
1336 sbitmap_free (load_index);
1337 }
1338
1339 if (!bad_permutation)
1340 {
1341 /* This permutation is valid for reduction. Since the order of the
1342 statements in the nodes is not important unless they are memory
1343 accesses, we can rearrange the statements in all the nodes
1344 according to the order of the loads. */
1345 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1346 load_permutation);
1347 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1348 return true;
1349 }
1350 }
1351
1352 /* In basic block vectorization we allow any subchain of an interleaving
1353 chain.
1354 FORNOW: not supported in loop SLP because of realignment compications. */
1355 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1356 bad_permutation = false;
1357 /* Check that for every node in the instance the loads form a subchain. */
1358 if (bb_vinfo)
1359 {
1360 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1361 {
1362 next_load = NULL;
1363 first_load = NULL;
1364 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1365 {
1366 if (!first_load)
1367 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1368 else if (first_load
1369 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1370 {
1371 bad_permutation = true;
1372 break;
1373 }
1374
1375 if (j != 0 && next_load != load)
1376 {
1377 bad_permutation = true;
1378 break;
1379 }
1380
1381 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1382 }
1383
1384 if (bad_permutation)
1385 break;
1386 }
1387
1388 /* Check that the alignment of the first load in every subchain, i.e.,
1389 the first statement in every load node, is supported. */
1390 if (!bad_permutation)
1391 {
1392 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1393 {
1394 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1395 if (first_load
1396 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1397 {
1398 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1399 if (vect_supportable_dr_alignment (dr, false)
1400 == dr_unaligned_unsupported)
1401 {
1402 if (dump_enabled_p ())
1403 {
1404 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1405 vect_location,
1406 "unsupported unaligned load ");
1407 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1408 first_load, 0);
1409 }
1410 bad_permutation = true;
1411 break;
1412 }
1413 }
1414 }
1415
1416 if (!bad_permutation)
1417 {
1418 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1419 return true;
1420 }
1421 }
1422 }
1423
1424 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1425 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1426 well (unless it's reduction). */
1427 if (load_permutation.length ()
1428 != (unsigned int) (group_size * group_size))
1429 return false;
1430
1431 supported = true;
1432 load_index = sbitmap_alloc (group_size);
1433 bitmap_clear (load_index);
1434 for (j = 0; j < group_size; j++)
1435 {
1436 for (i = j * group_size, k = 0;
1437 load_permutation.iterate (i, &next) && k < group_size;
1438 i++, k++)
1439 {
1440 if (i != j * group_size && next != prev)
1441 {
1442 supported = false;
1443 break;
1444 }
1445
1446 prev = next;
1447 }
1448
1449 if (bitmap_bit_p (load_index, prev))
1450 {
1451 supported = false;
1452 break;
1453 }
1454
1455 bitmap_set_bit (load_index, prev);
1456 }
1457
1458 for (j = 0; j < group_size; j++)
1459 if (!bitmap_bit_p (load_index, j))
1460 {
1461 sbitmap_free (load_index);
1462 return false;
1463 }
1464
1465 sbitmap_free (load_index);
1466
1467 if (supported && i == group_size * group_size
1468 && vect_supported_slp_permutation_p (slp_instn))
1469 return true;
1470
1471 return false;
1472 }
1473
1474
1475 /* Find the first load in the loop that belongs to INSTANCE.
1476 When loads are in several SLP nodes, there can be a case in which the first
1477 load does not appear in the first SLP node to be transformed, causing
1478 incorrect order of statements. Since we generate all the loads together,
1479 they must be inserted before the first load of the SLP instance and not
1480 before the first load of the first node of the instance. */
1481
1482 static gimple
1483 vect_find_first_load_in_slp_instance (slp_instance instance)
1484 {
1485 int i, j;
1486 slp_tree load_node;
1487 gimple first_load = NULL, load;
1488
1489 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1490 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1491 first_load = get_earlier_stmt (load, first_load);
1492
1493 return first_load;
1494 }
1495
1496
1497 /* Find the last store in SLP INSTANCE. */
1498
1499 static gimple
1500 vect_find_last_store_in_slp_instance (slp_instance instance)
1501 {
1502 int i;
1503 slp_tree node;
1504 gimple last_store = NULL, store;
1505
1506 node = SLP_INSTANCE_TREE (instance);
1507 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1508 last_store = get_later_stmt (store, last_store);
1509
1510 return last_store;
1511 }
1512
1513
1514 /* Analyze an SLP instance starting from a group of grouped stores. Call
1515 vect_build_slp_tree to build a tree of packed stmts if possible.
1516 Return FALSE if it's impossible to SLP any stmt in the loop. */
1517
1518 static bool
1519 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1520 gimple stmt)
1521 {
1522 slp_instance new_instance;
1523 slp_tree node;
1524 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1525 unsigned int unrolling_factor = 1, nunits;
1526 tree vectype, scalar_type = NULL_TREE;
1527 gimple next;
1528 unsigned int vectorization_factor = 0;
1529 int outside_cost = 0, ncopies_for_cost, i;
1530 unsigned int max_nunits = 0;
1531 vec<int> load_permutation;
1532 vec<slp_tree> loads;
1533 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1534 bool loads_permuted = false;
1535 vec<gimple> scalar_stmts;
1536 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1537 stmt_info_for_cost *si;
1538
1539 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1540 {
1541 if (dr)
1542 {
1543 scalar_type = TREE_TYPE (DR_REF (dr));
1544 vectype = get_vectype_for_scalar_type (scalar_type);
1545 }
1546 else
1547 {
1548 gcc_assert (loop_vinfo);
1549 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1550 }
1551
1552 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1553 }
1554 else
1555 {
1556 gcc_assert (loop_vinfo);
1557 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1558 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1559 }
1560
1561 if (!vectype)
1562 {
1563 if (dump_enabled_p ())
1564 {
1565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1566 "Build SLP failed: unsupported data-type ");
1567 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1568 }
1569
1570 return false;
1571 }
1572
1573 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1574 if (loop_vinfo)
1575 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1576 else
1577 vectorization_factor = nunits;
1578
1579 /* Calculate the unrolling factor. */
1580 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1581 if (unrolling_factor != 1 && !loop_vinfo)
1582 {
1583 if (dump_enabled_p ())
1584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1585 "Build SLP failed: unrolling required in basic"
1586 " block SLP");
1587
1588 return false;
1589 }
1590
1591 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1592 scalar_stmts.create (group_size);
1593 next = stmt;
1594 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1595 {
1596 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1597 while (next)
1598 {
1599 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1600 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1601 scalar_stmts.safe_push (
1602 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1603 else
1604 scalar_stmts.safe_push (next);
1605 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1606 }
1607 }
1608 else
1609 {
1610 /* Collect reduction statements. */
1611 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1612 for (i = 0; reductions.iterate (i, &next); i++)
1613 scalar_stmts.safe_push (next);
1614 }
1615
1616 node = vect_create_new_slp_node (scalar_stmts);
1617
1618 /* Calculate the number of vector stmts to create based on the unrolling
1619 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1620 GROUP_SIZE / NUNITS otherwise. */
1621 ncopies_for_cost = unrolling_factor * group_size / nunits;
1622
1623 load_permutation.create (group_size * group_size);
1624 loads.create (group_size);
1625 prologue_cost_vec.create (10);
1626 body_cost_vec.create (10);
1627
1628 /* Build the tree for the SLP instance. */
1629 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1630 &outside_cost, ncopies_for_cost,
1631 &max_nunits, &load_permutation, &loads,
1632 vectorization_factor, &loads_permuted,
1633 &prologue_cost_vec, &body_cost_vec))
1634 {
1635 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1636 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1637
1638 /* Calculate the unrolling factor based on the smallest type. */
1639 if (max_nunits > nunits)
1640 unrolling_factor = least_common_multiple (max_nunits, group_size)
1641 / group_size;
1642
1643 if (unrolling_factor != 1 && !loop_vinfo)
1644 {
1645 if (dump_enabled_p ())
1646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1647 "Build SLP failed: unrolling required in basic"
1648 " block SLP");
1649 vect_free_slp_tree (node);
1650 body_cost_vec.release ();
1651 prologue_cost_vec.release ();
1652 load_permutation.release ();
1653 loads.release ();
1654 return false;
1655 }
1656
1657 /* Create a new SLP instance. */
1658 new_instance = XNEW (struct _slp_instance);
1659 SLP_INSTANCE_TREE (new_instance) = node;
1660 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1661 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1662 SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
1663 SLP_INSTANCE_LOADS (new_instance) = loads;
1664 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1665 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1666
1667 if (loads_permuted)
1668 {
1669 if (!vect_supported_load_permutation_p (new_instance, group_size,
1670 load_permutation))
1671 {
1672 if (dump_enabled_p ())
1673 {
1674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1675 "Build SLP failed: unsupported load "
1676 "permutation ");
1677 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1678 }
1679
1680 vect_free_slp_instance (new_instance);
1681 prologue_cost_vec.release ();
1682 return false;
1683 }
1684
1685 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1686 = vect_find_first_load_in_slp_instance (new_instance);
1687 }
1688 else
1689 SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
1690
1691 /* Record the prologue costs, which were delayed until we were
1692 sure that SLP was successful. Unlike the body costs, we know
1693 the final values now regardless of the loop vectorization factor. */
1694 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1695 {
1696 struct _stmt_vec_info *stmt_info
1697 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1698 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1699 si->misalign, vect_prologue);
1700 }
1701
1702 prologue_cost_vec.release ();
1703
1704 if (loop_vinfo)
1705 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1706 else
1707 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1708
1709 if (dump_enabled_p ())
1710 vect_print_slp_tree (MSG_NOTE, node);
1711
1712 return true;
1713 }
1714 else
1715 {
1716 body_cost_vec.release ();
1717 prologue_cost_vec.release ();
1718 }
1719
1720 /* Failed to SLP. */
1721 /* Free the allocated memory. */
1722 vect_free_slp_tree (node);
1723 load_permutation.release ();
1724 loads.release ();
1725
1726 return false;
1727 }
1728
1729
1730 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1731 trees of packed scalar stmts if SLP is possible. */
1732
1733 bool
1734 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1735 {
1736 unsigned int i;
1737 vec<gimple> grouped_stores;
1738 vec<gimple> reductions = vNULL;
1739 vec<gimple> reduc_chains = vNULL;
1740 gimple first_element;
1741 bool ok = false;
1742
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1745
1746 if (loop_vinfo)
1747 {
1748 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1749 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1750 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1751 }
1752 else
1753 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1754
1755 /* Find SLP sequences starting from groups of grouped stores. */
1756 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1757 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1758 ok = true;
1759
1760 if (bb_vinfo && !ok)
1761 {
1762 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1764 "Failed to SLP the basic block.");
1765
1766 return false;
1767 }
1768
1769 if (loop_vinfo
1770 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1771 {
1772 /* Find SLP sequences starting from reduction chains. */
1773 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1774 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1775 ok = true;
1776 else
1777 return false;
1778
1779 /* Don't try to vectorize SLP reductions if reduction chain was
1780 detected. */
1781 return ok;
1782 }
1783
1784 /* Find SLP sequences starting from groups of reductions. */
1785 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1786 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1787 ok = true;
1788
1789 return true;
1790 }
1791
1792
1793 /* For each possible SLP instance decide whether to SLP it and calculate overall
1794 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1795 least one instance. */
1796
1797 bool
1798 vect_make_slp_decision (loop_vec_info loop_vinfo)
1799 {
1800 unsigned int i, unrolling_factor = 1;
1801 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1802 slp_instance instance;
1803 int decided_to_slp = 0;
1804
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1807
1808 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1809 {
1810 /* FORNOW: SLP if you can. */
1811 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1812 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1813
1814 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1815 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1816 loop-based vectorization. Such stmts will be marked as HYBRID. */
1817 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1818 decided_to_slp++;
1819 }
1820
1821 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1822
1823 if (decided_to_slp && dump_enabled_p ())
1824 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1825 "Decided to SLP %d instances. Unrolling factor %d",
1826 decided_to_slp, unrolling_factor);
1827
1828 return (decided_to_slp > 0);
1829 }
1830
1831
1832 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1833 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1834
1835 static void
1836 vect_detect_hybrid_slp_stmts (slp_tree node)
1837 {
1838 int i;
1839 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1840 gimple stmt = stmts[0];
1841 imm_use_iterator imm_iter;
1842 gimple use_stmt;
1843 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1844 slp_tree child;
1845 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1846 struct loop *loop = NULL;
1847 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1848 basic_block bb = NULL;
1849
1850 if (!node)
1851 return;
1852
1853 if (loop_vinfo)
1854 loop = LOOP_VINFO_LOOP (loop_vinfo);
1855 else
1856 bb = BB_VINFO_BB (bb_vinfo);
1857
1858 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1859 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1860 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1861 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1862 if (gimple_bb (use_stmt)
1863 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1864 || bb == gimple_bb (use_stmt))
1865 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1866 && !STMT_SLP_TYPE (stmt_vinfo)
1867 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1868 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1869 && !(gimple_code (use_stmt) == GIMPLE_PHI
1870 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1871 == vect_reduction_def))
1872 vect_mark_slp_stmts (node, hybrid, i);
1873
1874 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1875 vect_detect_hybrid_slp_stmts (child);
1876 }
1877
1878
1879 /* Find stmts that must be both vectorized and SLPed. */
1880
1881 void
1882 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1883 {
1884 unsigned int i;
1885 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1886 slp_instance instance;
1887
1888 if (dump_enabled_p ())
1889 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1890
1891 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1892 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1893 }
1894
1895
1896 /* Create and initialize a new bb_vec_info struct for BB, as well as
1897 stmt_vec_info structs for all the stmts in it. */
1898
1899 static bb_vec_info
1900 new_bb_vec_info (basic_block bb)
1901 {
1902 bb_vec_info res = NULL;
1903 gimple_stmt_iterator gsi;
1904
1905 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1906 BB_VINFO_BB (res) = bb;
1907
1908 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1909 {
1910 gimple stmt = gsi_stmt (gsi);
1911 gimple_set_uid (stmt, 0);
1912 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1913 }
1914
1915 BB_VINFO_GROUPED_STORES (res).create (10);
1916 BB_VINFO_SLP_INSTANCES (res).create (2);
1917 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1918
1919 bb->aux = res;
1920 return res;
1921 }
1922
1923
1924 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1925 stmts in the basic block. */
1926
1927 static void
1928 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1929 {
1930 vec<slp_instance> slp_instances;
1931 slp_instance instance;
1932 basic_block bb;
1933 gimple_stmt_iterator si;
1934 unsigned i;
1935
1936 if (!bb_vinfo)
1937 return;
1938
1939 bb = BB_VINFO_BB (bb_vinfo);
1940
1941 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1942 {
1943 gimple stmt = gsi_stmt (si);
1944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945
1946 if (stmt_info)
1947 /* Free stmt_vec_info. */
1948 free_stmt_vec_info (stmt);
1949 }
1950
1951 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1952 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1953 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1954 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1955 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1956 vect_free_slp_instance (instance);
1957 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1958 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1959 free (bb_vinfo);
1960 bb->aux = NULL;
1961 }
1962
1963
1964 /* Analyze statements contained in SLP tree node after recursively analyzing
1965 the subtree. Return TRUE if the operations are supported. */
1966
1967 static bool
1968 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1969 {
1970 bool dummy;
1971 int i;
1972 gimple stmt;
1973 slp_tree child;
1974
1975 if (!node)
1976 return true;
1977
1978 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1979 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1980 return false;
1981
1982 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1983 {
1984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1985 gcc_assert (stmt_info);
1986 gcc_assert (PURE_SLP_STMT (stmt_info));
1987
1988 if (!vect_analyze_stmt (stmt, &dummy, node))
1989 return false;
1990 }
1991
1992 return true;
1993 }
1994
1995
1996 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1997 operations are supported. */
1998
1999 static bool
2000 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2001 {
2002 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2003 slp_instance instance;
2004 int i;
2005
2006 for (i = 0; slp_instances.iterate (i, &instance); )
2007 {
2008 if (!vect_slp_analyze_node_operations (bb_vinfo,
2009 SLP_INSTANCE_TREE (instance)))
2010 {
2011 vect_free_slp_instance (instance);
2012 slp_instances.ordered_remove (i);
2013 }
2014 else
2015 i++;
2016 }
2017
2018 if (!slp_instances.length ())
2019 return false;
2020
2021 return true;
2022 }
2023
2024 /* Check if vectorization of the basic block is profitable. */
2025
2026 static bool
2027 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2028 {
2029 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2030 slp_instance instance;
2031 int i, j;
2032 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2033 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2034 unsigned int stmt_cost;
2035 gimple stmt;
2036 gimple_stmt_iterator si;
2037 basic_block bb = BB_VINFO_BB (bb_vinfo);
2038 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2039 stmt_vec_info stmt_info = NULL;
2040 stmt_vector_for_cost body_cost_vec;
2041 stmt_info_for_cost *ci;
2042
2043 /* Calculate vector costs. */
2044 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2045 {
2046 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2047
2048 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2049 {
2050 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2051 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2052 stmt_info, ci->misalign, vect_body);
2053 }
2054 }
2055
2056 /* Calculate scalar cost. */
2057 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2058 {
2059 stmt = gsi_stmt (si);
2060 stmt_info = vinfo_for_stmt (stmt);
2061
2062 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2063 || !PURE_SLP_STMT (stmt_info))
2064 continue;
2065
2066 if (STMT_VINFO_DATA_REF (stmt_info))
2067 {
2068 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2069 stmt_cost = vect_get_stmt_cost (scalar_load);
2070 else
2071 stmt_cost = vect_get_stmt_cost (scalar_store);
2072 }
2073 else
2074 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2075
2076 scalar_cost += stmt_cost;
2077 }
2078
2079 /* Complete the target-specific cost calculation. */
2080 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2081 &vec_inside_cost, &vec_epilogue_cost);
2082
2083 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2084
2085 if (dump_enabled_p ())
2086 {
2087 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2088 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2089 vec_inside_cost);
2090 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2091 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2092 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2093 }
2094
2095 /* Vectorization is profitable if its cost is less than the cost of scalar
2096 version. */
2097 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2098 return false;
2099
2100 return true;
2101 }
2102
2103 /* Check if the basic block can be vectorized. */
2104
2105 static bb_vec_info
2106 vect_slp_analyze_bb_1 (basic_block bb)
2107 {
2108 bb_vec_info bb_vinfo;
2109 vec<slp_instance> slp_instances;
2110 slp_instance instance;
2111 int i;
2112 int min_vf = 2;
2113
2114 bb_vinfo = new_bb_vec_info (bb);
2115 if (!bb_vinfo)
2116 return NULL;
2117
2118 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2119 {
2120 if (dump_enabled_p ())
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2122 "not vectorized: unhandled data-ref in basic "
2123 "block.\n");
2124
2125 destroy_bb_vec_info (bb_vinfo);
2126 return NULL;
2127 }
2128
2129 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2130 {
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "not vectorized: not enough data-refs in "
2134 "basic block.\n");
2135
2136 destroy_bb_vec_info (bb_vinfo);
2137 return NULL;
2138 }
2139
2140 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2141 {
2142 if (dump_enabled_p ())
2143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2144 "not vectorized: unhandled data access in "
2145 "basic block.\n");
2146
2147 destroy_bb_vec_info (bb_vinfo);
2148 return NULL;
2149 }
2150
2151 vect_pattern_recog (NULL, bb_vinfo);
2152
2153 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2154 {
2155 if (dump_enabled_p ())
2156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2157 "not vectorized: unhandled data dependence "
2158 "in basic block.\n");
2159
2160 destroy_bb_vec_info (bb_vinfo);
2161 return NULL;
2162 }
2163
2164 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2165 {
2166 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 "not vectorized: bad data alignment in basic "
2169 "block.\n");
2170
2171 destroy_bb_vec_info (bb_vinfo);
2172 return NULL;
2173 }
2174
2175 /* Check the SLP opportunities in the basic block, analyze and build SLP
2176 trees. */
2177 if (!vect_analyze_slp (NULL, bb_vinfo))
2178 {
2179 if (dump_enabled_p ())
2180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2181 "not vectorized: failed to find SLP opportunities "
2182 "in basic block.\n");
2183
2184 destroy_bb_vec_info (bb_vinfo);
2185 return NULL;
2186 }
2187
2188 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2189
2190 /* Mark all the statements that we want to vectorize as pure SLP and
2191 relevant. */
2192 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2193 {
2194 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2195 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2196 }
2197
2198 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2199 {
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202 "not vectorized: unsupported alignment in basic "
2203 "block.\n");
2204 destroy_bb_vec_info (bb_vinfo);
2205 return NULL;
2206 }
2207
2208 if (!vect_slp_analyze_operations (bb_vinfo))
2209 {
2210 if (dump_enabled_p ())
2211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2212 "not vectorized: bad operation in basic block.\n");
2213
2214 destroy_bb_vec_info (bb_vinfo);
2215 return NULL;
2216 }
2217
2218 /* Cost model: check if the vectorization is worthwhile. */
2219 if (flag_vect_cost_model
2220 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2221 {
2222 if (dump_enabled_p ())
2223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2224 "not vectorized: vectorization is not "
2225 "profitable.\n");
2226
2227 destroy_bb_vec_info (bb_vinfo);
2228 return NULL;
2229 }
2230
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_NOTE, vect_location,
2233 "Basic block will be vectorized using SLP\n");
2234
2235 return bb_vinfo;
2236 }
2237
2238
2239 bb_vec_info
2240 vect_slp_analyze_bb (basic_block bb)
2241 {
2242 bb_vec_info bb_vinfo;
2243 int insns = 0;
2244 gimple_stmt_iterator gsi;
2245 unsigned int vector_sizes;
2246
2247 if (dump_enabled_p ())
2248 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2249
2250 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2251 {
2252 gimple stmt = gsi_stmt (gsi);
2253 if (!is_gimple_debug (stmt)
2254 && !gimple_nop_p (stmt)
2255 && gimple_code (stmt) != GIMPLE_LABEL)
2256 insns++;
2257 }
2258
2259 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2260 {
2261 if (dump_enabled_p ())
2262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2263 "not vectorized: too many instructions in "
2264 "basic block.\n");
2265
2266 return NULL;
2267 }
2268
2269 /* Autodetect first vector size we try. */
2270 current_vector_size = 0;
2271 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2272
2273 while (1)
2274 {
2275 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2276 if (bb_vinfo)
2277 return bb_vinfo;
2278
2279 destroy_bb_vec_info (bb_vinfo);
2280
2281 vector_sizes &= ~current_vector_size;
2282 if (vector_sizes == 0
2283 || current_vector_size == 0)
2284 return NULL;
2285
2286 /* Try the next biggest vector size. */
2287 current_vector_size = 1 << floor_log2 (vector_sizes);
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_NOTE, vect_location,
2290 "***** Re-trying analysis with "
2291 "vector size %d\n", current_vector_size);
2292 }
2293 }
2294
2295
2296 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2297 the number of created vector stmts depends on the unrolling factor).
2298 However, the actual number of vector stmts for every SLP node depends on
2299 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2300 should be updated. In this function we assume that the inside costs
2301 calculated in vect_model_xxx_cost are linear in ncopies. */
2302
2303 void
2304 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2305 {
2306 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2307 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2308 slp_instance instance;
2309 stmt_vector_for_cost body_cost_vec;
2310 stmt_info_for_cost *si;
2311 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2312
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_NOTE, vect_location,
2315 "=== vect_update_slp_costs_according_to_vf ===");
2316
2317 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2318 {
2319 /* We assume that costs are linear in ncopies. */
2320 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2321
2322 /* Record the instance's instructions in the target cost model.
2323 This was delayed until here because the count of instructions
2324 isn't known beforehand. */
2325 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2326
2327 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2328 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2329 vinfo_for_stmt (si->stmt), si->misalign,
2330 vect_body);
2331 }
2332 }
2333
2334
2335 /* For constant and loop invariant defs of SLP_NODE this function returns
2336 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2337 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2338 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2339 REDUC_INDEX is the index of the reduction operand in the statements, unless
2340 it is -1. */
2341
2342 static void
2343 vect_get_constant_vectors (tree op, slp_tree slp_node,
2344 vec<tree> *vec_oprnds,
2345 unsigned int op_num, unsigned int number_of_vectors,
2346 int reduc_index)
2347 {
2348 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2349 gimple stmt = stmts[0];
2350 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2351 unsigned nunits;
2352 tree vec_cst;
2353 tree *elts;
2354 unsigned j, number_of_places_left_in_vector;
2355 tree vector_type;
2356 tree vop;
2357 int group_size = stmts.length ();
2358 unsigned int vec_num, i;
2359 unsigned number_of_copies = 1;
2360 vec<tree> voprnds;
2361 voprnds.create (number_of_vectors);
2362 bool constant_p, is_store;
2363 tree neutral_op = NULL;
2364 enum tree_code code = gimple_expr_code (stmt);
2365 gimple def_stmt;
2366 struct loop *loop;
2367 gimple_seq ctor_seq = NULL;
2368
2369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2370 && reduc_index != -1)
2371 {
2372 op_num = reduc_index - 1;
2373 op = gimple_op (stmt, reduc_index);
2374 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2375 we need either neutral operands or the original operands. See
2376 get_initial_def_for_reduction() for details. */
2377 switch (code)
2378 {
2379 case WIDEN_SUM_EXPR:
2380 case DOT_PROD_EXPR:
2381 case PLUS_EXPR:
2382 case MINUS_EXPR:
2383 case BIT_IOR_EXPR:
2384 case BIT_XOR_EXPR:
2385 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2386 neutral_op = build_real (TREE_TYPE (op), dconst0);
2387 else
2388 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2389
2390 break;
2391
2392 case MULT_EXPR:
2393 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2394 neutral_op = build_real (TREE_TYPE (op), dconst1);
2395 else
2396 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2397
2398 break;
2399
2400 case BIT_AND_EXPR:
2401 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2402 break;
2403
2404 case MAX_EXPR:
2405 case MIN_EXPR:
2406 def_stmt = SSA_NAME_DEF_STMT (op);
2407 loop = (gimple_bb (stmt))->loop_father;
2408 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2409 loop_preheader_edge (loop));
2410 break;
2411
2412 default:
2413 neutral_op = NULL;
2414 }
2415 }
2416
2417 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2418 {
2419 is_store = true;
2420 op = gimple_assign_rhs1 (stmt);
2421 }
2422 else
2423 is_store = false;
2424
2425 gcc_assert (op);
2426
2427 if (CONSTANT_CLASS_P (op))
2428 constant_p = true;
2429 else
2430 constant_p = false;
2431
2432 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2433 gcc_assert (vector_type);
2434 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2435
2436 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2437 created vectors. It is greater than 1 if unrolling is performed.
2438
2439 For example, we have two scalar operands, s1 and s2 (e.g., group of
2440 strided accesses of size two), while NUNITS is four (i.e., four scalars
2441 of this type can be packed in a vector). The output vector will contain
2442 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2443 will be 2).
2444
2445 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2446 containing the operands.
2447
2448 For example, NUNITS is four as before, and the group size is 8
2449 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2450 {s5, s6, s7, s8}. */
2451
2452 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2453
2454 number_of_places_left_in_vector = nunits;
2455 elts = XALLOCAVEC (tree, nunits);
2456 for (j = 0; j < number_of_copies; j++)
2457 {
2458 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2459 {
2460 if (is_store)
2461 op = gimple_assign_rhs1 (stmt);
2462 else
2463 {
2464 switch (code)
2465 {
2466 case COND_EXPR:
2467 if (op_num == 0 || op_num == 1)
2468 {
2469 tree cond = gimple_assign_rhs1 (stmt);
2470 op = TREE_OPERAND (cond, op_num);
2471 }
2472 else
2473 {
2474 if (op_num == 2)
2475 op = gimple_assign_rhs2 (stmt);
2476 else
2477 op = gimple_assign_rhs3 (stmt);
2478 }
2479 break;
2480
2481 case CALL_EXPR:
2482 op = gimple_call_arg (stmt, op_num);
2483 break;
2484
2485 case LSHIFT_EXPR:
2486 case RSHIFT_EXPR:
2487 case LROTATE_EXPR:
2488 case RROTATE_EXPR:
2489 op = gimple_op (stmt, op_num + 1);
2490 /* Unlike the other binary operators, shifts/rotates have
2491 the shift count being int, instead of the same type as
2492 the lhs, so make sure the scalar is the right type if
2493 we are dealing with vectors of
2494 long long/long/short/char. */
2495 if (op_num == 1 && constant_p)
2496 op = fold_convert (TREE_TYPE (vector_type), op);
2497 break;
2498
2499 default:
2500 op = gimple_op (stmt, op_num + 1);
2501 break;
2502 }
2503 }
2504
2505 if (reduc_index != -1)
2506 {
2507 loop = (gimple_bb (stmt))->loop_father;
2508 def_stmt = SSA_NAME_DEF_STMT (op);
2509
2510 gcc_assert (loop);
2511
2512 /* Get the def before the loop. In reduction chain we have only
2513 one initial value. */
2514 if ((j != (number_of_copies - 1)
2515 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2516 && i != 0))
2517 && neutral_op)
2518 op = neutral_op;
2519 else
2520 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2521 loop_preheader_edge (loop));
2522 }
2523
2524 /* Create 'vect_ = {op0,op1,...,opn}'. */
2525 number_of_places_left_in_vector--;
2526 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2527 {
2528 if (constant_p)
2529 {
2530 op = fold_unary (VIEW_CONVERT_EXPR,
2531 TREE_TYPE (vector_type), op);
2532 gcc_assert (op && CONSTANT_CLASS_P (op));
2533 }
2534 else
2535 {
2536 tree new_temp
2537 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2538 gimple init_stmt;
2539 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2540 op);
2541 init_stmt
2542 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2543 new_temp, op, NULL_TREE);
2544 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2545 op = new_temp;
2546 }
2547 }
2548 elts[number_of_places_left_in_vector] = op;
2549
2550 if (number_of_places_left_in_vector == 0)
2551 {
2552 number_of_places_left_in_vector = nunits;
2553
2554 if (constant_p)
2555 vec_cst = build_vector (vector_type, elts);
2556 else
2557 {
2558 vec<constructor_elt, va_gc> *v;
2559 unsigned k;
2560 vec_alloc (v, nunits);
2561 for (k = 0; k < nunits; ++k)
2562 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2563 vec_cst = build_constructor (vector_type, v);
2564 }
2565 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2566 vector_type, NULL));
2567 if (ctor_seq != NULL)
2568 {
2569 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2570 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2571 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2572 GSI_SAME_STMT);
2573 ctor_seq = NULL;
2574 }
2575 }
2576 }
2577 }
2578
2579 /* Since the vectors are created in the reverse order, we should invert
2580 them. */
2581 vec_num = voprnds.length ();
2582 for (j = vec_num; j != 0; j--)
2583 {
2584 vop = voprnds[j - 1];
2585 vec_oprnds->quick_push (vop);
2586 }
2587
2588 voprnds.release ();
2589
2590 /* In case that VF is greater than the unrolling factor needed for the SLP
2591 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2592 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2593 to replicate the vectors. */
2594 while (number_of_vectors > vec_oprnds->length ())
2595 {
2596 tree neutral_vec = NULL;
2597
2598 if (neutral_op)
2599 {
2600 if (!neutral_vec)
2601 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2602
2603 vec_oprnds->quick_push (neutral_vec);
2604 }
2605 else
2606 {
2607 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2608 vec_oprnds->quick_push (vop);
2609 }
2610 }
2611 }
2612
2613
2614 /* Get vectorized definitions from SLP_NODE that contains corresponding
2615 vectorized def-stmts. */
2616
2617 static void
2618 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2619 {
2620 tree vec_oprnd;
2621 gimple vec_def_stmt;
2622 unsigned int i;
2623
2624 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2625
2626 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2627 {
2628 gcc_assert (vec_def_stmt);
2629 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2630 vec_oprnds->quick_push (vec_oprnd);
2631 }
2632 }
2633
2634
2635 /* Get vectorized definitions for SLP_NODE.
2636 If the scalar definitions are loop invariants or constants, collect them and
2637 call vect_get_constant_vectors() to create vector stmts.
2638 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2639 must be stored in the corresponding child of SLP_NODE, and we call
2640 vect_get_slp_vect_defs () to retrieve them. */
2641
2642 void
2643 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2644 vec<vec<tree> > *vec_oprnds, int reduc_index)
2645 {
2646 gimple first_stmt;
2647 int number_of_vects = 0, i;
2648 unsigned int child_index = 0;
2649 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2650 slp_tree child = NULL;
2651 vec<tree> vec_defs;
2652 tree oprnd;
2653 bool vectorized_defs;
2654
2655 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2656 FOR_EACH_VEC_ELT (ops, i, oprnd)
2657 {
2658 /* For each operand we check if it has vectorized definitions in a child
2659 node or we need to create them (for invariants and constants). We
2660 check if the LHS of the first stmt of the next child matches OPRND.
2661 If it does, we found the correct child. Otherwise, we call
2662 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2663 to check this child node for the next operand. */
2664 vectorized_defs = false;
2665 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2666 {
2667 child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2668
2669 /* We have to check both pattern and original def, if available. */
2670 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2671 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2672
2673 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2674 || (related
2675 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2676 {
2677 /* The number of vector defs is determined by the number of
2678 vector statements in the node from which we get those
2679 statements. */
2680 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2681 vectorized_defs = true;
2682 child_index++;
2683 }
2684 }
2685
2686 if (!vectorized_defs)
2687 {
2688 if (i == 0)
2689 {
2690 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2691 /* Number of vector stmts was calculated according to LHS in
2692 vect_schedule_slp_instance (), fix it by replacing LHS with
2693 RHS, if necessary. See vect_get_smallest_scalar_type () for
2694 details. */
2695 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2696 &rhs_size_unit);
2697 if (rhs_size_unit != lhs_size_unit)
2698 {
2699 number_of_vects *= rhs_size_unit;
2700 number_of_vects /= lhs_size_unit;
2701 }
2702 }
2703 }
2704
2705 /* Allocate memory for vectorized defs. */
2706 vec_defs = vNULL;
2707 vec_defs.create (number_of_vects);
2708
2709 /* For reduction defs we call vect_get_constant_vectors (), since we are
2710 looking for initial loop invariant values. */
2711 if (vectorized_defs && reduc_index == -1)
2712 /* The defs are already vectorized. */
2713 vect_get_slp_vect_defs (child, &vec_defs);
2714 else
2715 /* Build vectors from scalar defs. */
2716 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2717 number_of_vects, reduc_index);
2718
2719 vec_oprnds->quick_push (vec_defs);
2720
2721 /* For reductions, we only need initial values. */
2722 if (reduc_index != -1)
2723 return;
2724 }
2725 }
2726
2727
2728 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2729 building a vector of type MASK_TYPE from it) and two input vectors placed in
2730 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2731 shifting by STRIDE elements of DR_CHAIN for every copy.
2732 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2733 copies).
2734 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2735 the created stmts must be inserted. */
2736
2737 static inline void
2738 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2739 tree mask, int first_vec_indx, int second_vec_indx,
2740 gimple_stmt_iterator *gsi, slp_tree node,
2741 tree vectype, vec<tree> dr_chain,
2742 int ncopies, int vect_stmts_counter)
2743 {
2744 tree perm_dest;
2745 gimple perm_stmt = NULL;
2746 stmt_vec_info next_stmt_info;
2747 int i, stride;
2748 tree first_vec, second_vec, data_ref;
2749
2750 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2751
2752 /* Initialize the vect stmts of NODE to properly insert the generated
2753 stmts later. */
2754 for (i = SLP_TREE_VEC_STMTS (node).length ();
2755 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2756 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2757
2758 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2759 for (i = 0; i < ncopies; i++)
2760 {
2761 first_vec = dr_chain[first_vec_indx];
2762 second_vec = dr_chain[second_vec_indx];
2763
2764 /* Generate the permute statement. */
2765 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2766 first_vec, second_vec, mask);
2767 data_ref = make_ssa_name (perm_dest, perm_stmt);
2768 gimple_set_lhs (perm_stmt, data_ref);
2769 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2770
2771 /* Store the vector statement in NODE. */
2772 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2773
2774 first_vec_indx += stride;
2775 second_vec_indx += stride;
2776 }
2777
2778 /* Mark the scalar stmt as vectorized. */
2779 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2780 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2781 }
2782
2783
2784 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2785 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2786 representation. Check that the mask is valid and return FALSE if not.
2787 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2788 the next vector, i.e., the current first vector is not needed. */
2789
2790 static bool
2791 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2792 int mask_nunits, bool only_one_vec, int index,
2793 unsigned char *mask, int *current_mask_element,
2794 bool *need_next_vector, int *number_of_mask_fixes,
2795 bool *mask_fixed, bool *needs_first_vector)
2796 {
2797 int i;
2798
2799 /* Convert to target specific representation. */
2800 *current_mask_element = first_mask_element + m;
2801 /* Adjust the value in case it's a mask for second and third vectors. */
2802 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2803
2804 if (*current_mask_element < mask_nunits)
2805 *needs_first_vector = true;
2806
2807 /* We have only one input vector to permute but the mask accesses values in
2808 the next vector as well. */
2809 if (only_one_vec && *current_mask_element >= mask_nunits)
2810 {
2811 if (dump_enabled_p ())
2812 {
2813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2814 "permutation requires at least two vectors ");
2815 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2816 }
2817
2818 return false;
2819 }
2820
2821 /* The mask requires the next vector. */
2822 if (*current_mask_element >= mask_nunits * 2)
2823 {
2824 if (*needs_first_vector || *mask_fixed)
2825 {
2826 /* We either need the first vector too or have already moved to the
2827 next vector. In both cases, this permutation needs three
2828 vectors. */
2829 if (dump_enabled_p ())
2830 {
2831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2832 "permutation requires at "
2833 "least three vectors ");
2834 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2835 }
2836
2837 return false;
2838 }
2839
2840 /* We move to the next vector, dropping the first one and working with
2841 the second and the third - we need to adjust the values of the mask
2842 accordingly. */
2843 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2844
2845 for (i = 0; i < index; i++)
2846 mask[i] -= mask_nunits * *number_of_mask_fixes;
2847
2848 (*number_of_mask_fixes)++;
2849 *mask_fixed = true;
2850 }
2851
2852 *need_next_vector = *mask_fixed;
2853
2854 /* This was the last element of this mask. Start a new one. */
2855 if (index == mask_nunits - 1)
2856 {
2857 *number_of_mask_fixes = 1;
2858 *mask_fixed = false;
2859 *needs_first_vector = false;
2860 }
2861
2862 return true;
2863 }
2864
2865
2866 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2867 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2868 permute statements for SLP_NODE_INSTANCE. */
2869 bool
2870 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2871 gimple_stmt_iterator *gsi, int vf,
2872 slp_instance slp_node_instance, bool analyze_only)
2873 {
2874 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2875 tree mask_element_type = NULL_TREE, mask_type;
2876 int i, j, k, nunits, vec_index = 0, scalar_index;
2877 slp_tree node;
2878 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2879 gimple next_scalar_stmt;
2880 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2881 int first_mask_element;
2882 int index, unroll_factor, current_mask_element, ncopies;
2883 unsigned char *mask;
2884 bool only_one_vec = false, need_next_vector = false;
2885 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2886 int number_of_mask_fixes = 1;
2887 bool mask_fixed = false;
2888 bool needs_first_vector = false;
2889 enum machine_mode mode;
2890
2891 mode = TYPE_MODE (vectype);
2892
2893 if (!can_vec_perm_p (mode, false, NULL))
2894 {
2895 if (dump_enabled_p ())
2896 {
2897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2898 "no vect permute for ");
2899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2900 }
2901 return false;
2902 }
2903
2904 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2905 same size as the vector element being permuted. */
2906 mask_element_type = lang_hooks.types.type_for_mode
2907 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2908 mask_type = get_vectype_for_scalar_type (mask_element_type);
2909 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2910 mask = XALLOCAVEC (unsigned char, nunits);
2911 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2912
2913 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2914 unrolling factor. */
2915 orig_vec_stmts_num = group_size *
2916 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2917 if (orig_vec_stmts_num == 1)
2918 only_one_vec = true;
2919
2920 /* Number of copies is determined by the final vectorization factor
2921 relatively to SLP_NODE_INSTANCE unrolling factor. */
2922 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2923
2924 /* Generate permutation masks for every NODE. Number of masks for each NODE
2925 is equal to GROUP_SIZE.
2926 E.g., we have a group of three nodes with three loads from the same
2927 location in each node, and the vector size is 4. I.e., we have a
2928 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2929 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2930 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2931 ...
2932
2933 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2934 The last mask is illegal since we assume two operands for permute
2935 operation, and the mask element values can't be outside that range.
2936 Hence, the last mask must be converted into {2,5,5,5}.
2937 For the first two permutations we need the first and the second input
2938 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2939 we need the second and the third vectors: {b1,c1,a2,b2} and
2940 {c2,a3,b3,c3}. */
2941
2942 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2943 {
2944 scalar_index = 0;
2945 index = 0;
2946 vect_stmts_counter = 0;
2947 vec_index = 0;
2948 first_vec_index = vec_index++;
2949 if (only_one_vec)
2950 second_vec_index = first_vec_index;
2951 else
2952 second_vec_index = vec_index++;
2953
2954 for (j = 0; j < unroll_factor; j++)
2955 {
2956 for (k = 0; k < group_size; k++)
2957 {
2958 first_mask_element = i + j * group_size;
2959 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2960 nunits, only_one_vec, index,
2961 mask, &current_mask_element,
2962 &need_next_vector,
2963 &number_of_mask_fixes, &mask_fixed,
2964 &needs_first_vector))
2965 return false;
2966 mask[index++] = current_mask_element;
2967
2968 if (index == nunits)
2969 {
2970 tree mask_vec, *mask_elts;
2971 int l;
2972
2973 if (!can_vec_perm_p (mode, false, mask))
2974 {
2975 if (dump_enabled_p ())
2976 {
2977 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2978 vect_location,
2979 "unsupported vect permute { ");
2980 for (i = 0; i < nunits; ++i)
2981 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2982 mask[i]);
2983 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2984 }
2985 return false;
2986 }
2987
2988 mask_elts = XALLOCAVEC (tree, nunits);
2989 for (l = 0; l < nunits; ++l)
2990 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2991 mask_vec = build_vector (mask_type, mask_elts);
2992 index = 0;
2993
2994 if (!analyze_only)
2995 {
2996 if (need_next_vector)
2997 {
2998 first_vec_index = second_vec_index;
2999 second_vec_index = vec_index;
3000 }
3001
3002 next_scalar_stmt
3003 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3004
3005 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3006 mask_vec, first_vec_index, second_vec_index,
3007 gsi, node, vectype, dr_chain,
3008 ncopies, vect_stmts_counter++);
3009 }
3010 }
3011 }
3012 }
3013 }
3014
3015 return true;
3016 }
3017
3018
3019
3020 /* Vectorize SLP instance tree in postorder. */
3021
3022 static bool
3023 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3024 unsigned int vectorization_factor)
3025 {
3026 gimple stmt;
3027 bool grouped_store, is_store;
3028 gimple_stmt_iterator si;
3029 stmt_vec_info stmt_info;
3030 unsigned int vec_stmts_size, nunits, group_size;
3031 tree vectype;
3032 int i;
3033 slp_tree loads_node;
3034 slp_tree child;
3035
3036 if (!node)
3037 return false;
3038
3039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3040 vect_schedule_slp_instance (child, instance, vectorization_factor);
3041
3042 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3043 stmt_info = vinfo_for_stmt (stmt);
3044
3045 /* VECTYPE is the type of the destination. */
3046 vectype = STMT_VINFO_VECTYPE (stmt_info);
3047 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3048 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3049
3050 /* For each SLP instance calculate number of vector stmts to be created
3051 for the scalar stmts in each node of the SLP tree. Number of vector
3052 elements in one vector iteration is the number of scalar elements in
3053 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3054 size. */
3055 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3056
3057 /* In case of load permutation we have to allocate vectorized statements for
3058 all the nodes that participate in that permutation. */
3059 if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3060 {
3061 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3062 {
3063 if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3064 {
3065 SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3066 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3067 }
3068 }
3069 }
3070
3071 if (!SLP_TREE_VEC_STMTS (node).exists ())
3072 {
3073 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3074 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3075 }
3076
3077 if (dump_enabled_p ())
3078 {
3079 dump_printf_loc (MSG_NOTE,vect_location,
3080 "------>vectorizing SLP node starting from: ");
3081 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3082 }
3083
3084 /* Loads should be inserted before the first load. */
3085 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3086 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3087 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3088 && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3089 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3090 else if (is_pattern_stmt_p (stmt_info))
3091 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3092 else
3093 si = gsi_for_stmt (stmt);
3094
3095 /* Stores should be inserted just before the last store. */
3096 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3097 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3098 {
3099 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3100 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3101 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3102 si = gsi_for_stmt (last_store);
3103 }
3104
3105 /* Mark the first element of the reduction chain as reduction to properly
3106 transform the node. In the analysis phase only the last element of the
3107 chain is marked as reduction. */
3108 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3109 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3110 {
3111 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3112 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3113 }
3114
3115 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3116 return is_store;
3117 }
3118
3119 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3120 For loop vectorization this is done in vectorizable_call, but for SLP
3121 it needs to be deferred until end of vect_schedule_slp, because multiple
3122 SLP instances may refer to the same scalar stmt. */
3123
3124 static void
3125 vect_remove_slp_scalar_calls (slp_tree node)
3126 {
3127 gimple stmt, new_stmt;
3128 gimple_stmt_iterator gsi;
3129 int i;
3130 slp_tree child;
3131 tree lhs;
3132 stmt_vec_info stmt_info;
3133
3134 if (!node)
3135 return;
3136
3137 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3138 vect_remove_slp_scalar_calls (child);
3139
3140 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3141 {
3142 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3143 continue;
3144 stmt_info = vinfo_for_stmt (stmt);
3145 if (stmt_info == NULL
3146 || is_pattern_stmt_p (stmt_info)
3147 || !PURE_SLP_STMT (stmt_info))
3148 continue;
3149 lhs = gimple_call_lhs (stmt);
3150 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3151 set_vinfo_for_stmt (new_stmt, stmt_info);
3152 set_vinfo_for_stmt (stmt, NULL);
3153 STMT_VINFO_STMT (stmt_info) = new_stmt;
3154 gsi = gsi_for_stmt (stmt);
3155 gsi_replace (&gsi, new_stmt, false);
3156 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3157 }
3158 }
3159
3160 /* Generate vector code for all SLP instances in the loop/basic block. */
3161
3162 bool
3163 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3164 {
3165 vec<slp_instance> slp_instances;
3166 slp_instance instance;
3167 slp_tree loads_node;
3168 unsigned int i, j, vf;
3169 bool is_store = false;
3170
3171 if (loop_vinfo)
3172 {
3173 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3174 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3175 }
3176 else
3177 {
3178 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3179 vf = 1;
3180 }
3181
3182 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3183 {
3184 /* Schedule the tree of INSTANCE. */
3185 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3186 instance, vf);
3187
3188 /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3189 between SLP instances we fail to properly initialize the
3190 vectorized SLP stmts and confuse different load permutations. */
3191 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
3192 STMT_VINFO_VEC_STMT
3193 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
3194
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_NOTE, vect_location,
3197 "vectorizing stmts using SLP.");
3198 }
3199
3200 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3201 {
3202 slp_tree root = SLP_INSTANCE_TREE (instance);
3203 gimple store;
3204 unsigned int j;
3205 gimple_stmt_iterator gsi;
3206
3207 /* Remove scalar call stmts. Do not do this for basic-block
3208 vectorization as not all uses may be vectorized.
3209 ??? Why should this be necessary? DCE should be able to
3210 remove the stmts itself.
3211 ??? For BB vectorization we can as well remove scalar
3212 stmts starting from the SLP tree root if they have no
3213 uses. */
3214 if (loop_vinfo)
3215 vect_remove_slp_scalar_calls (root);
3216
3217 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3218 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3219 {
3220 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3221 break;
3222
3223 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3224 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3225 /* Free the attached stmt_vec_info and remove the stmt. */
3226 gsi = gsi_for_stmt (store);
3227 unlink_stmt_vdef (store);
3228 gsi_remove (&gsi, true);
3229 release_defs (store);
3230 free_stmt_vec_info (store);
3231 }
3232 }
3233
3234 return is_store;
3235 }
3236
3237
3238 /* Vectorize the basic block. */
3239
3240 void
3241 vect_slp_transform_bb (basic_block bb)
3242 {
3243 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3244 gimple_stmt_iterator si;
3245
3246 gcc_assert (bb_vinfo);
3247
3248 if (dump_enabled_p ())
3249 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3250
3251 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3252 {
3253 gimple stmt = gsi_stmt (si);
3254 stmt_vec_info stmt_info;
3255
3256 if (dump_enabled_p ())
3257 {
3258 dump_printf_loc (MSG_NOTE, vect_location,
3259 "------>SLPing statement: ");
3260 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3261 }
3262
3263 stmt_info = vinfo_for_stmt (stmt);
3264 gcc_assert (stmt_info);
3265
3266 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3267 if (STMT_SLP_TYPE (stmt_info))
3268 {
3269 vect_schedule_slp (NULL, bb_vinfo);
3270 break;
3271 }
3272 }
3273
3274 if (dump_enabled_p ())
3275 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3276
3277 destroy_bb_vec_info (bb_vinfo);
3278 }