re PR fortran/54107 ([F03] Memory hog with abstract interface)
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69 int i;
70 slp_void_p child;
71
72 if (!node)
73 return;
74
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76 vect_free_slp_tree ((slp_tree) child);
77
78 SLP_TREE_CHILDREN (node).release ();
79 SLP_TREE_SCALAR_STMTS (node).release ();
80 SLP_TREE_VEC_STMTS (node).release ();
81
82 free (node);
83 }
84
85
86 /* Free the memory allocated for the SLP instance. */
87
88 void
89 vect_free_slp_instance (slp_instance instance)
90 {
91 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
92 SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93 SLP_INSTANCE_LOADS (instance).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95 free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS. */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104 slp_tree node;
105 gimple stmt = scalar_stmts[0];
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
116 else
117 return NULL;
118
119 node = XNEW (struct _slp_tree);
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node).create (0);
122 SLP_TREE_CHILDREN (node).create (nops);
123
124 return node;
125 }
126
127
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
130 static vec<slp_oprnd_info>
131 vect_create_oprnd_info (int nops, int group_size)
132 {
133 int i;
134 slp_oprnd_info oprnd_info;
135 vec<slp_oprnd_info> oprnds_info;
136
137 oprnds_info.create (nops);
138 for (i = 0; i < nops; i++)
139 {
140 oprnd_info = XNEW (struct _slp_oprnd_info);
141 oprnd_info->def_stmts.create (group_size);
142 oprnd_info->first_dt = vect_uninitialized_def;
143 oprnd_info->first_def_type = NULL_TREE;
144 oprnd_info->first_const_oprnd = NULL_TREE;
145 oprnd_info->first_pattern = false;
146 oprnds_info.quick_push (oprnd_info);
147 }
148
149 return oprnds_info;
150 }
151
152
153 /* Free operands info. */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158 int i;
159 slp_oprnd_info oprnd_info;
160
161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162 {
163 oprnd_info->def_stmts.release ();
164 XDELETE (oprnd_info);
165 }
166
167 oprnds_info.release ();
168 }
169
170
171 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
172 they are of a valid type and that they match the defs of the first stmt of
173 the SLP group (stored in OPRNDS_INFO). */
174
175 static bool
176 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
177 slp_tree slp_node, gimple stmt,
178 int ncopies_for_cost, bool first,
179 vec<slp_oprnd_info> *oprnds_info,
180 stmt_vector_for_cost *prologue_cost_vec,
181 stmt_vector_for_cost *body_cost_vec)
182 {
183 tree oprnd;
184 unsigned int i, number_of_oprnds;
185 tree def, def_op0 = NULL_TREE;
186 gimple def_stmt;
187 enum vect_def_type dt = vect_uninitialized_def;
188 enum vect_def_type dt_op0 = vect_uninitialized_def;
189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
190 tree lhs = gimple_get_lhs (stmt);
191 struct loop *loop = NULL;
192 enum tree_code rhs_code;
193 bool different_types = false;
194 bool pattern = false;
195 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
196 int op_idx = 1;
197 tree compare_rhs = NULL_TREE;
198
199 if (loop_vinfo)
200 loop = LOOP_VINFO_LOOP (loop_vinfo);
201
202 if (is_gimple_call (stmt))
203 {
204 number_of_oprnds = gimple_call_num_args (stmt);
205 op_idx = 3;
206 }
207 else if (is_gimple_assign (stmt))
208 {
209 number_of_oprnds = gimple_num_ops (stmt) - 1;
210 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
211 number_of_oprnds++;
212 }
213 else
214 return false;
215
216 for (i = 0; i < number_of_oprnds; i++)
217 {
218 if (compare_rhs)
219 {
220 oprnd = compare_rhs;
221 compare_rhs = NULL_TREE;
222 }
223 else
224 oprnd = gimple_op (stmt, op_idx++);
225
226 oprnd_info = (*oprnds_info)[i];
227
228 if (COMPARISON_CLASS_P (oprnd))
229 {
230 compare_rhs = TREE_OPERAND (oprnd, 1);
231 oprnd = TREE_OPERAND (oprnd, 0);
232 }
233
234 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
235 &def, &dt)
236 || (!def_stmt && dt != vect_constant_def))
237 {
238 if (dump_enabled_p ())
239 {
240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
241 "Build SLP failed: can't find def for ");
242 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
243 }
244
245 return false;
246 }
247
248 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
249 from the pattern. Check that all the stmts of the node are in the
250 pattern. */
251 if (def_stmt && gimple_bb (def_stmt)
252 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
253 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
254 && gimple_code (def_stmt) != GIMPLE_PHI))
255 && vinfo_for_stmt (def_stmt)
256 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
257 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
258 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
259 {
260 pattern = true;
261 if (!first && !oprnd_info->first_pattern)
262 {
263 if (dump_enabled_p ())
264 {
265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
266 "Build SLP failed: some of the stmts"
267 " are in a pattern, and others are not ");
268 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
269 }
270
271 return false;
272 }
273
274 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
275 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
276
277 if (dt == vect_unknown_def_type)
278 {
279 if (dump_enabled_p ())
280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
281 "Unsupported pattern.");
282 return false;
283 }
284
285 switch (gimple_code (def_stmt))
286 {
287 case GIMPLE_PHI:
288 def = gimple_phi_result (def_stmt);
289 break;
290
291 case GIMPLE_ASSIGN:
292 def = gimple_assign_lhs (def_stmt);
293 break;
294
295 default:
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298 "unsupported defining stmt: ");
299 return false;
300 }
301 }
302
303 if (first)
304 {
305 oprnd_info->first_dt = dt;
306 oprnd_info->first_pattern = pattern;
307 if (def)
308 {
309 oprnd_info->first_def_type = TREE_TYPE (def);
310 oprnd_info->first_const_oprnd = NULL_TREE;
311 }
312 else
313 {
314 oprnd_info->first_def_type = NULL_TREE;
315 oprnd_info->first_const_oprnd = oprnd;
316 }
317
318 if (i == 0)
319 {
320 def_op0 = def;
321 dt_op0 = dt;
322 /* Analyze costs (for the first stmt of the group only). */
323 if (REFERENCE_CLASS_P (lhs))
324 /* Store. */
325 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
326 dt, slp_node, prologue_cost_vec,
327 body_cost_vec);
328 else
329 {
330 enum vect_def_type dts[2];
331 dts[0] = dt;
332 dts[1] = vect_uninitialized_def;
333 /* Not memory operation (we don't call this function for
334 loads). */
335 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
336 prologue_cost_vec, body_cost_vec);
337 }
338 }
339 }
340 else
341 {
342 /* Not first stmt of the group, check that the def-stmt/s match
343 the def-stmt/s of the first stmt. Allow different definition
344 types for reduction chains: the first stmt must be a
345 vect_reduction_def (a phi node), and the rest
346 vect_internal_def. */
347 if (((oprnd_info->first_dt != dt
348 && !(oprnd_info->first_dt == vect_reduction_def
349 && dt == vect_internal_def))
350 || (oprnd_info->first_def_type != NULL_TREE
351 && def
352 && !types_compatible_p (oprnd_info->first_def_type,
353 TREE_TYPE (def))))
354 || (!def
355 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
356 TREE_TYPE (oprnd)))
357 || different_types)
358 {
359 if (number_of_oprnds != 2)
360 {
361 if (dump_enabled_p ())
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
363 "Build SLP failed: different types ");
364
365 return false;
366 }
367
368 /* Try to swap operands in case of binary operation. */
369 if (i == 0)
370 different_types = true;
371 else
372 {
373 oprnd0_info = (*oprnds_info)[0];
374 if (is_gimple_assign (stmt)
375 && (rhs_code = gimple_assign_rhs_code (stmt))
376 && TREE_CODE_CLASS (rhs_code) == tcc_binary
377 && commutative_tree_code (rhs_code)
378 && oprnd0_info->first_dt == dt
379 && oprnd_info->first_dt == dt_op0
380 && def_op0 && def
381 && !(oprnd0_info->first_def_type
382 && !types_compatible_p (oprnd0_info->first_def_type,
383 TREE_TYPE (def)))
384 && !(oprnd_info->first_def_type
385 && !types_compatible_p (oprnd_info->first_def_type,
386 TREE_TYPE (def_op0))))
387 {
388 if (dump_enabled_p ())
389 {
390 dump_printf_loc (MSG_NOTE, vect_location,
391 "Swapping operands of ");
392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
393 }
394
395 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
396 gimple_assign_rhs2_ptr (stmt));
397 }
398 else
399 {
400 if (dump_enabled_p ())
401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
402 "Build SLP failed: different types ");
403
404 return false;
405 }
406 }
407 }
408 }
409
410 /* Check the types of the definitions. */
411 switch (dt)
412 {
413 case vect_constant_def:
414 case vect_external_def:
415 case vect_reduction_def:
416 break;
417
418 case vect_internal_def:
419 if (different_types)
420 {
421 oprnd0_info = (*oprnds_info)[0];
422 oprnd1_info = (*oprnds_info)[0];
423 if (i == 0)
424 oprnd1_info->def_stmts.quick_push (def_stmt);
425 else
426 oprnd0_info->def_stmts.quick_push (def_stmt);
427 }
428 else
429 oprnd_info->def_stmts.quick_push (def_stmt);
430
431 break;
432
433 default:
434 /* FORNOW: Not supported. */
435 if (dump_enabled_p ())
436 {
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: illegal type of def ");
439 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
440 }
441
442 return false;
443 }
444 }
445
446 return true;
447 }
448
449
450 /* Recursively build an SLP tree starting from NODE.
451 Fail (and return FALSE) if def-stmts are not isomorphic, require data
452 permutation or are of unsupported types of operation. Otherwise, return
453 TRUE. */
454
455 static bool
456 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
457 slp_tree *node, unsigned int group_size, int *outside_cost,
458 int ncopies_for_cost, unsigned int *max_nunits,
459 vec<int> *load_permutation,
460 vec<slp_tree> *loads,
461 unsigned int vectorization_factor, bool *loads_permuted,
462 stmt_vector_for_cost *prologue_cost_vec,
463 stmt_vector_for_cost *body_cost_vec)
464 {
465 unsigned int i;
466 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
467 gimple stmt = stmts[0];
468 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
469 enum tree_code first_cond_code = ERROR_MARK;
470 tree lhs;
471 bool stop_recursion = false, need_same_oprnds = false;
472 tree vectype, scalar_type, first_op1 = NULL_TREE;
473 unsigned int ncopies;
474 optab optab;
475 int icode;
476 enum machine_mode optab_op2_mode;
477 enum machine_mode vec_mode;
478 struct data_reference *first_dr;
479 HOST_WIDE_INT dummy;
480 bool permutation = false;
481 unsigned int load_place;
482 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
483 vec<slp_oprnd_info> oprnds_info;
484 unsigned int nops;
485 slp_oprnd_info oprnd_info;
486 tree cond;
487
488 if (is_gimple_call (stmt))
489 nops = gimple_call_num_args (stmt);
490 else if (is_gimple_assign (stmt))
491 {
492 nops = gimple_num_ops (stmt) - 1;
493 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
494 nops++;
495 }
496 else
497 return false;
498
499 oprnds_info = vect_create_oprnd_info (nops, group_size);
500
501 /* For every stmt in NODE find its def stmt/s. */
502 FOR_EACH_VEC_ELT (stmts, i, stmt)
503 {
504 if (dump_enabled_p ())
505 {
506 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
507 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
508 }
509
510 /* Fail to vectorize statements marked as unvectorizable. */
511 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
512 {
513 if (dump_enabled_p ())
514 {
515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
516 "Build SLP failed: unvectorizable statement ");
517 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
518 }
519
520 vect_free_oprnd_info (oprnds_info);
521 return false;
522 }
523
524 lhs = gimple_get_lhs (stmt);
525 if (lhs == NULL_TREE)
526 {
527 if (dump_enabled_p ())
528 {
529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
530 "Build SLP failed: not GIMPLE_ASSIGN nor "
531 "GIMPLE_CALL ");
532 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
533 }
534
535 vect_free_oprnd_info (oprnds_info);
536 return false;
537 }
538
539 if (is_gimple_assign (stmt)
540 && gimple_assign_rhs_code (stmt) == COND_EXPR
541 && (cond = gimple_assign_rhs1 (stmt))
542 && !COMPARISON_CLASS_P (cond))
543 {
544 if (dump_enabled_p ())
545 {
546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
547 "Build SLP failed: condition is not "
548 "comparison ");
549 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
550 }
551
552 vect_free_oprnd_info (oprnds_info);
553 return false;
554 }
555
556 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
557 vectype = get_vectype_for_scalar_type (scalar_type);
558 if (!vectype)
559 {
560 if (dump_enabled_p ())
561 {
562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
563 "Build SLP failed: unsupported data-type ");
564 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
565 scalar_type);
566 }
567
568 vect_free_oprnd_info (oprnds_info);
569 return false;
570 }
571
572 /* In case of multiple types we need to detect the smallest type. */
573 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
574 {
575 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
576 if (bb_vinfo)
577 vectorization_factor = *max_nunits;
578 }
579
580 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
581
582 if (is_gimple_call (stmt))
583 {
584 rhs_code = CALL_EXPR;
585 if (gimple_call_internal_p (stmt)
586 || gimple_call_tail_p (stmt)
587 || gimple_call_noreturn_p (stmt)
588 || !gimple_call_nothrow_p (stmt)
589 || gimple_call_chain (stmt))
590 {
591 if (dump_enabled_p ())
592 {
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
594 "Build SLP failed: unsupported call type ");
595 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
596 }
597
598 vect_free_oprnd_info (oprnds_info);
599 return false;
600 }
601 }
602 else
603 rhs_code = gimple_assign_rhs_code (stmt);
604
605 /* Check the operation. */
606 if (i == 0)
607 {
608 first_stmt_code = rhs_code;
609
610 /* Shift arguments should be equal in all the packed stmts for a
611 vector shift with scalar shift operand. */
612 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
613 || rhs_code == LROTATE_EXPR
614 || rhs_code == RROTATE_EXPR)
615 {
616 vec_mode = TYPE_MODE (vectype);
617
618 /* First see if we have a vector/vector shift. */
619 optab = optab_for_tree_code (rhs_code, vectype,
620 optab_vector);
621
622 if (!optab
623 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
624 {
625 /* No vector/vector shift, try for a vector/scalar shift. */
626 optab = optab_for_tree_code (rhs_code, vectype,
627 optab_scalar);
628
629 if (!optab)
630 {
631 if (dump_enabled_p ())
632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
633 "Build SLP failed: no optab.");
634 vect_free_oprnd_info (oprnds_info);
635 return false;
636 }
637 icode = (int) optab_handler (optab, vec_mode);
638 if (icode == CODE_FOR_nothing)
639 {
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: "
643 "op not supported by target.");
644 vect_free_oprnd_info (oprnds_info);
645 return false;
646 }
647 optab_op2_mode = insn_data[icode].operand[2].mode;
648 if (!VECTOR_MODE_P (optab_op2_mode))
649 {
650 need_same_oprnds = true;
651 first_op1 = gimple_assign_rhs2 (stmt);
652 }
653 }
654 }
655 else if (rhs_code == WIDEN_LSHIFT_EXPR)
656 {
657 need_same_oprnds = true;
658 first_op1 = gimple_assign_rhs2 (stmt);
659 }
660 }
661 else
662 {
663 if (first_stmt_code != rhs_code
664 && (first_stmt_code != IMAGPART_EXPR
665 || rhs_code != REALPART_EXPR)
666 && (first_stmt_code != REALPART_EXPR
667 || rhs_code != IMAGPART_EXPR)
668 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
669 && (first_stmt_code == ARRAY_REF
670 || first_stmt_code == INDIRECT_REF
671 || first_stmt_code == COMPONENT_REF
672 || first_stmt_code == MEM_REF)))
673 {
674 if (dump_enabled_p ())
675 {
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
677 "Build SLP failed: different operation "
678 "in stmt ");
679 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
680 }
681
682 vect_free_oprnd_info (oprnds_info);
683 return false;
684 }
685
686 if (need_same_oprnds
687 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
688 {
689 if (dump_enabled_p ())
690 {
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "Build SLP failed: different shift "
693 "arguments in ");
694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
695 }
696
697 vect_free_oprnd_info (oprnds_info);
698 return false;
699 }
700
701 if (rhs_code == CALL_EXPR)
702 {
703 gimple first_stmt = stmts[0];
704 if (gimple_call_num_args (stmt) != nops
705 || !operand_equal_p (gimple_call_fn (first_stmt),
706 gimple_call_fn (stmt), 0)
707 || gimple_call_fntype (first_stmt)
708 != gimple_call_fntype (stmt))
709 {
710 if (dump_enabled_p ())
711 {
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: different calls in ");
714 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
715 stmt, 0);
716 }
717
718 vect_free_oprnd_info (oprnds_info);
719 return false;
720 }
721 }
722 }
723
724 /* Grouped store or load. */
725 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
726 {
727 if (REFERENCE_CLASS_P (lhs))
728 {
729 /* Store. */
730 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
731 stmt, ncopies_for_cost,
732 (i == 0), &oprnds_info,
733 prologue_cost_vec,
734 body_cost_vec))
735 {
736 vect_free_oprnd_info (oprnds_info);
737 return false;
738 }
739 }
740 else
741 {
742 /* Load. */
743 /* FORNOW: Check that there is no gap between the loads. */
744 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
745 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
746 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
747 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
748 {
749 if (dump_enabled_p ())
750 {
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "Build SLP failed: grouped "
753 "loads have gaps ");
754 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
755 stmt, 0);
756 }
757
758 vect_free_oprnd_info (oprnds_info);
759 return false;
760 }
761
762 /* Check that the size of interleaved loads group is not
763 greater than the SLP group size. */
764 if (loop_vinfo
765 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
766 {
767 if (dump_enabled_p ())
768 {
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770 "Build SLP failed: the number "
771 "of interleaved loads is greater than "
772 "the SLP group size ");
773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
774 stmt, 0);
775 }
776
777 vect_free_oprnd_info (oprnds_info);
778 return false;
779 }
780
781 old_first_load = first_load;
782 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
783 if (prev_first_load)
784 {
785 /* Check that there are no loads from different interleaving
786 chains in the same node. The only exception is complex
787 numbers. */
788 if (prev_first_load != first_load
789 && rhs_code != REALPART_EXPR
790 && rhs_code != IMAGPART_EXPR)
791 {
792 if (dump_enabled_p ())
793 {
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
795 vect_location,
796 "Build SLP failed: different "
797 "interleaving chains in one node ");
798 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
799 stmt, 0);
800 }
801
802 vect_free_oprnd_info (oprnds_info);
803 return false;
804 }
805 }
806 else
807 prev_first_load = first_load;
808
809 /* In some cases a group of loads is just the same load
810 repeated N times. Only analyze its cost once. */
811 if (first_load == stmt && old_first_load != first_load)
812 {
813 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
814 if (vect_supportable_dr_alignment (first_dr, false)
815 == dr_unaligned_unsupported)
816 {
817 if (dump_enabled_p ())
818 {
819 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
820 vect_location,
821 "Build SLP failed: unsupported "
822 "unaligned load ");
823 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
824 stmt, 0);
825 }
826
827 vect_free_oprnd_info (oprnds_info);
828 return false;
829 }
830
831 /* Analyze costs (for the first stmt in the group). */
832 vect_model_load_cost (vinfo_for_stmt (stmt),
833 ncopies_for_cost, false, *node,
834 prologue_cost_vec, body_cost_vec);
835 }
836
837 /* Store the place of this load in the interleaving chain. In
838 case that permutation is needed we later decide if a specific
839 permutation is supported. */
840 load_place = vect_get_place_in_interleaving_chain (stmt,
841 first_load);
842 if (load_place != i)
843 permutation = true;
844
845 load_permutation->safe_push (load_place);
846
847 /* We stop the tree when we reach a group of loads. */
848 stop_recursion = true;
849 continue;
850 }
851 } /* Grouped access. */
852 else
853 {
854 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
855 {
856 /* Not grouped load. */
857 if (dump_enabled_p ())
858 {
859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
860 "Build SLP failed: not grouped load ");
861 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
862 }
863
864 /* FORNOW: Not grouped loads are not supported. */
865 vect_free_oprnd_info (oprnds_info);
866 return false;
867 }
868
869 /* Not memory operation. */
870 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
871 && TREE_CODE_CLASS (rhs_code) != tcc_unary
872 && rhs_code != COND_EXPR
873 && rhs_code != CALL_EXPR)
874 {
875 if (dump_enabled_p ())
876 {
877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
878 "Build SLP failed: operation");
879 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
880 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
881 }
882
883 vect_free_oprnd_info (oprnds_info);
884 return false;
885 }
886
887 if (rhs_code == COND_EXPR)
888 {
889 tree cond_expr = gimple_assign_rhs1 (stmt);
890
891 if (i == 0)
892 first_cond_code = TREE_CODE (cond_expr);
893 else if (first_cond_code != TREE_CODE (cond_expr))
894 {
895 if (dump_enabled_p ())
896 {
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
898 "Build SLP failed: different"
899 " operation");
900 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
901 stmt, 0);
902 }
903
904 vect_free_oprnd_info (oprnds_info);
905 return false;
906 }
907 }
908
909 /* Find the def-stmts. */
910 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
911 ncopies_for_cost, (i == 0),
912 &oprnds_info, prologue_cost_vec,
913 body_cost_vec))
914 {
915 vect_free_oprnd_info (oprnds_info);
916 return false;
917 }
918 }
919 }
920
921 /* Grouped loads were reached - stop the recursion. */
922 if (stop_recursion)
923 {
924 loads->safe_push (*node);
925 if (permutation)
926 {
927 gimple first_stmt = stmts[0];
928 *loads_permuted = true;
929 (void) record_stmt_cost (body_cost_vec, group_size, vec_perm,
930 vinfo_for_stmt (first_stmt), 0, vect_body);
931 }
932 else
933 {
934 /* We don't check here complex numbers chains, so we set
935 LOADS_PERMUTED for further check in
936 vect_supported_load_permutation_p. */
937 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
938 *loads_permuted = true;
939 }
940
941 vect_free_oprnd_info (oprnds_info);
942 return true;
943 }
944
945 /* Create SLP_TREE nodes for the definition node/s. */
946 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
947 {
948 slp_tree child;
949
950 if (oprnd_info->first_dt != vect_internal_def)
951 continue;
952
953 child = vect_create_new_slp_node (oprnd_info->def_stmts);
954 if (!child
955 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
956 outside_cost, ncopies_for_cost,
957 max_nunits, load_permutation, loads,
958 vectorization_factor, loads_permuted,
959 prologue_cost_vec, body_cost_vec))
960 {
961 if (child)
962 oprnd_info->def_stmts = vNULL;
963 vect_free_slp_tree (child);
964 vect_free_oprnd_info (oprnds_info);
965 return false;
966 }
967
968 oprnd_info->def_stmts.create (0);
969 SLP_TREE_CHILDREN (*node).quick_push (child);
970 }
971
972 vect_free_oprnd_info (oprnds_info);
973 return true;
974 }
975
976 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
977
978 static void
979 vect_print_slp_tree (int dump_kind, slp_tree node)
980 {
981 int i;
982 gimple stmt;
983 slp_void_p child;
984
985 if (!node)
986 return;
987
988 dump_printf (dump_kind, "node ");
989 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
990 {
991 dump_printf (dump_kind, "\n\tstmt %d ", i);
992 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
993 }
994 dump_printf (dump_kind, "\n");
995
996 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
997 vect_print_slp_tree (dump_kind, (slp_tree) child);
998 }
999
1000
1001 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1002 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1003 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1004 stmts in NODE are to be marked. */
1005
1006 static void
1007 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1008 {
1009 int i;
1010 gimple stmt;
1011 slp_void_p child;
1012
1013 if (!node)
1014 return;
1015
1016 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1017 if (j < 0 || i == j)
1018 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1019
1020 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1021 vect_mark_slp_stmts ((slp_tree) child, mark, j);
1022 }
1023
1024
1025 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1026
1027 static void
1028 vect_mark_slp_stmts_relevant (slp_tree node)
1029 {
1030 int i;
1031 gimple stmt;
1032 stmt_vec_info stmt_info;
1033 slp_void_p child;
1034
1035 if (!node)
1036 return;
1037
1038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1039 {
1040 stmt_info = vinfo_for_stmt (stmt);
1041 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1042 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1043 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1044 }
1045
1046 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1047 vect_mark_slp_stmts_relevant ((slp_tree) child);
1048 }
1049
1050
1051 /* Check if the permutation required by the SLP INSTANCE is supported.
1052 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1053
1054 static bool
1055 vect_supported_slp_permutation_p (slp_instance instance)
1056 {
1057 slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
1058 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1059 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1060 vec<slp_tree> sorted_loads = vNULL;
1061 int index;
1062 slp_tree *tmp_loads = NULL;
1063 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1064 slp_tree load;
1065
1066 /* FORNOW: The only supported loads permutation is loads from the same
1067 location in all the loads in the node, when the data-refs in
1068 nodes of LOADS constitute an interleaving chain.
1069 Sort the nodes according to the order of accesses in the chain. */
1070 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1071 for (i = 0, j = 0;
1072 SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
1073 && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
1074 i += group_size, j++)
1075 {
1076 gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
1077 /* Check that the loads are all in the same interleaving chain. */
1078 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1079 {
1080 if (dump_enabled_p ())
1081 {
1082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1083 "Build SLP failed: unsupported data "
1084 "permutation ");
1085 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1086 scalar_stmt, 0);
1087 }
1088
1089 free (tmp_loads);
1090 return false;
1091 }
1092
1093 tmp_loads[index] = load;
1094 }
1095
1096 sorted_loads.create (group_size);
1097 for (i = 0; i < group_size; i++)
1098 sorted_loads.safe_push (tmp_loads[i]);
1099
1100 SLP_INSTANCE_LOADS (instance).release ();
1101 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1102 free (tmp_loads);
1103
1104 if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
1105 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1106 instance, true))
1107 return false;
1108
1109 return true;
1110 }
1111
1112
1113 /* Rearrange the statements of NODE according to PERMUTATION. */
1114
1115 static void
1116 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1117 vec<int> permutation)
1118 {
1119 gimple stmt;
1120 vec<gimple> tmp_stmts;
1121 unsigned int index, i;
1122 slp_void_p child;
1123
1124 if (!node)
1125 return;
1126
1127 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1128 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1129
1130 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1131 tmp_stmts.create (group_size);
1132
1133 for (i = 0; i < group_size; i++)
1134 tmp_stmts.safe_push (NULL);
1135
1136 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1137 {
1138 index = permutation[i];
1139 tmp_stmts[index] = stmt;
1140 }
1141
1142 SLP_TREE_SCALAR_STMTS (node).release ();
1143 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1144 }
1145
1146
1147 /* Check if the required load permutation is supported.
1148 LOAD_PERMUTATION contains a list of indices of the loads.
1149 In SLP this permutation is relative to the order of grouped stores that are
1150 the base of the SLP instance. */
1151
1152 static bool
1153 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1154 vec<int> load_permutation)
1155 {
1156 int i = 0, j, prev = -1, next, k, number_of_groups;
1157 bool supported, bad_permutation = false;
1158 sbitmap load_index;
1159 slp_tree node, other_complex_node;
1160 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1161 unsigned complex_numbers = 0;
1162 struct data_reference *dr;
1163 bb_vec_info bb_vinfo;
1164
1165 /* FORNOW: permutations are only supported in SLP. */
1166 if (!slp_instn)
1167 return false;
1168
1169 if (dump_enabled_p ())
1170 {
1171 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1172 FOR_EACH_VEC_ELT (load_permutation, i, next)
1173 dump_printf (MSG_NOTE, "%d ", next);
1174 }
1175
1176 /* In case of reduction every load permutation is allowed, since the order
1177 of the reduction statements is not important (as opposed to the case of
1178 grouped stores). The only condition we need to check is that all the
1179 load nodes are of the same size and have the same permutation (and then
1180 rearrange all the nodes of the SLP instance according to this
1181 permutation). */
1182
1183 /* Check that all the load nodes are of the same size. */
1184 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1185 {
1186 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1187 return false;
1188
1189 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1190 if (is_gimple_assign (stmt)
1191 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1192 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1193 complex_numbers++;
1194 }
1195
1196 /* Complex operands can be swapped as following:
1197 real_c = real_b + real_a;
1198 imag_c = imag_a + imag_b;
1199 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1200 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1201 chains are mixed, they match the above pattern. */
1202 if (complex_numbers)
1203 {
1204 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1205 {
1206 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1207 {
1208 if (j == 0)
1209 first = stmt;
1210 else
1211 {
1212 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1213 {
1214 if (complex_numbers != 2)
1215 return false;
1216
1217 if (i == 0)
1218 k = 1;
1219 else
1220 k = 0;
1221
1222 other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1223 other_node_first =
1224 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1225
1226 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1227 != other_node_first)
1228 return false;
1229 }
1230 }
1231 }
1232 }
1233 }
1234
1235 /* We checked that this case ok, so there is no need to proceed with
1236 permutation tests. */
1237 if (complex_numbers == 2
1238 && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1239 {
1240 SLP_INSTANCE_LOADS (slp_instn).release ();
1241 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1242 return true;
1243 }
1244
1245 node = SLP_INSTANCE_TREE (slp_instn);
1246 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1247 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1248 instance, not all the loads belong to the same node or interleaving
1249 group. Hence, we need to divide them into groups according to
1250 GROUP_SIZE. */
1251 number_of_groups = load_permutation.length () / group_size;
1252
1253 /* Reduction (there are no data-refs in the root).
1254 In reduction chain the order of the loads is important. */
1255 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1256 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1257 {
1258 int first_group_load_index;
1259
1260 /* Compare all the permutation sequences to the first one. */
1261 for (i = 1; i < number_of_groups; i++)
1262 {
1263 k = 0;
1264 for (j = i * group_size; j < i * group_size + group_size; j++)
1265 {
1266 next = load_permutation[j];
1267 first_group_load_index = load_permutation[k];
1268
1269 if (next != first_group_load_index)
1270 {
1271 bad_permutation = true;
1272 break;
1273 }
1274
1275 k++;
1276 }
1277
1278 if (bad_permutation)
1279 break;
1280 }
1281
1282 if (!bad_permutation)
1283 {
1284 /* Check that the loads in the first sequence are different and there
1285 are no gaps between them. */
1286 load_index = sbitmap_alloc (group_size);
1287 bitmap_clear (load_index);
1288 for (k = 0; k < group_size; k++)
1289 {
1290 first_group_load_index = load_permutation[k];
1291 if (bitmap_bit_p (load_index, first_group_load_index))
1292 {
1293 bad_permutation = true;
1294 break;
1295 }
1296
1297 bitmap_set_bit (load_index, first_group_load_index);
1298 }
1299
1300 if (!bad_permutation)
1301 for (k = 0; k < group_size; k++)
1302 if (!bitmap_bit_p (load_index, k))
1303 {
1304 bad_permutation = true;
1305 break;
1306 }
1307
1308 sbitmap_free (load_index);
1309 }
1310
1311 if (!bad_permutation)
1312 {
1313 /* This permutation is valid for reduction. Since the order of the
1314 statements in the nodes is not important unless they are memory
1315 accesses, we can rearrange the statements in all the nodes
1316 according to the order of the loads. */
1317 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1318 load_permutation);
1319 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1320 return true;
1321 }
1322 }
1323
1324 /* In basic block vectorization we allow any subchain of an interleaving
1325 chain.
1326 FORNOW: not supported in loop SLP because of realignment compications. */
1327 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1328 bad_permutation = false;
1329 /* Check that for every node in the instance the loads form a subchain. */
1330 if (bb_vinfo)
1331 {
1332 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1333 {
1334 next_load = NULL;
1335 first_load = NULL;
1336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1337 {
1338 if (!first_load)
1339 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1340 else if (first_load
1341 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1342 {
1343 bad_permutation = true;
1344 break;
1345 }
1346
1347 if (j != 0 && next_load != load)
1348 {
1349 bad_permutation = true;
1350 break;
1351 }
1352
1353 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1354 }
1355
1356 if (bad_permutation)
1357 break;
1358 }
1359
1360 /* Check that the alignment of the first load in every subchain, i.e.,
1361 the first statement in every load node, is supported. */
1362 if (!bad_permutation)
1363 {
1364 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1365 {
1366 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1367 if (first_load
1368 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1369 {
1370 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1371 if (vect_supportable_dr_alignment (dr, false)
1372 == dr_unaligned_unsupported)
1373 {
1374 if (dump_enabled_p ())
1375 {
1376 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1377 vect_location,
1378 "unsupported unaligned load ");
1379 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1380 first_load, 0);
1381 }
1382 bad_permutation = true;
1383 break;
1384 }
1385 }
1386 }
1387
1388 if (!bad_permutation)
1389 {
1390 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1391 return true;
1392 }
1393 }
1394 }
1395
1396 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1397 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1398 well (unless it's reduction). */
1399 if (load_permutation.length ()
1400 != (unsigned int) (group_size * group_size))
1401 return false;
1402
1403 supported = true;
1404 load_index = sbitmap_alloc (group_size);
1405 bitmap_clear (load_index);
1406 for (j = 0; j < group_size; j++)
1407 {
1408 for (i = j * group_size, k = 0;
1409 load_permutation.iterate (i, &next) && k < group_size;
1410 i++, k++)
1411 {
1412 if (i != j * group_size && next != prev)
1413 {
1414 supported = false;
1415 break;
1416 }
1417
1418 prev = next;
1419 }
1420
1421 if (bitmap_bit_p (load_index, prev))
1422 {
1423 supported = false;
1424 break;
1425 }
1426
1427 bitmap_set_bit (load_index, prev);
1428 }
1429
1430 for (j = 0; j < group_size; j++)
1431 if (!bitmap_bit_p (load_index, j))
1432 return false;
1433
1434 sbitmap_free (load_index);
1435
1436 if (supported && i == group_size * group_size
1437 && vect_supported_slp_permutation_p (slp_instn))
1438 return true;
1439
1440 return false;
1441 }
1442
1443
1444 /* Find the first load in the loop that belongs to INSTANCE.
1445 When loads are in several SLP nodes, there can be a case in which the first
1446 load does not appear in the first SLP node to be transformed, causing
1447 incorrect order of statements. Since we generate all the loads together,
1448 they must be inserted before the first load of the SLP instance and not
1449 before the first load of the first node of the instance. */
1450
1451 static gimple
1452 vect_find_first_load_in_slp_instance (slp_instance instance)
1453 {
1454 int i, j;
1455 slp_tree load_node;
1456 gimple first_load = NULL, load;
1457
1458 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1459 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1460 first_load = get_earlier_stmt (load, first_load);
1461
1462 return first_load;
1463 }
1464
1465
1466 /* Find the last store in SLP INSTANCE. */
1467
1468 static gimple
1469 vect_find_last_store_in_slp_instance (slp_instance instance)
1470 {
1471 int i;
1472 slp_tree node;
1473 gimple last_store = NULL, store;
1474
1475 node = SLP_INSTANCE_TREE (instance);
1476 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1477 last_store = get_later_stmt (store, last_store);
1478
1479 return last_store;
1480 }
1481
1482
1483 /* Analyze an SLP instance starting from a group of grouped stores. Call
1484 vect_build_slp_tree to build a tree of packed stmts if possible.
1485 Return FALSE if it's impossible to SLP any stmt in the loop. */
1486
1487 static bool
1488 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1489 gimple stmt)
1490 {
1491 slp_instance new_instance;
1492 slp_tree node;
1493 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1494 unsigned int unrolling_factor = 1, nunits;
1495 tree vectype, scalar_type = NULL_TREE;
1496 gimple next;
1497 unsigned int vectorization_factor = 0;
1498 int outside_cost = 0, ncopies_for_cost, i;
1499 unsigned int max_nunits = 0;
1500 vec<int> load_permutation;
1501 vec<slp_tree> loads;
1502 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1503 bool loads_permuted = false;
1504 vec<gimple> scalar_stmts;
1505 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1506 stmt_info_for_cost *si;
1507
1508 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1509 {
1510 if (dr)
1511 {
1512 scalar_type = TREE_TYPE (DR_REF (dr));
1513 vectype = get_vectype_for_scalar_type (scalar_type);
1514 }
1515 else
1516 {
1517 gcc_assert (loop_vinfo);
1518 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1519 }
1520
1521 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1522 }
1523 else
1524 {
1525 gcc_assert (loop_vinfo);
1526 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1527 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1528 }
1529
1530 if (!vectype)
1531 {
1532 if (dump_enabled_p ())
1533 {
1534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1535 "Build SLP failed: unsupported data-type ");
1536 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1537 }
1538
1539 return false;
1540 }
1541
1542 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1543 if (loop_vinfo)
1544 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1545 else
1546 vectorization_factor = nunits;
1547
1548 /* Calculate the unrolling factor. */
1549 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1550 if (unrolling_factor != 1 && !loop_vinfo)
1551 {
1552 if (dump_enabled_p ())
1553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1554 "Build SLP failed: unrolling required in basic"
1555 " block SLP");
1556
1557 return false;
1558 }
1559
1560 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1561 scalar_stmts.create (group_size);
1562 next = stmt;
1563 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1564 {
1565 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1566 while (next)
1567 {
1568 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1569 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1570 scalar_stmts.safe_push (
1571 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1572 else
1573 scalar_stmts.safe_push (next);
1574 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1575 }
1576 }
1577 else
1578 {
1579 /* Collect reduction statements. */
1580 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1581 for (i = 0; reductions.iterate (i, &next); i++)
1582 scalar_stmts.safe_push (next);
1583 }
1584
1585 node = vect_create_new_slp_node (scalar_stmts);
1586
1587 /* Calculate the number of vector stmts to create based on the unrolling
1588 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1589 GROUP_SIZE / NUNITS otherwise. */
1590 ncopies_for_cost = unrolling_factor * group_size / nunits;
1591
1592 load_permutation.create (group_size * group_size);
1593 loads.create (group_size);
1594 prologue_cost_vec.create (10);
1595 body_cost_vec.create (10);
1596
1597 /* Build the tree for the SLP instance. */
1598 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1599 &outside_cost, ncopies_for_cost,
1600 &max_nunits, &load_permutation, &loads,
1601 vectorization_factor, &loads_permuted,
1602 &prologue_cost_vec, &body_cost_vec))
1603 {
1604 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1605 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1606
1607 /* Calculate the unrolling factor based on the smallest type. */
1608 if (max_nunits > nunits)
1609 unrolling_factor = least_common_multiple (max_nunits, group_size)
1610 / group_size;
1611
1612 if (unrolling_factor != 1 && !loop_vinfo)
1613 {
1614 if (dump_enabled_p ())
1615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1616 "Build SLP failed: unrolling required in basic"
1617 " block SLP");
1618 vect_free_slp_tree (node);
1619 body_cost_vec.release ();
1620 prologue_cost_vec.release ();
1621 load_permutation.release ();
1622 loads.release ();
1623 return false;
1624 }
1625
1626 /* Create a new SLP instance. */
1627 new_instance = XNEW (struct _slp_instance);
1628 SLP_INSTANCE_TREE (new_instance) = node;
1629 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1630 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1631 SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
1632 SLP_INSTANCE_LOADS (new_instance) = loads;
1633 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1634 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1635
1636 if (loads_permuted)
1637 {
1638 if (!vect_supported_load_permutation_p (new_instance, group_size,
1639 load_permutation))
1640 {
1641 if (dump_enabled_p ())
1642 {
1643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1644 "Build SLP failed: unsupported load "
1645 "permutation ");
1646 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1647 }
1648
1649 vect_free_slp_instance (new_instance);
1650 prologue_cost_vec.release ();
1651 return false;
1652 }
1653
1654 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1655 = vect_find_first_load_in_slp_instance (new_instance);
1656 }
1657 else
1658 SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
1659
1660 /* Record the prologue costs, which were delayed until we were
1661 sure that SLP was successful. Unlike the body costs, we know
1662 the final values now regardless of the loop vectorization factor. */
1663 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1664 {
1665 struct _stmt_vec_info *stmt_info
1666 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1667 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1668 si->misalign, vect_prologue);
1669 }
1670
1671 prologue_cost_vec.release ();
1672
1673 if (loop_vinfo)
1674 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1675 else
1676 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1677
1678 if (dump_enabled_p ())
1679 vect_print_slp_tree (MSG_NOTE, node);
1680
1681 return true;
1682 }
1683 else
1684 {
1685 body_cost_vec.release ();
1686 prologue_cost_vec.release ();
1687 }
1688
1689 /* Failed to SLP. */
1690 /* Free the allocated memory. */
1691 vect_free_slp_tree (node);
1692 load_permutation.release ();
1693 loads.release ();
1694
1695 return false;
1696 }
1697
1698
1699 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1700 trees of packed scalar stmts if SLP is possible. */
1701
1702 bool
1703 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1704 {
1705 unsigned int i;
1706 vec<gimple> grouped_stores;
1707 vec<gimple> reductions = vNULL;
1708 vec<gimple> reduc_chains = vNULL;
1709 gimple first_element;
1710 bool ok = false;
1711
1712 if (dump_enabled_p ())
1713 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1714
1715 if (loop_vinfo)
1716 {
1717 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1718 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1719 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1720 }
1721 else
1722 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1723
1724 /* Find SLP sequences starting from groups of grouped stores. */
1725 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1726 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1727 ok = true;
1728
1729 if (bb_vinfo && !ok)
1730 {
1731 if (dump_enabled_p ())
1732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1733 "Failed to SLP the basic block.");
1734
1735 return false;
1736 }
1737
1738 if (loop_vinfo
1739 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1740 {
1741 /* Find SLP sequences starting from reduction chains. */
1742 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1743 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1744 ok = true;
1745 else
1746 return false;
1747
1748 /* Don't try to vectorize SLP reductions if reduction chain was
1749 detected. */
1750 return ok;
1751 }
1752
1753 /* Find SLP sequences starting from groups of reductions. */
1754 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1755 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1756 ok = true;
1757
1758 return true;
1759 }
1760
1761
1762 /* For each possible SLP instance decide whether to SLP it and calculate overall
1763 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1764 least one instance. */
1765
1766 bool
1767 vect_make_slp_decision (loop_vec_info loop_vinfo)
1768 {
1769 unsigned int i, unrolling_factor = 1;
1770 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1771 slp_instance instance;
1772 int decided_to_slp = 0;
1773
1774 if (dump_enabled_p ())
1775 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1776
1777 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1778 {
1779 /* FORNOW: SLP if you can. */
1780 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1781 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1782
1783 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1784 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1785 loop-based vectorization. Such stmts will be marked as HYBRID. */
1786 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1787 decided_to_slp++;
1788 }
1789
1790 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1791
1792 if (decided_to_slp && dump_enabled_p ())
1793 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1794 "Decided to SLP %d instances. Unrolling factor %d",
1795 decided_to_slp, unrolling_factor);
1796
1797 return (decided_to_slp > 0);
1798 }
1799
1800
1801 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1802 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1803
1804 static void
1805 vect_detect_hybrid_slp_stmts (slp_tree node)
1806 {
1807 int i;
1808 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1809 gimple stmt = stmts[0];
1810 imm_use_iterator imm_iter;
1811 gimple use_stmt;
1812 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1813 slp_void_p child;
1814 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1815 struct loop *loop = NULL;
1816 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1817 basic_block bb = NULL;
1818
1819 if (!node)
1820 return;
1821
1822 if (loop_vinfo)
1823 loop = LOOP_VINFO_LOOP (loop_vinfo);
1824 else
1825 bb = BB_VINFO_BB (bb_vinfo);
1826
1827 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1828 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1829 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1830 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1831 if (gimple_bb (use_stmt)
1832 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1833 || bb == gimple_bb (use_stmt))
1834 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1835 && !STMT_SLP_TYPE (stmt_vinfo)
1836 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1837 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1838 && !(gimple_code (use_stmt) == GIMPLE_PHI
1839 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1840 == vect_reduction_def))
1841 vect_mark_slp_stmts (node, hybrid, i);
1842
1843 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1844 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1845 }
1846
1847
1848 /* Find stmts that must be both vectorized and SLPed. */
1849
1850 void
1851 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1852 {
1853 unsigned int i;
1854 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1855 slp_instance instance;
1856
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1859
1860 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1861 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1862 }
1863
1864
1865 /* Create and initialize a new bb_vec_info struct for BB, as well as
1866 stmt_vec_info structs for all the stmts in it. */
1867
1868 static bb_vec_info
1869 new_bb_vec_info (basic_block bb)
1870 {
1871 bb_vec_info res = NULL;
1872 gimple_stmt_iterator gsi;
1873
1874 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1875 BB_VINFO_BB (res) = bb;
1876
1877 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1878 {
1879 gimple stmt = gsi_stmt (gsi);
1880 gimple_set_uid (stmt, 0);
1881 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1882 }
1883
1884 BB_VINFO_GROUPED_STORES (res).create (10);
1885 BB_VINFO_SLP_INSTANCES (res).create (2);
1886 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1887
1888 bb->aux = res;
1889 return res;
1890 }
1891
1892
1893 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1894 stmts in the basic block. */
1895
1896 static void
1897 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1898 {
1899 vec<slp_instance> slp_instances;
1900 slp_instance instance;
1901 basic_block bb;
1902 gimple_stmt_iterator si;
1903 unsigned i;
1904
1905 if (!bb_vinfo)
1906 return;
1907
1908 bb = BB_VINFO_BB (bb_vinfo);
1909
1910 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1911 {
1912 gimple stmt = gsi_stmt (si);
1913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1914
1915 if (stmt_info)
1916 /* Free stmt_vec_info. */
1917 free_stmt_vec_info (stmt);
1918 }
1919
1920 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1921 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1922 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1923 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1924 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1925 vect_free_slp_instance (instance);
1926 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1927 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1928 free (bb_vinfo);
1929 bb->aux = NULL;
1930 }
1931
1932
1933 /* Analyze statements contained in SLP tree node after recursively analyzing
1934 the subtree. Return TRUE if the operations are supported. */
1935
1936 static bool
1937 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1938 {
1939 bool dummy;
1940 int i;
1941 gimple stmt;
1942 slp_void_p child;
1943
1944 if (!node)
1945 return true;
1946
1947 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1948 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1949 return false;
1950
1951 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1952 {
1953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1954 gcc_assert (stmt_info);
1955 gcc_assert (PURE_SLP_STMT (stmt_info));
1956
1957 if (!vect_analyze_stmt (stmt, &dummy, node))
1958 return false;
1959 }
1960
1961 return true;
1962 }
1963
1964
1965 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1966 operations are supported. */
1967
1968 static bool
1969 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1970 {
1971 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1972 slp_instance instance;
1973 int i;
1974
1975 for (i = 0; slp_instances.iterate (i, &instance); )
1976 {
1977 if (!vect_slp_analyze_node_operations (bb_vinfo,
1978 SLP_INSTANCE_TREE (instance)))
1979 {
1980 vect_free_slp_instance (instance);
1981 slp_instances.ordered_remove (i);
1982 }
1983 else
1984 i++;
1985 }
1986
1987 if (!slp_instances.length ())
1988 return false;
1989
1990 return true;
1991 }
1992
1993 /* Check if vectorization of the basic block is profitable. */
1994
1995 static bool
1996 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1997 {
1998 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1999 slp_instance instance;
2000 int i, j;
2001 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2002 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2003 unsigned int stmt_cost;
2004 gimple stmt;
2005 gimple_stmt_iterator si;
2006 basic_block bb = BB_VINFO_BB (bb_vinfo);
2007 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2008 stmt_vec_info stmt_info = NULL;
2009 stmt_vector_for_cost body_cost_vec;
2010 stmt_info_for_cost *ci;
2011
2012 /* Calculate vector costs. */
2013 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2014 {
2015 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2016
2017 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2018 {
2019 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2020 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2021 stmt_info, ci->misalign, vect_body);
2022 }
2023 }
2024
2025 /* Calculate scalar cost. */
2026 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2027 {
2028 stmt = gsi_stmt (si);
2029 stmt_info = vinfo_for_stmt (stmt);
2030
2031 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2032 || !PURE_SLP_STMT (stmt_info))
2033 continue;
2034
2035 if (STMT_VINFO_DATA_REF (stmt_info))
2036 {
2037 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2038 stmt_cost = vect_get_stmt_cost (scalar_load);
2039 else
2040 stmt_cost = vect_get_stmt_cost (scalar_store);
2041 }
2042 else
2043 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2044
2045 scalar_cost += stmt_cost;
2046 }
2047
2048 /* Complete the target-specific cost calculation. */
2049 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2050 &vec_inside_cost, &vec_epilogue_cost);
2051
2052 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2053
2054 if (dump_enabled_p ())
2055 {
2056 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2057 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2058 vec_inside_cost);
2059 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2060 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2061 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2062 }
2063
2064 /* Vectorization is profitable if its cost is less than the cost of scalar
2065 version. */
2066 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2067 return false;
2068
2069 return true;
2070 }
2071
2072 /* Check if the basic block can be vectorized. */
2073
2074 static bb_vec_info
2075 vect_slp_analyze_bb_1 (basic_block bb)
2076 {
2077 bb_vec_info bb_vinfo;
2078 vec<ddr_p> ddrs;
2079 vec<slp_instance> slp_instances;
2080 slp_instance instance;
2081 int i;
2082 int min_vf = 2;
2083 int max_vf = MAX_VECTORIZATION_FACTOR;
2084
2085 bb_vinfo = new_bb_vec_info (bb);
2086 if (!bb_vinfo)
2087 return NULL;
2088
2089 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2090 {
2091 if (dump_enabled_p ())
2092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2093 "not vectorized: unhandled data-ref in basic "
2094 "block.\n");
2095
2096 destroy_bb_vec_info (bb_vinfo);
2097 return NULL;
2098 }
2099
2100 ddrs = BB_VINFO_DDRS (bb_vinfo);
2101 if (!ddrs.length ())
2102 {
2103 if (dump_enabled_p ())
2104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2105 "not vectorized: not enough data-refs in "
2106 "basic block.\n");
2107
2108 destroy_bb_vec_info (bb_vinfo);
2109 return NULL;
2110 }
2111
2112 vect_pattern_recog (NULL, bb_vinfo);
2113
2114 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2115 || min_vf > max_vf)
2116 {
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2119 "not vectorized: unhandled data dependence "
2120 "in basic block.\n");
2121
2122 destroy_bb_vec_info (bb_vinfo);
2123 return NULL;
2124 }
2125
2126 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2127 {
2128 if (dump_enabled_p ())
2129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2130 "not vectorized: bad data alignment in basic "
2131 "block.\n");
2132
2133 destroy_bb_vec_info (bb_vinfo);
2134 return NULL;
2135 }
2136
2137 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2138 {
2139 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2141 "not vectorized: unhandled data access in "
2142 "basic block.\n");
2143
2144 destroy_bb_vec_info (bb_vinfo);
2145 return NULL;
2146 }
2147
2148 /* Check the SLP opportunities in the basic block, analyze and build SLP
2149 trees. */
2150 if (!vect_analyze_slp (NULL, bb_vinfo))
2151 {
2152 if (dump_enabled_p ())
2153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2154 "not vectorized: failed to find SLP opportunities "
2155 "in basic block.\n");
2156
2157 destroy_bb_vec_info (bb_vinfo);
2158 return NULL;
2159 }
2160
2161 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2162
2163 /* Mark all the statements that we want to vectorize as pure SLP and
2164 relevant. */
2165 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2166 {
2167 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2168 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2169 }
2170
2171 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2172 {
2173 if (dump_enabled_p ())
2174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2175 "not vectorized: unsupported alignment in basic "
2176 "block.\n");
2177 destroy_bb_vec_info (bb_vinfo);
2178 return NULL;
2179 }
2180
2181 if (!vect_slp_analyze_operations (bb_vinfo))
2182 {
2183 if (dump_enabled_p ())
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 "not vectorized: bad operation in basic block.\n");
2186
2187 destroy_bb_vec_info (bb_vinfo);
2188 return NULL;
2189 }
2190
2191 /* Cost model: check if the vectorization is worthwhile. */
2192 if (flag_vect_cost_model
2193 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2194 {
2195 if (dump_enabled_p ())
2196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2197 "not vectorized: vectorization is not "
2198 "profitable.\n");
2199
2200 destroy_bb_vec_info (bb_vinfo);
2201 return NULL;
2202 }
2203
2204 if (dump_enabled_p ())
2205 dump_printf_loc (MSG_NOTE, vect_location,
2206 "Basic block will be vectorized using SLP\n");
2207
2208 return bb_vinfo;
2209 }
2210
2211
2212 bb_vec_info
2213 vect_slp_analyze_bb (basic_block bb)
2214 {
2215 bb_vec_info bb_vinfo;
2216 int insns = 0;
2217 gimple_stmt_iterator gsi;
2218 unsigned int vector_sizes;
2219
2220 if (dump_enabled_p ())
2221 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2222
2223 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2224 {
2225 gimple stmt = gsi_stmt (gsi);
2226 if (!is_gimple_debug (stmt)
2227 && !gimple_nop_p (stmt)
2228 && gimple_code (stmt) != GIMPLE_LABEL)
2229 insns++;
2230 }
2231
2232 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2233 {
2234 if (dump_enabled_p ())
2235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2236 "not vectorized: too many instructions in "
2237 "basic block.\n");
2238
2239 return NULL;
2240 }
2241
2242 /* Autodetect first vector size we try. */
2243 current_vector_size = 0;
2244 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2245
2246 while (1)
2247 {
2248 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2249 if (bb_vinfo)
2250 return bb_vinfo;
2251
2252 destroy_bb_vec_info (bb_vinfo);
2253
2254 vector_sizes &= ~current_vector_size;
2255 if (vector_sizes == 0
2256 || current_vector_size == 0)
2257 return NULL;
2258
2259 /* Try the next biggest vector size. */
2260 current_vector_size = 1 << floor_log2 (vector_sizes);
2261 if (dump_enabled_p ())
2262 dump_printf_loc (MSG_NOTE, vect_location,
2263 "***** Re-trying analysis with "
2264 "vector size %d\n", current_vector_size);
2265 }
2266 }
2267
2268
2269 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2270 the number of created vector stmts depends on the unrolling factor).
2271 However, the actual number of vector stmts for every SLP node depends on
2272 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2273 should be updated. In this function we assume that the inside costs
2274 calculated in vect_model_xxx_cost are linear in ncopies. */
2275
2276 void
2277 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2278 {
2279 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2280 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2281 slp_instance instance;
2282 stmt_vector_for_cost body_cost_vec;
2283 stmt_info_for_cost *si;
2284 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2285
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_NOTE, vect_location,
2288 "=== vect_update_slp_costs_according_to_vf ===");
2289
2290 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2291 {
2292 /* We assume that costs are linear in ncopies. */
2293 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2294
2295 /* Record the instance's instructions in the target cost model.
2296 This was delayed until here because the count of instructions
2297 isn't known beforehand. */
2298 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2299
2300 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2301 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2302 vinfo_for_stmt (si->stmt), si->misalign,
2303 vect_body);
2304 }
2305 }
2306
2307
2308 /* For constant and loop invariant defs of SLP_NODE this function returns
2309 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2310 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2311 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2312 REDUC_INDEX is the index of the reduction operand in the statements, unless
2313 it is -1. */
2314
2315 static void
2316 vect_get_constant_vectors (tree op, slp_tree slp_node,
2317 vec<tree> *vec_oprnds,
2318 unsigned int op_num, unsigned int number_of_vectors,
2319 int reduc_index)
2320 {
2321 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2322 gimple stmt = stmts[0];
2323 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2324 unsigned nunits;
2325 tree vec_cst;
2326 tree *elts;
2327 unsigned j, number_of_places_left_in_vector;
2328 tree vector_type;
2329 tree vop;
2330 int group_size = stmts.length ();
2331 unsigned int vec_num, i;
2332 unsigned number_of_copies = 1;
2333 vec<tree> voprnds;
2334 voprnds.create (number_of_vectors);
2335 bool constant_p, is_store;
2336 tree neutral_op = NULL;
2337 enum tree_code code = gimple_expr_code (stmt);
2338 gimple def_stmt;
2339 struct loop *loop;
2340 gimple_seq ctor_seq = NULL;
2341
2342 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2343 && reduc_index != -1)
2344 {
2345 op_num = reduc_index - 1;
2346 op = gimple_op (stmt, reduc_index);
2347 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2348 we need either neutral operands or the original operands. See
2349 get_initial_def_for_reduction() for details. */
2350 switch (code)
2351 {
2352 case WIDEN_SUM_EXPR:
2353 case DOT_PROD_EXPR:
2354 case PLUS_EXPR:
2355 case MINUS_EXPR:
2356 case BIT_IOR_EXPR:
2357 case BIT_XOR_EXPR:
2358 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2359 neutral_op = build_real (TREE_TYPE (op), dconst0);
2360 else
2361 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2362
2363 break;
2364
2365 case MULT_EXPR:
2366 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2367 neutral_op = build_real (TREE_TYPE (op), dconst1);
2368 else
2369 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2370
2371 break;
2372
2373 case BIT_AND_EXPR:
2374 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2375 break;
2376
2377 case MAX_EXPR:
2378 case MIN_EXPR:
2379 def_stmt = SSA_NAME_DEF_STMT (op);
2380 loop = (gimple_bb (stmt))->loop_father;
2381 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2382 loop_preheader_edge (loop));
2383 break;
2384
2385 default:
2386 neutral_op = NULL;
2387 }
2388 }
2389
2390 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2391 {
2392 is_store = true;
2393 op = gimple_assign_rhs1 (stmt);
2394 }
2395 else
2396 is_store = false;
2397
2398 gcc_assert (op);
2399
2400 if (CONSTANT_CLASS_P (op))
2401 constant_p = true;
2402 else
2403 constant_p = false;
2404
2405 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2406 gcc_assert (vector_type);
2407 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2408
2409 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2410 created vectors. It is greater than 1 if unrolling is performed.
2411
2412 For example, we have two scalar operands, s1 and s2 (e.g., group of
2413 strided accesses of size two), while NUNITS is four (i.e., four scalars
2414 of this type can be packed in a vector). The output vector will contain
2415 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2416 will be 2).
2417
2418 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2419 containing the operands.
2420
2421 For example, NUNITS is four as before, and the group size is 8
2422 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2423 {s5, s6, s7, s8}. */
2424
2425 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2426
2427 number_of_places_left_in_vector = nunits;
2428 elts = XALLOCAVEC (tree, nunits);
2429 for (j = 0; j < number_of_copies; j++)
2430 {
2431 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2432 {
2433 if (is_store)
2434 op = gimple_assign_rhs1 (stmt);
2435 else
2436 {
2437 switch (code)
2438 {
2439 case COND_EXPR:
2440 if (op_num == 0 || op_num == 1)
2441 {
2442 tree cond = gimple_assign_rhs1 (stmt);
2443 op = TREE_OPERAND (cond, op_num);
2444 }
2445 else
2446 {
2447 if (op_num == 2)
2448 op = gimple_assign_rhs2 (stmt);
2449 else
2450 op = gimple_assign_rhs3 (stmt);
2451 }
2452 break;
2453
2454 case CALL_EXPR:
2455 op = gimple_call_arg (stmt, op_num);
2456 break;
2457
2458 case LSHIFT_EXPR:
2459 case RSHIFT_EXPR:
2460 case LROTATE_EXPR:
2461 case RROTATE_EXPR:
2462 op = gimple_op (stmt, op_num + 1);
2463 /* Unlike the other binary operators, shifts/rotates have
2464 the shift count being int, instead of the same type as
2465 the lhs, so make sure the scalar is the right type if
2466 we are dealing with vectors of
2467 long long/long/short/char. */
2468 if (op_num == 1 && constant_p)
2469 op = fold_convert (TREE_TYPE (vector_type), op);
2470 break;
2471
2472 default:
2473 op = gimple_op (stmt, op_num + 1);
2474 break;
2475 }
2476 }
2477
2478 if (reduc_index != -1)
2479 {
2480 loop = (gimple_bb (stmt))->loop_father;
2481 def_stmt = SSA_NAME_DEF_STMT (op);
2482
2483 gcc_assert (loop);
2484
2485 /* Get the def before the loop. In reduction chain we have only
2486 one initial value. */
2487 if ((j != (number_of_copies - 1)
2488 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2489 && i != 0))
2490 && neutral_op)
2491 op = neutral_op;
2492 else
2493 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2494 loop_preheader_edge (loop));
2495 }
2496
2497 /* Create 'vect_ = {op0,op1,...,opn}'. */
2498 number_of_places_left_in_vector--;
2499 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2500 {
2501 if (constant_p)
2502 {
2503 op = fold_unary (VIEW_CONVERT_EXPR,
2504 TREE_TYPE (vector_type), op);
2505 gcc_assert (op && CONSTANT_CLASS_P (op));
2506 }
2507 else
2508 {
2509 tree new_temp
2510 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2511 gimple init_stmt;
2512 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2513 op);
2514 init_stmt
2515 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2516 new_temp, op, NULL_TREE);
2517 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2518 op = new_temp;
2519 }
2520 }
2521 elts[number_of_places_left_in_vector] = op;
2522
2523 if (number_of_places_left_in_vector == 0)
2524 {
2525 number_of_places_left_in_vector = nunits;
2526
2527 if (constant_p)
2528 vec_cst = build_vector (vector_type, elts);
2529 else
2530 {
2531 vec<constructor_elt, va_gc> *v;
2532 unsigned k;
2533 vec_alloc (v, nunits);
2534 for (k = 0; k < nunits; ++k)
2535 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2536 vec_cst = build_constructor (vector_type, v);
2537 }
2538 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2539 vector_type, NULL));
2540 if (ctor_seq != NULL)
2541 {
2542 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2543 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2544 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2545 GSI_SAME_STMT);
2546 ctor_seq = NULL;
2547 }
2548 }
2549 }
2550 }
2551
2552 /* Since the vectors are created in the reverse order, we should invert
2553 them. */
2554 vec_num = voprnds.length ();
2555 for (j = vec_num; j != 0; j--)
2556 {
2557 vop = voprnds[j - 1];
2558 vec_oprnds->quick_push (vop);
2559 }
2560
2561 voprnds.release ();
2562
2563 /* In case that VF is greater than the unrolling factor needed for the SLP
2564 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2565 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2566 to replicate the vectors. */
2567 while (number_of_vectors > vec_oprnds->length ())
2568 {
2569 tree neutral_vec = NULL;
2570
2571 if (neutral_op)
2572 {
2573 if (!neutral_vec)
2574 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2575
2576 vec_oprnds->quick_push (neutral_vec);
2577 }
2578 else
2579 {
2580 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2581 vec_oprnds->quick_push (vop);
2582 }
2583 }
2584 }
2585
2586
2587 /* Get vectorized definitions from SLP_NODE that contains corresponding
2588 vectorized def-stmts. */
2589
2590 static void
2591 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2592 {
2593 tree vec_oprnd;
2594 gimple vec_def_stmt;
2595 unsigned int i;
2596
2597 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2598
2599 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2600 {
2601 gcc_assert (vec_def_stmt);
2602 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2603 vec_oprnds->quick_push (vec_oprnd);
2604 }
2605 }
2606
2607
2608 /* Get vectorized definitions for SLP_NODE.
2609 If the scalar definitions are loop invariants or constants, collect them and
2610 call vect_get_constant_vectors() to create vector stmts.
2611 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2612 must be stored in the corresponding child of SLP_NODE, and we call
2613 vect_get_slp_vect_defs () to retrieve them. */
2614
2615 void
2616 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2617 vec<slp_void_p> *vec_oprnds, int reduc_index)
2618 {
2619 gimple first_stmt;
2620 int number_of_vects = 0, i;
2621 unsigned int child_index = 0;
2622 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2623 slp_tree child = NULL;
2624 vec<tree> *vec_defs;
2625 tree oprnd;
2626 bool vectorized_defs;
2627
2628 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2629 FOR_EACH_VEC_ELT (ops, i, oprnd)
2630 {
2631 /* For each operand we check if it has vectorized definitions in a child
2632 node or we need to create them (for invariants and constants). We
2633 check if the LHS of the first stmt of the next child matches OPRND.
2634 If it does, we found the correct child. Otherwise, we call
2635 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2636 to check this child node for the next operand. */
2637 vectorized_defs = false;
2638 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2639 {
2640 child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2641
2642 /* We have to check both pattern and original def, if available. */
2643 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2644 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2645
2646 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2647 || (related
2648 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2649 {
2650 /* The number of vector defs is determined by the number of
2651 vector statements in the node from which we get those
2652 statements. */
2653 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2654 vectorized_defs = true;
2655 child_index++;
2656 }
2657 }
2658
2659 if (!vectorized_defs)
2660 {
2661 if (i == 0)
2662 {
2663 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2664 /* Number of vector stmts was calculated according to LHS in
2665 vect_schedule_slp_instance (), fix it by replacing LHS with
2666 RHS, if necessary. See vect_get_smallest_scalar_type () for
2667 details. */
2668 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2669 &rhs_size_unit);
2670 if (rhs_size_unit != lhs_size_unit)
2671 {
2672 number_of_vects *= rhs_size_unit;
2673 number_of_vects /= lhs_size_unit;
2674 }
2675 }
2676 }
2677
2678 /* Allocate memory for vectorized defs. */
2679 vec_alloc (vec_defs, number_of_vects);
2680
2681 /* For reduction defs we call vect_get_constant_vectors (), since we are
2682 looking for initial loop invariant values. */
2683 if (vectorized_defs && reduc_index == -1)
2684 /* The defs are already vectorized. */
2685 vect_get_slp_vect_defs (child, vec_defs);
2686 else
2687 /* Build vectors from scalar defs. */
2688 vect_get_constant_vectors (oprnd, slp_node, vec_defs, i,
2689 number_of_vects, reduc_index);
2690
2691 vec_oprnds->quick_push ((slp_void_p) vec_defs);
2692
2693 /* For reductions, we only need initial values. */
2694 if (reduc_index != -1)
2695 return;
2696 }
2697 }
2698
2699
2700 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2701 building a vector of type MASK_TYPE from it) and two input vectors placed in
2702 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2703 shifting by STRIDE elements of DR_CHAIN for every copy.
2704 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2705 copies).
2706 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2707 the created stmts must be inserted. */
2708
2709 static inline void
2710 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2711 tree mask, int first_vec_indx, int second_vec_indx,
2712 gimple_stmt_iterator *gsi, slp_tree node,
2713 tree vectype, vec<tree> dr_chain,
2714 int ncopies, int vect_stmts_counter)
2715 {
2716 tree perm_dest;
2717 gimple perm_stmt = NULL;
2718 stmt_vec_info next_stmt_info;
2719 int i, stride;
2720 tree first_vec, second_vec, data_ref;
2721
2722 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2723
2724 /* Initialize the vect stmts of NODE to properly insert the generated
2725 stmts later. */
2726 for (i = SLP_TREE_VEC_STMTS (node).length ();
2727 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2728 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2729
2730 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2731 for (i = 0; i < ncopies; i++)
2732 {
2733 first_vec = dr_chain[first_vec_indx];
2734 second_vec = dr_chain[second_vec_indx];
2735
2736 /* Generate the permute statement. */
2737 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2738 first_vec, second_vec, mask);
2739 data_ref = make_ssa_name (perm_dest, perm_stmt);
2740 gimple_set_lhs (perm_stmt, data_ref);
2741 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2742
2743 /* Store the vector statement in NODE. */
2744 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2745
2746 first_vec_indx += stride;
2747 second_vec_indx += stride;
2748 }
2749
2750 /* Mark the scalar stmt as vectorized. */
2751 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2752 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2753 }
2754
2755
2756 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2757 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2758 representation. Check that the mask is valid and return FALSE if not.
2759 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2760 the next vector, i.e., the current first vector is not needed. */
2761
2762 static bool
2763 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2764 int mask_nunits, bool only_one_vec, int index,
2765 unsigned char *mask, int *current_mask_element,
2766 bool *need_next_vector, int *number_of_mask_fixes,
2767 bool *mask_fixed, bool *needs_first_vector)
2768 {
2769 int i;
2770
2771 /* Convert to target specific representation. */
2772 *current_mask_element = first_mask_element + m;
2773 /* Adjust the value in case it's a mask for second and third vectors. */
2774 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2775
2776 if (*current_mask_element < mask_nunits)
2777 *needs_first_vector = true;
2778
2779 /* We have only one input vector to permute but the mask accesses values in
2780 the next vector as well. */
2781 if (only_one_vec && *current_mask_element >= mask_nunits)
2782 {
2783 if (dump_enabled_p ())
2784 {
2785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2786 "permutation requires at least two vectors ");
2787 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2788 }
2789
2790 return false;
2791 }
2792
2793 /* The mask requires the next vector. */
2794 if (*current_mask_element >= mask_nunits * 2)
2795 {
2796 if (*needs_first_vector || *mask_fixed)
2797 {
2798 /* We either need the first vector too or have already moved to the
2799 next vector. In both cases, this permutation needs three
2800 vectors. */
2801 if (dump_enabled_p ())
2802 {
2803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2804 "permutation requires at "
2805 "least three vectors ");
2806 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2807 }
2808
2809 return false;
2810 }
2811
2812 /* We move to the next vector, dropping the first one and working with
2813 the second and the third - we need to adjust the values of the mask
2814 accordingly. */
2815 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2816
2817 for (i = 0; i < index; i++)
2818 mask[i] -= mask_nunits * *number_of_mask_fixes;
2819
2820 (*number_of_mask_fixes)++;
2821 *mask_fixed = true;
2822 }
2823
2824 *need_next_vector = *mask_fixed;
2825
2826 /* This was the last element of this mask. Start a new one. */
2827 if (index == mask_nunits - 1)
2828 {
2829 *number_of_mask_fixes = 1;
2830 *mask_fixed = false;
2831 *needs_first_vector = false;
2832 }
2833
2834 return true;
2835 }
2836
2837
2838 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2839 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2840 permute statements for SLP_NODE_INSTANCE. */
2841 bool
2842 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2843 gimple_stmt_iterator *gsi, int vf,
2844 slp_instance slp_node_instance, bool analyze_only)
2845 {
2846 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2847 tree mask_element_type = NULL_TREE, mask_type;
2848 int i, j, k, nunits, vec_index = 0, scalar_index;
2849 slp_tree node;
2850 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2851 gimple next_scalar_stmt;
2852 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2853 int first_mask_element;
2854 int index, unroll_factor, current_mask_element, ncopies;
2855 unsigned char *mask;
2856 bool only_one_vec = false, need_next_vector = false;
2857 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2858 int number_of_mask_fixes = 1;
2859 bool mask_fixed = false;
2860 bool needs_first_vector = false;
2861 enum machine_mode mode;
2862
2863 mode = TYPE_MODE (vectype);
2864
2865 if (!can_vec_perm_p (mode, false, NULL))
2866 {
2867 if (dump_enabled_p ())
2868 {
2869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2870 "no vect permute for ");
2871 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2872 }
2873 return false;
2874 }
2875
2876 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2877 same size as the vector element being permuted. */
2878 mask_element_type = lang_hooks.types.type_for_mode
2879 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2880 mask_type = get_vectype_for_scalar_type (mask_element_type);
2881 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2882 mask = XALLOCAVEC (unsigned char, nunits);
2883 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2884
2885 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2886 unrolling factor. */
2887 orig_vec_stmts_num = group_size *
2888 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2889 if (orig_vec_stmts_num == 1)
2890 only_one_vec = true;
2891
2892 /* Number of copies is determined by the final vectorization factor
2893 relatively to SLP_NODE_INSTANCE unrolling factor. */
2894 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2895
2896 /* Generate permutation masks for every NODE. Number of masks for each NODE
2897 is equal to GROUP_SIZE.
2898 E.g., we have a group of three nodes with three loads from the same
2899 location in each node, and the vector size is 4. I.e., we have a
2900 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2901 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2902 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2903 ...
2904
2905 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2906 The last mask is illegal since we assume two operands for permute
2907 operation, and the mask element values can't be outside that range.
2908 Hence, the last mask must be converted into {2,5,5,5}.
2909 For the first two permutations we need the first and the second input
2910 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2911 we need the second and the third vectors: {b1,c1,a2,b2} and
2912 {c2,a3,b3,c3}. */
2913
2914 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2915 {
2916 scalar_index = 0;
2917 index = 0;
2918 vect_stmts_counter = 0;
2919 vec_index = 0;
2920 first_vec_index = vec_index++;
2921 if (only_one_vec)
2922 second_vec_index = first_vec_index;
2923 else
2924 second_vec_index = vec_index++;
2925
2926 for (j = 0; j < unroll_factor; j++)
2927 {
2928 for (k = 0; k < group_size; k++)
2929 {
2930 first_mask_element = i + j * group_size;
2931 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2932 nunits, only_one_vec, index,
2933 mask, &current_mask_element,
2934 &need_next_vector,
2935 &number_of_mask_fixes, &mask_fixed,
2936 &needs_first_vector))
2937 return false;
2938 mask[index++] = current_mask_element;
2939
2940 if (index == nunits)
2941 {
2942 tree mask_vec, *mask_elts;
2943 int l;
2944
2945 if (!can_vec_perm_p (mode, false, mask))
2946 {
2947 if (dump_enabled_p ())
2948 {
2949 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2950 vect_location,
2951 "unsupported vect permute { ");
2952 for (i = 0; i < nunits; ++i)
2953 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2954 mask[i]);
2955 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2956 }
2957 return false;
2958 }
2959
2960 mask_elts = XALLOCAVEC (tree, nunits);
2961 for (l = 0; l < nunits; ++l)
2962 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2963 mask_vec = build_vector (mask_type, mask_elts);
2964 index = 0;
2965
2966 if (!analyze_only)
2967 {
2968 if (need_next_vector)
2969 {
2970 first_vec_index = second_vec_index;
2971 second_vec_index = vec_index;
2972 }
2973
2974 next_scalar_stmt
2975 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2976
2977 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2978 mask_vec, first_vec_index, second_vec_index,
2979 gsi, node, vectype, dr_chain,
2980 ncopies, vect_stmts_counter++);
2981 }
2982 }
2983 }
2984 }
2985 }
2986
2987 return true;
2988 }
2989
2990
2991
2992 /* Vectorize SLP instance tree in postorder. */
2993
2994 static bool
2995 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2996 unsigned int vectorization_factor)
2997 {
2998 gimple stmt;
2999 bool grouped_store, is_store;
3000 gimple_stmt_iterator si;
3001 stmt_vec_info stmt_info;
3002 unsigned int vec_stmts_size, nunits, group_size;
3003 tree vectype;
3004 int i;
3005 slp_tree loads_node;
3006 slp_void_p child;
3007
3008 if (!node)
3009 return false;
3010
3011 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3012 vect_schedule_slp_instance ((slp_tree) child, instance,
3013 vectorization_factor);
3014
3015 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3016 stmt_info = vinfo_for_stmt (stmt);
3017
3018 /* VECTYPE is the type of the destination. */
3019 vectype = STMT_VINFO_VECTYPE (stmt_info);
3020 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3021 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3022
3023 /* For each SLP instance calculate number of vector stmts to be created
3024 for the scalar stmts in each node of the SLP tree. Number of vector
3025 elements in one vector iteration is the number of scalar elements in
3026 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3027 size. */
3028 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3029
3030 /* In case of load permutation we have to allocate vectorized statements for
3031 all the nodes that participate in that permutation. */
3032 if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3033 {
3034 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3035 {
3036 if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3037 {
3038 SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3039 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3040 }
3041 }
3042 }
3043
3044 if (!SLP_TREE_VEC_STMTS (node).exists ())
3045 {
3046 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3047 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3048 }
3049
3050 if (dump_enabled_p ())
3051 {
3052 dump_printf_loc (MSG_NOTE,vect_location,
3053 "------>vectorizing SLP node starting from: ");
3054 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3055 }
3056
3057 /* Loads should be inserted before the first load. */
3058 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3059 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3060 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3061 && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3062 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3063 else if (is_pattern_stmt_p (stmt_info))
3064 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3065 else
3066 si = gsi_for_stmt (stmt);
3067
3068 /* Stores should be inserted just before the last store. */
3069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3070 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3071 {
3072 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3073 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3074 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3075 si = gsi_for_stmt (last_store);
3076 }
3077
3078 /* Mark the first element of the reduction chain as reduction to properly
3079 transform the node. In the analysis phase only the last element of the
3080 chain is marked as reduction. */
3081 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3082 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3083 {
3084 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3085 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3086 }
3087
3088 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3089 return is_store;
3090 }
3091
3092 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3093 For loop vectorization this is done in vectorizable_call, but for SLP
3094 it needs to be deferred until end of vect_schedule_slp, because multiple
3095 SLP instances may refer to the same scalar stmt. */
3096
3097 static void
3098 vect_remove_slp_scalar_calls (slp_tree node)
3099 {
3100 gimple stmt, new_stmt;
3101 gimple_stmt_iterator gsi;
3102 int i;
3103 slp_void_p child;
3104 tree lhs;
3105 stmt_vec_info stmt_info;
3106
3107 if (!node)
3108 return;
3109
3110 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3111 vect_remove_slp_scalar_calls ((slp_tree) child);
3112
3113 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3114 {
3115 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3116 continue;
3117 stmt_info = vinfo_for_stmt (stmt);
3118 if (stmt_info == NULL
3119 || is_pattern_stmt_p (stmt_info)
3120 || !PURE_SLP_STMT (stmt_info))
3121 continue;
3122 lhs = gimple_call_lhs (stmt);
3123 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3124 set_vinfo_for_stmt (new_stmt, stmt_info);
3125 set_vinfo_for_stmt (stmt, NULL);
3126 STMT_VINFO_STMT (stmt_info) = new_stmt;
3127 gsi = gsi_for_stmt (stmt);
3128 gsi_replace (&gsi, new_stmt, false);
3129 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3130 }
3131 }
3132
3133 /* Generate vector code for all SLP instances in the loop/basic block. */
3134
3135 bool
3136 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3137 {
3138 vec<slp_instance> slp_instances;
3139 slp_instance instance;
3140 unsigned int i, vf;
3141 bool is_store = false;
3142
3143 if (loop_vinfo)
3144 {
3145 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3146 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3147 }
3148 else
3149 {
3150 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3151 vf = 1;
3152 }
3153
3154 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3155 {
3156 /* Schedule the tree of INSTANCE. */
3157 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3158 instance, vf);
3159 if (dump_enabled_p ())
3160 dump_printf_loc (MSG_NOTE, vect_location,
3161 "vectorizing stmts using SLP.");
3162 }
3163
3164 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3165 {
3166 slp_tree root = SLP_INSTANCE_TREE (instance);
3167 gimple store;
3168 unsigned int j;
3169 gimple_stmt_iterator gsi;
3170
3171 vect_remove_slp_scalar_calls (root);
3172
3173 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3174 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3175 {
3176 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3177 break;
3178
3179 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3180 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3181 /* Free the attached stmt_vec_info and remove the stmt. */
3182 gsi = gsi_for_stmt (store);
3183 unlink_stmt_vdef (store);
3184 gsi_remove (&gsi, true);
3185 release_defs (store);
3186 free_stmt_vec_info (store);
3187 }
3188 }
3189
3190 return is_store;
3191 }
3192
3193
3194 /* Vectorize the basic block. */
3195
3196 void
3197 vect_slp_transform_bb (basic_block bb)
3198 {
3199 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3200 gimple_stmt_iterator si;
3201
3202 gcc_assert (bb_vinfo);
3203
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3206
3207 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3208 {
3209 gimple stmt = gsi_stmt (si);
3210 stmt_vec_info stmt_info;
3211
3212 if (dump_enabled_p ())
3213 {
3214 dump_printf_loc (MSG_NOTE, vect_location,
3215 "------>SLPing statement: ");
3216 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3217 }
3218
3219 stmt_info = vinfo_for_stmt (stmt);
3220 gcc_assert (stmt_info);
3221
3222 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3223 if (STMT_SLP_TYPE (stmt_info))
3224 {
3225 vect_schedule_slp (NULL, bb_vinfo);
3226 break;
3227 }
3228 }
3229
3230 if (dump_enabled_p ())
3231 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3232
3233 destroy_bb_vec_info (bb_vinfo);
3234 }