gimple.h: Reorder prototypes to match .c declaration order...
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "gimple.h"
33 #include "gimple-iterator.h"
34 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-pass.h"
39 #include "cfgloop.h"
40 #include "expr.h"
41 #include "recog.h" /* FIXME: for insn_data */
42 #include "optabs.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45
46 /* Extract the location of the basic block in the source code.
47 Return the basic block location if succeed and NULL if not. */
48
49 LOC
50 find_bb_location (basic_block bb)
51 {
52 gimple stmt = NULL;
53 gimple_stmt_iterator si;
54
55 if (!bb)
56 return UNKNOWN_LOC;
57
58 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
59 {
60 stmt = gsi_stmt (si);
61 if (gimple_location (stmt) != UNKNOWN_LOC)
62 return gimple_location (stmt);
63 }
64
65 return UNKNOWN_LOC;
66 }
67
68
69 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
70
71 static void
72 vect_free_slp_tree (slp_tree node)
73 {
74 int i;
75 slp_tree child;
76
77 if (!node)
78 return;
79
80 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
81 vect_free_slp_tree (child);
82
83 SLP_TREE_CHILDREN (node).release ();
84 SLP_TREE_SCALAR_STMTS (node).release ();
85 SLP_TREE_VEC_STMTS (node).release ();
86 SLP_TREE_LOAD_PERMUTATION (node).release ();
87
88 free (node);
89 }
90
91
92 /* Free the memory allocated for the SLP instance. */
93
94 void
95 vect_free_slp_instance (slp_instance instance)
96 {
97 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
98 SLP_INSTANCE_LOADS (instance).release ();
99 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
100 free (instance);
101 }
102
103
104 /* Create an SLP node for SCALAR_STMTS. */
105
106 static slp_tree
107 vect_create_new_slp_node (vec<gimple> scalar_stmts)
108 {
109 slp_tree node;
110 gimple stmt = scalar_stmts[0];
111 unsigned int nops;
112
113 if (is_gimple_call (stmt))
114 nops = gimple_call_num_args (stmt);
115 else if (is_gimple_assign (stmt))
116 {
117 nops = gimple_num_ops (stmt) - 1;
118 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
119 nops++;
120 }
121 else
122 return NULL;
123
124 node = XNEW (struct _slp_tree);
125 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
126 SLP_TREE_VEC_STMTS (node).create (0);
127 SLP_TREE_CHILDREN (node).create (nops);
128 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
129
130 return node;
131 }
132
133
134 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
135 operand. */
136 static vec<slp_oprnd_info>
137 vect_create_oprnd_info (int nops, int group_size)
138 {
139 int i;
140 slp_oprnd_info oprnd_info;
141 vec<slp_oprnd_info> oprnds_info;
142
143 oprnds_info.create (nops);
144 for (i = 0; i < nops; i++)
145 {
146 oprnd_info = XNEW (struct _slp_oprnd_info);
147 oprnd_info->def_stmts.create (group_size);
148 oprnd_info->first_dt = vect_uninitialized_def;
149 oprnd_info->first_op_type = NULL_TREE;
150 oprnd_info->first_pattern = false;
151 oprnds_info.quick_push (oprnd_info);
152 }
153
154 return oprnds_info;
155 }
156
157
158 /* Free operands info. */
159
160 static void
161 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
162 {
163 int i;
164 slp_oprnd_info oprnd_info;
165
166 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
167 {
168 oprnd_info->def_stmts.release ();
169 XDELETE (oprnd_info);
170 }
171
172 oprnds_info.release ();
173 }
174
175
176 /* Find the place of the data-ref in STMT in the interleaving chain that starts
177 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
178
179 static int
180 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
181 {
182 gimple next_stmt = first_stmt;
183 int result = 0;
184
185 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
186 return -1;
187
188 do
189 {
190 if (next_stmt == stmt)
191 return result;
192 result++;
193 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
194 }
195 while (next_stmt);
196
197 return -1;
198 }
199
200
201 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
202 they are of a valid type and that they match the defs of the first stmt of
203 the SLP group (stored in OPRNDS_INFO). */
204
205 static bool
206 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
207 gimple stmt, bool first,
208 vec<slp_oprnd_info> *oprnds_info)
209 {
210 tree oprnd;
211 unsigned int i, number_of_oprnds;
212 tree def;
213 gimple def_stmt;
214 enum vect_def_type dt = vect_uninitialized_def;
215 struct loop *loop = NULL;
216 bool pattern = false;
217 slp_oprnd_info oprnd_info;
218 int op_idx = 1;
219 tree compare_rhs = NULL_TREE;
220
221 if (loop_vinfo)
222 loop = LOOP_VINFO_LOOP (loop_vinfo);
223
224 if (is_gimple_call (stmt))
225 {
226 number_of_oprnds = gimple_call_num_args (stmt);
227 op_idx = 3;
228 }
229 else if (is_gimple_assign (stmt))
230 {
231 number_of_oprnds = gimple_num_ops (stmt) - 1;
232 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
233 number_of_oprnds++;
234 }
235 else
236 return false;
237
238 for (i = 0; i < number_of_oprnds; i++)
239 {
240 if (compare_rhs)
241 {
242 oprnd = compare_rhs;
243 compare_rhs = NULL_TREE;
244 }
245 else
246 oprnd = gimple_op (stmt, op_idx++);
247
248 oprnd_info = (*oprnds_info)[i];
249
250 if (COMPARISON_CLASS_P (oprnd))
251 {
252 compare_rhs = TREE_OPERAND (oprnd, 1);
253 oprnd = TREE_OPERAND (oprnd, 0);
254 }
255
256 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
257 &def, &dt)
258 || (!def_stmt && dt != vect_constant_def))
259 {
260 if (dump_enabled_p ())
261 {
262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
263 "Build SLP failed: can't find def for ");
264 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
265 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
266 }
267
268 return false;
269 }
270
271 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
272 from the pattern. Check that all the stmts of the node are in the
273 pattern. */
274 if (def_stmt && gimple_bb (def_stmt)
275 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
276 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
277 && gimple_code (def_stmt) != GIMPLE_PHI))
278 && vinfo_for_stmt (def_stmt)
279 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
280 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
281 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
282 {
283 pattern = true;
284 if (!first && !oprnd_info->first_pattern)
285 {
286 if (dump_enabled_p ())
287 {
288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
289 "Build SLP failed: some of the stmts"
290 " are in a pattern, and others are not ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 }
294
295 return false;
296 }
297
298 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
299 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
300
301 if (dt == vect_unknown_def_type)
302 {
303 if (dump_enabled_p ())
304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
305 "Unsupported pattern.\n");
306 return false;
307 }
308
309 switch (gimple_code (def_stmt))
310 {
311 case GIMPLE_PHI:
312 def = gimple_phi_result (def_stmt);
313 break;
314
315 case GIMPLE_ASSIGN:
316 def = gimple_assign_lhs (def_stmt);
317 break;
318
319 default:
320 if (dump_enabled_p ())
321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
322 "unsupported defining stmt:\n");
323 return false;
324 }
325 }
326
327 if (first)
328 {
329 oprnd_info->first_dt = dt;
330 oprnd_info->first_pattern = pattern;
331 oprnd_info->first_op_type = TREE_TYPE (oprnd);
332 }
333 else
334 {
335 /* Not first stmt of the group, check that the def-stmt/s match
336 the def-stmt/s of the first stmt. Allow different definition
337 types for reduction chains: the first stmt must be a
338 vect_reduction_def (a phi node), and the rest
339 vect_internal_def. */
340 if (((oprnd_info->first_dt != dt
341 && !(oprnd_info->first_dt == vect_reduction_def
342 && dt == vect_internal_def)
343 && !((oprnd_info->first_dt == vect_external_def
344 || oprnd_info->first_dt == vect_constant_def)
345 && (dt == vect_external_def
346 || dt == vect_constant_def)))
347 || !types_compatible_p (oprnd_info->first_op_type,
348 TREE_TYPE (oprnd))))
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
352 "Build SLP failed: different types\n");
353
354 return false;
355 }
356 }
357
358 /* Check the types of the definitions. */
359 switch (dt)
360 {
361 case vect_constant_def:
362 case vect_external_def:
363 case vect_reduction_def:
364 break;
365
366 case vect_internal_def:
367 oprnd_info->def_stmts.quick_push (def_stmt);
368 break;
369
370 default:
371 /* FORNOW: Not supported. */
372 if (dump_enabled_p ())
373 {
374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
375 "Build SLP failed: illegal type of def ");
376 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
377 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
378 }
379
380 return false;
381 }
382 }
383
384 return true;
385 }
386
387
388 /* Verify if the scalar stmts STMTS are isomorphic, require data
389 permutation or are of unsupported types of operation. Return
390 true if they are, otherwise return false and indicate in *MATCHES
391 which stmts are not isomorphic to the first one. If MATCHES[0]
392 is false then this indicates the comparison could not be
393 carried out or the stmts will never be vectorized by SLP. */
394
395 static bool
396 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
397 vec<gimple> stmts, unsigned int group_size,
398 unsigned nops, unsigned int *max_nunits,
399 unsigned int vectorization_factor, bool *matches)
400 {
401 unsigned int i;
402 gimple stmt = stmts[0];
403 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
404 enum tree_code first_cond_code = ERROR_MARK;
405 tree lhs;
406 bool need_same_oprnds = false;
407 tree vectype, scalar_type, first_op1 = NULL_TREE;
408 optab optab;
409 int icode;
410 enum machine_mode optab_op2_mode;
411 enum machine_mode vec_mode;
412 struct data_reference *first_dr;
413 HOST_WIDE_INT dummy;
414 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
415 tree cond;
416
417 /* For every stmt in NODE find its def stmt/s. */
418 FOR_EACH_VEC_ELT (stmts, i, stmt)
419 {
420 matches[i] = false;
421
422 if (dump_enabled_p ())
423 {
424 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
425 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
426 dump_printf (MSG_NOTE, "\n");
427 }
428
429 /* Fail to vectorize statements marked as unvectorizable. */
430 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
431 {
432 if (dump_enabled_p ())
433 {
434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
435 "Build SLP failed: unvectorizable statement ");
436 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
437 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
438 }
439 /* Fatal mismatch. */
440 matches[0] = false;
441 return false;
442 }
443
444 lhs = gimple_get_lhs (stmt);
445 if (lhs == NULL_TREE)
446 {
447 if (dump_enabled_p ())
448 {
449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
450 "Build SLP failed: not GIMPLE_ASSIGN nor "
451 "GIMPLE_CALL ");
452 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
453 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
454 }
455 /* Fatal mismatch. */
456 matches[0] = false;
457 return false;
458 }
459
460 if (is_gimple_assign (stmt)
461 && gimple_assign_rhs_code (stmt) == COND_EXPR
462 && (cond = gimple_assign_rhs1 (stmt))
463 && !COMPARISON_CLASS_P (cond))
464 {
465 if (dump_enabled_p ())
466 {
467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
468 "Build SLP failed: condition is not "
469 "comparison ");
470 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
471 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
472 }
473 /* Fatal mismatch. */
474 matches[0] = false;
475 return false;
476 }
477
478 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
479 vectype = get_vectype_for_scalar_type (scalar_type);
480 if (!vectype)
481 {
482 if (dump_enabled_p ())
483 {
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
485 "Build SLP failed: unsupported data-type ");
486 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
487 scalar_type);
488 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
489 }
490 /* Fatal mismatch. */
491 matches[0] = false;
492 return false;
493 }
494
495 /* In case of multiple types we need to detect the smallest type. */
496 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
497 {
498 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
499 if (bb_vinfo)
500 vectorization_factor = *max_nunits;
501 }
502
503 if (is_gimple_call (stmt))
504 {
505 rhs_code = CALL_EXPR;
506 if (gimple_call_internal_p (stmt)
507 || gimple_call_tail_p (stmt)
508 || gimple_call_noreturn_p (stmt)
509 || !gimple_call_nothrow_p (stmt)
510 || gimple_call_chain (stmt))
511 {
512 if (dump_enabled_p ())
513 {
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
515 "Build SLP failed: unsupported call type ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
518 }
519 /* Fatal mismatch. */
520 matches[0] = false;
521 return false;
522 }
523 }
524 else
525 rhs_code = gimple_assign_rhs_code (stmt);
526
527 /* Check the operation. */
528 if (i == 0)
529 {
530 first_stmt_code = rhs_code;
531
532 /* Shift arguments should be equal in all the packed stmts for a
533 vector shift with scalar shift operand. */
534 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
535 || rhs_code == LROTATE_EXPR
536 || rhs_code == RROTATE_EXPR)
537 {
538 vec_mode = TYPE_MODE (vectype);
539
540 /* First see if we have a vector/vector shift. */
541 optab = optab_for_tree_code (rhs_code, vectype,
542 optab_vector);
543
544 if (!optab
545 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
546 {
547 /* No vector/vector shift, try for a vector/scalar shift. */
548 optab = optab_for_tree_code (rhs_code, vectype,
549 optab_scalar);
550
551 if (!optab)
552 {
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 "Build SLP failed: no optab.\n");
556 /* Fatal mismatch. */
557 matches[0] = false;
558 return false;
559 }
560 icode = (int) optab_handler (optab, vec_mode);
561 if (icode == CODE_FOR_nothing)
562 {
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
565 "Build SLP failed: "
566 "op not supported by target.\n");
567 /* Fatal mismatch. */
568 matches[0] = false;
569 return false;
570 }
571 optab_op2_mode = insn_data[icode].operand[2].mode;
572 if (!VECTOR_MODE_P (optab_op2_mode))
573 {
574 need_same_oprnds = true;
575 first_op1 = gimple_assign_rhs2 (stmt);
576 }
577 }
578 }
579 else if (rhs_code == WIDEN_LSHIFT_EXPR)
580 {
581 need_same_oprnds = true;
582 first_op1 = gimple_assign_rhs2 (stmt);
583 }
584 }
585 else
586 {
587 if (first_stmt_code != rhs_code
588 && (first_stmt_code != IMAGPART_EXPR
589 || rhs_code != REALPART_EXPR)
590 && (first_stmt_code != REALPART_EXPR
591 || rhs_code != IMAGPART_EXPR)
592 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
593 && (first_stmt_code == ARRAY_REF
594 || first_stmt_code == BIT_FIELD_REF
595 || first_stmt_code == INDIRECT_REF
596 || first_stmt_code == COMPONENT_REF
597 || first_stmt_code == MEM_REF)))
598 {
599 if (dump_enabled_p ())
600 {
601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
602 "Build SLP failed: different operation "
603 "in stmt ");
604 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
605 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
606 }
607 /* Mismatch. */
608 continue;
609 }
610
611 if (need_same_oprnds
612 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
613 {
614 if (dump_enabled_p ())
615 {
616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
617 "Build SLP failed: different shift "
618 "arguments in ");
619 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
620 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
621 }
622 /* Mismatch. */
623 continue;
624 }
625
626 if (rhs_code == CALL_EXPR)
627 {
628 gimple first_stmt = stmts[0];
629 if (gimple_call_num_args (stmt) != nops
630 || !operand_equal_p (gimple_call_fn (first_stmt),
631 gimple_call_fn (stmt), 0)
632 || gimple_call_fntype (first_stmt)
633 != gimple_call_fntype (stmt))
634 {
635 if (dump_enabled_p ())
636 {
637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
638 "Build SLP failed: different calls in ");
639 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
640 stmt, 0);
641 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
642 }
643 /* Mismatch. */
644 continue;
645 }
646 }
647 }
648
649 /* Grouped store or load. */
650 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
651 {
652 if (REFERENCE_CLASS_P (lhs))
653 {
654 /* Store. */
655 ;
656 }
657 else
658 {
659 /* Load. */
660 unsigned unrolling_factor
661 = least_common_multiple
662 (*max_nunits, group_size) / group_size;
663 /* FORNOW: Check that there is no gap between the loads
664 and no gap between the groups when we need to load
665 multiple groups at once.
666 ??? We should enhance this to only disallow gaps
667 inside vectors. */
668 if ((unrolling_factor > 1
669 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
670 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
671 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
672 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
673 {
674 if (dump_enabled_p ())
675 {
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
677 "Build SLP failed: grouped "
678 "loads have gaps ");
679 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
680 stmt, 0);
681 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
682 }
683 /* Fatal mismatch. */
684 matches[0] = false;
685 return false;
686 }
687
688 /* Check that the size of interleaved loads group is not
689 greater than the SLP group size. */
690 unsigned ncopies
691 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
692 if (loop_vinfo
693 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
694 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
695 - GROUP_GAP (vinfo_for_stmt (stmt)))
696 > ncopies * group_size))
697 {
698 if (dump_enabled_p ())
699 {
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
701 "Build SLP failed: the number "
702 "of interleaved loads is greater than "
703 "the SLP group size ");
704 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
705 stmt, 0);
706 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
707 }
708 /* Fatal mismatch. */
709 matches[0] = false;
710 return false;
711 }
712
713 old_first_load = first_load;
714 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
715 if (prev_first_load)
716 {
717 /* Check that there are no loads from different interleaving
718 chains in the same node. */
719 if (prev_first_load != first_load)
720 {
721 if (dump_enabled_p ())
722 {
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
724 vect_location,
725 "Build SLP failed: different "
726 "interleaving chains in one node ");
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
728 stmt, 0);
729 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
730 }
731 /* Mismatch. */
732 continue;
733 }
734 }
735 else
736 prev_first_load = first_load;
737
738 /* In some cases a group of loads is just the same load
739 repeated N times. Only analyze its cost once. */
740 if (first_load == stmt && old_first_load != first_load)
741 {
742 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
743 if (vect_supportable_dr_alignment (first_dr, false)
744 == dr_unaligned_unsupported)
745 {
746 if (dump_enabled_p ())
747 {
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
749 vect_location,
750 "Build SLP failed: unsupported "
751 "unaligned load ");
752 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
753 stmt, 0);
754 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
755 }
756 /* Fatal mismatch. */
757 matches[0] = false;
758 return false;
759 }
760 }
761 }
762 } /* Grouped access. */
763 else
764 {
765 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
766 {
767 /* Not grouped load. */
768 if (dump_enabled_p ())
769 {
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
771 "Build SLP failed: not grouped load ");
772 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
773 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
774 }
775
776 /* FORNOW: Not grouped loads are not supported. */
777 /* Fatal mismatch. */
778 matches[0] = false;
779 return false;
780 }
781
782 /* Not memory operation. */
783 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
784 && TREE_CODE_CLASS (rhs_code) != tcc_unary
785 && rhs_code != COND_EXPR
786 && rhs_code != CALL_EXPR)
787 {
788 if (dump_enabled_p ())
789 {
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
791 "Build SLP failed: operation");
792 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
793 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
794 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
795 }
796 /* Fatal mismatch. */
797 matches[0] = false;
798 return false;
799 }
800
801 if (rhs_code == COND_EXPR)
802 {
803 tree cond_expr = gimple_assign_rhs1 (stmt);
804
805 if (i == 0)
806 first_cond_code = TREE_CODE (cond_expr);
807 else if (first_cond_code != TREE_CODE (cond_expr))
808 {
809 if (dump_enabled_p ())
810 {
811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
812 "Build SLP failed: different"
813 " operation");
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
815 stmt, 0);
816 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
817 }
818 /* Mismatch. */
819 continue;
820 }
821 }
822 }
823
824 matches[i] = true;
825 }
826
827 for (i = 0; i < group_size; ++i)
828 if (!matches[i])
829 return false;
830
831 return true;
832 }
833
834 /* Recursively build an SLP tree starting from NODE.
835 Fail (and return a value not equal to zero) if def-stmts are not
836 isomorphic, require data permutation or are of unsupported types of
837 operation. Otherwise, return 0.
838 The value returned is the depth in the SLP tree where a mismatch
839 was found. */
840
841 static bool
842 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
843 slp_tree *node, unsigned int group_size,
844 unsigned int *max_nunits,
845 vec<slp_tree> *loads,
846 unsigned int vectorization_factor,
847 bool *matches, unsigned *npermutes)
848 {
849 unsigned nops, i, this_npermutes = 0;
850 gimple stmt;
851
852 if (!matches)
853 matches = XALLOCAVEC (bool, group_size);
854 if (!npermutes)
855 npermutes = &this_npermutes;
856
857 matches[0] = false;
858
859 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
860 if (is_gimple_call (stmt))
861 nops = gimple_call_num_args (stmt);
862 else if (is_gimple_assign (stmt))
863 {
864 nops = gimple_num_ops (stmt) - 1;
865 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
866 nops++;
867 }
868 else
869 return false;
870
871 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
872 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
873 max_nunits, vectorization_factor, matches))
874 return false;
875
876 /* If the SLP node is a load, terminate the recursion. */
877 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
878 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
879 {
880 loads->safe_push (*node);
881 return true;
882 }
883
884 /* Get at the operands, verifying they are compatible. */
885 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
886 slp_oprnd_info oprnd_info;
887 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
888 {
889 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
890 stmt, (i == 0), &oprnds_info))
891 {
892 vect_free_oprnd_info (oprnds_info);
893 return false;
894 }
895 }
896
897 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
898
899 /* Create SLP_TREE nodes for the definition node/s. */
900 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
901 {
902 slp_tree child;
903 unsigned old_nloads = loads->length ();
904 unsigned old_max_nunits = *max_nunits;
905
906 if (oprnd_info->first_dt != vect_internal_def)
907 continue;
908
909 child = vect_create_new_slp_node (oprnd_info->def_stmts);
910 if (!child)
911 {
912 vect_free_oprnd_info (oprnds_info);
913 return false;
914 }
915
916 bool *matches = XALLOCAVEC (bool, group_size);
917 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
918 group_size, max_nunits, loads,
919 vectorization_factor, matches, npermutes))
920 {
921 oprnd_info->def_stmts = vNULL;
922 SLP_TREE_CHILDREN (*node).quick_push (child);
923 continue;
924 }
925
926 /* If the SLP build for operand zero failed and operand zero
927 and one can be commutated try that for the scalar stmts
928 that failed the match. */
929 if (i == 0
930 /* A first scalar stmt mismatch signals a fatal mismatch. */
931 && matches[0]
932 /* ??? For COND_EXPRs we can swap the comparison operands
933 as well as the arms under some constraints. */
934 && nops == 2
935 && oprnds_info[1]->first_dt == vect_internal_def
936 && is_gimple_assign (stmt)
937 && commutative_tree_code (gimple_assign_rhs_code (stmt))
938 /* Do so only if the number of not successful permutes was nor more
939 than a cut-ff as re-trying the recursive match on
940 possibly each level of the tree would expose exponential
941 behavior. */
942 && *npermutes < 4)
943 {
944 /* Roll back. */
945 *max_nunits = old_max_nunits;
946 loads->truncate (old_nloads);
947 /* Swap mismatched definition stmts. */
948 for (unsigned j = 0; j < group_size; ++j)
949 if (!matches[j])
950 {
951 gimple tem = oprnds_info[0]->def_stmts[j];
952 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
953 oprnds_info[1]->def_stmts[j] = tem;
954 }
955 /* And try again ... */
956 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
957 group_size, max_nunits, loads,
958 vectorization_factor,
959 matches, npermutes))
960 {
961 oprnd_info->def_stmts = vNULL;
962 SLP_TREE_CHILDREN (*node).quick_push (child);
963 continue;
964 }
965
966 ++*npermutes;
967 }
968
969 oprnd_info->def_stmts = vNULL;
970 vect_free_slp_tree (child);
971 vect_free_oprnd_info (oprnds_info);
972 return false;
973 }
974
975 vect_free_oprnd_info (oprnds_info);
976 return true;
977 }
978
979 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
980
981 static void
982 vect_print_slp_tree (int dump_kind, slp_tree node)
983 {
984 int i;
985 gimple stmt;
986 slp_tree child;
987
988 if (!node)
989 return;
990
991 dump_printf (dump_kind, "node ");
992 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
993 {
994 dump_printf (dump_kind, "\n\tstmt %d ", i);
995 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
996 }
997 dump_printf (dump_kind, "\n");
998
999 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1000 vect_print_slp_tree (dump_kind, child);
1001 }
1002
1003
1004 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1005 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1006 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1007 stmts in NODE are to be marked. */
1008
1009 static void
1010 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1011 {
1012 int i;
1013 gimple stmt;
1014 slp_tree child;
1015
1016 if (!node)
1017 return;
1018
1019 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1020 if (j < 0 || i == j)
1021 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1022
1023 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1024 vect_mark_slp_stmts (child, mark, j);
1025 }
1026
1027
1028 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1029
1030 static void
1031 vect_mark_slp_stmts_relevant (slp_tree node)
1032 {
1033 int i;
1034 gimple stmt;
1035 stmt_vec_info stmt_info;
1036 slp_tree child;
1037
1038 if (!node)
1039 return;
1040
1041 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1042 {
1043 stmt_info = vinfo_for_stmt (stmt);
1044 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1045 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1046 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1047 }
1048
1049 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1050 vect_mark_slp_stmts_relevant (child);
1051 }
1052
1053
1054 /* Rearrange the statements of NODE according to PERMUTATION. */
1055
1056 static void
1057 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1058 vec<unsigned> permutation)
1059 {
1060 gimple stmt;
1061 vec<gimple> tmp_stmts;
1062 unsigned int i;
1063 slp_tree child;
1064
1065 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1066 vect_slp_rearrange_stmts (child, group_size, permutation);
1067
1068 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1069 tmp_stmts.create (group_size);
1070 tmp_stmts.quick_grow_cleared (group_size);
1071
1072 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1073 tmp_stmts[permutation[i]] = stmt;
1074
1075 SLP_TREE_SCALAR_STMTS (node).release ();
1076 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1077 }
1078
1079
1080 /* Check if the required load permutations in the SLP instance
1081 SLP_INSTN are supported. */
1082
1083 static bool
1084 vect_supported_load_permutation_p (slp_instance slp_instn)
1085 {
1086 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1087 unsigned int i, j, k, next;
1088 sbitmap load_index;
1089 slp_tree node;
1090 gimple stmt, load, next_load, first_load;
1091 struct data_reference *dr;
1092
1093 if (dump_enabled_p ())
1094 {
1095 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1096 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1097 if (node->load_permutation.exists ())
1098 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1099 dump_printf (MSG_NOTE, "%d ", next);
1100 else
1101 for (i = 0; i < group_size; ++i)
1102 dump_printf (MSG_NOTE, "%d ", i);
1103 dump_printf (MSG_NOTE, "\n");
1104 }
1105
1106 /* In case of reduction every load permutation is allowed, since the order
1107 of the reduction statements is not important (as opposed to the case of
1108 grouped stores). The only condition we need to check is that all the
1109 load nodes are of the same size and have the same permutation (and then
1110 rearrange all the nodes of the SLP instance according to this
1111 permutation). */
1112
1113 /* Check that all the load nodes are of the same size. */
1114 /* ??? Can't we assert this? */
1115 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1116 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1117 return false;
1118
1119 node = SLP_INSTANCE_TREE (slp_instn);
1120 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1121
1122 /* Reduction (there are no data-refs in the root).
1123 In reduction chain the order of the loads is important. */
1124 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1125 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1126 {
1127 slp_tree load;
1128 unsigned int lidx;
1129
1130 /* Compare all the permutation sequences to the first one. We know
1131 that at least one load is permuted. */
1132 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1133 if (!node->load_permutation.exists ())
1134 return false;
1135 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1136 {
1137 if (!load->load_permutation.exists ())
1138 return false;
1139 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1140 if (lidx != node->load_permutation[j])
1141 return false;
1142 }
1143
1144 /* Check that the loads in the first sequence are different and there
1145 are no gaps between them. */
1146 load_index = sbitmap_alloc (group_size);
1147 bitmap_clear (load_index);
1148 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1149 {
1150 if (bitmap_bit_p (load_index, lidx))
1151 {
1152 sbitmap_free (load_index);
1153 return false;
1154 }
1155 bitmap_set_bit (load_index, lidx);
1156 }
1157 for (i = 0; i < group_size; i++)
1158 if (!bitmap_bit_p (load_index, i))
1159 {
1160 sbitmap_free (load_index);
1161 return false;
1162 }
1163 sbitmap_free (load_index);
1164
1165 /* This permutation is valid for reduction. Since the order of the
1166 statements in the nodes is not important unless they are memory
1167 accesses, we can rearrange the statements in all the nodes
1168 according to the order of the loads. */
1169 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1170 node->load_permutation);
1171
1172 /* We are done, no actual permutations need to be generated. */
1173 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1174 SLP_TREE_LOAD_PERMUTATION (node).release ();
1175 return true;
1176 }
1177
1178 /* In basic block vectorization we allow any subchain of an interleaving
1179 chain.
1180 FORNOW: not supported in loop SLP because of realignment compications. */
1181 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1182 {
1183 /* Check that for every node in the instance the loads
1184 form a subchain. */
1185 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1186 {
1187 next_load = NULL;
1188 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1189 {
1190 if (j != 0 && next_load != load)
1191 return false;
1192 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1193 }
1194 }
1195
1196 /* Check that the alignment of the first load in every subchain, i.e.,
1197 the first statement in every load node, is supported.
1198 ??? This belongs in alignment checking. */
1199 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1200 {
1201 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1202 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1203 {
1204 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1205 if (vect_supportable_dr_alignment (dr, false)
1206 == dr_unaligned_unsupported)
1207 {
1208 if (dump_enabled_p ())
1209 {
1210 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1211 vect_location,
1212 "unsupported unaligned load ");
1213 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1214 first_load, 0);
1215 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1216 }
1217 return false;
1218 }
1219 }
1220 }
1221
1222 /* We are done, no actual permutations need to be generated. */
1223 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1224 SLP_TREE_LOAD_PERMUTATION (node).release ();
1225 return true;
1226 }
1227
1228 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1229 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1230 well (unless it's reduction). */
1231 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1232 return false;
1233 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1234 if (!node->load_permutation.exists ())
1235 return false;
1236
1237 load_index = sbitmap_alloc (group_size);
1238 bitmap_clear (load_index);
1239 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1240 {
1241 unsigned int lidx = node->load_permutation[0];
1242 if (bitmap_bit_p (load_index, lidx))
1243 {
1244 sbitmap_free (load_index);
1245 return false;
1246 }
1247 bitmap_set_bit (load_index, lidx);
1248 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1249 if (k != lidx)
1250 {
1251 sbitmap_free (load_index);
1252 return false;
1253 }
1254 }
1255 for (i = 0; i < group_size; i++)
1256 if (!bitmap_bit_p (load_index, i))
1257 {
1258 sbitmap_free (load_index);
1259 return false;
1260 }
1261 sbitmap_free (load_index);
1262
1263 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1264 if (node->load_permutation.exists ()
1265 && !vect_transform_slp_perm_load
1266 (node, vNULL, NULL,
1267 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1268 return false;
1269 return true;
1270 }
1271
1272
1273 /* Find the first load in the loop that belongs to INSTANCE.
1274 When loads are in several SLP nodes, there can be a case in which the first
1275 load does not appear in the first SLP node to be transformed, causing
1276 incorrect order of statements. Since we generate all the loads together,
1277 they must be inserted before the first load of the SLP instance and not
1278 before the first load of the first node of the instance. */
1279
1280 static gimple
1281 vect_find_first_load_in_slp_instance (slp_instance instance)
1282 {
1283 int i, j;
1284 slp_tree load_node;
1285 gimple first_load = NULL, load;
1286
1287 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1288 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1289 first_load = get_earlier_stmt (load, first_load);
1290
1291 return first_load;
1292 }
1293
1294
1295 /* Find the last store in SLP INSTANCE. */
1296
1297 static gimple
1298 vect_find_last_store_in_slp_instance (slp_instance instance)
1299 {
1300 int i;
1301 slp_tree node;
1302 gimple last_store = NULL, store;
1303
1304 node = SLP_INSTANCE_TREE (instance);
1305 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1306 last_store = get_later_stmt (store, last_store);
1307
1308 return last_store;
1309 }
1310
1311 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1312
1313 static void
1314 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1315 slp_instance instance, slp_tree node,
1316 stmt_vector_for_cost *prologue_cost_vec,
1317 unsigned ncopies_for_cost)
1318 {
1319 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1320
1321 unsigned i;
1322 slp_tree child;
1323 gimple stmt, s;
1324 stmt_vec_info stmt_info;
1325 tree lhs;
1326 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1327
1328 /* Recurse down the SLP tree. */
1329 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1330 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1331 instance, child, prologue_cost_vec,
1332 ncopies_for_cost);
1333
1334 /* Look at the first scalar stmt to determine the cost. */
1335 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1336 stmt_info = vinfo_for_stmt (stmt);
1337 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1338 {
1339 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1340 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1341 vect_uninitialized_def,
1342 node, prologue_cost_vec, body_cost_vec);
1343 else
1344 {
1345 int i;
1346 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1347 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1348 node, prologue_cost_vec, body_cost_vec);
1349 /* If the load is permuted record the cost for the permutation.
1350 ??? Loads from multiple chains are let through here only
1351 for a single special case involving complex numbers where
1352 in the end no permutation is necessary. */
1353 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1354 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1355 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1356 && vect_get_place_in_interleaving_chain
1357 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1358 {
1359 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1360 stmt_info, 0, vect_body);
1361 break;
1362 }
1363 }
1364 }
1365 else
1366 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1367 stmt_info, 0, vect_body);
1368
1369 /* Scan operands and account for prologue cost of constants/externals.
1370 ??? This over-estimates cost for multiple uses and should be
1371 re-engineered. */
1372 lhs = gimple_get_lhs (stmt);
1373 for (i = 0; i < gimple_num_ops (stmt); ++i)
1374 {
1375 tree def, op = gimple_op (stmt, i);
1376 gimple def_stmt;
1377 enum vect_def_type dt;
1378 if (!op || op == lhs)
1379 continue;
1380 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1381 &def_stmt, &def, &dt)
1382 && (dt == vect_constant_def || dt == vect_external_def))
1383 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1384 stmt_info, 0, vect_prologue);
1385 }
1386 }
1387
1388 /* Compute the cost for the SLP instance INSTANCE. */
1389
1390 static void
1391 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1392 slp_instance instance, unsigned nunits)
1393 {
1394 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1395 unsigned ncopies_for_cost;
1396 stmt_info_for_cost *si;
1397 unsigned i;
1398
1399 /* Calculate the number of vector stmts to create based on the unrolling
1400 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1401 GROUP_SIZE / NUNITS otherwise. */
1402 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1403 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1404
1405 prologue_cost_vec.create (10);
1406 body_cost_vec.create (10);
1407 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1408 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1409 instance, SLP_INSTANCE_TREE (instance),
1410 &prologue_cost_vec, ncopies_for_cost);
1411
1412 /* Record the prologue costs, which were delayed until we were
1413 sure that SLP was successful. Unlike the body costs, we know
1414 the final values now regardless of the loop vectorization factor. */
1415 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1416 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1417 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1418 {
1419 struct _stmt_vec_info *stmt_info
1420 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1421 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1422 si->misalign, vect_prologue);
1423 }
1424
1425 prologue_cost_vec.release ();
1426 }
1427
1428 /* Analyze an SLP instance starting from a group of grouped stores. Call
1429 vect_build_slp_tree to build a tree of packed stmts if possible.
1430 Return FALSE if it's impossible to SLP any stmt in the loop. */
1431
1432 static bool
1433 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1434 gimple stmt)
1435 {
1436 slp_instance new_instance;
1437 slp_tree node;
1438 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1439 unsigned int unrolling_factor = 1, nunits;
1440 tree vectype, scalar_type = NULL_TREE;
1441 gimple next;
1442 unsigned int vectorization_factor = 0;
1443 int i;
1444 unsigned int max_nunits = 0;
1445 vec<slp_tree> loads;
1446 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1447 vec<gimple> scalar_stmts;
1448
1449 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1450 {
1451 if (dr)
1452 {
1453 scalar_type = TREE_TYPE (DR_REF (dr));
1454 vectype = get_vectype_for_scalar_type (scalar_type);
1455 }
1456 else
1457 {
1458 gcc_assert (loop_vinfo);
1459 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1460 }
1461
1462 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1463 }
1464 else
1465 {
1466 gcc_assert (loop_vinfo);
1467 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1468 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1469 }
1470
1471 if (!vectype)
1472 {
1473 if (dump_enabled_p ())
1474 {
1475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1476 "Build SLP failed: unsupported data-type ");
1477 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1478 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1479 }
1480
1481 return false;
1482 }
1483
1484 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1485 if (loop_vinfo)
1486 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1487 else
1488 vectorization_factor = nunits;
1489
1490 /* Calculate the unrolling factor. */
1491 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1492 if (unrolling_factor != 1 && !loop_vinfo)
1493 {
1494 if (dump_enabled_p ())
1495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1496 "Build SLP failed: unrolling required in basic"
1497 " block SLP\n");
1498
1499 return false;
1500 }
1501
1502 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1503 scalar_stmts.create (group_size);
1504 next = stmt;
1505 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1506 {
1507 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1508 while (next)
1509 {
1510 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1511 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1512 scalar_stmts.safe_push (
1513 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1514 else
1515 scalar_stmts.safe_push (next);
1516 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1517 }
1518 }
1519 else
1520 {
1521 /* Collect reduction statements. */
1522 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1523 for (i = 0; reductions.iterate (i, &next); i++)
1524 scalar_stmts.safe_push (next);
1525 }
1526
1527 node = vect_create_new_slp_node (scalar_stmts);
1528
1529 loads.create (group_size);
1530
1531 /* Build the tree for the SLP instance. */
1532 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1533 &max_nunits, &loads,
1534 vectorization_factor, NULL, NULL))
1535 {
1536 /* Calculate the unrolling factor based on the smallest type. */
1537 if (max_nunits > nunits)
1538 unrolling_factor = least_common_multiple (max_nunits, group_size)
1539 / group_size;
1540
1541 if (unrolling_factor != 1 && !loop_vinfo)
1542 {
1543 if (dump_enabled_p ())
1544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1545 "Build SLP failed: unrolling required in basic"
1546 " block SLP\n");
1547 vect_free_slp_tree (node);
1548 loads.release ();
1549 return false;
1550 }
1551
1552 /* Create a new SLP instance. */
1553 new_instance = XNEW (struct _slp_instance);
1554 SLP_INSTANCE_TREE (new_instance) = node;
1555 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1556 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1557 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1558 SLP_INSTANCE_LOADS (new_instance) = loads;
1559 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1560
1561 /* Compute the load permutation. */
1562 slp_tree load_node;
1563 bool loads_permuted = false;
1564 FOR_EACH_VEC_ELT (loads, i, load_node)
1565 {
1566 vec<unsigned> load_permutation;
1567 int j;
1568 gimple load, first_stmt;
1569 bool this_load_permuted = false;
1570 load_permutation.create (group_size);
1571 first_stmt = GROUP_FIRST_ELEMENT
1572 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1573 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1574 {
1575 int load_place
1576 = vect_get_place_in_interleaving_chain (load, first_stmt);
1577 gcc_assert (load_place != -1);
1578 if (load_place != j)
1579 this_load_permuted = true;
1580 load_permutation.safe_push (load_place);
1581 }
1582 if (!this_load_permuted)
1583 {
1584 load_permutation.release ();
1585 continue;
1586 }
1587 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1588 loads_permuted = true;
1589 }
1590
1591 if (loads_permuted)
1592 {
1593 if (!vect_supported_load_permutation_p (new_instance))
1594 {
1595 if (dump_enabled_p ())
1596 {
1597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1598 "Build SLP failed: unsupported load "
1599 "permutation ");
1600 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1601 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1602 }
1603 vect_free_slp_instance (new_instance);
1604 return false;
1605 }
1606
1607 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1608 = vect_find_first_load_in_slp_instance (new_instance);
1609 }
1610
1611 /* Compute the costs of this SLP instance. */
1612 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1613 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1614
1615 if (loop_vinfo)
1616 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1617 else
1618 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1619
1620 if (dump_enabled_p ())
1621 vect_print_slp_tree (MSG_NOTE, node);
1622
1623 return true;
1624 }
1625
1626 /* Failed to SLP. */
1627 /* Free the allocated memory. */
1628 vect_free_slp_tree (node);
1629 loads.release ();
1630
1631 return false;
1632 }
1633
1634
1635 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1636 trees of packed scalar stmts if SLP is possible. */
1637
1638 bool
1639 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1640 {
1641 unsigned int i;
1642 vec<gimple> grouped_stores;
1643 vec<gimple> reductions = vNULL;
1644 vec<gimple> reduc_chains = vNULL;
1645 gimple first_element;
1646 bool ok = false;
1647
1648 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1650
1651 if (loop_vinfo)
1652 {
1653 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1654 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1655 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1656 }
1657 else
1658 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1659
1660 /* Find SLP sequences starting from groups of grouped stores. */
1661 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1662 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1663 ok = true;
1664
1665 if (bb_vinfo && !ok)
1666 {
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "Failed to SLP the basic block.\n");
1670
1671 return false;
1672 }
1673
1674 if (loop_vinfo
1675 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1676 {
1677 /* Find SLP sequences starting from reduction chains. */
1678 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1679 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1680 ok = true;
1681 else
1682 return false;
1683
1684 /* Don't try to vectorize SLP reductions if reduction chain was
1685 detected. */
1686 return ok;
1687 }
1688
1689 /* Find SLP sequences starting from groups of reductions. */
1690 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1691 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1692 ok = true;
1693
1694 return true;
1695 }
1696
1697
1698 /* For each possible SLP instance decide whether to SLP it and calculate overall
1699 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1700 least one instance. */
1701
1702 bool
1703 vect_make_slp_decision (loop_vec_info loop_vinfo)
1704 {
1705 unsigned int i, unrolling_factor = 1;
1706 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1707 slp_instance instance;
1708 int decided_to_slp = 0;
1709
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1712 "\n");
1713
1714 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1715 {
1716 /* FORNOW: SLP if you can. */
1717 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1718 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1719
1720 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1721 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1722 loop-based vectorization. Such stmts will be marked as HYBRID. */
1723 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1724 decided_to_slp++;
1725 }
1726
1727 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1728
1729 if (decided_to_slp && dump_enabled_p ())
1730 dump_printf_loc (MSG_NOTE, vect_location,
1731 "Decided to SLP %d instances. Unrolling factor %d\n",
1732 decided_to_slp, unrolling_factor);
1733
1734 return (decided_to_slp > 0);
1735 }
1736
1737
1738 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1739 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1740
1741 static void
1742 vect_detect_hybrid_slp_stmts (slp_tree node)
1743 {
1744 int i;
1745 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1746 gimple stmt = stmts[0];
1747 imm_use_iterator imm_iter;
1748 gimple use_stmt;
1749 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1750 slp_tree child;
1751 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1752 struct loop *loop = NULL;
1753 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1754 basic_block bb = NULL;
1755
1756 if (!node)
1757 return;
1758
1759 if (loop_vinfo)
1760 loop = LOOP_VINFO_LOOP (loop_vinfo);
1761 else
1762 bb = BB_VINFO_BB (bb_vinfo);
1763
1764 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1765 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1766 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1767 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1768 if (gimple_bb (use_stmt)
1769 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1770 || bb == gimple_bb (use_stmt))
1771 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1772 && !STMT_SLP_TYPE (stmt_vinfo)
1773 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1774 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1775 && !(gimple_code (use_stmt) == GIMPLE_PHI
1776 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1777 == vect_reduction_def))
1778 vect_mark_slp_stmts (node, hybrid, i);
1779
1780 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1781 vect_detect_hybrid_slp_stmts (child);
1782 }
1783
1784
1785 /* Find stmts that must be both vectorized and SLPed. */
1786
1787 void
1788 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1789 {
1790 unsigned int i;
1791 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1792 slp_instance instance;
1793
1794 if (dump_enabled_p ())
1795 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1796 "\n");
1797
1798 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1799 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1800 }
1801
1802
1803 /* Create and initialize a new bb_vec_info struct for BB, as well as
1804 stmt_vec_info structs for all the stmts in it. */
1805
1806 static bb_vec_info
1807 new_bb_vec_info (basic_block bb)
1808 {
1809 bb_vec_info res = NULL;
1810 gimple_stmt_iterator gsi;
1811
1812 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1813 BB_VINFO_BB (res) = bb;
1814
1815 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1816 {
1817 gimple stmt = gsi_stmt (gsi);
1818 gimple_set_uid (stmt, 0);
1819 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1820 }
1821
1822 BB_VINFO_GROUPED_STORES (res).create (10);
1823 BB_VINFO_SLP_INSTANCES (res).create (2);
1824 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1825
1826 bb->aux = res;
1827 return res;
1828 }
1829
1830
1831 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1832 stmts in the basic block. */
1833
1834 static void
1835 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1836 {
1837 vec<slp_instance> slp_instances;
1838 slp_instance instance;
1839 basic_block bb;
1840 gimple_stmt_iterator si;
1841 unsigned i;
1842
1843 if (!bb_vinfo)
1844 return;
1845
1846 bb = BB_VINFO_BB (bb_vinfo);
1847
1848 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1849 {
1850 gimple stmt = gsi_stmt (si);
1851 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1852
1853 if (stmt_info)
1854 /* Free stmt_vec_info. */
1855 free_stmt_vec_info (stmt);
1856 }
1857
1858 vect_destroy_datarefs (NULL, bb_vinfo);
1859 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1860 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1861 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1862 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1863 vect_free_slp_instance (instance);
1864 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1865 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1866 free (bb_vinfo);
1867 bb->aux = NULL;
1868 }
1869
1870
1871 /* Analyze statements contained in SLP tree node after recursively analyzing
1872 the subtree. Return TRUE if the operations are supported. */
1873
1874 static bool
1875 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1876 {
1877 bool dummy;
1878 int i;
1879 gimple stmt;
1880 slp_tree child;
1881
1882 if (!node)
1883 return true;
1884
1885 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1886 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1887 return false;
1888
1889 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1890 {
1891 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1892 gcc_assert (stmt_info);
1893 gcc_assert (PURE_SLP_STMT (stmt_info));
1894
1895 if (!vect_analyze_stmt (stmt, &dummy, node))
1896 return false;
1897 }
1898
1899 return true;
1900 }
1901
1902
1903 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1904 operations are supported. */
1905
1906 static bool
1907 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1908 {
1909 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1910 slp_instance instance;
1911 int i;
1912
1913 for (i = 0; slp_instances.iterate (i, &instance); )
1914 {
1915 if (!vect_slp_analyze_node_operations (bb_vinfo,
1916 SLP_INSTANCE_TREE (instance)))
1917 {
1918 vect_free_slp_instance (instance);
1919 slp_instances.ordered_remove (i);
1920 }
1921 else
1922 i++;
1923 }
1924
1925 if (!slp_instances.length ())
1926 return false;
1927
1928 return true;
1929 }
1930
1931
1932 /* Compute the scalar cost of the SLP node NODE and its children
1933 and return it. Do not account defs that are marked in LIFE and
1934 update LIFE according to uses of NODE. */
1935
1936 static unsigned
1937 vect_bb_slp_scalar_cost (basic_block bb,
1938 slp_tree node, vec<bool, va_heap> *life)
1939 {
1940 unsigned scalar_cost = 0;
1941 unsigned i;
1942 gimple stmt;
1943 slp_tree child;
1944
1945 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1946 {
1947 unsigned stmt_cost;
1948 ssa_op_iter op_iter;
1949 def_operand_p def_p;
1950 stmt_vec_info stmt_info;
1951
1952 if ((*life)[i])
1953 continue;
1954
1955 /* If there is a non-vectorized use of the defs then the scalar
1956 stmt is kept live in which case we do not account it or any
1957 required defs in the SLP children in the scalar cost. This
1958 way we make the vectorization more costly when compared to
1959 the scalar cost. */
1960 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1961 {
1962 imm_use_iterator use_iter;
1963 gimple use_stmt;
1964 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1965 if (gimple_code (use_stmt) == GIMPLE_PHI
1966 || gimple_bb (use_stmt) != bb
1967 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1968 {
1969 (*life)[i] = true;
1970 BREAK_FROM_IMM_USE_STMT (use_iter);
1971 }
1972 }
1973 if ((*life)[i])
1974 continue;
1975
1976 stmt_info = vinfo_for_stmt (stmt);
1977 if (STMT_VINFO_DATA_REF (stmt_info))
1978 {
1979 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1980 stmt_cost = vect_get_stmt_cost (scalar_load);
1981 else
1982 stmt_cost = vect_get_stmt_cost (scalar_store);
1983 }
1984 else
1985 stmt_cost = vect_get_stmt_cost (scalar_stmt);
1986
1987 scalar_cost += stmt_cost;
1988 }
1989
1990 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1991 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
1992
1993 return scalar_cost;
1994 }
1995
1996 /* Check if vectorization of the basic block is profitable. */
1997
1998 static bool
1999 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2000 {
2001 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2002 slp_instance instance;
2003 int i, j;
2004 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2005 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2006 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2007 stmt_vec_info stmt_info = NULL;
2008 stmt_vector_for_cost body_cost_vec;
2009 stmt_info_for_cost *ci;
2010
2011 /* Calculate vector costs. */
2012 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2013 {
2014 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2015
2016 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2017 {
2018 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2019 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2020 stmt_info, ci->misalign, vect_body);
2021 }
2022 }
2023
2024 /* Calculate scalar cost. */
2025 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2026 {
2027 stack_vec<bool, 20> life;
2028 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2029 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2030 SLP_INSTANCE_TREE (instance),
2031 &life);
2032 }
2033
2034 /* Complete the target-specific cost calculation. */
2035 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2036 &vec_inside_cost, &vec_epilogue_cost);
2037
2038 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2039
2040 if (dump_enabled_p ())
2041 {
2042 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2043 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2044 vec_inside_cost);
2045 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2046 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2047 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2048 }
2049
2050 /* Vectorization is profitable if its cost is less than the cost of scalar
2051 version. */
2052 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2053 return false;
2054
2055 return true;
2056 }
2057
2058 /* Check if the basic block can be vectorized. */
2059
2060 static bb_vec_info
2061 vect_slp_analyze_bb_1 (basic_block bb)
2062 {
2063 bb_vec_info bb_vinfo;
2064 vec<slp_instance> slp_instances;
2065 slp_instance instance;
2066 int i;
2067 int min_vf = 2;
2068
2069 bb_vinfo = new_bb_vec_info (bb);
2070 if (!bb_vinfo)
2071 return NULL;
2072
2073 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2074 {
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "not vectorized: unhandled data-ref in basic "
2078 "block.\n");
2079
2080 destroy_bb_vec_info (bb_vinfo);
2081 return NULL;
2082 }
2083
2084 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2085 {
2086 if (dump_enabled_p ())
2087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2088 "not vectorized: not enough data-refs in "
2089 "basic block.\n");
2090
2091 destroy_bb_vec_info (bb_vinfo);
2092 return NULL;
2093 }
2094
2095 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2096 {
2097 if (dump_enabled_p ())
2098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2099 "not vectorized: unhandled data access in "
2100 "basic block.\n");
2101
2102 destroy_bb_vec_info (bb_vinfo);
2103 return NULL;
2104 }
2105
2106 vect_pattern_recog (NULL, bb_vinfo);
2107
2108 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2109 {
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112 "not vectorized: unhandled data dependence "
2113 "in basic block.\n");
2114
2115 destroy_bb_vec_info (bb_vinfo);
2116 return NULL;
2117 }
2118
2119 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2120 {
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123 "not vectorized: bad data alignment in basic "
2124 "block.\n");
2125
2126 destroy_bb_vec_info (bb_vinfo);
2127 return NULL;
2128 }
2129
2130 /* Check the SLP opportunities in the basic block, analyze and build SLP
2131 trees. */
2132 if (!vect_analyze_slp (NULL, bb_vinfo))
2133 {
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2136 "not vectorized: failed to find SLP opportunities "
2137 "in basic block.\n");
2138
2139 destroy_bb_vec_info (bb_vinfo);
2140 return NULL;
2141 }
2142
2143 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2144
2145 /* Mark all the statements that we want to vectorize as pure SLP and
2146 relevant. */
2147 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2148 {
2149 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2150 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2151 }
2152
2153 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2154 {
2155 if (dump_enabled_p ())
2156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2157 "not vectorized: unsupported alignment in basic "
2158 "block.\n");
2159 destroy_bb_vec_info (bb_vinfo);
2160 return NULL;
2161 }
2162
2163 if (!vect_slp_analyze_operations (bb_vinfo))
2164 {
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2167 "not vectorized: bad operation in basic block.\n");
2168
2169 destroy_bb_vec_info (bb_vinfo);
2170 return NULL;
2171 }
2172
2173 /* Cost model: check if the vectorization is worthwhile. */
2174 if (!unlimited_cost_model ()
2175 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2176 {
2177 if (dump_enabled_p ())
2178 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2179 "not vectorized: vectorization is not "
2180 "profitable.\n");
2181
2182 destroy_bb_vec_info (bb_vinfo);
2183 return NULL;
2184 }
2185
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_NOTE, vect_location,
2188 "Basic block will be vectorized using SLP\n");
2189
2190 return bb_vinfo;
2191 }
2192
2193
2194 bb_vec_info
2195 vect_slp_analyze_bb (basic_block bb)
2196 {
2197 bb_vec_info bb_vinfo;
2198 int insns = 0;
2199 gimple_stmt_iterator gsi;
2200 unsigned int vector_sizes;
2201
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2204
2205 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2206 {
2207 gimple stmt = gsi_stmt (gsi);
2208 if (!is_gimple_debug (stmt)
2209 && !gimple_nop_p (stmt)
2210 && gimple_code (stmt) != GIMPLE_LABEL)
2211 insns++;
2212 }
2213
2214 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2215 {
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2218 "not vectorized: too many instructions in "
2219 "basic block.\n");
2220
2221 return NULL;
2222 }
2223
2224 /* Autodetect first vector size we try. */
2225 current_vector_size = 0;
2226 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2227
2228 while (1)
2229 {
2230 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2231 if (bb_vinfo)
2232 return bb_vinfo;
2233
2234 destroy_bb_vec_info (bb_vinfo);
2235
2236 vector_sizes &= ~current_vector_size;
2237 if (vector_sizes == 0
2238 || current_vector_size == 0)
2239 return NULL;
2240
2241 /* Try the next biggest vector size. */
2242 current_vector_size = 1 << floor_log2 (vector_sizes);
2243 if (dump_enabled_p ())
2244 dump_printf_loc (MSG_NOTE, vect_location,
2245 "***** Re-trying analysis with "
2246 "vector size %d\n", current_vector_size);
2247 }
2248 }
2249
2250
2251 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2252 the number of created vector stmts depends on the unrolling factor).
2253 However, the actual number of vector stmts for every SLP node depends on
2254 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2255 should be updated. In this function we assume that the inside costs
2256 calculated in vect_model_xxx_cost are linear in ncopies. */
2257
2258 void
2259 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2260 {
2261 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2262 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2263 slp_instance instance;
2264 stmt_vector_for_cost body_cost_vec;
2265 stmt_info_for_cost *si;
2266 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2267
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE, vect_location,
2270 "=== vect_update_slp_costs_according_to_vf ===\n");
2271
2272 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2273 {
2274 /* We assume that costs are linear in ncopies. */
2275 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2276
2277 /* Record the instance's instructions in the target cost model.
2278 This was delayed until here because the count of instructions
2279 isn't known beforehand. */
2280 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2281
2282 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2283 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2284 vinfo_for_stmt (si->stmt), si->misalign,
2285 vect_body);
2286 }
2287 }
2288
2289
2290 /* For constant and loop invariant defs of SLP_NODE this function returns
2291 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2292 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2293 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2294 REDUC_INDEX is the index of the reduction operand in the statements, unless
2295 it is -1. */
2296
2297 static void
2298 vect_get_constant_vectors (tree op, slp_tree slp_node,
2299 vec<tree> *vec_oprnds,
2300 unsigned int op_num, unsigned int number_of_vectors,
2301 int reduc_index)
2302 {
2303 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2304 gimple stmt = stmts[0];
2305 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2306 unsigned nunits;
2307 tree vec_cst;
2308 tree *elts;
2309 unsigned j, number_of_places_left_in_vector;
2310 tree vector_type;
2311 tree vop;
2312 int group_size = stmts.length ();
2313 unsigned int vec_num, i;
2314 unsigned number_of_copies = 1;
2315 vec<tree> voprnds;
2316 voprnds.create (number_of_vectors);
2317 bool constant_p, is_store;
2318 tree neutral_op = NULL;
2319 enum tree_code code = gimple_expr_code (stmt);
2320 gimple def_stmt;
2321 struct loop *loop;
2322 gimple_seq ctor_seq = NULL;
2323
2324 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2325 && reduc_index != -1)
2326 {
2327 op_num = reduc_index - 1;
2328 op = gimple_op (stmt, reduc_index);
2329 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2330 we need either neutral operands or the original operands. See
2331 get_initial_def_for_reduction() for details. */
2332 switch (code)
2333 {
2334 case WIDEN_SUM_EXPR:
2335 case DOT_PROD_EXPR:
2336 case PLUS_EXPR:
2337 case MINUS_EXPR:
2338 case BIT_IOR_EXPR:
2339 case BIT_XOR_EXPR:
2340 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2341 neutral_op = build_real (TREE_TYPE (op), dconst0);
2342 else
2343 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2344
2345 break;
2346
2347 case MULT_EXPR:
2348 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2349 neutral_op = build_real (TREE_TYPE (op), dconst1);
2350 else
2351 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2352
2353 break;
2354
2355 case BIT_AND_EXPR:
2356 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2357 break;
2358
2359 case MAX_EXPR:
2360 case MIN_EXPR:
2361 def_stmt = SSA_NAME_DEF_STMT (op);
2362 loop = (gimple_bb (stmt))->loop_father;
2363 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2364 loop_preheader_edge (loop));
2365 break;
2366
2367 default:
2368 neutral_op = NULL;
2369 }
2370 }
2371
2372 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2373 {
2374 is_store = true;
2375 op = gimple_assign_rhs1 (stmt);
2376 }
2377 else
2378 is_store = false;
2379
2380 gcc_assert (op);
2381
2382 if (CONSTANT_CLASS_P (op))
2383 constant_p = true;
2384 else
2385 constant_p = false;
2386
2387 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2388 gcc_assert (vector_type);
2389 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2390
2391 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2392 created vectors. It is greater than 1 if unrolling is performed.
2393
2394 For example, we have two scalar operands, s1 and s2 (e.g., group of
2395 strided accesses of size two), while NUNITS is four (i.e., four scalars
2396 of this type can be packed in a vector). The output vector will contain
2397 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2398 will be 2).
2399
2400 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2401 containing the operands.
2402
2403 For example, NUNITS is four as before, and the group size is 8
2404 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2405 {s5, s6, s7, s8}. */
2406
2407 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2408
2409 number_of_places_left_in_vector = nunits;
2410 elts = XALLOCAVEC (tree, nunits);
2411 for (j = 0; j < number_of_copies; j++)
2412 {
2413 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2414 {
2415 if (is_store)
2416 op = gimple_assign_rhs1 (stmt);
2417 else
2418 {
2419 switch (code)
2420 {
2421 case COND_EXPR:
2422 if (op_num == 0 || op_num == 1)
2423 {
2424 tree cond = gimple_assign_rhs1 (stmt);
2425 op = TREE_OPERAND (cond, op_num);
2426 }
2427 else
2428 {
2429 if (op_num == 2)
2430 op = gimple_assign_rhs2 (stmt);
2431 else
2432 op = gimple_assign_rhs3 (stmt);
2433 }
2434 break;
2435
2436 case CALL_EXPR:
2437 op = gimple_call_arg (stmt, op_num);
2438 break;
2439
2440 case LSHIFT_EXPR:
2441 case RSHIFT_EXPR:
2442 case LROTATE_EXPR:
2443 case RROTATE_EXPR:
2444 op = gimple_op (stmt, op_num + 1);
2445 /* Unlike the other binary operators, shifts/rotates have
2446 the shift count being int, instead of the same type as
2447 the lhs, so make sure the scalar is the right type if
2448 we are dealing with vectors of
2449 long long/long/short/char. */
2450 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2451 op = fold_convert (TREE_TYPE (vector_type), op);
2452 break;
2453
2454 default:
2455 op = gimple_op (stmt, op_num + 1);
2456 break;
2457 }
2458 }
2459
2460 if (reduc_index != -1)
2461 {
2462 loop = (gimple_bb (stmt))->loop_father;
2463 def_stmt = SSA_NAME_DEF_STMT (op);
2464
2465 gcc_assert (loop);
2466
2467 /* Get the def before the loop. In reduction chain we have only
2468 one initial value. */
2469 if ((j != (number_of_copies - 1)
2470 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2471 && i != 0))
2472 && neutral_op)
2473 op = neutral_op;
2474 else
2475 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2476 loop_preheader_edge (loop));
2477 }
2478
2479 /* Create 'vect_ = {op0,op1,...,opn}'. */
2480 number_of_places_left_in_vector--;
2481 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2482 {
2483 if (CONSTANT_CLASS_P (op))
2484 {
2485 op = fold_unary (VIEW_CONVERT_EXPR,
2486 TREE_TYPE (vector_type), op);
2487 gcc_assert (op && CONSTANT_CLASS_P (op));
2488 }
2489 else
2490 {
2491 tree new_temp
2492 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2493 gimple init_stmt;
2494 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2495 op);
2496 init_stmt
2497 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2498 new_temp, op, NULL_TREE);
2499 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2500 op = new_temp;
2501 }
2502 }
2503 elts[number_of_places_left_in_vector] = op;
2504 if (!CONSTANT_CLASS_P (op))
2505 constant_p = false;
2506
2507 if (number_of_places_left_in_vector == 0)
2508 {
2509 number_of_places_left_in_vector = nunits;
2510
2511 if (constant_p)
2512 vec_cst = build_vector (vector_type, elts);
2513 else
2514 {
2515 vec<constructor_elt, va_gc> *v;
2516 unsigned k;
2517 vec_alloc (v, nunits);
2518 for (k = 0; k < nunits; ++k)
2519 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2520 vec_cst = build_constructor (vector_type, v);
2521 }
2522 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2523 vector_type, NULL));
2524 if (ctor_seq != NULL)
2525 {
2526 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2527 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2528 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2529 GSI_SAME_STMT);
2530 ctor_seq = NULL;
2531 }
2532 }
2533 }
2534 }
2535
2536 /* Since the vectors are created in the reverse order, we should invert
2537 them. */
2538 vec_num = voprnds.length ();
2539 for (j = vec_num; j != 0; j--)
2540 {
2541 vop = voprnds[j - 1];
2542 vec_oprnds->quick_push (vop);
2543 }
2544
2545 voprnds.release ();
2546
2547 /* In case that VF is greater than the unrolling factor needed for the SLP
2548 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2549 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2550 to replicate the vectors. */
2551 while (number_of_vectors > vec_oprnds->length ())
2552 {
2553 tree neutral_vec = NULL;
2554
2555 if (neutral_op)
2556 {
2557 if (!neutral_vec)
2558 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2559
2560 vec_oprnds->quick_push (neutral_vec);
2561 }
2562 else
2563 {
2564 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2565 vec_oprnds->quick_push (vop);
2566 }
2567 }
2568 }
2569
2570
2571 /* Get vectorized definitions from SLP_NODE that contains corresponding
2572 vectorized def-stmts. */
2573
2574 static void
2575 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2576 {
2577 tree vec_oprnd;
2578 gimple vec_def_stmt;
2579 unsigned int i;
2580
2581 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2582
2583 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2584 {
2585 gcc_assert (vec_def_stmt);
2586 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2587 vec_oprnds->quick_push (vec_oprnd);
2588 }
2589 }
2590
2591
2592 /* Get vectorized definitions for SLP_NODE.
2593 If the scalar definitions are loop invariants or constants, collect them and
2594 call vect_get_constant_vectors() to create vector stmts.
2595 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2596 must be stored in the corresponding child of SLP_NODE, and we call
2597 vect_get_slp_vect_defs () to retrieve them. */
2598
2599 void
2600 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2601 vec<vec<tree> > *vec_oprnds, int reduc_index)
2602 {
2603 gimple first_stmt;
2604 int number_of_vects = 0, i;
2605 unsigned int child_index = 0;
2606 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2607 slp_tree child = NULL;
2608 vec<tree> vec_defs;
2609 tree oprnd;
2610 bool vectorized_defs;
2611
2612 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2613 FOR_EACH_VEC_ELT (ops, i, oprnd)
2614 {
2615 /* For each operand we check if it has vectorized definitions in a child
2616 node or we need to create them (for invariants and constants). We
2617 check if the LHS of the first stmt of the next child matches OPRND.
2618 If it does, we found the correct child. Otherwise, we call
2619 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2620 to check this child node for the next operand. */
2621 vectorized_defs = false;
2622 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2623 {
2624 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2625
2626 /* We have to check both pattern and original def, if available. */
2627 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2628 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2629
2630 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2631 || (related
2632 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2633 {
2634 /* The number of vector defs is determined by the number of
2635 vector statements in the node from which we get those
2636 statements. */
2637 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2638 vectorized_defs = true;
2639 child_index++;
2640 }
2641 }
2642
2643 if (!vectorized_defs)
2644 {
2645 if (i == 0)
2646 {
2647 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2648 /* Number of vector stmts was calculated according to LHS in
2649 vect_schedule_slp_instance (), fix it by replacing LHS with
2650 RHS, if necessary. See vect_get_smallest_scalar_type () for
2651 details. */
2652 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2653 &rhs_size_unit);
2654 if (rhs_size_unit != lhs_size_unit)
2655 {
2656 number_of_vects *= rhs_size_unit;
2657 number_of_vects /= lhs_size_unit;
2658 }
2659 }
2660 }
2661
2662 /* Allocate memory for vectorized defs. */
2663 vec_defs = vNULL;
2664 vec_defs.create (number_of_vects);
2665
2666 /* For reduction defs we call vect_get_constant_vectors (), since we are
2667 looking for initial loop invariant values. */
2668 if (vectorized_defs && reduc_index == -1)
2669 /* The defs are already vectorized. */
2670 vect_get_slp_vect_defs (child, &vec_defs);
2671 else
2672 /* Build vectors from scalar defs. */
2673 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2674 number_of_vects, reduc_index);
2675
2676 vec_oprnds->quick_push (vec_defs);
2677
2678 /* For reductions, we only need initial values. */
2679 if (reduc_index != -1)
2680 return;
2681 }
2682 }
2683
2684
2685 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2686 building a vector of type MASK_TYPE from it) and two input vectors placed in
2687 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2688 shifting by STRIDE elements of DR_CHAIN for every copy.
2689 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2690 copies).
2691 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2692 the created stmts must be inserted. */
2693
2694 static inline void
2695 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2696 tree mask, int first_vec_indx, int second_vec_indx,
2697 gimple_stmt_iterator *gsi, slp_tree node,
2698 tree vectype, vec<tree> dr_chain,
2699 int ncopies, int vect_stmts_counter)
2700 {
2701 tree perm_dest;
2702 gimple perm_stmt = NULL;
2703 stmt_vec_info next_stmt_info;
2704 int i, stride;
2705 tree first_vec, second_vec, data_ref;
2706
2707 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2708
2709 /* Initialize the vect stmts of NODE to properly insert the generated
2710 stmts later. */
2711 for (i = SLP_TREE_VEC_STMTS (node).length ();
2712 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2713 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2714
2715 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2716 for (i = 0; i < ncopies; i++)
2717 {
2718 first_vec = dr_chain[first_vec_indx];
2719 second_vec = dr_chain[second_vec_indx];
2720
2721 /* Generate the permute statement. */
2722 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2723 first_vec, second_vec, mask);
2724 data_ref = make_ssa_name (perm_dest, perm_stmt);
2725 gimple_set_lhs (perm_stmt, data_ref);
2726 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2727
2728 /* Store the vector statement in NODE. */
2729 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2730
2731 first_vec_indx += stride;
2732 second_vec_indx += stride;
2733 }
2734
2735 /* Mark the scalar stmt as vectorized. */
2736 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2737 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2738 }
2739
2740
2741 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2742 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2743 representation. Check that the mask is valid and return FALSE if not.
2744 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2745 the next vector, i.e., the current first vector is not needed. */
2746
2747 static bool
2748 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2749 int mask_nunits, bool only_one_vec, int index,
2750 unsigned char *mask, int *current_mask_element,
2751 bool *need_next_vector, int *number_of_mask_fixes,
2752 bool *mask_fixed, bool *needs_first_vector)
2753 {
2754 int i;
2755
2756 /* Convert to target specific representation. */
2757 *current_mask_element = first_mask_element + m;
2758 /* Adjust the value in case it's a mask for second and third vectors. */
2759 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2760
2761 if (*current_mask_element < mask_nunits)
2762 *needs_first_vector = true;
2763
2764 /* We have only one input vector to permute but the mask accesses values in
2765 the next vector as well. */
2766 if (only_one_vec && *current_mask_element >= mask_nunits)
2767 {
2768 if (dump_enabled_p ())
2769 {
2770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2771 "permutation requires at least two vectors ");
2772 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2773 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2774 }
2775
2776 return false;
2777 }
2778
2779 /* The mask requires the next vector. */
2780 if (*current_mask_element >= mask_nunits * 2)
2781 {
2782 if (*needs_first_vector || *mask_fixed)
2783 {
2784 /* We either need the first vector too or have already moved to the
2785 next vector. In both cases, this permutation needs three
2786 vectors. */
2787 if (dump_enabled_p ())
2788 {
2789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2790 "permutation requires at "
2791 "least three vectors ");
2792 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2793 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2794 }
2795
2796 return false;
2797 }
2798
2799 /* We move to the next vector, dropping the first one and working with
2800 the second and the third - we need to adjust the values of the mask
2801 accordingly. */
2802 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2803
2804 for (i = 0; i < index; i++)
2805 mask[i] -= mask_nunits * *number_of_mask_fixes;
2806
2807 (*number_of_mask_fixes)++;
2808 *mask_fixed = true;
2809 }
2810
2811 *need_next_vector = *mask_fixed;
2812
2813 /* This was the last element of this mask. Start a new one. */
2814 if (index == mask_nunits - 1)
2815 {
2816 *number_of_mask_fixes = 1;
2817 *mask_fixed = false;
2818 *needs_first_vector = false;
2819 }
2820
2821 return true;
2822 }
2823
2824
2825 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2826 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2827 permute statements for the SLP node NODE of the SLP instance
2828 SLP_NODE_INSTANCE. */
2829
2830 bool
2831 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2832 gimple_stmt_iterator *gsi, int vf,
2833 slp_instance slp_node_instance, bool analyze_only)
2834 {
2835 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2836 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2837 tree mask_element_type = NULL_TREE, mask_type;
2838 int i, j, k, nunits, vec_index = 0, scalar_index;
2839 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2840 gimple next_scalar_stmt;
2841 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2842 int first_mask_element;
2843 int index, unroll_factor, current_mask_element, ncopies;
2844 unsigned char *mask;
2845 bool only_one_vec = false, need_next_vector = false;
2846 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2847 int number_of_mask_fixes = 1;
2848 bool mask_fixed = false;
2849 bool needs_first_vector = false;
2850 enum machine_mode mode;
2851
2852 mode = TYPE_MODE (vectype);
2853
2854 if (!can_vec_perm_p (mode, false, NULL))
2855 {
2856 if (dump_enabled_p ())
2857 {
2858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2859 "no vect permute for ");
2860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2861 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2862 }
2863 return false;
2864 }
2865
2866 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2867 same size as the vector element being permuted. */
2868 mask_element_type = lang_hooks.types.type_for_mode
2869 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2870 mask_type = get_vectype_for_scalar_type (mask_element_type);
2871 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2872 mask = XALLOCAVEC (unsigned char, nunits);
2873 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2874
2875 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2876 unrolling factor. */
2877 orig_vec_stmts_num = group_size *
2878 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2879 if (orig_vec_stmts_num == 1)
2880 only_one_vec = true;
2881
2882 /* Number of copies is determined by the final vectorization factor
2883 relatively to SLP_NODE_INSTANCE unrolling factor. */
2884 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2885
2886 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2887 return false;
2888
2889 /* Generate permutation masks for every NODE. Number of masks for each NODE
2890 is equal to GROUP_SIZE.
2891 E.g., we have a group of three nodes with three loads from the same
2892 location in each node, and the vector size is 4. I.e., we have a
2893 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2894 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2895 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2896 ...
2897
2898 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2899 The last mask is illegal since we assume two operands for permute
2900 operation, and the mask element values can't be outside that range.
2901 Hence, the last mask must be converted into {2,5,5,5}.
2902 For the first two permutations we need the first and the second input
2903 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2904 we need the second and the third vectors: {b1,c1,a2,b2} and
2905 {c2,a3,b3,c3}. */
2906
2907 {
2908 scalar_index = 0;
2909 index = 0;
2910 vect_stmts_counter = 0;
2911 vec_index = 0;
2912 first_vec_index = vec_index++;
2913 if (only_one_vec)
2914 second_vec_index = first_vec_index;
2915 else
2916 second_vec_index = vec_index++;
2917
2918 for (j = 0; j < unroll_factor; j++)
2919 {
2920 for (k = 0; k < group_size; k++)
2921 {
2922 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2923 first_mask_element = i + j * group_size;
2924 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2925 nunits, only_one_vec, index,
2926 mask, &current_mask_element,
2927 &need_next_vector,
2928 &number_of_mask_fixes, &mask_fixed,
2929 &needs_first_vector))
2930 return false;
2931 mask[index++] = current_mask_element;
2932
2933 if (index == nunits)
2934 {
2935 index = 0;
2936 if (!can_vec_perm_p (mode, false, mask))
2937 {
2938 if (dump_enabled_p ())
2939 {
2940 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2941 vect_location,
2942 "unsupported vect permute { ");
2943 for (i = 0; i < nunits; ++i)
2944 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2945 mask[i]);
2946 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2947 }
2948 return false;
2949 }
2950
2951 if (!analyze_only)
2952 {
2953 int l;
2954 tree mask_vec, *mask_elts;
2955 mask_elts = XALLOCAVEC (tree, nunits);
2956 for (l = 0; l < nunits; ++l)
2957 mask_elts[l] = build_int_cst (mask_element_type,
2958 mask[l]);
2959 mask_vec = build_vector (mask_type, mask_elts);
2960
2961 if (need_next_vector)
2962 {
2963 first_vec_index = second_vec_index;
2964 second_vec_index = vec_index;
2965 }
2966
2967 next_scalar_stmt
2968 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2969
2970 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2971 mask_vec, first_vec_index, second_vec_index,
2972 gsi, node, vectype, dr_chain,
2973 ncopies, vect_stmts_counter++);
2974 }
2975 }
2976 }
2977 }
2978 }
2979
2980 return true;
2981 }
2982
2983
2984
2985 /* Vectorize SLP instance tree in postorder. */
2986
2987 static bool
2988 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2989 unsigned int vectorization_factor)
2990 {
2991 gimple stmt;
2992 bool grouped_store, is_store;
2993 gimple_stmt_iterator si;
2994 stmt_vec_info stmt_info;
2995 unsigned int vec_stmts_size, nunits, group_size;
2996 tree vectype;
2997 int i;
2998 slp_tree child;
2999
3000 if (!node)
3001 return false;
3002
3003 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3004 vect_schedule_slp_instance (child, instance, vectorization_factor);
3005
3006 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3007 stmt_info = vinfo_for_stmt (stmt);
3008
3009 /* VECTYPE is the type of the destination. */
3010 vectype = STMT_VINFO_VECTYPE (stmt_info);
3011 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3012 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3013
3014 /* For each SLP instance calculate number of vector stmts to be created
3015 for the scalar stmts in each node of the SLP tree. Number of vector
3016 elements in one vector iteration is the number of scalar elements in
3017 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3018 size. */
3019 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3020
3021 if (!SLP_TREE_VEC_STMTS (node).exists ())
3022 {
3023 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3024 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3025 }
3026
3027 if (dump_enabled_p ())
3028 {
3029 dump_printf_loc (MSG_NOTE,vect_location,
3030 "------>vectorizing SLP node starting from: ");
3031 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3032 dump_printf (MSG_NOTE, "\n");
3033 }
3034
3035 /* Loads should be inserted before the first load. */
3036 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3037 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3038 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3039 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3040 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3041 else if (is_pattern_stmt_p (stmt_info))
3042 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3043 else
3044 si = gsi_for_stmt (stmt);
3045
3046 /* Stores should be inserted just before the last store. */
3047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3048 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3049 {
3050 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3051 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3052 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3053 si = gsi_for_stmt (last_store);
3054 }
3055
3056 /* Mark the first element of the reduction chain as reduction to properly
3057 transform the node. In the analysis phase only the last element of the
3058 chain is marked as reduction. */
3059 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3060 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3061 {
3062 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3063 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3064 }
3065
3066 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3067 return is_store;
3068 }
3069
3070 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3071 For loop vectorization this is done in vectorizable_call, but for SLP
3072 it needs to be deferred until end of vect_schedule_slp, because multiple
3073 SLP instances may refer to the same scalar stmt. */
3074
3075 static void
3076 vect_remove_slp_scalar_calls (slp_tree node)
3077 {
3078 gimple stmt, new_stmt;
3079 gimple_stmt_iterator gsi;
3080 int i;
3081 slp_tree child;
3082 tree lhs;
3083 stmt_vec_info stmt_info;
3084
3085 if (!node)
3086 return;
3087
3088 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3089 vect_remove_slp_scalar_calls (child);
3090
3091 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3092 {
3093 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3094 continue;
3095 stmt_info = vinfo_for_stmt (stmt);
3096 if (stmt_info == NULL
3097 || is_pattern_stmt_p (stmt_info)
3098 || !PURE_SLP_STMT (stmt_info))
3099 continue;
3100 lhs = gimple_call_lhs (stmt);
3101 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3102 set_vinfo_for_stmt (new_stmt, stmt_info);
3103 set_vinfo_for_stmt (stmt, NULL);
3104 STMT_VINFO_STMT (stmt_info) = new_stmt;
3105 gsi = gsi_for_stmt (stmt);
3106 gsi_replace (&gsi, new_stmt, false);
3107 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3108 }
3109 }
3110
3111 /* Generate vector code for all SLP instances in the loop/basic block. */
3112
3113 bool
3114 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3115 {
3116 vec<slp_instance> slp_instances;
3117 slp_instance instance;
3118 unsigned int i, vf;
3119 bool is_store = false;
3120
3121 if (loop_vinfo)
3122 {
3123 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3124 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3125 }
3126 else
3127 {
3128 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3129 vf = 1;
3130 }
3131
3132 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3133 {
3134 /* Schedule the tree of INSTANCE. */
3135 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3136 instance, vf);
3137 if (dump_enabled_p ())
3138 dump_printf_loc (MSG_NOTE, vect_location,
3139 "vectorizing stmts using SLP.\n");
3140 }
3141
3142 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3143 {
3144 slp_tree root = SLP_INSTANCE_TREE (instance);
3145 gimple store;
3146 unsigned int j;
3147 gimple_stmt_iterator gsi;
3148
3149 /* Remove scalar call stmts. Do not do this for basic-block
3150 vectorization as not all uses may be vectorized.
3151 ??? Why should this be necessary? DCE should be able to
3152 remove the stmts itself.
3153 ??? For BB vectorization we can as well remove scalar
3154 stmts starting from the SLP tree root if they have no
3155 uses. */
3156 if (loop_vinfo)
3157 vect_remove_slp_scalar_calls (root);
3158
3159 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3160 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3161 {
3162 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3163 break;
3164
3165 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3166 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3167 /* Free the attached stmt_vec_info and remove the stmt. */
3168 gsi = gsi_for_stmt (store);
3169 unlink_stmt_vdef (store);
3170 gsi_remove (&gsi, true);
3171 release_defs (store);
3172 free_stmt_vec_info (store);
3173 }
3174 }
3175
3176 return is_store;
3177 }
3178
3179
3180 /* Vectorize the basic block. */
3181
3182 void
3183 vect_slp_transform_bb (basic_block bb)
3184 {
3185 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3186 gimple_stmt_iterator si;
3187
3188 gcc_assert (bb_vinfo);
3189
3190 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3192
3193 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3194 {
3195 gimple stmt = gsi_stmt (si);
3196 stmt_vec_info stmt_info;
3197
3198 if (dump_enabled_p ())
3199 {
3200 dump_printf_loc (MSG_NOTE, vect_location,
3201 "------>SLPing statement: ");
3202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3203 dump_printf (MSG_NOTE, "\n");
3204 }
3205
3206 stmt_info = vinfo_for_stmt (stmt);
3207 gcc_assert (stmt_info);
3208
3209 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3210 if (STMT_SLP_TYPE (stmt_info))
3211 {
3212 vect_schedule_slp (NULL, bb_vinfo);
3213 break;
3214 }
3215 }
3216
3217 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_NOTE, vect_location,
3219 "BASIC BLOCK VECTORIZED\n");
3220
3221 destroy_bb_vec_info (bb_vinfo);
3222 }