tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code dealing with cost.
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69 int i;
70 slp_tree child;
71
72 if (!node)
73 return;
74
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76 vect_free_slp_tree (child);
77
78 SLP_TREE_CHILDREN (node).release ();
79 SLP_TREE_SCALAR_STMTS (node).release ();
80 SLP_TREE_VEC_STMTS (node).release ();
81
82 free (node);
83 }
84
85
86 /* Free the memory allocated for the SLP instance. */
87
88 void
89 vect_free_slp_instance (slp_instance instance)
90 {
91 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
92 SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93 SLP_INSTANCE_LOADS (instance).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95 free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS. */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104 slp_tree node;
105 gimple stmt = scalar_stmts[0];
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
116 else
117 return NULL;
118
119 node = XNEW (struct _slp_tree);
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node).create (0);
122 SLP_TREE_CHILDREN (node).create (nops);
123
124 return node;
125 }
126
127
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
130 static vec<slp_oprnd_info>
131 vect_create_oprnd_info (int nops, int group_size)
132 {
133 int i;
134 slp_oprnd_info oprnd_info;
135 vec<slp_oprnd_info> oprnds_info;
136
137 oprnds_info.create (nops);
138 for (i = 0; i < nops; i++)
139 {
140 oprnd_info = XNEW (struct _slp_oprnd_info);
141 oprnd_info->def_stmts.create (group_size);
142 oprnd_info->first_dt = vect_uninitialized_def;
143 oprnd_info->first_def_type = NULL_TREE;
144 oprnd_info->first_const_oprnd = NULL_TREE;
145 oprnd_info->first_pattern = false;
146 oprnds_info.quick_push (oprnd_info);
147 }
148
149 return oprnds_info;
150 }
151
152
153 /* Free operands info. */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158 int i;
159 slp_oprnd_info oprnd_info;
160
161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162 {
163 oprnd_info->def_stmts.release ();
164 XDELETE (oprnd_info);
165 }
166
167 oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177 gimple next_stmt = first_stmt;
178 int result = 0;
179
180 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181 return -1;
182
183 do
184 {
185 if (next_stmt == stmt)
186 return result;
187 result++;
188 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189 }
190 while (next_stmt);
191
192 return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197 they are of a valid type and that they match the defs of the first stmt of
198 the SLP group (stored in OPRNDS_INFO). */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202 gimple stmt, bool first,
203 vec<slp_oprnd_info> *oprnds_info)
204 {
205 tree oprnd;
206 unsigned int i, number_of_oprnds;
207 tree def, def_op0 = NULL_TREE;
208 gimple def_stmt;
209 enum vect_def_type dt = vect_uninitialized_def;
210 enum vect_def_type dt_op0 = vect_uninitialized_def;
211 struct loop *loop = NULL;
212 enum tree_code rhs_code;
213 bool different_types = false;
214 bool pattern = false;
215 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
216 int op_idx = 1;
217 tree compare_rhs = NULL_TREE;
218
219 if (loop_vinfo)
220 loop = LOOP_VINFO_LOOP (loop_vinfo);
221
222 if (is_gimple_call (stmt))
223 {
224 number_of_oprnds = gimple_call_num_args (stmt);
225 op_idx = 3;
226 }
227 else if (is_gimple_assign (stmt))
228 {
229 number_of_oprnds = gimple_num_ops (stmt) - 1;
230 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
231 number_of_oprnds++;
232 }
233 else
234 return false;
235
236 for (i = 0; i < number_of_oprnds; i++)
237 {
238 if (compare_rhs)
239 {
240 oprnd = compare_rhs;
241 compare_rhs = NULL_TREE;
242 }
243 else
244 oprnd = gimple_op (stmt, op_idx++);
245
246 oprnd_info = (*oprnds_info)[i];
247
248 if (COMPARISON_CLASS_P (oprnd))
249 {
250 compare_rhs = TREE_OPERAND (oprnd, 1);
251 oprnd = TREE_OPERAND (oprnd, 0);
252 }
253
254 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
255 &def, &dt)
256 || (!def_stmt && dt != vect_constant_def))
257 {
258 if (dump_enabled_p ())
259 {
260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
261 "Build SLP failed: can't find def for ");
262 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
263 }
264
265 return false;
266 }
267
268 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
269 from the pattern. Check that all the stmts of the node are in the
270 pattern. */
271 if (def_stmt && gimple_bb (def_stmt)
272 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
273 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
274 && gimple_code (def_stmt) != GIMPLE_PHI))
275 && vinfo_for_stmt (def_stmt)
276 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
277 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
278 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
279 {
280 pattern = true;
281 if (!first && !oprnd_info->first_pattern)
282 {
283 if (dump_enabled_p ())
284 {
285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
286 "Build SLP failed: some of the stmts"
287 " are in a pattern, and others are not ");
288 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
289 }
290
291 return false;
292 }
293
294 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
295 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
296
297 if (dt == vect_unknown_def_type)
298 {
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
301 "Unsupported pattern.");
302 return false;
303 }
304
305 switch (gimple_code (def_stmt))
306 {
307 case GIMPLE_PHI:
308 def = gimple_phi_result (def_stmt);
309 break;
310
311 case GIMPLE_ASSIGN:
312 def = gimple_assign_lhs (def_stmt);
313 break;
314
315 default:
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
318 "unsupported defining stmt: ");
319 return false;
320 }
321 }
322
323 if (first)
324 {
325 oprnd_info->first_dt = dt;
326 oprnd_info->first_pattern = pattern;
327 if (def)
328 {
329 oprnd_info->first_def_type = TREE_TYPE (def);
330 oprnd_info->first_const_oprnd = NULL_TREE;
331 }
332 else
333 {
334 oprnd_info->first_def_type = NULL_TREE;
335 oprnd_info->first_const_oprnd = oprnd;
336 }
337
338 if (i == 0)
339 {
340 def_op0 = def;
341 dt_op0 = dt;
342 }
343 }
344 else
345 {
346 /* Not first stmt of the group, check that the def-stmt/s match
347 the def-stmt/s of the first stmt. Allow different definition
348 types for reduction chains: the first stmt must be a
349 vect_reduction_def (a phi node), and the rest
350 vect_internal_def. */
351 if (((oprnd_info->first_dt != dt
352 && !(oprnd_info->first_dt == vect_reduction_def
353 && dt == vect_internal_def))
354 || (oprnd_info->first_def_type != NULL_TREE
355 && def
356 && !types_compatible_p (oprnd_info->first_def_type,
357 TREE_TYPE (def))))
358 || (!def
359 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
360 TREE_TYPE (oprnd)))
361 || different_types)
362 {
363 if (number_of_oprnds != 2)
364 {
365 if (dump_enabled_p ())
366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
367 "Build SLP failed: different types ");
368
369 return false;
370 }
371
372 /* Try to swap operands in case of binary operation. */
373 if (i == 0)
374 different_types = true;
375 else
376 {
377 oprnd0_info = (*oprnds_info)[0];
378 if (is_gimple_assign (stmt)
379 && (rhs_code = gimple_assign_rhs_code (stmt))
380 && TREE_CODE_CLASS (rhs_code) == tcc_binary
381 && commutative_tree_code (rhs_code)
382 && oprnd0_info->first_dt == dt
383 && oprnd_info->first_dt == dt_op0
384 && def_op0 && def
385 && !(oprnd0_info->first_def_type
386 && !types_compatible_p (oprnd0_info->first_def_type,
387 TREE_TYPE (def)))
388 && !(oprnd_info->first_def_type
389 && !types_compatible_p (oprnd_info->first_def_type,
390 TREE_TYPE (def_op0))))
391 {
392 if (dump_enabled_p ())
393 {
394 dump_printf_loc (MSG_NOTE, vect_location,
395 "Swapping operands of ");
396 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
397 }
398
399 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
400 gimple_assign_rhs2_ptr (stmt));
401 }
402 else
403 {
404 if (dump_enabled_p ())
405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
406 "Build SLP failed: different types ");
407
408 return false;
409 }
410 }
411 }
412 }
413
414 /* Check the types of the definitions. */
415 switch (dt)
416 {
417 case vect_constant_def:
418 case vect_external_def:
419 case vect_reduction_def:
420 break;
421
422 case vect_internal_def:
423 if (different_types)
424 {
425 oprnd0_info = (*oprnds_info)[0];
426 oprnd1_info = (*oprnds_info)[0];
427 if (i == 0)
428 oprnd1_info->def_stmts.quick_push (def_stmt);
429 else
430 oprnd0_info->def_stmts.quick_push (def_stmt);
431 }
432 else
433 oprnd_info->def_stmts.quick_push (def_stmt);
434
435 break;
436
437 default:
438 /* FORNOW: Not supported. */
439 if (dump_enabled_p ())
440 {
441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
442 "Build SLP failed: illegal type of def ");
443 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
444 }
445
446 return false;
447 }
448 }
449
450 return true;
451 }
452
453
454 /* Recursively build an SLP tree starting from NODE.
455 Fail (and return FALSE) if def-stmts are not isomorphic, require data
456 permutation or are of unsupported types of operation. Otherwise, return
457 TRUE. */
458
459 static bool
460 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
461 slp_tree *node, unsigned int group_size,
462 unsigned int *max_nunits,
463 vec<int> *load_permutation,
464 vec<slp_tree> *loads,
465 unsigned int vectorization_factor, bool *loads_permuted)
466 {
467 unsigned int i;
468 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
469 gimple stmt = stmts[0];
470 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
471 enum tree_code first_cond_code = ERROR_MARK;
472 tree lhs;
473 bool stop_recursion = false, need_same_oprnds = false;
474 tree vectype, scalar_type, first_op1 = NULL_TREE;
475 optab optab;
476 int icode;
477 enum machine_mode optab_op2_mode;
478 enum machine_mode vec_mode;
479 struct data_reference *first_dr;
480 HOST_WIDE_INT dummy;
481 bool permutation = false;
482 unsigned int load_place;
483 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
484 vec<slp_oprnd_info> oprnds_info;
485 unsigned int nops;
486 slp_oprnd_info oprnd_info;
487 tree cond;
488
489 if (is_gimple_call (stmt))
490 nops = gimple_call_num_args (stmt);
491 else if (is_gimple_assign (stmt))
492 {
493 nops = gimple_num_ops (stmt) - 1;
494 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
495 nops++;
496 }
497 else
498 return false;
499
500 oprnds_info = vect_create_oprnd_info (nops, group_size);
501
502 /* For every stmt in NODE find its def stmt/s. */
503 FOR_EACH_VEC_ELT (stmts, i, stmt)
504 {
505 if (dump_enabled_p ())
506 {
507 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
508 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
509 }
510
511 /* Fail to vectorize statements marked as unvectorizable. */
512 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
513 {
514 if (dump_enabled_p ())
515 {
516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
517 "Build SLP failed: unvectorizable statement ");
518 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
519 }
520
521 vect_free_oprnd_info (oprnds_info);
522 return false;
523 }
524
525 lhs = gimple_get_lhs (stmt);
526 if (lhs == NULL_TREE)
527 {
528 if (dump_enabled_p ())
529 {
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 "Build SLP failed: not GIMPLE_ASSIGN nor "
532 "GIMPLE_CALL ");
533 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
534 }
535
536 vect_free_oprnd_info (oprnds_info);
537 return false;
538 }
539
540 if (is_gimple_assign (stmt)
541 && gimple_assign_rhs_code (stmt) == COND_EXPR
542 && (cond = gimple_assign_rhs1 (stmt))
543 && !COMPARISON_CLASS_P (cond))
544 {
545 if (dump_enabled_p ())
546 {
547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
548 "Build SLP failed: condition is not "
549 "comparison ");
550 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
551 }
552
553 vect_free_oprnd_info (oprnds_info);
554 return false;
555 }
556
557 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
558 vectype = get_vectype_for_scalar_type (scalar_type);
559 if (!vectype)
560 {
561 if (dump_enabled_p ())
562 {
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
564 "Build SLP failed: unsupported data-type ");
565 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
566 scalar_type);
567 }
568
569 vect_free_oprnd_info (oprnds_info);
570 return false;
571 }
572
573 /* In case of multiple types we need to detect the smallest type. */
574 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
575 {
576 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
577 if (bb_vinfo)
578 vectorization_factor = *max_nunits;
579 }
580
581 if (is_gimple_call (stmt))
582 {
583 rhs_code = CALL_EXPR;
584 if (gimple_call_internal_p (stmt)
585 || gimple_call_tail_p (stmt)
586 || gimple_call_noreturn_p (stmt)
587 || !gimple_call_nothrow_p (stmt)
588 || gimple_call_chain (stmt))
589 {
590 if (dump_enabled_p ())
591 {
592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
593 "Build SLP failed: unsupported call type ");
594 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
595 }
596
597 vect_free_oprnd_info (oprnds_info);
598 return false;
599 }
600 }
601 else
602 rhs_code = gimple_assign_rhs_code (stmt);
603
604 /* Check the operation. */
605 if (i == 0)
606 {
607 first_stmt_code = rhs_code;
608
609 /* Shift arguments should be equal in all the packed stmts for a
610 vector shift with scalar shift operand. */
611 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
612 || rhs_code == LROTATE_EXPR
613 || rhs_code == RROTATE_EXPR)
614 {
615 vec_mode = TYPE_MODE (vectype);
616
617 /* First see if we have a vector/vector shift. */
618 optab = optab_for_tree_code (rhs_code, vectype,
619 optab_vector);
620
621 if (!optab
622 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
623 {
624 /* No vector/vector shift, try for a vector/scalar shift. */
625 optab = optab_for_tree_code (rhs_code, vectype,
626 optab_scalar);
627
628 if (!optab)
629 {
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
632 "Build SLP failed: no optab.");
633 vect_free_oprnd_info (oprnds_info);
634 return false;
635 }
636 icode = (int) optab_handler (optab, vec_mode);
637 if (icode == CODE_FOR_nothing)
638 {
639 if (dump_enabled_p ())
640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
641 "Build SLP failed: "
642 "op not supported by target.");
643 vect_free_oprnd_info (oprnds_info);
644 return false;
645 }
646 optab_op2_mode = insn_data[icode].operand[2].mode;
647 if (!VECTOR_MODE_P (optab_op2_mode))
648 {
649 need_same_oprnds = true;
650 first_op1 = gimple_assign_rhs2 (stmt);
651 }
652 }
653 }
654 else if (rhs_code == WIDEN_LSHIFT_EXPR)
655 {
656 need_same_oprnds = true;
657 first_op1 = gimple_assign_rhs2 (stmt);
658 }
659 }
660 else
661 {
662 if (first_stmt_code != rhs_code
663 && (first_stmt_code != IMAGPART_EXPR
664 || rhs_code != REALPART_EXPR)
665 && (first_stmt_code != REALPART_EXPR
666 || rhs_code != IMAGPART_EXPR)
667 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
668 && (first_stmt_code == ARRAY_REF
669 || first_stmt_code == BIT_FIELD_REF
670 || first_stmt_code == INDIRECT_REF
671 || first_stmt_code == COMPONENT_REF
672 || first_stmt_code == MEM_REF)))
673 {
674 if (dump_enabled_p ())
675 {
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
677 "Build SLP failed: different operation "
678 "in stmt ");
679 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
680 }
681
682 vect_free_oprnd_info (oprnds_info);
683 return false;
684 }
685
686 if (need_same_oprnds
687 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
688 {
689 if (dump_enabled_p ())
690 {
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "Build SLP failed: different shift "
693 "arguments in ");
694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
695 }
696
697 vect_free_oprnd_info (oprnds_info);
698 return false;
699 }
700
701 if (rhs_code == CALL_EXPR)
702 {
703 gimple first_stmt = stmts[0];
704 if (gimple_call_num_args (stmt) != nops
705 || !operand_equal_p (gimple_call_fn (first_stmt),
706 gimple_call_fn (stmt), 0)
707 || gimple_call_fntype (first_stmt)
708 != gimple_call_fntype (stmt))
709 {
710 if (dump_enabled_p ())
711 {
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: different calls in ");
714 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
715 stmt, 0);
716 }
717
718 vect_free_oprnd_info (oprnds_info);
719 return false;
720 }
721 }
722 }
723
724 /* Grouped store or load. */
725 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
726 {
727 if (REFERENCE_CLASS_P (lhs))
728 {
729 /* Store. */
730 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
731 stmt, (i == 0), &oprnds_info))
732 {
733 vect_free_oprnd_info (oprnds_info);
734 return false;
735 }
736 }
737 else
738 {
739 /* Load. */
740 unsigned unrolling_factor
741 = least_common_multiple
742 (*max_nunits, group_size) / group_size;
743 /* FORNOW: Check that there is no gap between the loads
744 and no gap between the groups when we need to load
745 multiple groups at once.
746 ??? We should enhance this to only disallow gaps
747 inside vectors. */
748 if ((unrolling_factor > 1
749 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
750 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
751 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
752 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
753 {
754 if (dump_enabled_p ())
755 {
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: grouped "
758 "loads have gaps ");
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
760 stmt, 0);
761 }
762
763 vect_free_oprnd_info (oprnds_info);
764 return false;
765 }
766
767 /* Check that the size of interleaved loads group is not
768 greater than the SLP group size. */
769 unsigned ncopies
770 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
771 if (loop_vinfo
772 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
773 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
774 - GROUP_GAP (vinfo_for_stmt (stmt)))
775 > ncopies * group_size))
776 {
777 if (dump_enabled_p ())
778 {
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780 "Build SLP failed: the number "
781 "of interleaved loads is greater than "
782 "the SLP group size ");
783 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
784 stmt, 0);
785 }
786
787 vect_free_oprnd_info (oprnds_info);
788 return false;
789 }
790
791 old_first_load = first_load;
792 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
793 if (prev_first_load)
794 {
795 /* Check that there are no loads from different interleaving
796 chains in the same node. The only exception is complex
797 numbers. */
798 if (prev_first_load != first_load
799 && rhs_code != REALPART_EXPR
800 && rhs_code != IMAGPART_EXPR)
801 {
802 if (dump_enabled_p ())
803 {
804 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
805 vect_location,
806 "Build SLP failed: different "
807 "interleaving chains in one node ");
808 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
809 stmt, 0);
810 }
811
812 vect_free_oprnd_info (oprnds_info);
813 return false;
814 }
815 }
816 else
817 prev_first_load = first_load;
818
819 /* In some cases a group of loads is just the same load
820 repeated N times. Only analyze its cost once. */
821 if (first_load == stmt && old_first_load != first_load)
822 {
823 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
824 if (vect_supportable_dr_alignment (first_dr, false)
825 == dr_unaligned_unsupported)
826 {
827 if (dump_enabled_p ())
828 {
829 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
830 vect_location,
831 "Build SLP failed: unsupported "
832 "unaligned load ");
833 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
834 stmt, 0);
835 }
836
837 vect_free_oprnd_info (oprnds_info);
838 return false;
839 }
840 }
841
842 /* Store the place of this load in the interleaving chain. In
843 case that permutation is needed we later decide if a specific
844 permutation is supported. */
845 load_place = vect_get_place_in_interleaving_chain (stmt,
846 first_load);
847 if (load_place != i)
848 permutation = true;
849
850 load_permutation->safe_push (load_place);
851
852 /* We stop the tree when we reach a group of loads. */
853 stop_recursion = true;
854 continue;
855 }
856 } /* Grouped access. */
857 else
858 {
859 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
860 {
861 /* Not grouped load. */
862 if (dump_enabled_p ())
863 {
864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
865 "Build SLP failed: not grouped load ");
866 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
867 }
868
869 /* FORNOW: Not grouped loads are not supported. */
870 vect_free_oprnd_info (oprnds_info);
871 return false;
872 }
873
874 /* Not memory operation. */
875 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
876 && TREE_CODE_CLASS (rhs_code) != tcc_unary
877 && rhs_code != COND_EXPR
878 && rhs_code != CALL_EXPR)
879 {
880 if (dump_enabled_p ())
881 {
882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
883 "Build SLP failed: operation");
884 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
885 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
886 }
887
888 vect_free_oprnd_info (oprnds_info);
889 return false;
890 }
891
892 if (rhs_code == COND_EXPR)
893 {
894 tree cond_expr = gimple_assign_rhs1 (stmt);
895
896 if (i == 0)
897 first_cond_code = TREE_CODE (cond_expr);
898 else if (first_cond_code != TREE_CODE (cond_expr))
899 {
900 if (dump_enabled_p ())
901 {
902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
903 "Build SLP failed: different"
904 " operation");
905 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
906 stmt, 0);
907 }
908
909 vect_free_oprnd_info (oprnds_info);
910 return false;
911 }
912 }
913
914 /* Find the def-stmts. */
915 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, stmt,
916 (i == 0), &oprnds_info))
917 {
918 vect_free_oprnd_info (oprnds_info);
919 return false;
920 }
921 }
922 }
923
924 /* Grouped loads were reached - stop the recursion. */
925 if (stop_recursion)
926 {
927 loads->safe_push (*node);
928 if (permutation)
929 *loads_permuted = true;
930 else
931 {
932 /* We don't check here complex numbers chains, so we set
933 LOADS_PERMUTED for further check in
934 vect_supported_load_permutation_p. */
935 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
936 *loads_permuted = true;
937 }
938
939 vect_free_oprnd_info (oprnds_info);
940 return true;
941 }
942
943 /* Create SLP_TREE nodes for the definition node/s. */
944 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
945 {
946 slp_tree child;
947
948 if (oprnd_info->first_dt != vect_internal_def)
949 continue;
950
951 child = vect_create_new_slp_node (oprnd_info->def_stmts);
952 if (!child
953 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
954 max_nunits, load_permutation, loads,
955 vectorization_factor, loads_permuted))
956 {
957 if (child)
958 oprnd_info->def_stmts = vNULL;
959 vect_free_slp_tree (child);
960 vect_free_oprnd_info (oprnds_info);
961 return false;
962 }
963
964 oprnd_info->def_stmts.create (0);
965 SLP_TREE_CHILDREN (*node).quick_push (child);
966 }
967
968 vect_free_oprnd_info (oprnds_info);
969 return true;
970 }
971
972 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
973
974 static void
975 vect_print_slp_tree (int dump_kind, slp_tree node)
976 {
977 int i;
978 gimple stmt;
979 slp_tree child;
980
981 if (!node)
982 return;
983
984 dump_printf (dump_kind, "node ");
985 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
986 {
987 dump_printf (dump_kind, "\n\tstmt %d ", i);
988 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
989 }
990 dump_printf (dump_kind, "\n");
991
992 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
993 vect_print_slp_tree (dump_kind, child);
994 }
995
996
997 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
998 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
999 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1000 stmts in NODE are to be marked. */
1001
1002 static void
1003 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1004 {
1005 int i;
1006 gimple stmt;
1007 slp_tree child;
1008
1009 if (!node)
1010 return;
1011
1012 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1013 if (j < 0 || i == j)
1014 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1015
1016 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1017 vect_mark_slp_stmts (child, mark, j);
1018 }
1019
1020
1021 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1022
1023 static void
1024 vect_mark_slp_stmts_relevant (slp_tree node)
1025 {
1026 int i;
1027 gimple stmt;
1028 stmt_vec_info stmt_info;
1029 slp_tree child;
1030
1031 if (!node)
1032 return;
1033
1034 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1035 {
1036 stmt_info = vinfo_for_stmt (stmt);
1037 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1038 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1039 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1040 }
1041
1042 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1043 vect_mark_slp_stmts_relevant (child);
1044 }
1045
1046
1047 /* Check if the permutation required by the SLP INSTANCE is supported.
1048 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1049
1050 static bool
1051 vect_supported_slp_permutation_p (slp_instance instance)
1052 {
1053 slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
1054 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1055 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1056 vec<slp_tree> sorted_loads = vNULL;
1057 int index;
1058 slp_tree *tmp_loads = NULL;
1059 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1060 slp_tree load;
1061
1062 /* FORNOW: The only supported loads permutation is loads from the same
1063 location in all the loads in the node, when the data-refs in
1064 nodes of LOADS constitute an interleaving chain.
1065 Sort the nodes according to the order of accesses in the chain. */
1066 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1067 for (i = 0, j = 0;
1068 SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
1069 && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
1070 i += group_size, j++)
1071 {
1072 gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
1073 /* Check that the loads are all in the same interleaving chain. */
1074 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1075 {
1076 if (dump_enabled_p ())
1077 {
1078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1079 "Build SLP failed: unsupported data "
1080 "permutation ");
1081 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1082 scalar_stmt, 0);
1083 }
1084
1085 free (tmp_loads);
1086 return false;
1087 }
1088
1089 tmp_loads[index] = load;
1090 }
1091
1092 sorted_loads.create (group_size);
1093 for (i = 0; i < group_size; i++)
1094 sorted_loads.safe_push (tmp_loads[i]);
1095
1096 SLP_INSTANCE_LOADS (instance).release ();
1097 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1098 free (tmp_loads);
1099
1100 if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
1101 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1102 instance, true))
1103 return false;
1104
1105 return true;
1106 }
1107
1108
1109 /* Rearrange the statements of NODE according to PERMUTATION. */
1110
1111 static void
1112 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1113 vec<int> permutation)
1114 {
1115 gimple stmt;
1116 vec<gimple> tmp_stmts;
1117 unsigned int i;
1118 slp_tree child;
1119
1120 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1121 vect_slp_rearrange_stmts (child, group_size, permutation);
1122
1123 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1124 tmp_stmts.create (group_size);
1125 tmp_stmts.quick_grow_cleared (group_size);
1126
1127 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1128 tmp_stmts[permutation[i]] = stmt;
1129
1130 SLP_TREE_SCALAR_STMTS (node).release ();
1131 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1132 }
1133
1134
1135 /* Check if the required load permutation is supported.
1136 LOAD_PERMUTATION contains a list of indices of the loads.
1137 In SLP this permutation is relative to the order of grouped stores that are
1138 the base of the SLP instance. */
1139
1140 static bool
1141 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1142 vec<int> load_permutation)
1143 {
1144 int i = 0, j, prev = -1, next, k, number_of_groups;
1145 bool supported, bad_permutation = false;
1146 sbitmap load_index;
1147 slp_tree node, other_complex_node;
1148 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1149 unsigned complex_numbers = 0;
1150 struct data_reference *dr;
1151 bb_vec_info bb_vinfo;
1152
1153 /* FORNOW: permutations are only supported in SLP. */
1154 if (!slp_instn)
1155 return false;
1156
1157 if (dump_enabled_p ())
1158 {
1159 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1160 FOR_EACH_VEC_ELT (load_permutation, i, next)
1161 dump_printf (MSG_NOTE, "%d ", next);
1162 }
1163
1164 /* In case of reduction every load permutation is allowed, since the order
1165 of the reduction statements is not important (as opposed to the case of
1166 grouped stores). The only condition we need to check is that all the
1167 load nodes are of the same size and have the same permutation (and then
1168 rearrange all the nodes of the SLP instance according to this
1169 permutation). */
1170
1171 /* Check that all the load nodes are of the same size. */
1172 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1173 {
1174 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1175 return false;
1176
1177 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1178 if (is_gimple_assign (stmt)
1179 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1180 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1181 complex_numbers++;
1182 }
1183
1184 /* Complex operands can be swapped as following:
1185 real_c = real_b + real_a;
1186 imag_c = imag_a + imag_b;
1187 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1188 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1189 chains are mixed, they match the above pattern. */
1190 if (complex_numbers)
1191 {
1192 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1193 {
1194 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1195 {
1196 if (j == 0)
1197 first = stmt;
1198 else
1199 {
1200 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1201 {
1202 if (complex_numbers != 2)
1203 return false;
1204
1205 if (i == 0)
1206 k = 1;
1207 else
1208 k = 0;
1209
1210 other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1211 other_node_first =
1212 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1213
1214 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1215 != other_node_first)
1216 return false;
1217 }
1218 }
1219 }
1220 }
1221 }
1222
1223 /* We checked that this case ok, so there is no need to proceed with
1224 permutation tests. */
1225 if (complex_numbers == 2
1226 && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1227 {
1228 SLP_INSTANCE_LOADS (slp_instn).release ();
1229 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1230 return true;
1231 }
1232
1233 node = SLP_INSTANCE_TREE (slp_instn);
1234 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1235 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1236 instance, not all the loads belong to the same node or interleaving
1237 group. Hence, we need to divide them into groups according to
1238 GROUP_SIZE. */
1239 number_of_groups = load_permutation.length () / group_size;
1240
1241 /* Reduction (there are no data-refs in the root).
1242 In reduction chain the order of the loads is important. */
1243 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1244 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1245 {
1246 int first_group_load_index;
1247
1248 /* Compare all the permutation sequences to the first one. */
1249 for (i = 1; i < number_of_groups; i++)
1250 {
1251 k = 0;
1252 for (j = i * group_size; j < i * group_size + group_size; j++)
1253 {
1254 next = load_permutation[j];
1255 first_group_load_index = load_permutation[k];
1256
1257 if (next != first_group_load_index)
1258 {
1259 bad_permutation = true;
1260 break;
1261 }
1262
1263 k++;
1264 }
1265
1266 if (bad_permutation)
1267 break;
1268 }
1269
1270 if (!bad_permutation)
1271 {
1272 /* Check that the loads in the first sequence are different and there
1273 are no gaps between them. */
1274 load_index = sbitmap_alloc (group_size);
1275 bitmap_clear (load_index);
1276 for (k = 0; k < group_size; k++)
1277 {
1278 first_group_load_index = load_permutation[k];
1279 if (bitmap_bit_p (load_index, first_group_load_index))
1280 {
1281 bad_permutation = true;
1282 break;
1283 }
1284
1285 bitmap_set_bit (load_index, first_group_load_index);
1286 }
1287
1288 if (!bad_permutation)
1289 for (k = 0; k < group_size; k++)
1290 if (!bitmap_bit_p (load_index, k))
1291 {
1292 bad_permutation = true;
1293 break;
1294 }
1295
1296 sbitmap_free (load_index);
1297 }
1298
1299 if (!bad_permutation)
1300 {
1301 /* This permutation is valid for reduction. Since the order of the
1302 statements in the nodes is not important unless they are memory
1303 accesses, we can rearrange the statements in all the nodes
1304 according to the order of the loads. */
1305 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1306 load_permutation);
1307 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1308 return true;
1309 }
1310 }
1311
1312 /* In basic block vectorization we allow any subchain of an interleaving
1313 chain.
1314 FORNOW: not supported in loop SLP because of realignment compications. */
1315 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1316 bad_permutation = false;
1317 /* Check that for every node in the instance the loads form a subchain. */
1318 if (bb_vinfo)
1319 {
1320 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1321 {
1322 next_load = NULL;
1323 first_load = NULL;
1324 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1325 {
1326 if (!first_load)
1327 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1328 else if (first_load
1329 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1330 {
1331 bad_permutation = true;
1332 break;
1333 }
1334
1335 if (j != 0 && next_load != load)
1336 {
1337 bad_permutation = true;
1338 break;
1339 }
1340
1341 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1342 }
1343
1344 if (bad_permutation)
1345 break;
1346 }
1347
1348 /* Check that the alignment of the first load in every subchain, i.e.,
1349 the first statement in every load node, is supported. */
1350 if (!bad_permutation)
1351 {
1352 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1353 {
1354 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1355 if (first_load
1356 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1357 {
1358 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1359 if (vect_supportable_dr_alignment (dr, false)
1360 == dr_unaligned_unsupported)
1361 {
1362 if (dump_enabled_p ())
1363 {
1364 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1365 vect_location,
1366 "unsupported unaligned load ");
1367 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1368 first_load, 0);
1369 }
1370 bad_permutation = true;
1371 break;
1372 }
1373 }
1374 }
1375
1376 if (!bad_permutation)
1377 {
1378 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1379 return true;
1380 }
1381 }
1382 }
1383
1384 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1385 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1386 well (unless it's reduction). */
1387 if (load_permutation.length ()
1388 != (unsigned int) (group_size * group_size))
1389 return false;
1390
1391 supported = true;
1392 load_index = sbitmap_alloc (group_size);
1393 bitmap_clear (load_index);
1394 for (j = 0; j < group_size; j++)
1395 {
1396 for (i = j * group_size, k = 0;
1397 load_permutation.iterate (i, &next) && k < group_size;
1398 i++, k++)
1399 {
1400 if (i != j * group_size && next != prev)
1401 {
1402 supported = false;
1403 break;
1404 }
1405
1406 prev = next;
1407 }
1408
1409 if (bitmap_bit_p (load_index, prev))
1410 {
1411 supported = false;
1412 break;
1413 }
1414
1415 bitmap_set_bit (load_index, prev);
1416 }
1417
1418 for (j = 0; j < group_size; j++)
1419 if (!bitmap_bit_p (load_index, j))
1420 {
1421 sbitmap_free (load_index);
1422 return false;
1423 }
1424
1425 sbitmap_free (load_index);
1426
1427 if (supported && i == group_size * group_size
1428 && vect_supported_slp_permutation_p (slp_instn))
1429 return true;
1430
1431 return false;
1432 }
1433
1434
1435 /* Find the first load in the loop that belongs to INSTANCE.
1436 When loads are in several SLP nodes, there can be a case in which the first
1437 load does not appear in the first SLP node to be transformed, causing
1438 incorrect order of statements. Since we generate all the loads together,
1439 they must be inserted before the first load of the SLP instance and not
1440 before the first load of the first node of the instance. */
1441
1442 static gimple
1443 vect_find_first_load_in_slp_instance (slp_instance instance)
1444 {
1445 int i, j;
1446 slp_tree load_node;
1447 gimple first_load = NULL, load;
1448
1449 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1450 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1451 first_load = get_earlier_stmt (load, first_load);
1452
1453 return first_load;
1454 }
1455
1456
1457 /* Find the last store in SLP INSTANCE. */
1458
1459 static gimple
1460 vect_find_last_store_in_slp_instance (slp_instance instance)
1461 {
1462 int i;
1463 slp_tree node;
1464 gimple last_store = NULL, store;
1465
1466 node = SLP_INSTANCE_TREE (instance);
1467 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1468 last_store = get_later_stmt (store, last_store);
1469
1470 return last_store;
1471 }
1472
1473 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1474
1475 static void
1476 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1477 slp_instance instance, slp_tree node,
1478 stmt_vector_for_cost *prologue_cost_vec,
1479 unsigned ncopies_for_cost)
1480 {
1481 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1482
1483 unsigned i;
1484 slp_tree child;
1485 gimple stmt, s;
1486 stmt_vec_info stmt_info;
1487 tree lhs;
1488 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1489
1490 /* Recurse down the SLP tree. */
1491 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1492 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1493 instance, child, prologue_cost_vec,
1494 ncopies_for_cost);
1495
1496 /* Look at the first scalar stmt to determine the cost. */
1497 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1498 stmt_info = vinfo_for_stmt (stmt);
1499 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1500 {
1501 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1502 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1503 vect_uninitialized_def,
1504 node, prologue_cost_vec, body_cost_vec);
1505 else
1506 {
1507 int i;
1508 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1509 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1510 node, prologue_cost_vec, body_cost_vec);
1511 /* If the load is permuted record the cost for the permutation.
1512 ??? Loads from multiple chains are let through here only
1513 for a single special case involving complex numbers where
1514 in the end no permutation is necessary. */
1515 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1516 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1517 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1518 && vect_get_place_in_interleaving_chain
1519 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1520 {
1521 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1522 stmt_info, 0, vect_body);
1523 break;
1524 }
1525 }
1526 }
1527 else
1528 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1529 stmt_info, 0, vect_body);
1530
1531 /* Scan operands and account for prologue cost of constants/externals.
1532 ??? This over-estimates cost for multiple uses and should be
1533 re-engineered. */
1534 lhs = gimple_get_lhs (stmt);
1535 for (i = 0; i < gimple_num_ops (stmt); ++i)
1536 {
1537 tree def, op = gimple_op (stmt, i);
1538 gimple def_stmt;
1539 enum vect_def_type dt;
1540 if (!op || op == lhs)
1541 continue;
1542 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1543 &def_stmt, &def, &dt)
1544 && (dt == vect_constant_def || dt == vect_external_def))
1545 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1546 stmt_info, 0, vect_prologue);
1547 }
1548 }
1549
1550 /* Compute the cost for the SLP instance INSTANCE. */
1551
1552 static void
1553 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1554 slp_instance instance, unsigned nunits)
1555 {
1556 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1557 unsigned ncopies_for_cost;
1558 stmt_info_for_cost *si;
1559 unsigned i;
1560
1561 /* Calculate the number of vector stmts to create based on the unrolling
1562 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1563 GROUP_SIZE / NUNITS otherwise. */
1564 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1565 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1566
1567 prologue_cost_vec.create (10);
1568 body_cost_vec.create (10);
1569 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1570 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1571 instance, SLP_INSTANCE_TREE (instance),
1572 &prologue_cost_vec, ncopies_for_cost);
1573
1574 /* Record the prologue costs, which were delayed until we were
1575 sure that SLP was successful. Unlike the body costs, we know
1576 the final values now regardless of the loop vectorization factor. */
1577 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1578 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1579 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1580 {
1581 struct _stmt_vec_info *stmt_info
1582 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1583 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1584 si->misalign, vect_prologue);
1585 }
1586
1587 prologue_cost_vec.release ();
1588 }
1589
1590 /* Analyze an SLP instance starting from a group of grouped stores. Call
1591 vect_build_slp_tree to build a tree of packed stmts if possible.
1592 Return FALSE if it's impossible to SLP any stmt in the loop. */
1593
1594 static bool
1595 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1596 gimple stmt)
1597 {
1598 slp_instance new_instance;
1599 slp_tree node;
1600 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1601 unsigned int unrolling_factor = 1, nunits;
1602 tree vectype, scalar_type = NULL_TREE;
1603 gimple next;
1604 unsigned int vectorization_factor = 0;
1605 int i;
1606 unsigned int max_nunits = 0;
1607 vec<int> load_permutation;
1608 vec<slp_tree> loads;
1609 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1610 bool loads_permuted = false;
1611 vec<gimple> scalar_stmts;
1612
1613 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1614 {
1615 if (dr)
1616 {
1617 scalar_type = TREE_TYPE (DR_REF (dr));
1618 vectype = get_vectype_for_scalar_type (scalar_type);
1619 }
1620 else
1621 {
1622 gcc_assert (loop_vinfo);
1623 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1624 }
1625
1626 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1627 }
1628 else
1629 {
1630 gcc_assert (loop_vinfo);
1631 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1632 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1633 }
1634
1635 if (!vectype)
1636 {
1637 if (dump_enabled_p ())
1638 {
1639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1640 "Build SLP failed: unsupported data-type ");
1641 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1642 }
1643
1644 return false;
1645 }
1646
1647 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1648 if (loop_vinfo)
1649 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1650 else
1651 vectorization_factor = nunits;
1652
1653 /* Calculate the unrolling factor. */
1654 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1655 if (unrolling_factor != 1 && !loop_vinfo)
1656 {
1657 if (dump_enabled_p ())
1658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1659 "Build SLP failed: unrolling required in basic"
1660 " block SLP");
1661
1662 return false;
1663 }
1664
1665 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1666 scalar_stmts.create (group_size);
1667 next = stmt;
1668 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1669 {
1670 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1671 while (next)
1672 {
1673 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1674 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1675 scalar_stmts.safe_push (
1676 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1677 else
1678 scalar_stmts.safe_push (next);
1679 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1680 }
1681 }
1682 else
1683 {
1684 /* Collect reduction statements. */
1685 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1686 for (i = 0; reductions.iterate (i, &next); i++)
1687 scalar_stmts.safe_push (next);
1688 }
1689
1690 node = vect_create_new_slp_node (scalar_stmts);
1691
1692 load_permutation.create (group_size * group_size);
1693 loads.create (group_size);
1694
1695 /* Build the tree for the SLP instance. */
1696 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1697 &max_nunits, &load_permutation, &loads,
1698 vectorization_factor, &loads_permuted))
1699 {
1700 /* Calculate the unrolling factor based on the smallest type. */
1701 if (max_nunits > nunits)
1702 unrolling_factor = least_common_multiple (max_nunits, group_size)
1703 / group_size;
1704
1705 if (unrolling_factor != 1 && !loop_vinfo)
1706 {
1707 if (dump_enabled_p ())
1708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1709 "Build SLP failed: unrolling required in basic"
1710 " block SLP");
1711 vect_free_slp_tree (node);
1712 load_permutation.release ();
1713 loads.release ();
1714 return false;
1715 }
1716
1717 /* Create a new SLP instance. */
1718 new_instance = XNEW (struct _slp_instance);
1719 SLP_INSTANCE_TREE (new_instance) = node;
1720 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1721 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1722 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1723 SLP_INSTANCE_LOADS (new_instance) = loads;
1724 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1725 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1726
1727 if (loads_permuted)
1728 {
1729 if (!vect_supported_load_permutation_p (new_instance, group_size,
1730 load_permutation))
1731 {
1732 if (dump_enabled_p ())
1733 {
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735 "Build SLP failed: unsupported load "
1736 "permutation ");
1737 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1738 }
1739
1740 vect_free_slp_instance (new_instance);
1741 return false;
1742 }
1743
1744 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1745 = vect_find_first_load_in_slp_instance (new_instance);
1746 }
1747 else
1748 SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
1749
1750 /* Compute the costs of this SLP instance. */
1751 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1752 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1753
1754 if (loop_vinfo)
1755 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1756 else
1757 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1758
1759 if (dump_enabled_p ())
1760 vect_print_slp_tree (MSG_NOTE, node);
1761
1762 return true;
1763 }
1764
1765 /* Failed to SLP. */
1766 /* Free the allocated memory. */
1767 vect_free_slp_tree (node);
1768 load_permutation.release ();
1769 loads.release ();
1770
1771 return false;
1772 }
1773
1774
1775 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1776 trees of packed scalar stmts if SLP is possible. */
1777
1778 bool
1779 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1780 {
1781 unsigned int i;
1782 vec<gimple> grouped_stores;
1783 vec<gimple> reductions = vNULL;
1784 vec<gimple> reduc_chains = vNULL;
1785 gimple first_element;
1786 bool ok = false;
1787
1788 if (dump_enabled_p ())
1789 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1790
1791 if (loop_vinfo)
1792 {
1793 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1794 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1795 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1796 }
1797 else
1798 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1799
1800 /* Find SLP sequences starting from groups of grouped stores. */
1801 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1802 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1803 ok = true;
1804
1805 if (bb_vinfo && !ok)
1806 {
1807 if (dump_enabled_p ())
1808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1809 "Failed to SLP the basic block.");
1810
1811 return false;
1812 }
1813
1814 if (loop_vinfo
1815 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1816 {
1817 /* Find SLP sequences starting from reduction chains. */
1818 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1819 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1820 ok = true;
1821 else
1822 return false;
1823
1824 /* Don't try to vectorize SLP reductions if reduction chain was
1825 detected. */
1826 return ok;
1827 }
1828
1829 /* Find SLP sequences starting from groups of reductions. */
1830 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1831 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1832 ok = true;
1833
1834 return true;
1835 }
1836
1837
1838 /* For each possible SLP instance decide whether to SLP it and calculate overall
1839 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1840 least one instance. */
1841
1842 bool
1843 vect_make_slp_decision (loop_vec_info loop_vinfo)
1844 {
1845 unsigned int i, unrolling_factor = 1;
1846 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1847 slp_instance instance;
1848 int decided_to_slp = 0;
1849
1850 if (dump_enabled_p ())
1851 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1852
1853 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1854 {
1855 /* FORNOW: SLP if you can. */
1856 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1857 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1858
1859 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1860 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1861 loop-based vectorization. Such stmts will be marked as HYBRID. */
1862 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1863 decided_to_slp++;
1864 }
1865
1866 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1867
1868 if (decided_to_slp && dump_enabled_p ())
1869 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1870 "Decided to SLP %d instances. Unrolling factor %d",
1871 decided_to_slp, unrolling_factor);
1872
1873 return (decided_to_slp > 0);
1874 }
1875
1876
1877 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1878 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1879
1880 static void
1881 vect_detect_hybrid_slp_stmts (slp_tree node)
1882 {
1883 int i;
1884 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1885 gimple stmt = stmts[0];
1886 imm_use_iterator imm_iter;
1887 gimple use_stmt;
1888 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1889 slp_tree child;
1890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1891 struct loop *loop = NULL;
1892 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1893 basic_block bb = NULL;
1894
1895 if (!node)
1896 return;
1897
1898 if (loop_vinfo)
1899 loop = LOOP_VINFO_LOOP (loop_vinfo);
1900 else
1901 bb = BB_VINFO_BB (bb_vinfo);
1902
1903 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1904 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1905 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1906 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1907 if (gimple_bb (use_stmt)
1908 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1909 || bb == gimple_bb (use_stmt))
1910 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1911 && !STMT_SLP_TYPE (stmt_vinfo)
1912 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1913 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1914 && !(gimple_code (use_stmt) == GIMPLE_PHI
1915 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1916 == vect_reduction_def))
1917 vect_mark_slp_stmts (node, hybrid, i);
1918
1919 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1920 vect_detect_hybrid_slp_stmts (child);
1921 }
1922
1923
1924 /* Find stmts that must be both vectorized and SLPed. */
1925
1926 void
1927 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1928 {
1929 unsigned int i;
1930 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1931 slp_instance instance;
1932
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1935
1936 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1937 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1938 }
1939
1940
1941 /* Create and initialize a new bb_vec_info struct for BB, as well as
1942 stmt_vec_info structs for all the stmts in it. */
1943
1944 static bb_vec_info
1945 new_bb_vec_info (basic_block bb)
1946 {
1947 bb_vec_info res = NULL;
1948 gimple_stmt_iterator gsi;
1949
1950 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1951 BB_VINFO_BB (res) = bb;
1952
1953 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1954 {
1955 gimple stmt = gsi_stmt (gsi);
1956 gimple_set_uid (stmt, 0);
1957 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1958 }
1959
1960 BB_VINFO_GROUPED_STORES (res).create (10);
1961 BB_VINFO_SLP_INSTANCES (res).create (2);
1962 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1963
1964 bb->aux = res;
1965 return res;
1966 }
1967
1968
1969 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1970 stmts in the basic block. */
1971
1972 static void
1973 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1974 {
1975 vec<slp_instance> slp_instances;
1976 slp_instance instance;
1977 basic_block bb;
1978 gimple_stmt_iterator si;
1979 unsigned i;
1980
1981 if (!bb_vinfo)
1982 return;
1983
1984 bb = BB_VINFO_BB (bb_vinfo);
1985
1986 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1987 {
1988 gimple stmt = gsi_stmt (si);
1989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1990
1991 if (stmt_info)
1992 /* Free stmt_vec_info. */
1993 free_stmt_vec_info (stmt);
1994 }
1995
1996 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1997 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1998 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1999 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2000 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2001 vect_free_slp_instance (instance);
2002 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2003 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2004 free (bb_vinfo);
2005 bb->aux = NULL;
2006 }
2007
2008
2009 /* Analyze statements contained in SLP tree node after recursively analyzing
2010 the subtree. Return TRUE if the operations are supported. */
2011
2012 static bool
2013 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
2014 {
2015 bool dummy;
2016 int i;
2017 gimple stmt;
2018 slp_tree child;
2019
2020 if (!node)
2021 return true;
2022
2023 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2024 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
2025 return false;
2026
2027 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2028 {
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 gcc_assert (stmt_info);
2031 gcc_assert (PURE_SLP_STMT (stmt_info));
2032
2033 if (!vect_analyze_stmt (stmt, &dummy, node))
2034 return false;
2035 }
2036
2037 return true;
2038 }
2039
2040
2041 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2042 operations are supported. */
2043
2044 static bool
2045 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2046 {
2047 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2048 slp_instance instance;
2049 int i;
2050
2051 for (i = 0; slp_instances.iterate (i, &instance); )
2052 {
2053 if (!vect_slp_analyze_node_operations (bb_vinfo,
2054 SLP_INSTANCE_TREE (instance)))
2055 {
2056 vect_free_slp_instance (instance);
2057 slp_instances.ordered_remove (i);
2058 }
2059 else
2060 i++;
2061 }
2062
2063 if (!slp_instances.length ())
2064 return false;
2065
2066 return true;
2067 }
2068
2069 /* Check if vectorization of the basic block is profitable. */
2070
2071 static bool
2072 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2073 {
2074 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2075 slp_instance instance;
2076 int i, j;
2077 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2078 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2079 unsigned int stmt_cost;
2080 gimple stmt;
2081 gimple_stmt_iterator si;
2082 basic_block bb = BB_VINFO_BB (bb_vinfo);
2083 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2084 stmt_vec_info stmt_info = NULL;
2085 stmt_vector_for_cost body_cost_vec;
2086 stmt_info_for_cost *ci;
2087
2088 /* Calculate vector costs. */
2089 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2090 {
2091 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2092
2093 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2094 {
2095 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2096 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2097 stmt_info, ci->misalign, vect_body);
2098 }
2099 }
2100
2101 /* Calculate scalar cost. */
2102 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2103 {
2104 stmt = gsi_stmt (si);
2105 stmt_info = vinfo_for_stmt (stmt);
2106
2107 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2108 || !PURE_SLP_STMT (stmt_info))
2109 continue;
2110
2111 if (STMT_VINFO_DATA_REF (stmt_info))
2112 {
2113 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2114 stmt_cost = vect_get_stmt_cost (scalar_load);
2115 else
2116 stmt_cost = vect_get_stmt_cost (scalar_store);
2117 }
2118 else
2119 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2120
2121 scalar_cost += stmt_cost;
2122 }
2123
2124 /* Complete the target-specific cost calculation. */
2125 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2126 &vec_inside_cost, &vec_epilogue_cost);
2127
2128 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2129
2130 if (dump_enabled_p ())
2131 {
2132 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2133 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2134 vec_inside_cost);
2135 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2136 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2137 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2138 }
2139
2140 /* Vectorization is profitable if its cost is less than the cost of scalar
2141 version. */
2142 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2143 return false;
2144
2145 return true;
2146 }
2147
2148 /* Check if the basic block can be vectorized. */
2149
2150 static bb_vec_info
2151 vect_slp_analyze_bb_1 (basic_block bb)
2152 {
2153 bb_vec_info bb_vinfo;
2154 vec<slp_instance> slp_instances;
2155 slp_instance instance;
2156 int i;
2157 int min_vf = 2;
2158
2159 bb_vinfo = new_bb_vec_info (bb);
2160 if (!bb_vinfo)
2161 return NULL;
2162
2163 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2164 {
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2167 "not vectorized: unhandled data-ref in basic "
2168 "block.\n");
2169
2170 destroy_bb_vec_info (bb_vinfo);
2171 return NULL;
2172 }
2173
2174 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2175 {
2176 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 "not vectorized: not enough data-refs in "
2179 "basic block.\n");
2180
2181 destroy_bb_vec_info (bb_vinfo);
2182 return NULL;
2183 }
2184
2185 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2186 {
2187 if (dump_enabled_p ())
2188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2189 "not vectorized: unhandled data access in "
2190 "basic block.\n");
2191
2192 destroy_bb_vec_info (bb_vinfo);
2193 return NULL;
2194 }
2195
2196 vect_pattern_recog (NULL, bb_vinfo);
2197
2198 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2199 {
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202 "not vectorized: unhandled data dependence "
2203 "in basic block.\n");
2204
2205 destroy_bb_vec_info (bb_vinfo);
2206 return NULL;
2207 }
2208
2209 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2210 {
2211 if (dump_enabled_p ())
2212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2213 "not vectorized: bad data alignment in basic "
2214 "block.\n");
2215
2216 destroy_bb_vec_info (bb_vinfo);
2217 return NULL;
2218 }
2219
2220 /* Check the SLP opportunities in the basic block, analyze and build SLP
2221 trees. */
2222 if (!vect_analyze_slp (NULL, bb_vinfo))
2223 {
2224 if (dump_enabled_p ())
2225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2226 "not vectorized: failed to find SLP opportunities "
2227 "in basic block.\n");
2228
2229 destroy_bb_vec_info (bb_vinfo);
2230 return NULL;
2231 }
2232
2233 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2234
2235 /* Mark all the statements that we want to vectorize as pure SLP and
2236 relevant. */
2237 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2238 {
2239 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2240 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2241 }
2242
2243 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2244 {
2245 if (dump_enabled_p ())
2246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2247 "not vectorized: unsupported alignment in basic "
2248 "block.\n");
2249 destroy_bb_vec_info (bb_vinfo);
2250 return NULL;
2251 }
2252
2253 if (!vect_slp_analyze_operations (bb_vinfo))
2254 {
2255 if (dump_enabled_p ())
2256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2257 "not vectorized: bad operation in basic block.\n");
2258
2259 destroy_bb_vec_info (bb_vinfo);
2260 return NULL;
2261 }
2262
2263 /* Cost model: check if the vectorization is worthwhile. */
2264 if (flag_vect_cost_model
2265 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2266 {
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269 "not vectorized: vectorization is not "
2270 "profitable.\n");
2271
2272 destroy_bb_vec_info (bb_vinfo);
2273 return NULL;
2274 }
2275
2276 if (dump_enabled_p ())
2277 dump_printf_loc (MSG_NOTE, vect_location,
2278 "Basic block will be vectorized using SLP\n");
2279
2280 return bb_vinfo;
2281 }
2282
2283
2284 bb_vec_info
2285 vect_slp_analyze_bb (basic_block bb)
2286 {
2287 bb_vec_info bb_vinfo;
2288 int insns = 0;
2289 gimple_stmt_iterator gsi;
2290 unsigned int vector_sizes;
2291
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2294
2295 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2296 {
2297 gimple stmt = gsi_stmt (gsi);
2298 if (!is_gimple_debug (stmt)
2299 && !gimple_nop_p (stmt)
2300 && gimple_code (stmt) != GIMPLE_LABEL)
2301 insns++;
2302 }
2303
2304 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2305 {
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "not vectorized: too many instructions in "
2309 "basic block.\n");
2310
2311 return NULL;
2312 }
2313
2314 /* Autodetect first vector size we try. */
2315 current_vector_size = 0;
2316 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2317
2318 while (1)
2319 {
2320 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2321 if (bb_vinfo)
2322 return bb_vinfo;
2323
2324 destroy_bb_vec_info (bb_vinfo);
2325
2326 vector_sizes &= ~current_vector_size;
2327 if (vector_sizes == 0
2328 || current_vector_size == 0)
2329 return NULL;
2330
2331 /* Try the next biggest vector size. */
2332 current_vector_size = 1 << floor_log2 (vector_sizes);
2333 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_NOTE, vect_location,
2335 "***** Re-trying analysis with "
2336 "vector size %d\n", current_vector_size);
2337 }
2338 }
2339
2340
2341 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2342 the number of created vector stmts depends on the unrolling factor).
2343 However, the actual number of vector stmts for every SLP node depends on
2344 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2345 should be updated. In this function we assume that the inside costs
2346 calculated in vect_model_xxx_cost are linear in ncopies. */
2347
2348 void
2349 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2350 {
2351 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2352 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2353 slp_instance instance;
2354 stmt_vector_for_cost body_cost_vec;
2355 stmt_info_for_cost *si;
2356 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2357
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_NOTE, vect_location,
2360 "=== vect_update_slp_costs_according_to_vf ===");
2361
2362 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2363 {
2364 /* We assume that costs are linear in ncopies. */
2365 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2366
2367 /* Record the instance's instructions in the target cost model.
2368 This was delayed until here because the count of instructions
2369 isn't known beforehand. */
2370 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2371
2372 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2373 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2374 vinfo_for_stmt (si->stmt), si->misalign,
2375 vect_body);
2376 }
2377 }
2378
2379
2380 /* For constant and loop invariant defs of SLP_NODE this function returns
2381 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2382 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2383 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2384 REDUC_INDEX is the index of the reduction operand in the statements, unless
2385 it is -1. */
2386
2387 static void
2388 vect_get_constant_vectors (tree op, slp_tree slp_node,
2389 vec<tree> *vec_oprnds,
2390 unsigned int op_num, unsigned int number_of_vectors,
2391 int reduc_index)
2392 {
2393 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2394 gimple stmt = stmts[0];
2395 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2396 unsigned nunits;
2397 tree vec_cst;
2398 tree *elts;
2399 unsigned j, number_of_places_left_in_vector;
2400 tree vector_type;
2401 tree vop;
2402 int group_size = stmts.length ();
2403 unsigned int vec_num, i;
2404 unsigned number_of_copies = 1;
2405 vec<tree> voprnds;
2406 voprnds.create (number_of_vectors);
2407 bool constant_p, is_store;
2408 tree neutral_op = NULL;
2409 enum tree_code code = gimple_expr_code (stmt);
2410 gimple def_stmt;
2411 struct loop *loop;
2412 gimple_seq ctor_seq = NULL;
2413
2414 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2415 && reduc_index != -1)
2416 {
2417 op_num = reduc_index - 1;
2418 op = gimple_op (stmt, reduc_index);
2419 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2420 we need either neutral operands or the original operands. See
2421 get_initial_def_for_reduction() for details. */
2422 switch (code)
2423 {
2424 case WIDEN_SUM_EXPR:
2425 case DOT_PROD_EXPR:
2426 case PLUS_EXPR:
2427 case MINUS_EXPR:
2428 case BIT_IOR_EXPR:
2429 case BIT_XOR_EXPR:
2430 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2431 neutral_op = build_real (TREE_TYPE (op), dconst0);
2432 else
2433 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2434
2435 break;
2436
2437 case MULT_EXPR:
2438 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2439 neutral_op = build_real (TREE_TYPE (op), dconst1);
2440 else
2441 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2442
2443 break;
2444
2445 case BIT_AND_EXPR:
2446 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2447 break;
2448
2449 case MAX_EXPR:
2450 case MIN_EXPR:
2451 def_stmt = SSA_NAME_DEF_STMT (op);
2452 loop = (gimple_bb (stmt))->loop_father;
2453 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2454 loop_preheader_edge (loop));
2455 break;
2456
2457 default:
2458 neutral_op = NULL;
2459 }
2460 }
2461
2462 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2463 {
2464 is_store = true;
2465 op = gimple_assign_rhs1 (stmt);
2466 }
2467 else
2468 is_store = false;
2469
2470 gcc_assert (op);
2471
2472 if (CONSTANT_CLASS_P (op))
2473 constant_p = true;
2474 else
2475 constant_p = false;
2476
2477 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2478 gcc_assert (vector_type);
2479 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2480
2481 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2482 created vectors. It is greater than 1 if unrolling is performed.
2483
2484 For example, we have two scalar operands, s1 and s2 (e.g., group of
2485 strided accesses of size two), while NUNITS is four (i.e., four scalars
2486 of this type can be packed in a vector). The output vector will contain
2487 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2488 will be 2).
2489
2490 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2491 containing the operands.
2492
2493 For example, NUNITS is four as before, and the group size is 8
2494 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2495 {s5, s6, s7, s8}. */
2496
2497 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2498
2499 number_of_places_left_in_vector = nunits;
2500 elts = XALLOCAVEC (tree, nunits);
2501 for (j = 0; j < number_of_copies; j++)
2502 {
2503 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2504 {
2505 if (is_store)
2506 op = gimple_assign_rhs1 (stmt);
2507 else
2508 {
2509 switch (code)
2510 {
2511 case COND_EXPR:
2512 if (op_num == 0 || op_num == 1)
2513 {
2514 tree cond = gimple_assign_rhs1 (stmt);
2515 op = TREE_OPERAND (cond, op_num);
2516 }
2517 else
2518 {
2519 if (op_num == 2)
2520 op = gimple_assign_rhs2 (stmt);
2521 else
2522 op = gimple_assign_rhs3 (stmt);
2523 }
2524 break;
2525
2526 case CALL_EXPR:
2527 op = gimple_call_arg (stmt, op_num);
2528 break;
2529
2530 case LSHIFT_EXPR:
2531 case RSHIFT_EXPR:
2532 case LROTATE_EXPR:
2533 case RROTATE_EXPR:
2534 op = gimple_op (stmt, op_num + 1);
2535 /* Unlike the other binary operators, shifts/rotates have
2536 the shift count being int, instead of the same type as
2537 the lhs, so make sure the scalar is the right type if
2538 we are dealing with vectors of
2539 long long/long/short/char. */
2540 if (op_num == 1 && constant_p)
2541 op = fold_convert (TREE_TYPE (vector_type), op);
2542 break;
2543
2544 default:
2545 op = gimple_op (stmt, op_num + 1);
2546 break;
2547 }
2548 }
2549
2550 if (reduc_index != -1)
2551 {
2552 loop = (gimple_bb (stmt))->loop_father;
2553 def_stmt = SSA_NAME_DEF_STMT (op);
2554
2555 gcc_assert (loop);
2556
2557 /* Get the def before the loop. In reduction chain we have only
2558 one initial value. */
2559 if ((j != (number_of_copies - 1)
2560 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2561 && i != 0))
2562 && neutral_op)
2563 op = neutral_op;
2564 else
2565 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2566 loop_preheader_edge (loop));
2567 }
2568
2569 /* Create 'vect_ = {op0,op1,...,opn}'. */
2570 number_of_places_left_in_vector--;
2571 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2572 {
2573 if (constant_p)
2574 {
2575 op = fold_unary (VIEW_CONVERT_EXPR,
2576 TREE_TYPE (vector_type), op);
2577 gcc_assert (op && CONSTANT_CLASS_P (op));
2578 }
2579 else
2580 {
2581 tree new_temp
2582 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2583 gimple init_stmt;
2584 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2585 op);
2586 init_stmt
2587 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2588 new_temp, op, NULL_TREE);
2589 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2590 op = new_temp;
2591 }
2592 }
2593 elts[number_of_places_left_in_vector] = op;
2594
2595 if (number_of_places_left_in_vector == 0)
2596 {
2597 number_of_places_left_in_vector = nunits;
2598
2599 if (constant_p)
2600 vec_cst = build_vector (vector_type, elts);
2601 else
2602 {
2603 vec<constructor_elt, va_gc> *v;
2604 unsigned k;
2605 vec_alloc (v, nunits);
2606 for (k = 0; k < nunits; ++k)
2607 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2608 vec_cst = build_constructor (vector_type, v);
2609 }
2610 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2611 vector_type, NULL));
2612 if (ctor_seq != NULL)
2613 {
2614 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2615 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2616 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2617 GSI_SAME_STMT);
2618 ctor_seq = NULL;
2619 }
2620 }
2621 }
2622 }
2623
2624 /* Since the vectors are created in the reverse order, we should invert
2625 them. */
2626 vec_num = voprnds.length ();
2627 for (j = vec_num; j != 0; j--)
2628 {
2629 vop = voprnds[j - 1];
2630 vec_oprnds->quick_push (vop);
2631 }
2632
2633 voprnds.release ();
2634
2635 /* In case that VF is greater than the unrolling factor needed for the SLP
2636 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2637 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2638 to replicate the vectors. */
2639 while (number_of_vectors > vec_oprnds->length ())
2640 {
2641 tree neutral_vec = NULL;
2642
2643 if (neutral_op)
2644 {
2645 if (!neutral_vec)
2646 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2647
2648 vec_oprnds->quick_push (neutral_vec);
2649 }
2650 else
2651 {
2652 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2653 vec_oprnds->quick_push (vop);
2654 }
2655 }
2656 }
2657
2658
2659 /* Get vectorized definitions from SLP_NODE that contains corresponding
2660 vectorized def-stmts. */
2661
2662 static void
2663 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2664 {
2665 tree vec_oprnd;
2666 gimple vec_def_stmt;
2667 unsigned int i;
2668
2669 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2670
2671 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2672 {
2673 gcc_assert (vec_def_stmt);
2674 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2675 vec_oprnds->quick_push (vec_oprnd);
2676 }
2677 }
2678
2679
2680 /* Get vectorized definitions for SLP_NODE.
2681 If the scalar definitions are loop invariants or constants, collect them and
2682 call vect_get_constant_vectors() to create vector stmts.
2683 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2684 must be stored in the corresponding child of SLP_NODE, and we call
2685 vect_get_slp_vect_defs () to retrieve them. */
2686
2687 void
2688 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2689 vec<vec<tree> > *vec_oprnds, int reduc_index)
2690 {
2691 gimple first_stmt;
2692 int number_of_vects = 0, i;
2693 unsigned int child_index = 0;
2694 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2695 slp_tree child = NULL;
2696 vec<tree> vec_defs;
2697 tree oprnd;
2698 bool vectorized_defs;
2699
2700 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2701 FOR_EACH_VEC_ELT (ops, i, oprnd)
2702 {
2703 /* For each operand we check if it has vectorized definitions in a child
2704 node or we need to create them (for invariants and constants). We
2705 check if the LHS of the first stmt of the next child matches OPRND.
2706 If it does, we found the correct child. Otherwise, we call
2707 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2708 to check this child node for the next operand. */
2709 vectorized_defs = false;
2710 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2711 {
2712 child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2713
2714 /* We have to check both pattern and original def, if available. */
2715 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2716 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2717
2718 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2719 || (related
2720 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2721 {
2722 /* The number of vector defs is determined by the number of
2723 vector statements in the node from which we get those
2724 statements. */
2725 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2726 vectorized_defs = true;
2727 child_index++;
2728 }
2729 }
2730
2731 if (!vectorized_defs)
2732 {
2733 if (i == 0)
2734 {
2735 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2736 /* Number of vector stmts was calculated according to LHS in
2737 vect_schedule_slp_instance (), fix it by replacing LHS with
2738 RHS, if necessary. See vect_get_smallest_scalar_type () for
2739 details. */
2740 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2741 &rhs_size_unit);
2742 if (rhs_size_unit != lhs_size_unit)
2743 {
2744 number_of_vects *= rhs_size_unit;
2745 number_of_vects /= lhs_size_unit;
2746 }
2747 }
2748 }
2749
2750 /* Allocate memory for vectorized defs. */
2751 vec_defs = vNULL;
2752 vec_defs.create (number_of_vects);
2753
2754 /* For reduction defs we call vect_get_constant_vectors (), since we are
2755 looking for initial loop invariant values. */
2756 if (vectorized_defs && reduc_index == -1)
2757 /* The defs are already vectorized. */
2758 vect_get_slp_vect_defs (child, &vec_defs);
2759 else
2760 /* Build vectors from scalar defs. */
2761 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2762 number_of_vects, reduc_index);
2763
2764 vec_oprnds->quick_push (vec_defs);
2765
2766 /* For reductions, we only need initial values. */
2767 if (reduc_index != -1)
2768 return;
2769 }
2770 }
2771
2772
2773 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2774 building a vector of type MASK_TYPE from it) and two input vectors placed in
2775 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2776 shifting by STRIDE elements of DR_CHAIN for every copy.
2777 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2778 copies).
2779 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2780 the created stmts must be inserted. */
2781
2782 static inline void
2783 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2784 tree mask, int first_vec_indx, int second_vec_indx,
2785 gimple_stmt_iterator *gsi, slp_tree node,
2786 tree vectype, vec<tree> dr_chain,
2787 int ncopies, int vect_stmts_counter)
2788 {
2789 tree perm_dest;
2790 gimple perm_stmt = NULL;
2791 stmt_vec_info next_stmt_info;
2792 int i, stride;
2793 tree first_vec, second_vec, data_ref;
2794
2795 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2796
2797 /* Initialize the vect stmts of NODE to properly insert the generated
2798 stmts later. */
2799 for (i = SLP_TREE_VEC_STMTS (node).length ();
2800 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2801 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2802
2803 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2804 for (i = 0; i < ncopies; i++)
2805 {
2806 first_vec = dr_chain[first_vec_indx];
2807 second_vec = dr_chain[second_vec_indx];
2808
2809 /* Generate the permute statement. */
2810 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2811 first_vec, second_vec, mask);
2812 data_ref = make_ssa_name (perm_dest, perm_stmt);
2813 gimple_set_lhs (perm_stmt, data_ref);
2814 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2815
2816 /* Store the vector statement in NODE. */
2817 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2818
2819 first_vec_indx += stride;
2820 second_vec_indx += stride;
2821 }
2822
2823 /* Mark the scalar stmt as vectorized. */
2824 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2825 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2826 }
2827
2828
2829 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2830 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2831 representation. Check that the mask is valid and return FALSE if not.
2832 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2833 the next vector, i.e., the current first vector is not needed. */
2834
2835 static bool
2836 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2837 int mask_nunits, bool only_one_vec, int index,
2838 unsigned char *mask, int *current_mask_element,
2839 bool *need_next_vector, int *number_of_mask_fixes,
2840 bool *mask_fixed, bool *needs_first_vector)
2841 {
2842 int i;
2843
2844 /* Convert to target specific representation. */
2845 *current_mask_element = first_mask_element + m;
2846 /* Adjust the value in case it's a mask for second and third vectors. */
2847 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2848
2849 if (*current_mask_element < mask_nunits)
2850 *needs_first_vector = true;
2851
2852 /* We have only one input vector to permute but the mask accesses values in
2853 the next vector as well. */
2854 if (only_one_vec && *current_mask_element >= mask_nunits)
2855 {
2856 if (dump_enabled_p ())
2857 {
2858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2859 "permutation requires at least two vectors ");
2860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2861 }
2862
2863 return false;
2864 }
2865
2866 /* The mask requires the next vector. */
2867 if (*current_mask_element >= mask_nunits * 2)
2868 {
2869 if (*needs_first_vector || *mask_fixed)
2870 {
2871 /* We either need the first vector too or have already moved to the
2872 next vector. In both cases, this permutation needs three
2873 vectors. */
2874 if (dump_enabled_p ())
2875 {
2876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2877 "permutation requires at "
2878 "least three vectors ");
2879 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2880 }
2881
2882 return false;
2883 }
2884
2885 /* We move to the next vector, dropping the first one and working with
2886 the second and the third - we need to adjust the values of the mask
2887 accordingly. */
2888 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2889
2890 for (i = 0; i < index; i++)
2891 mask[i] -= mask_nunits * *number_of_mask_fixes;
2892
2893 (*number_of_mask_fixes)++;
2894 *mask_fixed = true;
2895 }
2896
2897 *need_next_vector = *mask_fixed;
2898
2899 /* This was the last element of this mask. Start a new one. */
2900 if (index == mask_nunits - 1)
2901 {
2902 *number_of_mask_fixes = 1;
2903 *mask_fixed = false;
2904 *needs_first_vector = false;
2905 }
2906
2907 return true;
2908 }
2909
2910
2911 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2912 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2913 permute statements for SLP_NODE_INSTANCE. */
2914 bool
2915 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2916 gimple_stmt_iterator *gsi, int vf,
2917 slp_instance slp_node_instance, bool analyze_only)
2918 {
2919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2920 tree mask_element_type = NULL_TREE, mask_type;
2921 int i, j, k, nunits, vec_index = 0, scalar_index;
2922 slp_tree node;
2923 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2924 gimple next_scalar_stmt;
2925 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2926 int first_mask_element;
2927 int index, unroll_factor, current_mask_element, ncopies;
2928 unsigned char *mask;
2929 bool only_one_vec = false, need_next_vector = false;
2930 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2931 int number_of_mask_fixes = 1;
2932 bool mask_fixed = false;
2933 bool needs_first_vector = false;
2934 enum machine_mode mode;
2935
2936 mode = TYPE_MODE (vectype);
2937
2938 if (!can_vec_perm_p (mode, false, NULL))
2939 {
2940 if (dump_enabled_p ())
2941 {
2942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2943 "no vect permute for ");
2944 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2945 }
2946 return false;
2947 }
2948
2949 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2950 same size as the vector element being permuted. */
2951 mask_element_type = lang_hooks.types.type_for_mode
2952 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2953 mask_type = get_vectype_for_scalar_type (mask_element_type);
2954 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2955 mask = XALLOCAVEC (unsigned char, nunits);
2956 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2957
2958 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2959 unrolling factor. */
2960 orig_vec_stmts_num = group_size *
2961 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2962 if (orig_vec_stmts_num == 1)
2963 only_one_vec = true;
2964
2965 /* Number of copies is determined by the final vectorization factor
2966 relatively to SLP_NODE_INSTANCE unrolling factor. */
2967 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2968
2969 /* Generate permutation masks for every NODE. Number of masks for each NODE
2970 is equal to GROUP_SIZE.
2971 E.g., we have a group of three nodes with three loads from the same
2972 location in each node, and the vector size is 4. I.e., we have a
2973 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2974 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2975 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2976 ...
2977
2978 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2979 The last mask is illegal since we assume two operands for permute
2980 operation, and the mask element values can't be outside that range.
2981 Hence, the last mask must be converted into {2,5,5,5}.
2982 For the first two permutations we need the first and the second input
2983 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2984 we need the second and the third vectors: {b1,c1,a2,b2} and
2985 {c2,a3,b3,c3}. */
2986
2987 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2988 {
2989 scalar_index = 0;
2990 index = 0;
2991 vect_stmts_counter = 0;
2992 vec_index = 0;
2993 first_vec_index = vec_index++;
2994 if (only_one_vec)
2995 second_vec_index = first_vec_index;
2996 else
2997 second_vec_index = vec_index++;
2998
2999 for (j = 0; j < unroll_factor; j++)
3000 {
3001 for (k = 0; k < group_size; k++)
3002 {
3003 first_mask_element = i + j * group_size;
3004 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3005 nunits, only_one_vec, index,
3006 mask, &current_mask_element,
3007 &need_next_vector,
3008 &number_of_mask_fixes, &mask_fixed,
3009 &needs_first_vector))
3010 return false;
3011 mask[index++] = current_mask_element;
3012
3013 if (index == nunits)
3014 {
3015 tree mask_vec, *mask_elts;
3016 int l;
3017
3018 if (!can_vec_perm_p (mode, false, mask))
3019 {
3020 if (dump_enabled_p ())
3021 {
3022 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3023 vect_location,
3024 "unsupported vect permute { ");
3025 for (i = 0; i < nunits; ++i)
3026 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3027 mask[i]);
3028 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3029 }
3030 return false;
3031 }
3032
3033 mask_elts = XALLOCAVEC (tree, nunits);
3034 for (l = 0; l < nunits; ++l)
3035 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
3036 mask_vec = build_vector (mask_type, mask_elts);
3037 index = 0;
3038
3039 if (!analyze_only)
3040 {
3041 if (need_next_vector)
3042 {
3043 first_vec_index = second_vec_index;
3044 second_vec_index = vec_index;
3045 }
3046
3047 next_scalar_stmt
3048 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3049
3050 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3051 mask_vec, first_vec_index, second_vec_index,
3052 gsi, node, vectype, dr_chain,
3053 ncopies, vect_stmts_counter++);
3054 }
3055 }
3056 }
3057 }
3058 }
3059
3060 return true;
3061 }
3062
3063
3064
3065 /* Vectorize SLP instance tree in postorder. */
3066
3067 static bool
3068 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3069 unsigned int vectorization_factor)
3070 {
3071 gimple stmt;
3072 bool grouped_store, is_store;
3073 gimple_stmt_iterator si;
3074 stmt_vec_info stmt_info;
3075 unsigned int vec_stmts_size, nunits, group_size;
3076 tree vectype;
3077 int i;
3078 slp_tree loads_node;
3079 slp_tree child;
3080
3081 if (!node)
3082 return false;
3083
3084 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3085 vect_schedule_slp_instance (child, instance, vectorization_factor);
3086
3087 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3088 stmt_info = vinfo_for_stmt (stmt);
3089
3090 /* VECTYPE is the type of the destination. */
3091 vectype = STMT_VINFO_VECTYPE (stmt_info);
3092 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3093 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3094
3095 /* For each SLP instance calculate number of vector stmts to be created
3096 for the scalar stmts in each node of the SLP tree. Number of vector
3097 elements in one vector iteration is the number of scalar elements in
3098 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3099 size. */
3100 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3101
3102 /* In case of load permutation we have to allocate vectorized statements for
3103 all the nodes that participate in that permutation. */
3104 if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3105 {
3106 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3107 {
3108 if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3109 {
3110 SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3111 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3112 }
3113 }
3114 }
3115
3116 if (!SLP_TREE_VEC_STMTS (node).exists ())
3117 {
3118 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3119 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3120 }
3121
3122 if (dump_enabled_p ())
3123 {
3124 dump_printf_loc (MSG_NOTE,vect_location,
3125 "------>vectorizing SLP node starting from: ");
3126 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3127 }
3128
3129 /* Loads should be inserted before the first load. */
3130 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3131 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3132 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3133 && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3134 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3135 else if (is_pattern_stmt_p (stmt_info))
3136 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3137 else
3138 si = gsi_for_stmt (stmt);
3139
3140 /* Stores should be inserted just before the last store. */
3141 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3142 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3143 {
3144 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3145 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3146 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3147 si = gsi_for_stmt (last_store);
3148 }
3149
3150 /* Mark the first element of the reduction chain as reduction to properly
3151 transform the node. In the analysis phase only the last element of the
3152 chain is marked as reduction. */
3153 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3154 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3155 {
3156 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3157 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3158 }
3159
3160 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3161 return is_store;
3162 }
3163
3164 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3165 For loop vectorization this is done in vectorizable_call, but for SLP
3166 it needs to be deferred until end of vect_schedule_slp, because multiple
3167 SLP instances may refer to the same scalar stmt. */
3168
3169 static void
3170 vect_remove_slp_scalar_calls (slp_tree node)
3171 {
3172 gimple stmt, new_stmt;
3173 gimple_stmt_iterator gsi;
3174 int i;
3175 slp_tree child;
3176 tree lhs;
3177 stmt_vec_info stmt_info;
3178
3179 if (!node)
3180 return;
3181
3182 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3183 vect_remove_slp_scalar_calls (child);
3184
3185 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3186 {
3187 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3188 continue;
3189 stmt_info = vinfo_for_stmt (stmt);
3190 if (stmt_info == NULL
3191 || is_pattern_stmt_p (stmt_info)
3192 || !PURE_SLP_STMT (stmt_info))
3193 continue;
3194 lhs = gimple_call_lhs (stmt);
3195 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3196 set_vinfo_for_stmt (new_stmt, stmt_info);
3197 set_vinfo_for_stmt (stmt, NULL);
3198 STMT_VINFO_STMT (stmt_info) = new_stmt;
3199 gsi = gsi_for_stmt (stmt);
3200 gsi_replace (&gsi, new_stmt, false);
3201 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3202 }
3203 }
3204
3205 /* Generate vector code for all SLP instances in the loop/basic block. */
3206
3207 bool
3208 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3209 {
3210 vec<slp_instance> slp_instances;
3211 slp_instance instance;
3212 slp_tree loads_node;
3213 unsigned int i, j, vf;
3214 bool is_store = false;
3215
3216 if (loop_vinfo)
3217 {
3218 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3219 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3220 }
3221 else
3222 {
3223 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3224 vf = 1;
3225 }
3226
3227 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3228 {
3229 /* Schedule the tree of INSTANCE. */
3230 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3231 instance, vf);
3232
3233 /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3234 between SLP instances we fail to properly initialize the
3235 vectorized SLP stmts and confuse different load permutations. */
3236 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
3237 STMT_VINFO_VEC_STMT
3238 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
3239
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_NOTE, vect_location,
3242 "vectorizing stmts using SLP.");
3243 }
3244
3245 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3246 {
3247 slp_tree root = SLP_INSTANCE_TREE (instance);
3248 gimple store;
3249 unsigned int j;
3250 gimple_stmt_iterator gsi;
3251
3252 /* Remove scalar call stmts. Do not do this for basic-block
3253 vectorization as not all uses may be vectorized.
3254 ??? Why should this be necessary? DCE should be able to
3255 remove the stmts itself.
3256 ??? For BB vectorization we can as well remove scalar
3257 stmts starting from the SLP tree root if they have no
3258 uses. */
3259 if (loop_vinfo)
3260 vect_remove_slp_scalar_calls (root);
3261
3262 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3263 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3264 {
3265 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3266 break;
3267
3268 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3269 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3270 /* Free the attached stmt_vec_info and remove the stmt. */
3271 gsi = gsi_for_stmt (store);
3272 unlink_stmt_vdef (store);
3273 gsi_remove (&gsi, true);
3274 release_defs (store);
3275 free_stmt_vec_info (store);
3276 }
3277 }
3278
3279 return is_store;
3280 }
3281
3282
3283 /* Vectorize the basic block. */
3284
3285 void
3286 vect_slp_transform_bb (basic_block bb)
3287 {
3288 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3289 gimple_stmt_iterator si;
3290
3291 gcc_assert (bb_vinfo);
3292
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3295
3296 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3297 {
3298 gimple stmt = gsi_stmt (si);
3299 stmt_vec_info stmt_info;
3300
3301 if (dump_enabled_p ())
3302 {
3303 dump_printf_loc (MSG_NOTE, vect_location,
3304 "------>SLPing statement: ");
3305 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3306 }
3307
3308 stmt_info = vinfo_for_stmt (stmt);
3309 gcc_assert (stmt_info);
3310
3311 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3312 if (STMT_SLP_TYPE (stmt_info))
3313 {
3314 vect_schedule_slp (NULL, bb_vinfo);
3315 break;
3316 }
3317 }
3318
3319 if (dump_enabled_p ())
3320 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3321
3322 destroy_bb_vec_info (bb_vinfo);
3323 }