cond.md (stzx_16): Use register_operand for operand 0.
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple.h"
34 #include "gimple-iterator.h"
35 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "stringpool.h"
39 #include "tree-ssanames.h"
40 #include "tree-pass.h"
41 #include "cfgloop.h"
42 #include "expr.h"
43 #include "recog.h" /* FIXME: for insn_data */
44 #include "optabs.h"
45 #include "tree-vectorizer.h"
46 #include "langhooks.h"
47
48 /* Extract the location of the basic block in the source code.
49 Return the basic block location if succeed and NULL if not. */
50
51 source_location
52 find_bb_location (basic_block bb)
53 {
54 gimple stmt = NULL;
55 gimple_stmt_iterator si;
56
57 if (!bb)
58 return UNKNOWN_LOCATION;
59
60 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
61 {
62 stmt = gsi_stmt (si);
63 if (gimple_location (stmt) != UNKNOWN_LOCATION)
64 return gimple_location (stmt);
65 }
66
67 return UNKNOWN_LOCATION;
68 }
69
70
71 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
72
73 static void
74 vect_free_slp_tree (slp_tree node)
75 {
76 int i;
77 slp_tree child;
78
79 if (!node)
80 return;
81
82 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
83 vect_free_slp_tree (child);
84
85 SLP_TREE_CHILDREN (node).release ();
86 SLP_TREE_SCALAR_STMTS (node).release ();
87 SLP_TREE_VEC_STMTS (node).release ();
88 SLP_TREE_LOAD_PERMUTATION (node).release ();
89
90 free (node);
91 }
92
93
94 /* Free the memory allocated for the SLP instance. */
95
96 void
97 vect_free_slp_instance (slp_instance instance)
98 {
99 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
100 SLP_INSTANCE_LOADS (instance).release ();
101 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
102 free (instance);
103 }
104
105
106 /* Create an SLP node for SCALAR_STMTS. */
107
108 static slp_tree
109 vect_create_new_slp_node (vec<gimple> scalar_stmts)
110 {
111 slp_tree node;
112 gimple stmt = scalar_stmts[0];
113 unsigned int nops;
114
115 if (is_gimple_call (stmt))
116 nops = gimple_call_num_args (stmt);
117 else if (is_gimple_assign (stmt))
118 {
119 nops = gimple_num_ops (stmt) - 1;
120 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
121 nops++;
122 }
123 else
124 return NULL;
125
126 node = XNEW (struct _slp_tree);
127 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
128 SLP_TREE_VEC_STMTS (node).create (0);
129 SLP_TREE_CHILDREN (node).create (nops);
130 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
131
132 return node;
133 }
134
135
136 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
137 operand. */
138 static vec<slp_oprnd_info>
139 vect_create_oprnd_info (int nops, int group_size)
140 {
141 int i;
142 slp_oprnd_info oprnd_info;
143 vec<slp_oprnd_info> oprnds_info;
144
145 oprnds_info.create (nops);
146 for (i = 0; i < nops; i++)
147 {
148 oprnd_info = XNEW (struct _slp_oprnd_info);
149 oprnd_info->def_stmts.create (group_size);
150 oprnd_info->first_dt = vect_uninitialized_def;
151 oprnd_info->first_op_type = NULL_TREE;
152 oprnd_info->first_pattern = false;
153 oprnds_info.quick_push (oprnd_info);
154 }
155
156 return oprnds_info;
157 }
158
159
160 /* Free operands info. */
161
162 static void
163 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
164 {
165 int i;
166 slp_oprnd_info oprnd_info;
167
168 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
169 {
170 oprnd_info->def_stmts.release ();
171 XDELETE (oprnd_info);
172 }
173
174 oprnds_info.release ();
175 }
176
177
178 /* Find the place of the data-ref in STMT in the interleaving chain that starts
179 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
180
181 static int
182 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
183 {
184 gimple next_stmt = first_stmt;
185 int result = 0;
186
187 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
188 return -1;
189
190 do
191 {
192 if (next_stmt == stmt)
193 return result;
194 result++;
195 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
196 }
197 while (next_stmt);
198
199 return -1;
200 }
201
202
203 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
204 they are of a valid type and that they match the defs of the first stmt of
205 the SLP group (stored in OPRNDS_INFO). */
206
207 static bool
208 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
209 gimple stmt, bool first,
210 vec<slp_oprnd_info> *oprnds_info)
211 {
212 tree oprnd;
213 unsigned int i, number_of_oprnds;
214 tree def;
215 gimple def_stmt;
216 enum vect_def_type dt = vect_uninitialized_def;
217 struct loop *loop = NULL;
218 bool pattern = false;
219 slp_oprnd_info oprnd_info;
220 int op_idx = 1;
221 tree compare_rhs = NULL_TREE;
222
223 if (loop_vinfo)
224 loop = LOOP_VINFO_LOOP (loop_vinfo);
225
226 if (is_gimple_call (stmt))
227 {
228 number_of_oprnds = gimple_call_num_args (stmt);
229 op_idx = 3;
230 }
231 else if (is_gimple_assign (stmt))
232 {
233 number_of_oprnds = gimple_num_ops (stmt) - 1;
234 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
235 number_of_oprnds++;
236 }
237 else
238 return false;
239
240 for (i = 0; i < number_of_oprnds; i++)
241 {
242 if (compare_rhs)
243 {
244 oprnd = compare_rhs;
245 compare_rhs = NULL_TREE;
246 }
247 else
248 oprnd = gimple_op (stmt, op_idx++);
249
250 oprnd_info = (*oprnds_info)[i];
251
252 if (COMPARISON_CLASS_P (oprnd))
253 {
254 compare_rhs = TREE_OPERAND (oprnd, 1);
255 oprnd = TREE_OPERAND (oprnd, 0);
256 }
257
258 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
259 &def, &dt)
260 || (!def_stmt && dt != vect_constant_def))
261 {
262 if (dump_enabled_p ())
263 {
264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
265 "Build SLP failed: can't find def for ");
266 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
267 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
268 }
269
270 return false;
271 }
272
273 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
274 from the pattern. Check that all the stmts of the node are in the
275 pattern. */
276 if (def_stmt && gimple_bb (def_stmt)
277 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
278 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
279 && gimple_code (def_stmt) != GIMPLE_PHI))
280 && vinfo_for_stmt (def_stmt)
281 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
282 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
283 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
284 {
285 pattern = true;
286 if (!first && !oprnd_info->first_pattern)
287 {
288 if (dump_enabled_p ())
289 {
290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
291 "Build SLP failed: some of the stmts"
292 " are in a pattern, and others are not ");
293 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
294 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
295 }
296
297 return false;
298 }
299
300 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
301 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
302
303 if (dt == vect_unknown_def_type)
304 {
305 if (dump_enabled_p ())
306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
307 "Unsupported pattern.\n");
308 return false;
309 }
310
311 switch (gimple_code (def_stmt))
312 {
313 case GIMPLE_PHI:
314 def = gimple_phi_result (def_stmt);
315 break;
316
317 case GIMPLE_ASSIGN:
318 def = gimple_assign_lhs (def_stmt);
319 break;
320
321 default:
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "unsupported defining stmt:\n");
325 return false;
326 }
327 }
328
329 if (first)
330 {
331 oprnd_info->first_dt = dt;
332 oprnd_info->first_pattern = pattern;
333 oprnd_info->first_op_type = TREE_TYPE (oprnd);
334 }
335 else
336 {
337 /* Not first stmt of the group, check that the def-stmt/s match
338 the def-stmt/s of the first stmt. Allow different definition
339 types for reduction chains: the first stmt must be a
340 vect_reduction_def (a phi node), and the rest
341 vect_internal_def. */
342 if (((oprnd_info->first_dt != dt
343 && !(oprnd_info->first_dt == vect_reduction_def
344 && dt == vect_internal_def)
345 && !((oprnd_info->first_dt == vect_external_def
346 || oprnd_info->first_dt == vect_constant_def)
347 && (dt == vect_external_def
348 || dt == vect_constant_def)))
349 || !types_compatible_p (oprnd_info->first_op_type,
350 TREE_TYPE (oprnd))))
351 {
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
354 "Build SLP failed: different types\n");
355
356 return false;
357 }
358 }
359
360 /* Check the types of the definitions. */
361 switch (dt)
362 {
363 case vect_constant_def:
364 case vect_external_def:
365 case vect_reduction_def:
366 break;
367
368 case vect_internal_def:
369 oprnd_info->def_stmts.quick_push (def_stmt);
370 break;
371
372 default:
373 /* FORNOW: Not supported. */
374 if (dump_enabled_p ())
375 {
376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
377 "Build SLP failed: illegal type of def ");
378 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
379 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
380 }
381
382 return false;
383 }
384 }
385
386 return true;
387 }
388
389
390 /* Verify if the scalar stmts STMTS are isomorphic, require data
391 permutation or are of unsupported types of operation. Return
392 true if they are, otherwise return false and indicate in *MATCHES
393 which stmts are not isomorphic to the first one. If MATCHES[0]
394 is false then this indicates the comparison could not be
395 carried out or the stmts will never be vectorized by SLP. */
396
397 static bool
398 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
399 vec<gimple> stmts, unsigned int group_size,
400 unsigned nops, unsigned int *max_nunits,
401 unsigned int vectorization_factor, bool *matches)
402 {
403 unsigned int i;
404 gimple stmt = stmts[0];
405 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
406 enum tree_code first_cond_code = ERROR_MARK;
407 tree lhs;
408 bool need_same_oprnds = false;
409 tree vectype, scalar_type, first_op1 = NULL_TREE;
410 optab optab;
411 int icode;
412 enum machine_mode optab_op2_mode;
413 enum machine_mode vec_mode;
414 struct data_reference *first_dr;
415 HOST_WIDE_INT dummy;
416 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
417 tree cond;
418
419 /* For every stmt in NODE find its def stmt/s. */
420 FOR_EACH_VEC_ELT (stmts, i, stmt)
421 {
422 matches[i] = false;
423
424 if (dump_enabled_p ())
425 {
426 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
427 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
428 dump_printf (MSG_NOTE, "\n");
429 }
430
431 /* Fail to vectorize statements marked as unvectorizable. */
432 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
433 {
434 if (dump_enabled_p ())
435 {
436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
437 "Build SLP failed: unvectorizable statement ");
438 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
439 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
440 }
441 /* Fatal mismatch. */
442 matches[0] = false;
443 return false;
444 }
445
446 lhs = gimple_get_lhs (stmt);
447 if (lhs == NULL_TREE)
448 {
449 if (dump_enabled_p ())
450 {
451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
452 "Build SLP failed: not GIMPLE_ASSIGN nor "
453 "GIMPLE_CALL ");
454 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
455 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
456 }
457 /* Fatal mismatch. */
458 matches[0] = false;
459 return false;
460 }
461
462 if (is_gimple_assign (stmt)
463 && gimple_assign_rhs_code (stmt) == COND_EXPR
464 && (cond = gimple_assign_rhs1 (stmt))
465 && !COMPARISON_CLASS_P (cond))
466 {
467 if (dump_enabled_p ())
468 {
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
470 "Build SLP failed: condition is not "
471 "comparison ");
472 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
473 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
474 }
475 /* Fatal mismatch. */
476 matches[0] = false;
477 return false;
478 }
479
480 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
481 vectype = get_vectype_for_scalar_type (scalar_type);
482 if (!vectype)
483 {
484 if (dump_enabled_p ())
485 {
486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
487 "Build SLP failed: unsupported data-type ");
488 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
489 scalar_type);
490 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
491 }
492 /* Fatal mismatch. */
493 matches[0] = false;
494 return false;
495 }
496
497 /* In case of multiple types we need to detect the smallest type. */
498 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
499 {
500 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
501 if (bb_vinfo)
502 vectorization_factor = *max_nunits;
503 }
504
505 if (is_gimple_call (stmt))
506 {
507 rhs_code = CALL_EXPR;
508 if (gimple_call_internal_p (stmt)
509 || gimple_call_tail_p (stmt)
510 || gimple_call_noreturn_p (stmt)
511 || !gimple_call_nothrow_p (stmt)
512 || gimple_call_chain (stmt))
513 {
514 if (dump_enabled_p ())
515 {
516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
517 "Build SLP failed: unsupported call type ");
518 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
519 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
520 }
521 /* Fatal mismatch. */
522 matches[0] = false;
523 return false;
524 }
525 }
526 else
527 rhs_code = gimple_assign_rhs_code (stmt);
528
529 /* Check the operation. */
530 if (i == 0)
531 {
532 first_stmt_code = rhs_code;
533
534 /* Shift arguments should be equal in all the packed stmts for a
535 vector shift with scalar shift operand. */
536 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
537 || rhs_code == LROTATE_EXPR
538 || rhs_code == RROTATE_EXPR)
539 {
540 vec_mode = TYPE_MODE (vectype);
541
542 /* First see if we have a vector/vector shift. */
543 optab = optab_for_tree_code (rhs_code, vectype,
544 optab_vector);
545
546 if (!optab
547 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
548 {
549 /* No vector/vector shift, try for a vector/scalar shift. */
550 optab = optab_for_tree_code (rhs_code, vectype,
551 optab_scalar);
552
553 if (!optab)
554 {
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
557 "Build SLP failed: no optab.\n");
558 /* Fatal mismatch. */
559 matches[0] = false;
560 return false;
561 }
562 icode = (int) optab_handler (optab, vec_mode);
563 if (icode == CODE_FOR_nothing)
564 {
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
567 "Build SLP failed: "
568 "op not supported by target.\n");
569 /* Fatal mismatch. */
570 matches[0] = false;
571 return false;
572 }
573 optab_op2_mode = insn_data[icode].operand[2].mode;
574 if (!VECTOR_MODE_P (optab_op2_mode))
575 {
576 need_same_oprnds = true;
577 first_op1 = gimple_assign_rhs2 (stmt);
578 }
579 }
580 }
581 else if (rhs_code == WIDEN_LSHIFT_EXPR)
582 {
583 need_same_oprnds = true;
584 first_op1 = gimple_assign_rhs2 (stmt);
585 }
586 }
587 else
588 {
589 if (first_stmt_code != rhs_code
590 && (first_stmt_code != IMAGPART_EXPR
591 || rhs_code != REALPART_EXPR)
592 && (first_stmt_code != REALPART_EXPR
593 || rhs_code != IMAGPART_EXPR)
594 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
595 && (first_stmt_code == ARRAY_REF
596 || first_stmt_code == BIT_FIELD_REF
597 || first_stmt_code == INDIRECT_REF
598 || first_stmt_code == COMPONENT_REF
599 || first_stmt_code == MEM_REF)))
600 {
601 if (dump_enabled_p ())
602 {
603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
604 "Build SLP failed: different operation "
605 "in stmt ");
606 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
607 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
608 }
609 /* Mismatch. */
610 continue;
611 }
612
613 if (need_same_oprnds
614 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
615 {
616 if (dump_enabled_p ())
617 {
618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
619 "Build SLP failed: different shift "
620 "arguments in ");
621 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
622 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
623 }
624 /* Mismatch. */
625 continue;
626 }
627
628 if (rhs_code == CALL_EXPR)
629 {
630 gimple first_stmt = stmts[0];
631 if (gimple_call_num_args (stmt) != nops
632 || !operand_equal_p (gimple_call_fn (first_stmt),
633 gimple_call_fn (stmt), 0)
634 || gimple_call_fntype (first_stmt)
635 != gimple_call_fntype (stmt))
636 {
637 if (dump_enabled_p ())
638 {
639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
640 "Build SLP failed: different calls in ");
641 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
642 stmt, 0);
643 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
644 }
645 /* Mismatch. */
646 continue;
647 }
648 }
649 }
650
651 /* Grouped store or load. */
652 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
653 {
654 if (REFERENCE_CLASS_P (lhs))
655 {
656 /* Store. */
657 ;
658 }
659 else
660 {
661 /* Load. */
662 unsigned unrolling_factor
663 = least_common_multiple
664 (*max_nunits, group_size) / group_size;
665 /* FORNOW: Check that there is no gap between the loads
666 and no gap between the groups when we need to load
667 multiple groups at once.
668 ??? We should enhance this to only disallow gaps
669 inside vectors. */
670 if ((unrolling_factor > 1
671 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
672 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
673 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
674 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
675 {
676 if (dump_enabled_p ())
677 {
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
679 "Build SLP failed: grouped "
680 "loads have gaps ");
681 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
682 stmt, 0);
683 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
684 }
685 /* Fatal mismatch. */
686 matches[0] = false;
687 return false;
688 }
689
690 /* Check that the size of interleaved loads group is not
691 greater than the SLP group size. */
692 unsigned ncopies
693 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
694 if (loop_vinfo
695 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
696 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
697 - GROUP_GAP (vinfo_for_stmt (stmt)))
698 > ncopies * group_size))
699 {
700 if (dump_enabled_p ())
701 {
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "Build SLP failed: the number "
704 "of interleaved loads is greater than "
705 "the SLP group size ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
707 stmt, 0);
708 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
709 }
710 /* Fatal mismatch. */
711 matches[0] = false;
712 return false;
713 }
714
715 old_first_load = first_load;
716 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
717 if (prev_first_load)
718 {
719 /* Check that there are no loads from different interleaving
720 chains in the same node. */
721 if (prev_first_load != first_load)
722 {
723 if (dump_enabled_p ())
724 {
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
726 vect_location,
727 "Build SLP failed: different "
728 "interleaving chains in one node ");
729 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
730 stmt, 0);
731 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
732 }
733 /* Mismatch. */
734 continue;
735 }
736 }
737 else
738 prev_first_load = first_load;
739
740 /* In some cases a group of loads is just the same load
741 repeated N times. Only analyze its cost once. */
742 if (first_load == stmt && old_first_load != first_load)
743 {
744 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
745 if (vect_supportable_dr_alignment (first_dr, false)
746 == dr_unaligned_unsupported)
747 {
748 if (dump_enabled_p ())
749 {
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
751 vect_location,
752 "Build SLP failed: unsupported "
753 "unaligned load ");
754 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
755 stmt, 0);
756 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
757 }
758 /* Fatal mismatch. */
759 matches[0] = false;
760 return false;
761 }
762 }
763 }
764 } /* Grouped access. */
765 else
766 {
767 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
768 {
769 /* Not grouped load. */
770 if (dump_enabled_p ())
771 {
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
773 "Build SLP failed: not grouped load ");
774 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
775 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
776 }
777
778 /* FORNOW: Not grouped loads are not supported. */
779 /* Fatal mismatch. */
780 matches[0] = false;
781 return false;
782 }
783
784 /* Not memory operation. */
785 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
786 && TREE_CODE_CLASS (rhs_code) != tcc_unary
787 && rhs_code != COND_EXPR
788 && rhs_code != CALL_EXPR)
789 {
790 if (dump_enabled_p ())
791 {
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
793 "Build SLP failed: operation");
794 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
795 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
796 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
797 }
798 /* Fatal mismatch. */
799 matches[0] = false;
800 return false;
801 }
802
803 if (rhs_code == COND_EXPR)
804 {
805 tree cond_expr = gimple_assign_rhs1 (stmt);
806
807 if (i == 0)
808 first_cond_code = TREE_CODE (cond_expr);
809 else if (first_cond_code != TREE_CODE (cond_expr))
810 {
811 if (dump_enabled_p ())
812 {
813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
814 "Build SLP failed: different"
815 " operation");
816 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
817 stmt, 0);
818 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
819 }
820 /* Mismatch. */
821 continue;
822 }
823 }
824 }
825
826 matches[i] = true;
827 }
828
829 for (i = 0; i < group_size; ++i)
830 if (!matches[i])
831 return false;
832
833 return true;
834 }
835
836 /* Recursively build an SLP tree starting from NODE.
837 Fail (and return a value not equal to zero) if def-stmts are not
838 isomorphic, require data permutation or are of unsupported types of
839 operation. Otherwise, return 0.
840 The value returned is the depth in the SLP tree where a mismatch
841 was found. */
842
843 static bool
844 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
845 slp_tree *node, unsigned int group_size,
846 unsigned int *max_nunits,
847 vec<slp_tree> *loads,
848 unsigned int vectorization_factor,
849 bool *matches, unsigned *npermutes)
850 {
851 unsigned nops, i, this_npermutes = 0;
852 gimple stmt;
853
854 if (!matches)
855 matches = XALLOCAVEC (bool, group_size);
856 if (!npermutes)
857 npermutes = &this_npermutes;
858
859 matches[0] = false;
860
861 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
862 if (is_gimple_call (stmt))
863 nops = gimple_call_num_args (stmt);
864 else if (is_gimple_assign (stmt))
865 {
866 nops = gimple_num_ops (stmt) - 1;
867 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
868 nops++;
869 }
870 else
871 return false;
872
873 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
874 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
875 max_nunits, vectorization_factor, matches))
876 return false;
877
878 /* If the SLP node is a load, terminate the recursion. */
879 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
880 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
881 {
882 loads->safe_push (*node);
883 return true;
884 }
885
886 /* Get at the operands, verifying they are compatible. */
887 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
888 slp_oprnd_info oprnd_info;
889 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
890 {
891 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
892 stmt, (i == 0), &oprnds_info))
893 {
894 vect_free_oprnd_info (oprnds_info);
895 return false;
896 }
897 }
898
899 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
900
901 /* Create SLP_TREE nodes for the definition node/s. */
902 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
903 {
904 slp_tree child;
905 unsigned old_nloads = loads->length ();
906 unsigned old_max_nunits = *max_nunits;
907
908 if (oprnd_info->first_dt != vect_internal_def)
909 continue;
910
911 child = vect_create_new_slp_node (oprnd_info->def_stmts);
912 if (!child)
913 {
914 vect_free_oprnd_info (oprnds_info);
915 return false;
916 }
917
918 bool *matches = XALLOCAVEC (bool, group_size);
919 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
920 group_size, max_nunits, loads,
921 vectorization_factor, matches, npermutes))
922 {
923 oprnd_info->def_stmts = vNULL;
924 SLP_TREE_CHILDREN (*node).quick_push (child);
925 continue;
926 }
927
928 /* If the SLP build for operand zero failed and operand zero
929 and one can be commutated try that for the scalar stmts
930 that failed the match. */
931 if (i == 0
932 /* A first scalar stmt mismatch signals a fatal mismatch. */
933 && matches[0]
934 /* ??? For COND_EXPRs we can swap the comparison operands
935 as well as the arms under some constraints. */
936 && nops == 2
937 && oprnds_info[1]->first_dt == vect_internal_def
938 && is_gimple_assign (stmt)
939 && commutative_tree_code (gimple_assign_rhs_code (stmt))
940 /* Do so only if the number of not successful permutes was nor more
941 than a cut-ff as re-trying the recursive match on
942 possibly each level of the tree would expose exponential
943 behavior. */
944 && *npermutes < 4)
945 {
946 /* Roll back. */
947 *max_nunits = old_max_nunits;
948 loads->truncate (old_nloads);
949 /* Swap mismatched definition stmts. */
950 for (unsigned j = 0; j < group_size; ++j)
951 if (!matches[j])
952 {
953 gimple tem = oprnds_info[0]->def_stmts[j];
954 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
955 oprnds_info[1]->def_stmts[j] = tem;
956 }
957 /* And try again ... */
958 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
959 group_size, max_nunits, loads,
960 vectorization_factor,
961 matches, npermutes))
962 {
963 oprnd_info->def_stmts = vNULL;
964 SLP_TREE_CHILDREN (*node).quick_push (child);
965 continue;
966 }
967
968 ++*npermutes;
969 }
970
971 oprnd_info->def_stmts = vNULL;
972 vect_free_slp_tree (child);
973 vect_free_oprnd_info (oprnds_info);
974 return false;
975 }
976
977 vect_free_oprnd_info (oprnds_info);
978 return true;
979 }
980
981 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
982
983 static void
984 vect_print_slp_tree (int dump_kind, slp_tree node)
985 {
986 int i;
987 gimple stmt;
988 slp_tree child;
989
990 if (!node)
991 return;
992
993 dump_printf (dump_kind, "node ");
994 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
995 {
996 dump_printf (dump_kind, "\n\tstmt %d ", i);
997 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
998 }
999 dump_printf (dump_kind, "\n");
1000
1001 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1002 vect_print_slp_tree (dump_kind, child);
1003 }
1004
1005
1006 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1007 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1008 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1009 stmts in NODE are to be marked. */
1010
1011 static void
1012 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1013 {
1014 int i;
1015 gimple stmt;
1016 slp_tree child;
1017
1018 if (!node)
1019 return;
1020
1021 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1022 if (j < 0 || i == j)
1023 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1024
1025 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1026 vect_mark_slp_stmts (child, mark, j);
1027 }
1028
1029
1030 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1031
1032 static void
1033 vect_mark_slp_stmts_relevant (slp_tree node)
1034 {
1035 int i;
1036 gimple stmt;
1037 stmt_vec_info stmt_info;
1038 slp_tree child;
1039
1040 if (!node)
1041 return;
1042
1043 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1044 {
1045 stmt_info = vinfo_for_stmt (stmt);
1046 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1047 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1048 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1049 }
1050
1051 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1052 vect_mark_slp_stmts_relevant (child);
1053 }
1054
1055
1056 /* Rearrange the statements of NODE according to PERMUTATION. */
1057
1058 static void
1059 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1060 vec<unsigned> permutation)
1061 {
1062 gimple stmt;
1063 vec<gimple> tmp_stmts;
1064 unsigned int i;
1065 slp_tree child;
1066
1067 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1068 vect_slp_rearrange_stmts (child, group_size, permutation);
1069
1070 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1071 tmp_stmts.create (group_size);
1072 tmp_stmts.quick_grow_cleared (group_size);
1073
1074 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1075 tmp_stmts[permutation[i]] = stmt;
1076
1077 SLP_TREE_SCALAR_STMTS (node).release ();
1078 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1079 }
1080
1081
1082 /* Check if the required load permutations in the SLP instance
1083 SLP_INSTN are supported. */
1084
1085 static bool
1086 vect_supported_load_permutation_p (slp_instance slp_instn)
1087 {
1088 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1089 unsigned int i, j, k, next;
1090 sbitmap load_index;
1091 slp_tree node;
1092 gimple stmt, load, next_load, first_load;
1093 struct data_reference *dr;
1094
1095 if (dump_enabled_p ())
1096 {
1097 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1098 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1099 if (node->load_permutation.exists ())
1100 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1101 dump_printf (MSG_NOTE, "%d ", next);
1102 else
1103 for (i = 0; i < group_size; ++i)
1104 dump_printf (MSG_NOTE, "%d ", i);
1105 dump_printf (MSG_NOTE, "\n");
1106 }
1107
1108 /* In case of reduction every load permutation is allowed, since the order
1109 of the reduction statements is not important (as opposed to the case of
1110 grouped stores). The only condition we need to check is that all the
1111 load nodes are of the same size and have the same permutation (and then
1112 rearrange all the nodes of the SLP instance according to this
1113 permutation). */
1114
1115 /* Check that all the load nodes are of the same size. */
1116 /* ??? Can't we assert this? */
1117 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1118 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1119 return false;
1120
1121 node = SLP_INSTANCE_TREE (slp_instn);
1122 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1123
1124 /* Reduction (there are no data-refs in the root).
1125 In reduction chain the order of the loads is important. */
1126 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1127 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1128 {
1129 slp_tree load;
1130 unsigned int lidx;
1131
1132 /* Compare all the permutation sequences to the first one. We know
1133 that at least one load is permuted. */
1134 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1135 if (!node->load_permutation.exists ())
1136 return false;
1137 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1138 {
1139 if (!load->load_permutation.exists ())
1140 return false;
1141 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1142 if (lidx != node->load_permutation[j])
1143 return false;
1144 }
1145
1146 /* Check that the loads in the first sequence are different and there
1147 are no gaps between them. */
1148 load_index = sbitmap_alloc (group_size);
1149 bitmap_clear (load_index);
1150 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1151 {
1152 if (bitmap_bit_p (load_index, lidx))
1153 {
1154 sbitmap_free (load_index);
1155 return false;
1156 }
1157 bitmap_set_bit (load_index, lidx);
1158 }
1159 for (i = 0; i < group_size; i++)
1160 if (!bitmap_bit_p (load_index, i))
1161 {
1162 sbitmap_free (load_index);
1163 return false;
1164 }
1165 sbitmap_free (load_index);
1166
1167 /* This permutation is valid for reduction. Since the order of the
1168 statements in the nodes is not important unless they are memory
1169 accesses, we can rearrange the statements in all the nodes
1170 according to the order of the loads. */
1171 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1172 node->load_permutation);
1173
1174 /* We are done, no actual permutations need to be generated. */
1175 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1176 SLP_TREE_LOAD_PERMUTATION (node).release ();
1177 return true;
1178 }
1179
1180 /* In basic block vectorization we allow any subchain of an interleaving
1181 chain.
1182 FORNOW: not supported in loop SLP because of realignment compications. */
1183 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1184 {
1185 /* Check that for every node in the instance the loads
1186 form a subchain. */
1187 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1188 {
1189 next_load = NULL;
1190 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1191 {
1192 if (j != 0 && next_load != load)
1193 return false;
1194 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1195 }
1196 }
1197
1198 /* Check that the alignment of the first load in every subchain, i.e.,
1199 the first statement in every load node, is supported.
1200 ??? This belongs in alignment checking. */
1201 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1202 {
1203 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1204 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1205 {
1206 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1207 if (vect_supportable_dr_alignment (dr, false)
1208 == dr_unaligned_unsupported)
1209 {
1210 if (dump_enabled_p ())
1211 {
1212 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1213 vect_location,
1214 "unsupported unaligned load ");
1215 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1216 first_load, 0);
1217 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1218 }
1219 return false;
1220 }
1221 }
1222 }
1223
1224 /* We are done, no actual permutations need to be generated. */
1225 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1226 SLP_TREE_LOAD_PERMUTATION (node).release ();
1227 return true;
1228 }
1229
1230 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1231 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1232 well (unless it's reduction). */
1233 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1234 return false;
1235 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1236 if (!node->load_permutation.exists ())
1237 return false;
1238
1239 load_index = sbitmap_alloc (group_size);
1240 bitmap_clear (load_index);
1241 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1242 {
1243 unsigned int lidx = node->load_permutation[0];
1244 if (bitmap_bit_p (load_index, lidx))
1245 {
1246 sbitmap_free (load_index);
1247 return false;
1248 }
1249 bitmap_set_bit (load_index, lidx);
1250 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1251 if (k != lidx)
1252 {
1253 sbitmap_free (load_index);
1254 return false;
1255 }
1256 }
1257 for (i = 0; i < group_size; i++)
1258 if (!bitmap_bit_p (load_index, i))
1259 {
1260 sbitmap_free (load_index);
1261 return false;
1262 }
1263 sbitmap_free (load_index);
1264
1265 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1266 if (node->load_permutation.exists ()
1267 && !vect_transform_slp_perm_load
1268 (node, vNULL, NULL,
1269 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1270 return false;
1271 return true;
1272 }
1273
1274
1275 /* Find the first load in the loop that belongs to INSTANCE.
1276 When loads are in several SLP nodes, there can be a case in which the first
1277 load does not appear in the first SLP node to be transformed, causing
1278 incorrect order of statements. Since we generate all the loads together,
1279 they must be inserted before the first load of the SLP instance and not
1280 before the first load of the first node of the instance. */
1281
1282 static gimple
1283 vect_find_first_load_in_slp_instance (slp_instance instance)
1284 {
1285 int i, j;
1286 slp_tree load_node;
1287 gimple first_load = NULL, load;
1288
1289 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1290 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1291 first_load = get_earlier_stmt (load, first_load);
1292
1293 return first_load;
1294 }
1295
1296
1297 /* Find the last store in SLP INSTANCE. */
1298
1299 static gimple
1300 vect_find_last_store_in_slp_instance (slp_instance instance)
1301 {
1302 int i;
1303 slp_tree node;
1304 gimple last_store = NULL, store;
1305
1306 node = SLP_INSTANCE_TREE (instance);
1307 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1308 last_store = get_later_stmt (store, last_store);
1309
1310 return last_store;
1311 }
1312
1313 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1314
1315 static void
1316 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1317 slp_instance instance, slp_tree node,
1318 stmt_vector_for_cost *prologue_cost_vec,
1319 unsigned ncopies_for_cost)
1320 {
1321 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1322
1323 unsigned i;
1324 slp_tree child;
1325 gimple stmt, s;
1326 stmt_vec_info stmt_info;
1327 tree lhs;
1328 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1329
1330 /* Recurse down the SLP tree. */
1331 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1332 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1333 instance, child, prologue_cost_vec,
1334 ncopies_for_cost);
1335
1336 /* Look at the first scalar stmt to determine the cost. */
1337 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1338 stmt_info = vinfo_for_stmt (stmt);
1339 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1340 {
1341 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1342 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1343 vect_uninitialized_def,
1344 node, prologue_cost_vec, body_cost_vec);
1345 else
1346 {
1347 int i;
1348 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1349 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1350 node, prologue_cost_vec, body_cost_vec);
1351 /* If the load is permuted record the cost for the permutation.
1352 ??? Loads from multiple chains are let through here only
1353 for a single special case involving complex numbers where
1354 in the end no permutation is necessary. */
1355 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1356 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1357 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1358 && vect_get_place_in_interleaving_chain
1359 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1360 {
1361 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1362 stmt_info, 0, vect_body);
1363 break;
1364 }
1365 }
1366 }
1367 else
1368 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1369 stmt_info, 0, vect_body);
1370
1371 /* Scan operands and account for prologue cost of constants/externals.
1372 ??? This over-estimates cost for multiple uses and should be
1373 re-engineered. */
1374 lhs = gimple_get_lhs (stmt);
1375 for (i = 0; i < gimple_num_ops (stmt); ++i)
1376 {
1377 tree def, op = gimple_op (stmt, i);
1378 gimple def_stmt;
1379 enum vect_def_type dt;
1380 if (!op || op == lhs)
1381 continue;
1382 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1383 &def_stmt, &def, &dt)
1384 && (dt == vect_constant_def || dt == vect_external_def))
1385 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1386 stmt_info, 0, vect_prologue);
1387 }
1388 }
1389
1390 /* Compute the cost for the SLP instance INSTANCE. */
1391
1392 static void
1393 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1394 slp_instance instance, unsigned nunits)
1395 {
1396 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1397 unsigned ncopies_for_cost;
1398 stmt_info_for_cost *si;
1399 unsigned i;
1400
1401 /* Calculate the number of vector stmts to create based on the unrolling
1402 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1403 GROUP_SIZE / NUNITS otherwise. */
1404 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1405 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1406
1407 prologue_cost_vec.create (10);
1408 body_cost_vec.create (10);
1409 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1410 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1411 instance, SLP_INSTANCE_TREE (instance),
1412 &prologue_cost_vec, ncopies_for_cost);
1413
1414 /* Record the prologue costs, which were delayed until we were
1415 sure that SLP was successful. Unlike the body costs, we know
1416 the final values now regardless of the loop vectorization factor. */
1417 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1418 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1419 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1420 {
1421 struct _stmt_vec_info *stmt_info
1422 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1423 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1424 si->misalign, vect_prologue);
1425 }
1426
1427 prologue_cost_vec.release ();
1428 }
1429
1430 /* Analyze an SLP instance starting from a group of grouped stores. Call
1431 vect_build_slp_tree to build a tree of packed stmts if possible.
1432 Return FALSE if it's impossible to SLP any stmt in the loop. */
1433
1434 static bool
1435 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1436 gimple stmt)
1437 {
1438 slp_instance new_instance;
1439 slp_tree node;
1440 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1441 unsigned int unrolling_factor = 1, nunits;
1442 tree vectype, scalar_type = NULL_TREE;
1443 gimple next;
1444 unsigned int vectorization_factor = 0;
1445 int i;
1446 unsigned int max_nunits = 0;
1447 vec<slp_tree> loads;
1448 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1449 vec<gimple> scalar_stmts;
1450
1451 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1452 {
1453 if (dr)
1454 {
1455 scalar_type = TREE_TYPE (DR_REF (dr));
1456 vectype = get_vectype_for_scalar_type (scalar_type);
1457 }
1458 else
1459 {
1460 gcc_assert (loop_vinfo);
1461 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1462 }
1463
1464 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1465 }
1466 else
1467 {
1468 gcc_assert (loop_vinfo);
1469 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1470 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1471 }
1472
1473 if (!vectype)
1474 {
1475 if (dump_enabled_p ())
1476 {
1477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1478 "Build SLP failed: unsupported data-type ");
1479 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1480 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1481 }
1482
1483 return false;
1484 }
1485
1486 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1487 if (loop_vinfo)
1488 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1489 else
1490 vectorization_factor = nunits;
1491
1492 /* Calculate the unrolling factor. */
1493 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1494 if (unrolling_factor != 1 && !loop_vinfo)
1495 {
1496 if (dump_enabled_p ())
1497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1498 "Build SLP failed: unrolling required in basic"
1499 " block SLP\n");
1500
1501 return false;
1502 }
1503
1504 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1505 scalar_stmts.create (group_size);
1506 next = stmt;
1507 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1508 {
1509 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1510 while (next)
1511 {
1512 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1513 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1514 scalar_stmts.safe_push (
1515 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1516 else
1517 scalar_stmts.safe_push (next);
1518 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1519 }
1520 }
1521 else
1522 {
1523 /* Collect reduction statements. */
1524 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1525 for (i = 0; reductions.iterate (i, &next); i++)
1526 scalar_stmts.safe_push (next);
1527 }
1528
1529 node = vect_create_new_slp_node (scalar_stmts);
1530
1531 loads.create (group_size);
1532
1533 /* Build the tree for the SLP instance. */
1534 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1535 &max_nunits, &loads,
1536 vectorization_factor, NULL, NULL))
1537 {
1538 /* Calculate the unrolling factor based on the smallest type. */
1539 if (max_nunits > nunits)
1540 unrolling_factor = least_common_multiple (max_nunits, group_size)
1541 / group_size;
1542
1543 if (unrolling_factor != 1 && !loop_vinfo)
1544 {
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1547 "Build SLP failed: unrolling required in basic"
1548 " block SLP\n");
1549 vect_free_slp_tree (node);
1550 loads.release ();
1551 return false;
1552 }
1553
1554 /* Create a new SLP instance. */
1555 new_instance = XNEW (struct _slp_instance);
1556 SLP_INSTANCE_TREE (new_instance) = node;
1557 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1558 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1559 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1560 SLP_INSTANCE_LOADS (new_instance) = loads;
1561 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1562
1563 /* Compute the load permutation. */
1564 slp_tree load_node;
1565 bool loads_permuted = false;
1566 FOR_EACH_VEC_ELT (loads, i, load_node)
1567 {
1568 vec<unsigned> load_permutation;
1569 int j;
1570 gimple load, first_stmt;
1571 bool this_load_permuted = false;
1572 load_permutation.create (group_size);
1573 first_stmt = GROUP_FIRST_ELEMENT
1574 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1575 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1576 {
1577 int load_place
1578 = vect_get_place_in_interleaving_chain (load, first_stmt);
1579 gcc_assert (load_place != -1);
1580 if (load_place != j)
1581 this_load_permuted = true;
1582 load_permutation.safe_push (load_place);
1583 }
1584 if (!this_load_permuted)
1585 {
1586 load_permutation.release ();
1587 continue;
1588 }
1589 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1590 loads_permuted = true;
1591 }
1592
1593 if (loads_permuted)
1594 {
1595 if (!vect_supported_load_permutation_p (new_instance))
1596 {
1597 if (dump_enabled_p ())
1598 {
1599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1600 "Build SLP failed: unsupported load "
1601 "permutation ");
1602 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1603 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1604 }
1605 vect_free_slp_instance (new_instance);
1606 return false;
1607 }
1608
1609 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1610 = vect_find_first_load_in_slp_instance (new_instance);
1611 }
1612
1613 /* Compute the costs of this SLP instance. */
1614 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1615 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1616
1617 if (loop_vinfo)
1618 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1619 else
1620 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1621
1622 if (dump_enabled_p ())
1623 vect_print_slp_tree (MSG_NOTE, node);
1624
1625 return true;
1626 }
1627
1628 /* Failed to SLP. */
1629 /* Free the allocated memory. */
1630 vect_free_slp_tree (node);
1631 loads.release ();
1632
1633 return false;
1634 }
1635
1636
1637 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1638 trees of packed scalar stmts if SLP is possible. */
1639
1640 bool
1641 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1642 {
1643 unsigned int i;
1644 vec<gimple> grouped_stores;
1645 vec<gimple> reductions = vNULL;
1646 vec<gimple> reduc_chains = vNULL;
1647 gimple first_element;
1648 bool ok = false;
1649
1650 if (dump_enabled_p ())
1651 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1652
1653 if (loop_vinfo)
1654 {
1655 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1656 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1657 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1658 }
1659 else
1660 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1661
1662 /* Find SLP sequences starting from groups of grouped stores. */
1663 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1664 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1665 ok = true;
1666
1667 if (bb_vinfo && !ok)
1668 {
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1671 "Failed to SLP the basic block.\n");
1672
1673 return false;
1674 }
1675
1676 if (loop_vinfo
1677 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1678 {
1679 /* Find SLP sequences starting from reduction chains. */
1680 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1681 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1682 ok = true;
1683 else
1684 return false;
1685
1686 /* Don't try to vectorize SLP reductions if reduction chain was
1687 detected. */
1688 return ok;
1689 }
1690
1691 /* Find SLP sequences starting from groups of reductions. */
1692 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1693 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1694 ok = true;
1695
1696 return true;
1697 }
1698
1699
1700 /* For each possible SLP instance decide whether to SLP it and calculate overall
1701 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1702 least one instance. */
1703
1704 bool
1705 vect_make_slp_decision (loop_vec_info loop_vinfo)
1706 {
1707 unsigned int i, unrolling_factor = 1;
1708 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1709 slp_instance instance;
1710 int decided_to_slp = 0;
1711
1712 if (dump_enabled_p ())
1713 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1714 "\n");
1715
1716 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1717 {
1718 /* FORNOW: SLP if you can. */
1719 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1720 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1721
1722 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1723 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1724 loop-based vectorization. Such stmts will be marked as HYBRID. */
1725 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1726 decided_to_slp++;
1727 }
1728
1729 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1730
1731 if (decided_to_slp && dump_enabled_p ())
1732 dump_printf_loc (MSG_NOTE, vect_location,
1733 "Decided to SLP %d instances. Unrolling factor %d\n",
1734 decided_to_slp, unrolling_factor);
1735
1736 return (decided_to_slp > 0);
1737 }
1738
1739
1740 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1741 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1742
1743 static void
1744 vect_detect_hybrid_slp_stmts (slp_tree node)
1745 {
1746 int i;
1747 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1748 gimple stmt = stmts[0];
1749 imm_use_iterator imm_iter;
1750 gimple use_stmt;
1751 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1752 slp_tree child;
1753 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1754 struct loop *loop = NULL;
1755 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1756 basic_block bb = NULL;
1757
1758 if (!node)
1759 return;
1760
1761 if (loop_vinfo)
1762 loop = LOOP_VINFO_LOOP (loop_vinfo);
1763 else
1764 bb = BB_VINFO_BB (bb_vinfo);
1765
1766 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1767 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1768 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1769 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1770 if (gimple_bb (use_stmt)
1771 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1772 || bb == gimple_bb (use_stmt))
1773 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1774 && !STMT_SLP_TYPE (stmt_vinfo)
1775 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1776 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1777 && !(gimple_code (use_stmt) == GIMPLE_PHI
1778 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1779 == vect_reduction_def))
1780 vect_mark_slp_stmts (node, hybrid, i);
1781
1782 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1783 vect_detect_hybrid_slp_stmts (child);
1784 }
1785
1786
1787 /* Find stmts that must be both vectorized and SLPed. */
1788
1789 void
1790 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1791 {
1792 unsigned int i;
1793 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1794 slp_instance instance;
1795
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1798 "\n");
1799
1800 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1801 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1802 }
1803
1804
1805 /* Create and initialize a new bb_vec_info struct for BB, as well as
1806 stmt_vec_info structs for all the stmts in it. */
1807
1808 static bb_vec_info
1809 new_bb_vec_info (basic_block bb)
1810 {
1811 bb_vec_info res = NULL;
1812 gimple_stmt_iterator gsi;
1813
1814 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1815 BB_VINFO_BB (res) = bb;
1816
1817 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1818 {
1819 gimple stmt = gsi_stmt (gsi);
1820 gimple_set_uid (stmt, 0);
1821 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1822 }
1823
1824 BB_VINFO_GROUPED_STORES (res).create (10);
1825 BB_VINFO_SLP_INSTANCES (res).create (2);
1826 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1827
1828 bb->aux = res;
1829 return res;
1830 }
1831
1832
1833 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1834 stmts in the basic block. */
1835
1836 static void
1837 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1838 {
1839 vec<slp_instance> slp_instances;
1840 slp_instance instance;
1841 basic_block bb;
1842 gimple_stmt_iterator si;
1843 unsigned i;
1844
1845 if (!bb_vinfo)
1846 return;
1847
1848 bb = BB_VINFO_BB (bb_vinfo);
1849
1850 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1851 {
1852 gimple stmt = gsi_stmt (si);
1853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1854
1855 if (stmt_info)
1856 /* Free stmt_vec_info. */
1857 free_stmt_vec_info (stmt);
1858 }
1859
1860 vect_destroy_datarefs (NULL, bb_vinfo);
1861 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1862 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1863 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1864 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1865 vect_free_slp_instance (instance);
1866 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1867 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1868 free (bb_vinfo);
1869 bb->aux = NULL;
1870 }
1871
1872
1873 /* Analyze statements contained in SLP tree node after recursively analyzing
1874 the subtree. Return TRUE if the operations are supported. */
1875
1876 static bool
1877 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1878 {
1879 bool dummy;
1880 int i;
1881 gimple stmt;
1882 slp_tree child;
1883
1884 if (!node)
1885 return true;
1886
1887 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1888 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1889 return false;
1890
1891 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1892 {
1893 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1894 gcc_assert (stmt_info);
1895 gcc_assert (PURE_SLP_STMT (stmt_info));
1896
1897 if (!vect_analyze_stmt (stmt, &dummy, node))
1898 return false;
1899 }
1900
1901 return true;
1902 }
1903
1904
1905 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1906 operations are supported. */
1907
1908 static bool
1909 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1910 {
1911 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1912 slp_instance instance;
1913 int i;
1914
1915 for (i = 0; slp_instances.iterate (i, &instance); )
1916 {
1917 if (!vect_slp_analyze_node_operations (bb_vinfo,
1918 SLP_INSTANCE_TREE (instance)))
1919 {
1920 vect_free_slp_instance (instance);
1921 slp_instances.ordered_remove (i);
1922 }
1923 else
1924 i++;
1925 }
1926
1927 if (!slp_instances.length ())
1928 return false;
1929
1930 return true;
1931 }
1932
1933
1934 /* Compute the scalar cost of the SLP node NODE and its children
1935 and return it. Do not account defs that are marked in LIFE and
1936 update LIFE according to uses of NODE. */
1937
1938 static unsigned
1939 vect_bb_slp_scalar_cost (basic_block bb,
1940 slp_tree node, vec<bool, va_heap> *life)
1941 {
1942 unsigned scalar_cost = 0;
1943 unsigned i;
1944 gimple stmt;
1945 slp_tree child;
1946
1947 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1948 {
1949 unsigned stmt_cost;
1950 ssa_op_iter op_iter;
1951 def_operand_p def_p;
1952 stmt_vec_info stmt_info;
1953
1954 if ((*life)[i])
1955 continue;
1956
1957 /* If there is a non-vectorized use of the defs then the scalar
1958 stmt is kept live in which case we do not account it or any
1959 required defs in the SLP children in the scalar cost. This
1960 way we make the vectorization more costly when compared to
1961 the scalar cost. */
1962 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1963 {
1964 imm_use_iterator use_iter;
1965 gimple use_stmt;
1966 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1967 if (gimple_code (use_stmt) == GIMPLE_PHI
1968 || gimple_bb (use_stmt) != bb
1969 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1970 {
1971 (*life)[i] = true;
1972 BREAK_FROM_IMM_USE_STMT (use_iter);
1973 }
1974 }
1975 if ((*life)[i])
1976 continue;
1977
1978 stmt_info = vinfo_for_stmt (stmt);
1979 if (STMT_VINFO_DATA_REF (stmt_info))
1980 {
1981 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1982 stmt_cost = vect_get_stmt_cost (scalar_load);
1983 else
1984 stmt_cost = vect_get_stmt_cost (scalar_store);
1985 }
1986 else
1987 stmt_cost = vect_get_stmt_cost (scalar_stmt);
1988
1989 scalar_cost += stmt_cost;
1990 }
1991
1992 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1993 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
1994
1995 return scalar_cost;
1996 }
1997
1998 /* Check if vectorization of the basic block is profitable. */
1999
2000 static bool
2001 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2002 {
2003 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2004 slp_instance instance;
2005 int i, j;
2006 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2007 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2008 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2009 stmt_vec_info stmt_info = NULL;
2010 stmt_vector_for_cost body_cost_vec;
2011 stmt_info_for_cost *ci;
2012
2013 /* Calculate vector costs. */
2014 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2015 {
2016 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2017
2018 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2019 {
2020 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2021 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2022 stmt_info, ci->misalign, vect_body);
2023 }
2024 }
2025
2026 /* Calculate scalar cost. */
2027 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2028 {
2029 stack_vec<bool, 20> life;
2030 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2031 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2032 SLP_INSTANCE_TREE (instance),
2033 &life);
2034 }
2035
2036 /* Complete the target-specific cost calculation. */
2037 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2038 &vec_inside_cost, &vec_epilogue_cost);
2039
2040 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2041
2042 if (dump_enabled_p ())
2043 {
2044 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2045 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2046 vec_inside_cost);
2047 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2048 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2049 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2050 }
2051
2052 /* Vectorization is profitable if its cost is less than the cost of scalar
2053 version. */
2054 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2055 return false;
2056
2057 return true;
2058 }
2059
2060 /* Check if the basic block can be vectorized. */
2061
2062 static bb_vec_info
2063 vect_slp_analyze_bb_1 (basic_block bb)
2064 {
2065 bb_vec_info bb_vinfo;
2066 vec<slp_instance> slp_instances;
2067 slp_instance instance;
2068 int i;
2069 int min_vf = 2;
2070
2071 bb_vinfo = new_bb_vec_info (bb);
2072 if (!bb_vinfo)
2073 return NULL;
2074
2075 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2076 {
2077 if (dump_enabled_p ())
2078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2079 "not vectorized: unhandled data-ref in basic "
2080 "block.\n");
2081
2082 destroy_bb_vec_info (bb_vinfo);
2083 return NULL;
2084 }
2085
2086 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2087 {
2088 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2090 "not vectorized: not enough data-refs in "
2091 "basic block.\n");
2092
2093 destroy_bb_vec_info (bb_vinfo);
2094 return NULL;
2095 }
2096
2097 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2098 {
2099 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2101 "not vectorized: unhandled data access in "
2102 "basic block.\n");
2103
2104 destroy_bb_vec_info (bb_vinfo);
2105 return NULL;
2106 }
2107
2108 vect_pattern_recog (NULL, bb_vinfo);
2109
2110 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2111 {
2112 if (dump_enabled_p ())
2113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2114 "not vectorized: unhandled data dependence "
2115 "in basic block.\n");
2116
2117 destroy_bb_vec_info (bb_vinfo);
2118 return NULL;
2119 }
2120
2121 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2122 {
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2125 "not vectorized: bad data alignment in basic "
2126 "block.\n");
2127
2128 destroy_bb_vec_info (bb_vinfo);
2129 return NULL;
2130 }
2131
2132 /* Check the SLP opportunities in the basic block, analyze and build SLP
2133 trees. */
2134 if (!vect_analyze_slp (NULL, bb_vinfo))
2135 {
2136 if (dump_enabled_p ())
2137 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2138 "not vectorized: failed to find SLP opportunities "
2139 "in basic block.\n");
2140
2141 destroy_bb_vec_info (bb_vinfo);
2142 return NULL;
2143 }
2144
2145 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2146
2147 /* Mark all the statements that we want to vectorize as pure SLP and
2148 relevant. */
2149 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2150 {
2151 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2152 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2153 }
2154
2155 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2156 {
2157 if (dump_enabled_p ())
2158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2159 "not vectorized: unsupported alignment in basic "
2160 "block.\n");
2161 destroy_bb_vec_info (bb_vinfo);
2162 return NULL;
2163 }
2164
2165 if (!vect_slp_analyze_operations (bb_vinfo))
2166 {
2167 if (dump_enabled_p ())
2168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2169 "not vectorized: bad operation in basic block.\n");
2170
2171 destroy_bb_vec_info (bb_vinfo);
2172 return NULL;
2173 }
2174
2175 /* Cost model: check if the vectorization is worthwhile. */
2176 if (!unlimited_cost_model ()
2177 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2178 {
2179 if (dump_enabled_p ())
2180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2181 "not vectorized: vectorization is not "
2182 "profitable.\n");
2183
2184 destroy_bb_vec_info (bb_vinfo);
2185 return NULL;
2186 }
2187
2188 if (dump_enabled_p ())
2189 dump_printf_loc (MSG_NOTE, vect_location,
2190 "Basic block will be vectorized using SLP\n");
2191
2192 return bb_vinfo;
2193 }
2194
2195
2196 bb_vec_info
2197 vect_slp_analyze_bb (basic_block bb)
2198 {
2199 bb_vec_info bb_vinfo;
2200 int insns = 0;
2201 gimple_stmt_iterator gsi;
2202 unsigned int vector_sizes;
2203
2204 if (dump_enabled_p ())
2205 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2206
2207 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2208 {
2209 gimple stmt = gsi_stmt (gsi);
2210 if (!is_gimple_debug (stmt)
2211 && !gimple_nop_p (stmt)
2212 && gimple_code (stmt) != GIMPLE_LABEL)
2213 insns++;
2214 }
2215
2216 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2217 {
2218 if (dump_enabled_p ())
2219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2220 "not vectorized: too many instructions in "
2221 "basic block.\n");
2222
2223 return NULL;
2224 }
2225
2226 /* Autodetect first vector size we try. */
2227 current_vector_size = 0;
2228 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2229
2230 while (1)
2231 {
2232 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2233 if (bb_vinfo)
2234 return bb_vinfo;
2235
2236 destroy_bb_vec_info (bb_vinfo);
2237
2238 vector_sizes &= ~current_vector_size;
2239 if (vector_sizes == 0
2240 || current_vector_size == 0)
2241 return NULL;
2242
2243 /* Try the next biggest vector size. */
2244 current_vector_size = 1 << floor_log2 (vector_sizes);
2245 if (dump_enabled_p ())
2246 dump_printf_loc (MSG_NOTE, vect_location,
2247 "***** Re-trying analysis with "
2248 "vector size %d\n", current_vector_size);
2249 }
2250 }
2251
2252
2253 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2254 the number of created vector stmts depends on the unrolling factor).
2255 However, the actual number of vector stmts for every SLP node depends on
2256 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2257 should be updated. In this function we assume that the inside costs
2258 calculated in vect_model_xxx_cost are linear in ncopies. */
2259
2260 void
2261 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2262 {
2263 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2264 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2265 slp_instance instance;
2266 stmt_vector_for_cost body_cost_vec;
2267 stmt_info_for_cost *si;
2268 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2269
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_NOTE, vect_location,
2272 "=== vect_update_slp_costs_according_to_vf ===\n");
2273
2274 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2275 {
2276 /* We assume that costs are linear in ncopies. */
2277 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2278
2279 /* Record the instance's instructions in the target cost model.
2280 This was delayed until here because the count of instructions
2281 isn't known beforehand. */
2282 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2283
2284 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2285 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2286 vinfo_for_stmt (si->stmt), si->misalign,
2287 vect_body);
2288 }
2289 }
2290
2291
2292 /* For constant and loop invariant defs of SLP_NODE this function returns
2293 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2294 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2295 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2296 REDUC_INDEX is the index of the reduction operand in the statements, unless
2297 it is -1. */
2298
2299 static void
2300 vect_get_constant_vectors (tree op, slp_tree slp_node,
2301 vec<tree> *vec_oprnds,
2302 unsigned int op_num, unsigned int number_of_vectors,
2303 int reduc_index)
2304 {
2305 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2306 gimple stmt = stmts[0];
2307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2308 unsigned nunits;
2309 tree vec_cst;
2310 tree *elts;
2311 unsigned j, number_of_places_left_in_vector;
2312 tree vector_type;
2313 tree vop;
2314 int group_size = stmts.length ();
2315 unsigned int vec_num, i;
2316 unsigned number_of_copies = 1;
2317 vec<tree> voprnds;
2318 voprnds.create (number_of_vectors);
2319 bool constant_p, is_store;
2320 tree neutral_op = NULL;
2321 enum tree_code code = gimple_expr_code (stmt);
2322 gimple def_stmt;
2323 struct loop *loop;
2324 gimple_seq ctor_seq = NULL;
2325
2326 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2327 && reduc_index != -1)
2328 {
2329 op_num = reduc_index - 1;
2330 op = gimple_op (stmt, reduc_index);
2331 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2332 we need either neutral operands or the original operands. See
2333 get_initial_def_for_reduction() for details. */
2334 switch (code)
2335 {
2336 case WIDEN_SUM_EXPR:
2337 case DOT_PROD_EXPR:
2338 case PLUS_EXPR:
2339 case MINUS_EXPR:
2340 case BIT_IOR_EXPR:
2341 case BIT_XOR_EXPR:
2342 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2343 neutral_op = build_real (TREE_TYPE (op), dconst0);
2344 else
2345 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2346
2347 break;
2348
2349 case MULT_EXPR:
2350 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2351 neutral_op = build_real (TREE_TYPE (op), dconst1);
2352 else
2353 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2354
2355 break;
2356
2357 case BIT_AND_EXPR:
2358 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2359 break;
2360
2361 case MAX_EXPR:
2362 case MIN_EXPR:
2363 def_stmt = SSA_NAME_DEF_STMT (op);
2364 loop = (gimple_bb (stmt))->loop_father;
2365 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2366 loop_preheader_edge (loop));
2367 break;
2368
2369 default:
2370 neutral_op = NULL;
2371 }
2372 }
2373
2374 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2375 {
2376 is_store = true;
2377 op = gimple_assign_rhs1 (stmt);
2378 }
2379 else
2380 is_store = false;
2381
2382 gcc_assert (op);
2383
2384 if (CONSTANT_CLASS_P (op))
2385 constant_p = true;
2386 else
2387 constant_p = false;
2388
2389 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2390 gcc_assert (vector_type);
2391 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2392
2393 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2394 created vectors. It is greater than 1 if unrolling is performed.
2395
2396 For example, we have two scalar operands, s1 and s2 (e.g., group of
2397 strided accesses of size two), while NUNITS is four (i.e., four scalars
2398 of this type can be packed in a vector). The output vector will contain
2399 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2400 will be 2).
2401
2402 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2403 containing the operands.
2404
2405 For example, NUNITS is four as before, and the group size is 8
2406 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2407 {s5, s6, s7, s8}. */
2408
2409 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2410
2411 number_of_places_left_in_vector = nunits;
2412 elts = XALLOCAVEC (tree, nunits);
2413 for (j = 0; j < number_of_copies; j++)
2414 {
2415 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2416 {
2417 if (is_store)
2418 op = gimple_assign_rhs1 (stmt);
2419 else
2420 {
2421 switch (code)
2422 {
2423 case COND_EXPR:
2424 if (op_num == 0 || op_num == 1)
2425 {
2426 tree cond = gimple_assign_rhs1 (stmt);
2427 op = TREE_OPERAND (cond, op_num);
2428 }
2429 else
2430 {
2431 if (op_num == 2)
2432 op = gimple_assign_rhs2 (stmt);
2433 else
2434 op = gimple_assign_rhs3 (stmt);
2435 }
2436 break;
2437
2438 case CALL_EXPR:
2439 op = gimple_call_arg (stmt, op_num);
2440 break;
2441
2442 case LSHIFT_EXPR:
2443 case RSHIFT_EXPR:
2444 case LROTATE_EXPR:
2445 case RROTATE_EXPR:
2446 op = gimple_op (stmt, op_num + 1);
2447 /* Unlike the other binary operators, shifts/rotates have
2448 the shift count being int, instead of the same type as
2449 the lhs, so make sure the scalar is the right type if
2450 we are dealing with vectors of
2451 long long/long/short/char. */
2452 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2453 op = fold_convert (TREE_TYPE (vector_type), op);
2454 break;
2455
2456 default:
2457 op = gimple_op (stmt, op_num + 1);
2458 break;
2459 }
2460 }
2461
2462 if (reduc_index != -1)
2463 {
2464 loop = (gimple_bb (stmt))->loop_father;
2465 def_stmt = SSA_NAME_DEF_STMT (op);
2466
2467 gcc_assert (loop);
2468
2469 /* Get the def before the loop. In reduction chain we have only
2470 one initial value. */
2471 if ((j != (number_of_copies - 1)
2472 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2473 && i != 0))
2474 && neutral_op)
2475 op = neutral_op;
2476 else
2477 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2478 loop_preheader_edge (loop));
2479 }
2480
2481 /* Create 'vect_ = {op0,op1,...,opn}'. */
2482 number_of_places_left_in_vector--;
2483 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2484 {
2485 if (CONSTANT_CLASS_P (op))
2486 {
2487 op = fold_unary (VIEW_CONVERT_EXPR,
2488 TREE_TYPE (vector_type), op);
2489 gcc_assert (op && CONSTANT_CLASS_P (op));
2490 }
2491 else
2492 {
2493 tree new_temp
2494 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2495 gimple init_stmt;
2496 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2497 op);
2498 init_stmt
2499 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2500 new_temp, op, NULL_TREE);
2501 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2502 op = new_temp;
2503 }
2504 }
2505 elts[number_of_places_left_in_vector] = op;
2506 if (!CONSTANT_CLASS_P (op))
2507 constant_p = false;
2508
2509 if (number_of_places_left_in_vector == 0)
2510 {
2511 number_of_places_left_in_vector = nunits;
2512
2513 if (constant_p)
2514 vec_cst = build_vector (vector_type, elts);
2515 else
2516 {
2517 vec<constructor_elt, va_gc> *v;
2518 unsigned k;
2519 vec_alloc (v, nunits);
2520 for (k = 0; k < nunits; ++k)
2521 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2522 vec_cst = build_constructor (vector_type, v);
2523 }
2524 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2525 vector_type, NULL));
2526 if (ctor_seq != NULL)
2527 {
2528 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2529 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2530 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2531 GSI_SAME_STMT);
2532 ctor_seq = NULL;
2533 }
2534 }
2535 }
2536 }
2537
2538 /* Since the vectors are created in the reverse order, we should invert
2539 them. */
2540 vec_num = voprnds.length ();
2541 for (j = vec_num; j != 0; j--)
2542 {
2543 vop = voprnds[j - 1];
2544 vec_oprnds->quick_push (vop);
2545 }
2546
2547 voprnds.release ();
2548
2549 /* In case that VF is greater than the unrolling factor needed for the SLP
2550 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2551 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2552 to replicate the vectors. */
2553 while (number_of_vectors > vec_oprnds->length ())
2554 {
2555 tree neutral_vec = NULL;
2556
2557 if (neutral_op)
2558 {
2559 if (!neutral_vec)
2560 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2561
2562 vec_oprnds->quick_push (neutral_vec);
2563 }
2564 else
2565 {
2566 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2567 vec_oprnds->quick_push (vop);
2568 }
2569 }
2570 }
2571
2572
2573 /* Get vectorized definitions from SLP_NODE that contains corresponding
2574 vectorized def-stmts. */
2575
2576 static void
2577 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2578 {
2579 tree vec_oprnd;
2580 gimple vec_def_stmt;
2581 unsigned int i;
2582
2583 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2584
2585 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2586 {
2587 gcc_assert (vec_def_stmt);
2588 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2589 vec_oprnds->quick_push (vec_oprnd);
2590 }
2591 }
2592
2593
2594 /* Get vectorized definitions for SLP_NODE.
2595 If the scalar definitions are loop invariants or constants, collect them and
2596 call vect_get_constant_vectors() to create vector stmts.
2597 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2598 must be stored in the corresponding child of SLP_NODE, and we call
2599 vect_get_slp_vect_defs () to retrieve them. */
2600
2601 void
2602 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2603 vec<vec<tree> > *vec_oprnds, int reduc_index)
2604 {
2605 gimple first_stmt;
2606 int number_of_vects = 0, i;
2607 unsigned int child_index = 0;
2608 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2609 slp_tree child = NULL;
2610 vec<tree> vec_defs;
2611 tree oprnd;
2612 bool vectorized_defs;
2613
2614 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2615 FOR_EACH_VEC_ELT (ops, i, oprnd)
2616 {
2617 /* For each operand we check if it has vectorized definitions in a child
2618 node or we need to create them (for invariants and constants). We
2619 check if the LHS of the first stmt of the next child matches OPRND.
2620 If it does, we found the correct child. Otherwise, we call
2621 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2622 to check this child node for the next operand. */
2623 vectorized_defs = false;
2624 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2625 {
2626 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2627
2628 /* We have to check both pattern and original def, if available. */
2629 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2630 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2631
2632 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2633 || (related
2634 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2635 {
2636 /* The number of vector defs is determined by the number of
2637 vector statements in the node from which we get those
2638 statements. */
2639 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2640 vectorized_defs = true;
2641 child_index++;
2642 }
2643 }
2644
2645 if (!vectorized_defs)
2646 {
2647 if (i == 0)
2648 {
2649 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2650 /* Number of vector stmts was calculated according to LHS in
2651 vect_schedule_slp_instance (), fix it by replacing LHS with
2652 RHS, if necessary. See vect_get_smallest_scalar_type () for
2653 details. */
2654 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2655 &rhs_size_unit);
2656 if (rhs_size_unit != lhs_size_unit)
2657 {
2658 number_of_vects *= rhs_size_unit;
2659 number_of_vects /= lhs_size_unit;
2660 }
2661 }
2662 }
2663
2664 /* Allocate memory for vectorized defs. */
2665 vec_defs = vNULL;
2666 vec_defs.create (number_of_vects);
2667
2668 /* For reduction defs we call vect_get_constant_vectors (), since we are
2669 looking for initial loop invariant values. */
2670 if (vectorized_defs && reduc_index == -1)
2671 /* The defs are already vectorized. */
2672 vect_get_slp_vect_defs (child, &vec_defs);
2673 else
2674 /* Build vectors from scalar defs. */
2675 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2676 number_of_vects, reduc_index);
2677
2678 vec_oprnds->quick_push (vec_defs);
2679
2680 /* For reductions, we only need initial values. */
2681 if (reduc_index != -1)
2682 return;
2683 }
2684 }
2685
2686
2687 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2688 building a vector of type MASK_TYPE from it) and two input vectors placed in
2689 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2690 shifting by STRIDE elements of DR_CHAIN for every copy.
2691 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2692 copies).
2693 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2694 the created stmts must be inserted. */
2695
2696 static inline void
2697 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2698 tree mask, int first_vec_indx, int second_vec_indx,
2699 gimple_stmt_iterator *gsi, slp_tree node,
2700 tree vectype, vec<tree> dr_chain,
2701 int ncopies, int vect_stmts_counter)
2702 {
2703 tree perm_dest;
2704 gimple perm_stmt = NULL;
2705 stmt_vec_info next_stmt_info;
2706 int i, stride;
2707 tree first_vec, second_vec, data_ref;
2708
2709 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2710
2711 /* Initialize the vect stmts of NODE to properly insert the generated
2712 stmts later. */
2713 for (i = SLP_TREE_VEC_STMTS (node).length ();
2714 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2715 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2716
2717 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2718 for (i = 0; i < ncopies; i++)
2719 {
2720 first_vec = dr_chain[first_vec_indx];
2721 second_vec = dr_chain[second_vec_indx];
2722
2723 /* Generate the permute statement. */
2724 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2725 first_vec, second_vec, mask);
2726 data_ref = make_ssa_name (perm_dest, perm_stmt);
2727 gimple_set_lhs (perm_stmt, data_ref);
2728 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2729
2730 /* Store the vector statement in NODE. */
2731 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2732
2733 first_vec_indx += stride;
2734 second_vec_indx += stride;
2735 }
2736
2737 /* Mark the scalar stmt as vectorized. */
2738 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2739 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2740 }
2741
2742
2743 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2744 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2745 representation. Check that the mask is valid and return FALSE if not.
2746 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2747 the next vector, i.e., the current first vector is not needed. */
2748
2749 static bool
2750 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2751 int mask_nunits, bool only_one_vec, int index,
2752 unsigned char *mask, int *current_mask_element,
2753 bool *need_next_vector, int *number_of_mask_fixes,
2754 bool *mask_fixed, bool *needs_first_vector)
2755 {
2756 int i;
2757
2758 /* Convert to target specific representation. */
2759 *current_mask_element = first_mask_element + m;
2760 /* Adjust the value in case it's a mask for second and third vectors. */
2761 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2762
2763 if (*current_mask_element < mask_nunits)
2764 *needs_first_vector = true;
2765
2766 /* We have only one input vector to permute but the mask accesses values in
2767 the next vector as well. */
2768 if (only_one_vec && *current_mask_element >= mask_nunits)
2769 {
2770 if (dump_enabled_p ())
2771 {
2772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2773 "permutation requires at least two vectors ");
2774 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2775 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2776 }
2777
2778 return false;
2779 }
2780
2781 /* The mask requires the next vector. */
2782 if (*current_mask_element >= mask_nunits * 2)
2783 {
2784 if (*needs_first_vector || *mask_fixed)
2785 {
2786 /* We either need the first vector too or have already moved to the
2787 next vector. In both cases, this permutation needs three
2788 vectors. */
2789 if (dump_enabled_p ())
2790 {
2791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2792 "permutation requires at "
2793 "least three vectors ");
2794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2795 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2796 }
2797
2798 return false;
2799 }
2800
2801 /* We move to the next vector, dropping the first one and working with
2802 the second and the third - we need to adjust the values of the mask
2803 accordingly. */
2804 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2805
2806 for (i = 0; i < index; i++)
2807 mask[i] -= mask_nunits * *number_of_mask_fixes;
2808
2809 (*number_of_mask_fixes)++;
2810 *mask_fixed = true;
2811 }
2812
2813 *need_next_vector = *mask_fixed;
2814
2815 /* This was the last element of this mask. Start a new one. */
2816 if (index == mask_nunits - 1)
2817 {
2818 *number_of_mask_fixes = 1;
2819 *mask_fixed = false;
2820 *needs_first_vector = false;
2821 }
2822
2823 return true;
2824 }
2825
2826
2827 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2828 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2829 permute statements for the SLP node NODE of the SLP instance
2830 SLP_NODE_INSTANCE. */
2831
2832 bool
2833 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2834 gimple_stmt_iterator *gsi, int vf,
2835 slp_instance slp_node_instance, bool analyze_only)
2836 {
2837 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2839 tree mask_element_type = NULL_TREE, mask_type;
2840 int i, j, k, nunits, vec_index = 0, scalar_index;
2841 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2842 gimple next_scalar_stmt;
2843 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2844 int first_mask_element;
2845 int index, unroll_factor, current_mask_element, ncopies;
2846 unsigned char *mask;
2847 bool only_one_vec = false, need_next_vector = false;
2848 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2849 int number_of_mask_fixes = 1;
2850 bool mask_fixed = false;
2851 bool needs_first_vector = false;
2852 enum machine_mode mode;
2853
2854 mode = TYPE_MODE (vectype);
2855
2856 if (!can_vec_perm_p (mode, false, NULL))
2857 {
2858 if (dump_enabled_p ())
2859 {
2860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2861 "no vect permute for ");
2862 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2863 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2864 }
2865 return false;
2866 }
2867
2868 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2869 same size as the vector element being permuted. */
2870 mask_element_type = lang_hooks.types.type_for_mode
2871 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2872 mask_type = get_vectype_for_scalar_type (mask_element_type);
2873 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2874 mask = XALLOCAVEC (unsigned char, nunits);
2875 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2876
2877 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2878 unrolling factor. */
2879 orig_vec_stmts_num = group_size *
2880 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2881 if (orig_vec_stmts_num == 1)
2882 only_one_vec = true;
2883
2884 /* Number of copies is determined by the final vectorization factor
2885 relatively to SLP_NODE_INSTANCE unrolling factor. */
2886 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2887
2888 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2889 return false;
2890
2891 /* Generate permutation masks for every NODE. Number of masks for each NODE
2892 is equal to GROUP_SIZE.
2893 E.g., we have a group of three nodes with three loads from the same
2894 location in each node, and the vector size is 4. I.e., we have a
2895 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2896 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2897 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2898 ...
2899
2900 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2901 The last mask is illegal since we assume two operands for permute
2902 operation, and the mask element values can't be outside that range.
2903 Hence, the last mask must be converted into {2,5,5,5}.
2904 For the first two permutations we need the first and the second input
2905 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2906 we need the second and the third vectors: {b1,c1,a2,b2} and
2907 {c2,a3,b3,c3}. */
2908
2909 {
2910 scalar_index = 0;
2911 index = 0;
2912 vect_stmts_counter = 0;
2913 vec_index = 0;
2914 first_vec_index = vec_index++;
2915 if (only_one_vec)
2916 second_vec_index = first_vec_index;
2917 else
2918 second_vec_index = vec_index++;
2919
2920 for (j = 0; j < unroll_factor; j++)
2921 {
2922 for (k = 0; k < group_size; k++)
2923 {
2924 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2925 first_mask_element = i + j * group_size;
2926 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2927 nunits, only_one_vec, index,
2928 mask, &current_mask_element,
2929 &need_next_vector,
2930 &number_of_mask_fixes, &mask_fixed,
2931 &needs_first_vector))
2932 return false;
2933 mask[index++] = current_mask_element;
2934
2935 if (index == nunits)
2936 {
2937 index = 0;
2938 if (!can_vec_perm_p (mode, false, mask))
2939 {
2940 if (dump_enabled_p ())
2941 {
2942 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2943 vect_location,
2944 "unsupported vect permute { ");
2945 for (i = 0; i < nunits; ++i)
2946 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2947 mask[i]);
2948 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2949 }
2950 return false;
2951 }
2952
2953 if (!analyze_only)
2954 {
2955 int l;
2956 tree mask_vec, *mask_elts;
2957 mask_elts = XALLOCAVEC (tree, nunits);
2958 for (l = 0; l < nunits; ++l)
2959 mask_elts[l] = build_int_cst (mask_element_type,
2960 mask[l]);
2961 mask_vec = build_vector (mask_type, mask_elts);
2962
2963 if (need_next_vector)
2964 {
2965 first_vec_index = second_vec_index;
2966 second_vec_index = vec_index;
2967 }
2968
2969 next_scalar_stmt
2970 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2971
2972 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2973 mask_vec, first_vec_index, second_vec_index,
2974 gsi, node, vectype, dr_chain,
2975 ncopies, vect_stmts_counter++);
2976 }
2977 }
2978 }
2979 }
2980 }
2981
2982 return true;
2983 }
2984
2985
2986
2987 /* Vectorize SLP instance tree in postorder. */
2988
2989 static bool
2990 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2991 unsigned int vectorization_factor)
2992 {
2993 gimple stmt;
2994 bool grouped_store, is_store;
2995 gimple_stmt_iterator si;
2996 stmt_vec_info stmt_info;
2997 unsigned int vec_stmts_size, nunits, group_size;
2998 tree vectype;
2999 int i;
3000 slp_tree child;
3001
3002 if (!node)
3003 return false;
3004
3005 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3006 vect_schedule_slp_instance (child, instance, vectorization_factor);
3007
3008 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3009 stmt_info = vinfo_for_stmt (stmt);
3010
3011 /* VECTYPE is the type of the destination. */
3012 vectype = STMT_VINFO_VECTYPE (stmt_info);
3013 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3014 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3015
3016 /* For each SLP instance calculate number of vector stmts to be created
3017 for the scalar stmts in each node of the SLP tree. Number of vector
3018 elements in one vector iteration is the number of scalar elements in
3019 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3020 size. */
3021 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3022
3023 if (!SLP_TREE_VEC_STMTS (node).exists ())
3024 {
3025 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3026 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3027 }
3028
3029 if (dump_enabled_p ())
3030 {
3031 dump_printf_loc (MSG_NOTE,vect_location,
3032 "------>vectorizing SLP node starting from: ");
3033 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3034 dump_printf (MSG_NOTE, "\n");
3035 }
3036
3037 /* Loads should be inserted before the first load. */
3038 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3039 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3040 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3041 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3042 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3043 else if (is_pattern_stmt_p (stmt_info))
3044 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3045 else
3046 si = gsi_for_stmt (stmt);
3047
3048 /* Stores should be inserted just before the last store. */
3049 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3050 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3051 {
3052 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3053 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3054 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3055 si = gsi_for_stmt (last_store);
3056 }
3057
3058 /* Mark the first element of the reduction chain as reduction to properly
3059 transform the node. In the analysis phase only the last element of the
3060 chain is marked as reduction. */
3061 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3062 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3063 {
3064 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3065 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3066 }
3067
3068 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3069 return is_store;
3070 }
3071
3072 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3073 For loop vectorization this is done in vectorizable_call, but for SLP
3074 it needs to be deferred until end of vect_schedule_slp, because multiple
3075 SLP instances may refer to the same scalar stmt. */
3076
3077 static void
3078 vect_remove_slp_scalar_calls (slp_tree node)
3079 {
3080 gimple stmt, new_stmt;
3081 gimple_stmt_iterator gsi;
3082 int i;
3083 slp_tree child;
3084 tree lhs;
3085 stmt_vec_info stmt_info;
3086
3087 if (!node)
3088 return;
3089
3090 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3091 vect_remove_slp_scalar_calls (child);
3092
3093 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3094 {
3095 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3096 continue;
3097 stmt_info = vinfo_for_stmt (stmt);
3098 if (stmt_info == NULL
3099 || is_pattern_stmt_p (stmt_info)
3100 || !PURE_SLP_STMT (stmt_info))
3101 continue;
3102 lhs = gimple_call_lhs (stmt);
3103 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3104 set_vinfo_for_stmt (new_stmt, stmt_info);
3105 set_vinfo_for_stmt (stmt, NULL);
3106 STMT_VINFO_STMT (stmt_info) = new_stmt;
3107 gsi = gsi_for_stmt (stmt);
3108 gsi_replace (&gsi, new_stmt, false);
3109 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3110 }
3111 }
3112
3113 /* Generate vector code for all SLP instances in the loop/basic block. */
3114
3115 bool
3116 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3117 {
3118 vec<slp_instance> slp_instances;
3119 slp_instance instance;
3120 unsigned int i, vf;
3121 bool is_store = false;
3122
3123 if (loop_vinfo)
3124 {
3125 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3126 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3127 }
3128 else
3129 {
3130 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3131 vf = 1;
3132 }
3133
3134 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3135 {
3136 /* Schedule the tree of INSTANCE. */
3137 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3138 instance, vf);
3139 if (dump_enabled_p ())
3140 dump_printf_loc (MSG_NOTE, vect_location,
3141 "vectorizing stmts using SLP.\n");
3142 }
3143
3144 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3145 {
3146 slp_tree root = SLP_INSTANCE_TREE (instance);
3147 gimple store;
3148 unsigned int j;
3149 gimple_stmt_iterator gsi;
3150
3151 /* Remove scalar call stmts. Do not do this for basic-block
3152 vectorization as not all uses may be vectorized.
3153 ??? Why should this be necessary? DCE should be able to
3154 remove the stmts itself.
3155 ??? For BB vectorization we can as well remove scalar
3156 stmts starting from the SLP tree root if they have no
3157 uses. */
3158 if (loop_vinfo)
3159 vect_remove_slp_scalar_calls (root);
3160
3161 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3162 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3163 {
3164 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3165 break;
3166
3167 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3168 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3169 /* Free the attached stmt_vec_info and remove the stmt. */
3170 gsi = gsi_for_stmt (store);
3171 unlink_stmt_vdef (store);
3172 gsi_remove (&gsi, true);
3173 release_defs (store);
3174 free_stmt_vec_info (store);
3175 }
3176 }
3177
3178 return is_store;
3179 }
3180
3181
3182 /* Vectorize the basic block. */
3183
3184 void
3185 vect_slp_transform_bb (basic_block bb)
3186 {
3187 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3188 gimple_stmt_iterator si;
3189
3190 gcc_assert (bb_vinfo);
3191
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3194
3195 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3196 {
3197 gimple stmt = gsi_stmt (si);
3198 stmt_vec_info stmt_info;
3199
3200 if (dump_enabled_p ())
3201 {
3202 dump_printf_loc (MSG_NOTE, vect_location,
3203 "------>SLPing statement: ");
3204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3205 dump_printf (MSG_NOTE, "\n");
3206 }
3207
3208 stmt_info = vinfo_for_stmt (stmt);
3209 gcc_assert (stmt_info);
3210
3211 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3212 if (STMT_SLP_TYPE (stmt_info))
3213 {
3214 vect_schedule_slp (NULL, bb_vinfo);
3215 break;
3216 }
3217 }
3218
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE, vect_location,
3221 "BASIC BLOCK VECTORIZED\n");
3222
3223 destroy_bb_vec_info (bb_vinfo);
3224 }