Merger of git branch "gimple-classes-v2-option-3"
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-ssa-alias.h"
41 #include "internal-fn.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimple-iterator.h"
46 #include "gimple-ssa.h"
47 #include "tree-phinodes.h"
48 #include "ssa-iterators.h"
49 #include "stringpool.h"
50 #include "tree-ssanames.h"
51 #include "tree-pass.h"
52 #include "cfgloop.h"
53 #include "expr.h"
54 #include "recog.h" /* FIXME: for insn_data */
55 #include "insn-codes.h"
56 #include "optabs.h"
57 #include "tree-vectorizer.h"
58 #include "langhooks.h"
59
60 /* Extract the location of the basic block in the source code.
61 Return the basic block location if succeed and NULL if not. */
62
63 source_location
64 find_bb_location (basic_block bb)
65 {
66 gimple stmt = NULL;
67 gimple_stmt_iterator si;
68
69 if (!bb)
70 return UNKNOWN_LOCATION;
71
72 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
73 {
74 stmt = gsi_stmt (si);
75 if (gimple_location (stmt) != UNKNOWN_LOCATION)
76 return gimple_location (stmt);
77 }
78
79 return UNKNOWN_LOCATION;
80 }
81
82
83 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
84
85 static void
86 vect_free_slp_tree (slp_tree node)
87 {
88 int i;
89 slp_tree child;
90
91 if (!node)
92 return;
93
94 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
95 vect_free_slp_tree (child);
96
97 SLP_TREE_CHILDREN (node).release ();
98 SLP_TREE_SCALAR_STMTS (node).release ();
99 SLP_TREE_VEC_STMTS (node).release ();
100 SLP_TREE_LOAD_PERMUTATION (node).release ();
101
102 free (node);
103 }
104
105
106 /* Free the memory allocated for the SLP instance. */
107
108 void
109 vect_free_slp_instance (slp_instance instance)
110 {
111 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
112 SLP_INSTANCE_LOADS (instance).release ();
113 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
114 free (instance);
115 }
116
117
118 /* Create an SLP node for SCALAR_STMTS. */
119
120 static slp_tree
121 vect_create_new_slp_node (vec<gimple> scalar_stmts)
122 {
123 slp_tree node;
124 gimple stmt = scalar_stmts[0];
125 unsigned int nops;
126
127 if (is_gimple_call (stmt))
128 nops = gimple_call_num_args (stmt);
129 else if (is_gimple_assign (stmt))
130 {
131 nops = gimple_num_ops (stmt) - 1;
132 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
133 nops++;
134 }
135 else
136 return NULL;
137
138 node = XNEW (struct _slp_tree);
139 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
140 SLP_TREE_VEC_STMTS (node).create (0);
141 SLP_TREE_CHILDREN (node).create (nops);
142 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
143
144 return node;
145 }
146
147
148 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
149 operand. */
150 static vec<slp_oprnd_info>
151 vect_create_oprnd_info (int nops, int group_size)
152 {
153 int i;
154 slp_oprnd_info oprnd_info;
155 vec<slp_oprnd_info> oprnds_info;
156
157 oprnds_info.create (nops);
158 for (i = 0; i < nops; i++)
159 {
160 oprnd_info = XNEW (struct _slp_oprnd_info);
161 oprnd_info->def_stmts.create (group_size);
162 oprnd_info->first_dt = vect_uninitialized_def;
163 oprnd_info->first_op_type = NULL_TREE;
164 oprnd_info->first_pattern = false;
165 oprnds_info.quick_push (oprnd_info);
166 }
167
168 return oprnds_info;
169 }
170
171
172 /* Free operands info. */
173
174 static void
175 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
176 {
177 int i;
178 slp_oprnd_info oprnd_info;
179
180 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
181 {
182 oprnd_info->def_stmts.release ();
183 XDELETE (oprnd_info);
184 }
185
186 oprnds_info.release ();
187 }
188
189
190 /* Find the place of the data-ref in STMT in the interleaving chain that starts
191 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
192
193 static int
194 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
195 {
196 gimple next_stmt = first_stmt;
197 int result = 0;
198
199 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
200 return -1;
201
202 do
203 {
204 if (next_stmt == stmt)
205 return result;
206 result++;
207 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
208 }
209 while (next_stmt);
210
211 return -1;
212 }
213
214
215 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
216 they are of a valid type and that they match the defs of the first stmt of
217 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
218 return -1, if the error could be corrected by swapping operands of the
219 operation return 1, if everything is ok return 0. */
220
221 static int
222 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
223 gimple stmt, bool first,
224 vec<slp_oprnd_info> *oprnds_info)
225 {
226 tree oprnd;
227 unsigned int i, number_of_oprnds;
228 tree def;
229 gimple def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 struct loop *loop = NULL;
232 bool pattern = false;
233 slp_oprnd_info oprnd_info;
234 int first_op_idx = 1;
235 bool commutative = false;
236 bool first_op_cond = false;
237
238 if (loop_vinfo)
239 loop = LOOP_VINFO_LOOP (loop_vinfo);
240
241 if (is_gimple_call (stmt))
242 {
243 number_of_oprnds = gimple_call_num_args (stmt);
244 first_op_idx = 3;
245 }
246 else if (is_gimple_assign (stmt))
247 {
248 enum tree_code code = gimple_assign_rhs_code (stmt);
249 number_of_oprnds = gimple_num_ops (stmt) - 1;
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
251 {
252 first_op_cond = true;
253 commutative = true;
254 number_of_oprnds++;
255 }
256 else
257 commutative = commutative_tree_code (code);
258 }
259 else
260 return -1;
261
262 bool swapped = false;
263 for (i = 0; i < number_of_oprnds; i++)
264 {
265 again:
266 if (first_op_cond)
267 {
268 if (i == 0 || i == 1)
269 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
270 swapped ? !i : i);
271 else
272 oprnd = gimple_op (stmt, first_op_idx + i - 1);
273 }
274 else
275 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
276
277 oprnd_info = (*oprnds_info)[i];
278
279 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
280 &def, &dt)
281 || (!def_stmt && dt != vect_constant_def))
282 {
283 if (dump_enabled_p ())
284 {
285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
286 "Build SLP failed: can't find def for ");
287 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
288 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
289 }
290
291 return -1;
292 }
293
294 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
295 from the pattern. Check that all the stmts of the node are in the
296 pattern. */
297 if (def_stmt && gimple_bb (def_stmt)
298 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
299 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
300 && gimple_code (def_stmt) != GIMPLE_PHI))
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
305 {
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern)
308 {
309 if (i == 0
310 && !swapped
311 && commutative)
312 {
313 swapped = true;
314 goto again;
315 }
316
317 if (dump_enabled_p ())
318 {
319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
320 "Build SLP failed: some of the stmts"
321 " are in a pattern, and others are not ");
322 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
323 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
324 }
325
326 return 1;
327 }
328
329 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
330 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
331
332 if (dt == vect_unknown_def_type)
333 {
334 if (dump_enabled_p ())
335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
336 "Unsupported pattern.\n");
337 return -1;
338 }
339
340 switch (gimple_code (def_stmt))
341 {
342 case GIMPLE_PHI:
343 def = gimple_phi_result (def_stmt);
344 break;
345
346 case GIMPLE_ASSIGN:
347 def = gimple_assign_lhs (def_stmt);
348 break;
349
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
355 }
356 }
357
358 if (first)
359 {
360 oprnd_info->first_dt = dt;
361 oprnd_info->first_pattern = pattern;
362 oprnd_info->first_op_type = TREE_TYPE (oprnd);
363 }
364 else
365 {
366 /* Not first stmt of the group, check that the def-stmt/s match
367 the def-stmt/s of the first stmt. Allow different definition
368 types for reduction chains: the first stmt must be a
369 vect_reduction_def (a phi node), and the rest
370 vect_internal_def. */
371 if (((oprnd_info->first_dt != dt
372 && !(oprnd_info->first_dt == vect_reduction_def
373 && dt == vect_internal_def)
374 && !((oprnd_info->first_dt == vect_external_def
375 || oprnd_info->first_dt == vect_constant_def)
376 && (dt == vect_external_def
377 || dt == vect_constant_def)))
378 || !types_compatible_p (oprnd_info->first_op_type,
379 TREE_TYPE (oprnd))))
380 {
381 /* Try swapping operands if we got a mismatch. */
382 if (i == 0
383 && !swapped
384 && commutative)
385 {
386 swapped = true;
387 goto again;
388 }
389
390 if (dump_enabled_p ())
391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
392 "Build SLP failed: different types\n");
393
394 return 1;
395 }
396 }
397
398 /* Check the types of the definitions. */
399 switch (dt)
400 {
401 case vect_constant_def:
402 case vect_external_def:
403 case vect_reduction_def:
404 break;
405
406 case vect_internal_def:
407 oprnd_info->def_stmts.quick_push (def_stmt);
408 break;
409
410 default:
411 /* FORNOW: Not supported. */
412 if (dump_enabled_p ())
413 {
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
415 "Build SLP failed: illegal type of def ");
416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
417 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
418 }
419
420 return -1;
421 }
422 }
423
424 /* Swap operands. */
425 if (swapped)
426 {
427 if (first_op_cond)
428 {
429 tree cond = gimple_assign_rhs1 (stmt);
430 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
431 &TREE_OPERAND (cond, 1));
432 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
433 }
434 else
435 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
436 gimple_assign_rhs2_ptr (stmt));
437 }
438
439 return 0;
440 }
441
442
443 /* Verify if the scalar stmts STMTS are isomorphic, require data
444 permutation or are of unsupported types of operation. Return
445 true if they are, otherwise return false and indicate in *MATCHES
446 which stmts are not isomorphic to the first one. If MATCHES[0]
447 is false then this indicates the comparison could not be
448 carried out or the stmts will never be vectorized by SLP. */
449
450 static bool
451 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
452 vec<gimple> stmts, unsigned int group_size,
453 unsigned nops, unsigned int *max_nunits,
454 unsigned int vectorization_factor, bool *matches)
455 {
456 unsigned int i;
457 gimple stmt = stmts[0];
458 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
459 enum tree_code first_cond_code = ERROR_MARK;
460 tree lhs;
461 bool need_same_oprnds = false;
462 tree vectype, scalar_type, first_op1 = NULL_TREE;
463 optab optab;
464 int icode;
465 machine_mode optab_op2_mode;
466 machine_mode vec_mode;
467 struct data_reference *first_dr;
468 HOST_WIDE_INT dummy;
469 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
470 tree cond;
471
472 /* For every stmt in NODE find its def stmt/s. */
473 FOR_EACH_VEC_ELT (stmts, i, stmt)
474 {
475 matches[i] = false;
476
477 if (dump_enabled_p ())
478 {
479 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
480 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
481 dump_printf (MSG_NOTE, "\n");
482 }
483
484 /* Fail to vectorize statements marked as unvectorizable. */
485 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
486 {
487 if (dump_enabled_p ())
488 {
489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
490 "Build SLP failed: unvectorizable statement ");
491 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
492 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
493 }
494 /* Fatal mismatch. */
495 matches[0] = false;
496 return false;
497 }
498
499 lhs = gimple_get_lhs (stmt);
500 if (lhs == NULL_TREE)
501 {
502 if (dump_enabled_p ())
503 {
504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
505 "Build SLP failed: not GIMPLE_ASSIGN nor "
506 "GIMPLE_CALL ");
507 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
508 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
509 }
510 /* Fatal mismatch. */
511 matches[0] = false;
512 return false;
513 }
514
515 if (is_gimple_assign (stmt)
516 && gimple_assign_rhs_code (stmt) == COND_EXPR
517 && (cond = gimple_assign_rhs1 (stmt))
518 && !COMPARISON_CLASS_P (cond))
519 {
520 if (dump_enabled_p ())
521 {
522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
523 "Build SLP failed: condition is not "
524 "comparison ");
525 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
526 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
527 }
528 /* Fatal mismatch. */
529 matches[0] = false;
530 return false;
531 }
532
533 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
534 vectype = get_vectype_for_scalar_type (scalar_type);
535 if (!vectype)
536 {
537 if (dump_enabled_p ())
538 {
539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540 "Build SLP failed: unsupported data-type ");
541 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
542 scalar_type);
543 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
544 }
545 /* Fatal mismatch. */
546 matches[0] = false;
547 return false;
548 }
549
550 /* In case of multiple types we need to detect the smallest type. */
551 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
552 {
553 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
554 if (bb_vinfo)
555 vectorization_factor = *max_nunits;
556 }
557
558 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
559 {
560 rhs_code = CALL_EXPR;
561 if (gimple_call_internal_p (call_stmt)
562 || gimple_call_tail_p (call_stmt)
563 || gimple_call_noreturn_p (call_stmt)
564 || !gimple_call_nothrow_p (call_stmt)
565 || gimple_call_chain (call_stmt))
566 {
567 if (dump_enabled_p ())
568 {
569 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
570 "Build SLP failed: unsupported call type ");
571 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
572 call_stmt, 0);
573 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
574 }
575 /* Fatal mismatch. */
576 matches[0] = false;
577 return false;
578 }
579 }
580 else
581 rhs_code = gimple_assign_rhs_code (stmt);
582
583 /* Check the operation. */
584 if (i == 0)
585 {
586 first_stmt_code = rhs_code;
587
588 /* Shift arguments should be equal in all the packed stmts for a
589 vector shift with scalar shift operand. */
590 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
591 || rhs_code == LROTATE_EXPR
592 || rhs_code == RROTATE_EXPR)
593 {
594 vec_mode = TYPE_MODE (vectype);
595
596 /* First see if we have a vector/vector shift. */
597 optab = optab_for_tree_code (rhs_code, vectype,
598 optab_vector);
599
600 if (!optab
601 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
602 {
603 /* No vector/vector shift, try for a vector/scalar shift. */
604 optab = optab_for_tree_code (rhs_code, vectype,
605 optab_scalar);
606
607 if (!optab)
608 {
609 if (dump_enabled_p ())
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611 "Build SLP failed: no optab.\n");
612 /* Fatal mismatch. */
613 matches[0] = false;
614 return false;
615 }
616 icode = (int) optab_handler (optab, vec_mode);
617 if (icode == CODE_FOR_nothing)
618 {
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
621 "Build SLP failed: "
622 "op not supported by target.\n");
623 /* Fatal mismatch. */
624 matches[0] = false;
625 return false;
626 }
627 optab_op2_mode = insn_data[icode].operand[2].mode;
628 if (!VECTOR_MODE_P (optab_op2_mode))
629 {
630 need_same_oprnds = true;
631 first_op1 = gimple_assign_rhs2 (stmt);
632 }
633 }
634 }
635 else if (rhs_code == WIDEN_LSHIFT_EXPR)
636 {
637 need_same_oprnds = true;
638 first_op1 = gimple_assign_rhs2 (stmt);
639 }
640 }
641 else
642 {
643 if (first_stmt_code != rhs_code
644 && (first_stmt_code != IMAGPART_EXPR
645 || rhs_code != REALPART_EXPR)
646 && (first_stmt_code != REALPART_EXPR
647 || rhs_code != IMAGPART_EXPR)
648 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
649 && (first_stmt_code == ARRAY_REF
650 || first_stmt_code == BIT_FIELD_REF
651 || first_stmt_code == INDIRECT_REF
652 || first_stmt_code == COMPONENT_REF
653 || first_stmt_code == MEM_REF)))
654 {
655 if (dump_enabled_p ())
656 {
657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
658 "Build SLP failed: different operation "
659 "in stmt ");
660 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
661 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
662 }
663 /* Mismatch. */
664 continue;
665 }
666
667 if (need_same_oprnds
668 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
669 {
670 if (dump_enabled_p ())
671 {
672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
673 "Build SLP failed: different shift "
674 "arguments in ");
675 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
676 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
677 }
678 /* Mismatch. */
679 continue;
680 }
681
682 if (rhs_code == CALL_EXPR)
683 {
684 gimple first_stmt = stmts[0];
685 if (gimple_call_num_args (stmt) != nops
686 || !operand_equal_p (gimple_call_fn (first_stmt),
687 gimple_call_fn (stmt), 0)
688 || gimple_call_fntype (first_stmt)
689 != gimple_call_fntype (stmt))
690 {
691 if (dump_enabled_p ())
692 {
693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
694 "Build SLP failed: different calls in ");
695 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
696 stmt, 0);
697 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
698 }
699 /* Mismatch. */
700 continue;
701 }
702 }
703 }
704
705 /* Grouped store or load. */
706 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
707 {
708 if (REFERENCE_CLASS_P (lhs))
709 {
710 /* Store. */
711 ;
712 }
713 else
714 {
715 /* Load. */
716 unsigned unrolling_factor
717 = least_common_multiple
718 (*max_nunits, group_size) / group_size;
719 /* FORNOW: Check that there is no gap between the loads
720 and no gap between the groups when we need to load
721 multiple groups at once.
722 ??? We should enhance this to only disallow gaps
723 inside vectors. */
724 if ((unrolling_factor > 1
725 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
726 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
727 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
728 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
729 {
730 if (dump_enabled_p ())
731 {
732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
733 "Build SLP failed: grouped "
734 "loads have gaps ");
735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
736 stmt, 0);
737 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
738 }
739 /* Fatal mismatch. */
740 matches[0] = false;
741 return false;
742 }
743
744 /* Check that the size of interleaved loads group is not
745 greater than the SLP group size. */
746 unsigned ncopies
747 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
748 if (loop_vinfo
749 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
750 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
751 - GROUP_GAP (vinfo_for_stmt (stmt)))
752 > ncopies * group_size))
753 {
754 if (dump_enabled_p ())
755 {
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: the number "
758 "of interleaved loads is greater than "
759 "the SLP group size ");
760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
761 stmt, 0);
762 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
763 }
764 /* Fatal mismatch. */
765 matches[0] = false;
766 return false;
767 }
768
769 old_first_load = first_load;
770 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
771 if (prev_first_load)
772 {
773 /* Check that there are no loads from different interleaving
774 chains in the same node. */
775 if (prev_first_load != first_load)
776 {
777 if (dump_enabled_p ())
778 {
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
780 vect_location,
781 "Build SLP failed: different "
782 "interleaving chains in one node ");
783 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
784 stmt, 0);
785 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
786 }
787 /* Mismatch. */
788 continue;
789 }
790 }
791 else
792 prev_first_load = first_load;
793
794 /* In some cases a group of loads is just the same load
795 repeated N times. Only analyze its cost once. */
796 if (first_load == stmt && old_first_load != first_load)
797 {
798 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
799 if (vect_supportable_dr_alignment (first_dr, false)
800 == dr_unaligned_unsupported)
801 {
802 if (dump_enabled_p ())
803 {
804 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
805 vect_location,
806 "Build SLP failed: unsupported "
807 "unaligned load ");
808 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
809 stmt, 0);
810 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
811 }
812 /* Fatal mismatch. */
813 matches[0] = false;
814 return false;
815 }
816 }
817 }
818 } /* Grouped access. */
819 else
820 {
821 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
822 {
823 /* Not grouped load. */
824 if (dump_enabled_p ())
825 {
826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
827 "Build SLP failed: not grouped load ");
828 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
829 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
830 }
831
832 /* FORNOW: Not grouped loads are not supported. */
833 /* Fatal mismatch. */
834 matches[0] = false;
835 return false;
836 }
837
838 /* Not memory operation. */
839 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
840 && TREE_CODE_CLASS (rhs_code) != tcc_unary
841 && rhs_code != COND_EXPR
842 && rhs_code != CALL_EXPR)
843 {
844 if (dump_enabled_p ())
845 {
846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
847 "Build SLP failed: operation");
848 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
849 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
850 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
851 }
852 /* Fatal mismatch. */
853 matches[0] = false;
854 return false;
855 }
856
857 if (rhs_code == COND_EXPR)
858 {
859 tree cond_expr = gimple_assign_rhs1 (stmt);
860
861 if (i == 0)
862 first_cond_code = TREE_CODE (cond_expr);
863 else if (first_cond_code != TREE_CODE (cond_expr))
864 {
865 if (dump_enabled_p ())
866 {
867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
868 "Build SLP failed: different"
869 " operation");
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
871 stmt, 0);
872 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
873 }
874 /* Mismatch. */
875 continue;
876 }
877 }
878 }
879
880 matches[i] = true;
881 }
882
883 for (i = 0; i < group_size; ++i)
884 if (!matches[i])
885 return false;
886
887 return true;
888 }
889
890 /* Recursively build an SLP tree starting from NODE.
891 Fail (and return a value not equal to zero) if def-stmts are not
892 isomorphic, require data permutation or are of unsupported types of
893 operation. Otherwise, return 0.
894 The value returned is the depth in the SLP tree where a mismatch
895 was found. */
896
897 static bool
898 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
899 slp_tree *node, unsigned int group_size,
900 unsigned int *max_nunits,
901 vec<slp_tree> *loads,
902 unsigned int vectorization_factor,
903 bool *matches, unsigned *npermutes, unsigned *tree_size,
904 unsigned max_tree_size)
905 {
906 unsigned nops, i, this_npermutes = 0, this_tree_size = 0;
907 gimple stmt;
908
909 if (!matches)
910 matches = XALLOCAVEC (bool, group_size);
911 if (!npermutes)
912 npermutes = &this_npermutes;
913
914 matches[0] = false;
915
916 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
917 if (is_gimple_call (stmt))
918 nops = gimple_call_num_args (stmt);
919 else if (is_gimple_assign (stmt))
920 {
921 nops = gimple_num_ops (stmt) - 1;
922 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
923 nops++;
924 }
925 else
926 return false;
927
928 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
929 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
930 max_nunits, vectorization_factor, matches))
931 return false;
932
933 /* If the SLP node is a load, terminate the recursion. */
934 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
935 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
936 {
937 loads->safe_push (*node);
938 return true;
939 }
940
941 /* Get at the operands, verifying they are compatible. */
942 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
943 slp_oprnd_info oprnd_info;
944 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
945 {
946 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
947 stmt, (i == 0), &oprnds_info))
948 {
949 case 0:
950 break;
951 case -1:
952 matches[0] = false;
953 vect_free_oprnd_info (oprnds_info);
954 return false;
955 case 1:
956 matches[i] = false;
957 break;
958 }
959 }
960 for (i = 0; i < group_size; ++i)
961 if (!matches[i])
962 {
963 vect_free_oprnd_info (oprnds_info);
964 return false;
965 }
966
967 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
968
969 /* Create SLP_TREE nodes for the definition node/s. */
970 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
971 {
972 slp_tree child;
973 unsigned old_nloads = loads->length ();
974 unsigned old_max_nunits = *max_nunits;
975
976 if (oprnd_info->first_dt != vect_internal_def)
977 continue;
978
979 if (++this_tree_size > max_tree_size)
980 {
981 vect_free_oprnd_info (oprnds_info);
982 return false;
983 }
984
985 child = vect_create_new_slp_node (oprnd_info->def_stmts);
986 if (!child)
987 {
988 vect_free_oprnd_info (oprnds_info);
989 return false;
990 }
991
992 bool *matches = XALLOCAVEC (bool, group_size);
993 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
994 group_size, max_nunits, loads,
995 vectorization_factor, matches,
996 npermutes, &this_tree_size, max_tree_size))
997 {
998 oprnd_info->def_stmts = vNULL;
999 SLP_TREE_CHILDREN (*node).quick_push (child);
1000 continue;
1001 }
1002
1003 /* If the SLP build for operand zero failed and operand zero
1004 and one can be commutated try that for the scalar stmts
1005 that failed the match. */
1006 if (i == 0
1007 /* A first scalar stmt mismatch signals a fatal mismatch. */
1008 && matches[0]
1009 /* ??? For COND_EXPRs we can swap the comparison operands
1010 as well as the arms under some constraints. */
1011 && nops == 2
1012 && oprnds_info[1]->first_dt == vect_internal_def
1013 && is_gimple_assign (stmt)
1014 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1015 /* Do so only if the number of not successful permutes was nor more
1016 than a cut-ff as re-trying the recursive match on
1017 possibly each level of the tree would expose exponential
1018 behavior. */
1019 && *npermutes < 4)
1020 {
1021 /* Roll back. */
1022 *max_nunits = old_max_nunits;
1023 loads->truncate (old_nloads);
1024 /* Swap mismatched definition stmts. */
1025 dump_printf_loc (MSG_NOTE, vect_location,
1026 "Re-trying with swapped operands of stmts ");
1027 for (unsigned j = 0; j < group_size; ++j)
1028 if (!matches[j])
1029 {
1030 gimple tem = oprnds_info[0]->def_stmts[j];
1031 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1032 oprnds_info[1]->def_stmts[j] = tem;
1033 dump_printf (MSG_NOTE, "%d ", j);
1034 }
1035 dump_printf (MSG_NOTE, "\n");
1036 /* And try again ... */
1037 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1038 group_size, max_nunits, loads,
1039 vectorization_factor,
1040 matches, npermutes, &this_tree_size,
1041 max_tree_size))
1042 {
1043 oprnd_info->def_stmts = vNULL;
1044 SLP_TREE_CHILDREN (*node).quick_push (child);
1045 continue;
1046 }
1047
1048 ++*npermutes;
1049 }
1050
1051 oprnd_info->def_stmts = vNULL;
1052 vect_free_slp_tree (child);
1053 vect_free_oprnd_info (oprnds_info);
1054 return false;
1055 }
1056
1057 if (tree_size)
1058 *tree_size += this_tree_size;
1059
1060 vect_free_oprnd_info (oprnds_info);
1061 return true;
1062 }
1063
1064 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1065
1066 static void
1067 vect_print_slp_tree (int dump_kind, slp_tree node)
1068 {
1069 int i;
1070 gimple stmt;
1071 slp_tree child;
1072
1073 if (!node)
1074 return;
1075
1076 dump_printf (dump_kind, "node ");
1077 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1078 {
1079 dump_printf (dump_kind, "\n\tstmt %d ", i);
1080 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1081 }
1082 dump_printf (dump_kind, "\n");
1083
1084 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1085 vect_print_slp_tree (dump_kind, child);
1086 }
1087
1088
1089 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1090 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1091 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1092 stmts in NODE are to be marked. */
1093
1094 static void
1095 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1096 {
1097 int i;
1098 gimple stmt;
1099 slp_tree child;
1100
1101 if (!node)
1102 return;
1103
1104 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1105 if (j < 0 || i == j)
1106 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1107
1108 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1109 vect_mark_slp_stmts (child, mark, j);
1110 }
1111
1112
1113 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1114
1115 static void
1116 vect_mark_slp_stmts_relevant (slp_tree node)
1117 {
1118 int i;
1119 gimple stmt;
1120 stmt_vec_info stmt_info;
1121 slp_tree child;
1122
1123 if (!node)
1124 return;
1125
1126 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1127 {
1128 stmt_info = vinfo_for_stmt (stmt);
1129 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1130 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1131 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1132 }
1133
1134 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1135 vect_mark_slp_stmts_relevant (child);
1136 }
1137
1138
1139 /* Rearrange the statements of NODE according to PERMUTATION. */
1140
1141 static void
1142 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1143 vec<unsigned> permutation)
1144 {
1145 gimple stmt;
1146 vec<gimple> tmp_stmts;
1147 unsigned int i;
1148 slp_tree child;
1149
1150 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1151 vect_slp_rearrange_stmts (child, group_size, permutation);
1152
1153 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1154 tmp_stmts.create (group_size);
1155 tmp_stmts.quick_grow_cleared (group_size);
1156
1157 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1158 tmp_stmts[permutation[i]] = stmt;
1159
1160 SLP_TREE_SCALAR_STMTS (node).release ();
1161 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1162 }
1163
1164
1165 /* Check if the required load permutations in the SLP instance
1166 SLP_INSTN are supported. */
1167
1168 static bool
1169 vect_supported_load_permutation_p (slp_instance slp_instn)
1170 {
1171 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1172 unsigned int i, j, k, next;
1173 sbitmap load_index;
1174 slp_tree node;
1175 gimple stmt, load, next_load, first_load;
1176 struct data_reference *dr;
1177
1178 if (dump_enabled_p ())
1179 {
1180 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1181 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1182 if (node->load_permutation.exists ())
1183 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1184 dump_printf (MSG_NOTE, "%d ", next);
1185 else
1186 for (k = 0; k < group_size; ++k)
1187 dump_printf (MSG_NOTE, "%d ", k);
1188 dump_printf (MSG_NOTE, "\n");
1189 }
1190
1191 /* In case of reduction every load permutation is allowed, since the order
1192 of the reduction statements is not important (as opposed to the case of
1193 grouped stores). The only condition we need to check is that all the
1194 load nodes are of the same size and have the same permutation (and then
1195 rearrange all the nodes of the SLP instance according to this
1196 permutation). */
1197
1198 /* Check that all the load nodes are of the same size. */
1199 /* ??? Can't we assert this? */
1200 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1201 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1202 return false;
1203
1204 node = SLP_INSTANCE_TREE (slp_instn);
1205 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1206
1207 /* Reduction (there are no data-refs in the root).
1208 In reduction chain the order of the loads is important. */
1209 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1210 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1211 {
1212 slp_tree load;
1213 unsigned int lidx;
1214
1215 /* Compare all the permutation sequences to the first one. We know
1216 that at least one load is permuted. */
1217 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1218 if (!node->load_permutation.exists ())
1219 return false;
1220 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1221 {
1222 if (!load->load_permutation.exists ())
1223 return false;
1224 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1225 if (lidx != node->load_permutation[j])
1226 return false;
1227 }
1228
1229 /* Check that the loads in the first sequence are different and there
1230 are no gaps between them. */
1231 load_index = sbitmap_alloc (group_size);
1232 bitmap_clear (load_index);
1233 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1234 {
1235 if (bitmap_bit_p (load_index, lidx))
1236 {
1237 sbitmap_free (load_index);
1238 return false;
1239 }
1240 bitmap_set_bit (load_index, lidx);
1241 }
1242 for (i = 0; i < group_size; i++)
1243 if (!bitmap_bit_p (load_index, i))
1244 {
1245 sbitmap_free (load_index);
1246 return false;
1247 }
1248 sbitmap_free (load_index);
1249
1250 /* This permutation is valid for reduction. Since the order of the
1251 statements in the nodes is not important unless they are memory
1252 accesses, we can rearrange the statements in all the nodes
1253 according to the order of the loads. */
1254 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1255 node->load_permutation);
1256
1257 /* We are done, no actual permutations need to be generated. */
1258 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1259 SLP_TREE_LOAD_PERMUTATION (node).release ();
1260 return true;
1261 }
1262
1263 /* In basic block vectorization we allow any subchain of an interleaving
1264 chain.
1265 FORNOW: not supported in loop SLP because of realignment compications. */
1266 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1267 {
1268 /* Check that for every node in the instance the loads
1269 form a subchain. */
1270 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1271 {
1272 next_load = NULL;
1273 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1274 {
1275 if (j != 0 && next_load != load)
1276 return false;
1277 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1278 }
1279 }
1280
1281 /* Check that the alignment of the first load in every subchain, i.e.,
1282 the first statement in every load node, is supported.
1283 ??? This belongs in alignment checking. */
1284 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1285 {
1286 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1287 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1288 {
1289 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1290 if (vect_supportable_dr_alignment (dr, false)
1291 == dr_unaligned_unsupported)
1292 {
1293 if (dump_enabled_p ())
1294 {
1295 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1296 vect_location,
1297 "unsupported unaligned load ");
1298 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1299 first_load, 0);
1300 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1301 }
1302 return false;
1303 }
1304 }
1305 }
1306
1307 /* We are done, no actual permutations need to be generated. */
1308 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1309 SLP_TREE_LOAD_PERMUTATION (node).release ();
1310 return true;
1311 }
1312
1313 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1314 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1315 well (unless it's reduction). */
1316 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1317 return false;
1318 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1319 if (!node->load_permutation.exists ())
1320 return false;
1321
1322 load_index = sbitmap_alloc (group_size);
1323 bitmap_clear (load_index);
1324 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1325 {
1326 unsigned int lidx = node->load_permutation[0];
1327 if (bitmap_bit_p (load_index, lidx))
1328 {
1329 sbitmap_free (load_index);
1330 return false;
1331 }
1332 bitmap_set_bit (load_index, lidx);
1333 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1334 if (k != lidx)
1335 {
1336 sbitmap_free (load_index);
1337 return false;
1338 }
1339 }
1340 for (i = 0; i < group_size; i++)
1341 if (!bitmap_bit_p (load_index, i))
1342 {
1343 sbitmap_free (load_index);
1344 return false;
1345 }
1346 sbitmap_free (load_index);
1347
1348 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1349 if (node->load_permutation.exists ()
1350 && !vect_transform_slp_perm_load
1351 (node, vNULL, NULL,
1352 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1353 return false;
1354 return true;
1355 }
1356
1357
1358 /* Find the first load in the loop that belongs to INSTANCE.
1359 When loads are in several SLP nodes, there can be a case in which the first
1360 load does not appear in the first SLP node to be transformed, causing
1361 incorrect order of statements. Since we generate all the loads together,
1362 they must be inserted before the first load of the SLP instance and not
1363 before the first load of the first node of the instance. */
1364
1365 static gimple
1366 vect_find_first_load_in_slp_instance (slp_instance instance)
1367 {
1368 int i, j;
1369 slp_tree load_node;
1370 gimple first_load = NULL, load;
1371
1372 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1373 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1374 first_load = get_earlier_stmt (load, first_load);
1375
1376 return first_load;
1377 }
1378
1379
1380 /* Find the last store in SLP INSTANCE. */
1381
1382 static gimple
1383 vect_find_last_store_in_slp_instance (slp_instance instance)
1384 {
1385 int i;
1386 slp_tree node;
1387 gimple last_store = NULL, store;
1388
1389 node = SLP_INSTANCE_TREE (instance);
1390 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1391 last_store = get_later_stmt (store, last_store);
1392
1393 return last_store;
1394 }
1395
1396 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1397
1398 static void
1399 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1400 slp_instance instance, slp_tree node,
1401 stmt_vector_for_cost *prologue_cost_vec,
1402 unsigned ncopies_for_cost)
1403 {
1404 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1405
1406 unsigned i;
1407 slp_tree child;
1408 gimple stmt, s;
1409 stmt_vec_info stmt_info;
1410 tree lhs;
1411 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1412
1413 /* Recurse down the SLP tree. */
1414 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1415 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1416 instance, child, prologue_cost_vec,
1417 ncopies_for_cost);
1418
1419 /* Look at the first scalar stmt to determine the cost. */
1420 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1421 stmt_info = vinfo_for_stmt (stmt);
1422 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1423 {
1424 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1425 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1426 vect_uninitialized_def,
1427 node, prologue_cost_vec, body_cost_vec);
1428 else
1429 {
1430 int i;
1431 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1432 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1433 node, prologue_cost_vec, body_cost_vec);
1434 /* If the load is permuted record the cost for the permutation.
1435 ??? Loads from multiple chains are let through here only
1436 for a single special case involving complex numbers where
1437 in the end no permutation is necessary. */
1438 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1439 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1440 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1441 && vect_get_place_in_interleaving_chain
1442 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1443 {
1444 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1445 stmt_info, 0, vect_body);
1446 break;
1447 }
1448 }
1449 }
1450 else
1451 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1452 stmt_info, 0, vect_body);
1453
1454 /* Scan operands and account for prologue cost of constants/externals.
1455 ??? This over-estimates cost for multiple uses and should be
1456 re-engineered. */
1457 lhs = gimple_get_lhs (stmt);
1458 for (i = 0; i < gimple_num_ops (stmt); ++i)
1459 {
1460 tree def, op = gimple_op (stmt, i);
1461 gimple def_stmt;
1462 enum vect_def_type dt;
1463 if (!op || op == lhs)
1464 continue;
1465 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1466 &def_stmt, &def, &dt)
1467 && (dt == vect_constant_def || dt == vect_external_def))
1468 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1469 stmt_info, 0, vect_prologue);
1470 }
1471 }
1472
1473 /* Compute the cost for the SLP instance INSTANCE. */
1474
1475 static void
1476 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1477 slp_instance instance, unsigned nunits)
1478 {
1479 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1480 unsigned ncopies_for_cost;
1481 stmt_info_for_cost *si;
1482 unsigned i;
1483
1484 /* Calculate the number of vector stmts to create based on the unrolling
1485 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1486 GROUP_SIZE / NUNITS otherwise. */
1487 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1488 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1489
1490 prologue_cost_vec.create (10);
1491 body_cost_vec.create (10);
1492 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1493 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1494 instance, SLP_INSTANCE_TREE (instance),
1495 &prologue_cost_vec, ncopies_for_cost);
1496
1497 /* Record the prologue costs, which were delayed until we were
1498 sure that SLP was successful. Unlike the body costs, we know
1499 the final values now regardless of the loop vectorization factor. */
1500 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1501 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1502 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1503 {
1504 struct _stmt_vec_info *stmt_info
1505 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1506 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1507 si->misalign, vect_prologue);
1508 }
1509
1510 prologue_cost_vec.release ();
1511 }
1512
1513 /* Analyze an SLP instance starting from a group of grouped stores. Call
1514 vect_build_slp_tree to build a tree of packed stmts if possible.
1515 Return FALSE if it's impossible to SLP any stmt in the loop. */
1516
1517 static bool
1518 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1519 gimple stmt, unsigned max_tree_size)
1520 {
1521 slp_instance new_instance;
1522 slp_tree node;
1523 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1524 unsigned int unrolling_factor = 1, nunits;
1525 tree vectype, scalar_type = NULL_TREE;
1526 gimple next;
1527 unsigned int vectorization_factor = 0;
1528 int i;
1529 unsigned int max_nunits = 0;
1530 vec<slp_tree> loads;
1531 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1532 vec<gimple> scalar_stmts;
1533
1534 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1535 {
1536 if (dr)
1537 {
1538 scalar_type = TREE_TYPE (DR_REF (dr));
1539 vectype = get_vectype_for_scalar_type (scalar_type);
1540 }
1541 else
1542 {
1543 gcc_assert (loop_vinfo);
1544 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1545 }
1546
1547 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1548 }
1549 else
1550 {
1551 gcc_assert (loop_vinfo);
1552 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1553 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1554 }
1555
1556 if (!vectype)
1557 {
1558 if (dump_enabled_p ())
1559 {
1560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1561 "Build SLP failed: unsupported data-type ");
1562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1563 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1564 }
1565
1566 return false;
1567 }
1568
1569 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1570 if (loop_vinfo)
1571 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1572 else
1573 vectorization_factor = nunits;
1574
1575 /* Calculate the unrolling factor. */
1576 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1577 if (unrolling_factor != 1 && !loop_vinfo)
1578 {
1579 if (dump_enabled_p ())
1580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1581 "Build SLP failed: unrolling required in basic"
1582 " block SLP\n");
1583
1584 return false;
1585 }
1586
1587 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1588 scalar_stmts.create (group_size);
1589 next = stmt;
1590 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1591 {
1592 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1593 while (next)
1594 {
1595 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1596 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1597 scalar_stmts.safe_push (
1598 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1599 else
1600 scalar_stmts.safe_push (next);
1601 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1602 }
1603 }
1604 else
1605 {
1606 /* Collect reduction statements. */
1607 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1608 for (i = 0; reductions.iterate (i, &next); i++)
1609 scalar_stmts.safe_push (next);
1610 }
1611
1612 node = vect_create_new_slp_node (scalar_stmts);
1613
1614 loads.create (group_size);
1615
1616 /* Build the tree for the SLP instance. */
1617 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1618 &max_nunits, &loads,
1619 vectorization_factor, NULL, NULL, NULL,
1620 max_tree_size))
1621 {
1622 /* Calculate the unrolling factor based on the smallest type. */
1623 if (max_nunits > nunits)
1624 unrolling_factor = least_common_multiple (max_nunits, group_size)
1625 / group_size;
1626
1627 if (unrolling_factor != 1 && !loop_vinfo)
1628 {
1629 if (dump_enabled_p ())
1630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1631 "Build SLP failed: unrolling required in basic"
1632 " block SLP\n");
1633 vect_free_slp_tree (node);
1634 loads.release ();
1635 return false;
1636 }
1637
1638 /* Create a new SLP instance. */
1639 new_instance = XNEW (struct _slp_instance);
1640 SLP_INSTANCE_TREE (new_instance) = node;
1641 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1642 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1643 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1644 SLP_INSTANCE_LOADS (new_instance) = loads;
1645 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1646
1647 /* Compute the load permutation. */
1648 slp_tree load_node;
1649 bool loads_permuted = false;
1650 FOR_EACH_VEC_ELT (loads, i, load_node)
1651 {
1652 vec<unsigned> load_permutation;
1653 int j;
1654 gimple load, first_stmt;
1655 bool this_load_permuted = false;
1656 load_permutation.create (group_size);
1657 first_stmt = GROUP_FIRST_ELEMENT
1658 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1659 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1660 {
1661 int load_place
1662 = vect_get_place_in_interleaving_chain (load, first_stmt);
1663 gcc_assert (load_place != -1);
1664 if (load_place != j)
1665 this_load_permuted = true;
1666 load_permutation.safe_push (load_place);
1667 }
1668 if (!this_load_permuted)
1669 {
1670 load_permutation.release ();
1671 continue;
1672 }
1673 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1674 loads_permuted = true;
1675 }
1676
1677 if (loads_permuted)
1678 {
1679 if (!vect_supported_load_permutation_p (new_instance))
1680 {
1681 if (dump_enabled_p ())
1682 {
1683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1684 "Build SLP failed: unsupported load "
1685 "permutation ");
1686 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1687 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1688 }
1689 vect_free_slp_instance (new_instance);
1690 return false;
1691 }
1692
1693 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1694 = vect_find_first_load_in_slp_instance (new_instance);
1695 }
1696
1697 /* Compute the costs of this SLP instance. */
1698 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1699 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1700
1701 if (loop_vinfo)
1702 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1703 else
1704 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1705
1706 if (dump_enabled_p ())
1707 vect_print_slp_tree (MSG_NOTE, node);
1708
1709 return true;
1710 }
1711
1712 /* Failed to SLP. */
1713 /* Free the allocated memory. */
1714 vect_free_slp_tree (node);
1715 loads.release ();
1716
1717 return false;
1718 }
1719
1720
1721 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1722 trees of packed scalar stmts if SLP is possible. */
1723
1724 bool
1725 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1726 unsigned max_tree_size)
1727 {
1728 unsigned int i;
1729 vec<gimple> grouped_stores;
1730 vec<gimple> reductions = vNULL;
1731 vec<gimple> reduc_chains = vNULL;
1732 gimple first_element;
1733 bool ok = false;
1734
1735 if (dump_enabled_p ())
1736 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1737
1738 if (loop_vinfo)
1739 {
1740 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1741 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1742 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1743 }
1744 else
1745 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1746
1747 /* Find SLP sequences starting from groups of grouped stores. */
1748 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1749 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1750 max_tree_size))
1751 ok = true;
1752
1753 if (bb_vinfo && !ok)
1754 {
1755 if (dump_enabled_p ())
1756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1757 "Failed to SLP the basic block.\n");
1758
1759 return false;
1760 }
1761
1762 if (loop_vinfo
1763 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1764 {
1765 /* Find SLP sequences starting from reduction chains. */
1766 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1767 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1768 max_tree_size))
1769 ok = true;
1770 else
1771 return false;
1772
1773 /* Don't try to vectorize SLP reductions if reduction chain was
1774 detected. */
1775 return ok;
1776 }
1777
1778 /* Find SLP sequences starting from groups of reductions. */
1779 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1780 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1781 max_tree_size))
1782 ok = true;
1783
1784 return true;
1785 }
1786
1787
1788 /* For each possible SLP instance decide whether to SLP it and calculate overall
1789 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1790 least one instance. */
1791
1792 bool
1793 vect_make_slp_decision (loop_vec_info loop_vinfo)
1794 {
1795 unsigned int i, unrolling_factor = 1;
1796 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1797 slp_instance instance;
1798 int decided_to_slp = 0;
1799
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1802 "\n");
1803
1804 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1805 {
1806 /* FORNOW: SLP if you can. */
1807 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1808 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1809
1810 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1811 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1812 loop-based vectorization. Such stmts will be marked as HYBRID. */
1813 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1814 decided_to_slp++;
1815 }
1816
1817 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1818
1819 if (decided_to_slp && dump_enabled_p ())
1820 dump_printf_loc (MSG_NOTE, vect_location,
1821 "Decided to SLP %d instances. Unrolling factor %d\n",
1822 decided_to_slp, unrolling_factor);
1823
1824 return (decided_to_slp > 0);
1825 }
1826
1827
1828 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1829 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1830
1831 static void
1832 vect_detect_hybrid_slp_stmts (slp_tree node)
1833 {
1834 int i;
1835 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1836 gimple stmt = stmts[0];
1837 imm_use_iterator imm_iter;
1838 gimple use_stmt;
1839 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1840 slp_tree child;
1841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1842 struct loop *loop = NULL;
1843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1844 basic_block bb = NULL;
1845
1846 if (!node)
1847 return;
1848
1849 if (loop_vinfo)
1850 loop = LOOP_VINFO_LOOP (loop_vinfo);
1851 else
1852 bb = BB_VINFO_BB (bb_vinfo);
1853
1854 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1855 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1856 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1857 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1858 if (gimple_bb (use_stmt)
1859 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1860 || bb == gimple_bb (use_stmt))
1861 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1862 && !STMT_SLP_TYPE (stmt_vinfo)
1863 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1864 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))
1865 || (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
1866 && STMT_VINFO_RELATED_STMT (stmt_vinfo)
1867 && !STMT_SLP_TYPE (vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo)))))
1868 && !(gimple_code (use_stmt) == GIMPLE_PHI
1869 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1870 == vect_reduction_def))
1871 vect_mark_slp_stmts (node, hybrid, i);
1872
1873 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1874 vect_detect_hybrid_slp_stmts (child);
1875 }
1876
1877
1878 /* Find stmts that must be both vectorized and SLPed. */
1879
1880 void
1881 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1882 {
1883 unsigned int i;
1884 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1885 slp_instance instance;
1886
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1889 "\n");
1890
1891 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1892 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1893 }
1894
1895
1896 /* Create and initialize a new bb_vec_info struct for BB, as well as
1897 stmt_vec_info structs for all the stmts in it. */
1898
1899 static bb_vec_info
1900 new_bb_vec_info (basic_block bb)
1901 {
1902 bb_vec_info res = NULL;
1903 gimple_stmt_iterator gsi;
1904
1905 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1906 BB_VINFO_BB (res) = bb;
1907
1908 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1909 {
1910 gimple stmt = gsi_stmt (gsi);
1911 gimple_set_uid (stmt, 0);
1912 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1913 }
1914
1915 BB_VINFO_GROUPED_STORES (res).create (10);
1916 BB_VINFO_SLP_INSTANCES (res).create (2);
1917 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1918
1919 bb->aux = res;
1920 return res;
1921 }
1922
1923
1924 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1925 stmts in the basic block. */
1926
1927 static void
1928 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1929 {
1930 vec<slp_instance> slp_instances;
1931 slp_instance instance;
1932 basic_block bb;
1933 gimple_stmt_iterator si;
1934 unsigned i;
1935
1936 if (!bb_vinfo)
1937 return;
1938
1939 bb = BB_VINFO_BB (bb_vinfo);
1940
1941 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1942 {
1943 gimple stmt = gsi_stmt (si);
1944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945
1946 if (stmt_info)
1947 /* Free stmt_vec_info. */
1948 free_stmt_vec_info (stmt);
1949 }
1950
1951 vect_destroy_datarefs (NULL, bb_vinfo);
1952 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1953 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1954 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1955 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1956 vect_free_slp_instance (instance);
1957 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1958 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1959 free (bb_vinfo);
1960 bb->aux = NULL;
1961 }
1962
1963
1964 /* Analyze statements contained in SLP tree node after recursively analyzing
1965 the subtree. Return TRUE if the operations are supported. */
1966
1967 static bool
1968 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1969 {
1970 bool dummy;
1971 int i;
1972 gimple stmt;
1973 slp_tree child;
1974
1975 if (!node)
1976 return true;
1977
1978 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1979 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1980 return false;
1981
1982 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1983 {
1984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1985 gcc_assert (stmt_info);
1986 gcc_assert (PURE_SLP_STMT (stmt_info));
1987
1988 if (!vect_analyze_stmt (stmt, &dummy, node))
1989 return false;
1990 }
1991
1992 return true;
1993 }
1994
1995
1996 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1997 operations are supported. */
1998
1999 static bool
2000 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2001 {
2002 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2003 slp_instance instance;
2004 int i;
2005
2006 for (i = 0; slp_instances.iterate (i, &instance); )
2007 {
2008 if (!vect_slp_analyze_node_operations (bb_vinfo,
2009 SLP_INSTANCE_TREE (instance)))
2010 {
2011 vect_free_slp_instance (instance);
2012 slp_instances.ordered_remove (i);
2013 }
2014 else
2015 i++;
2016 }
2017
2018 if (!slp_instances.length ())
2019 return false;
2020
2021 return true;
2022 }
2023
2024
2025 /* Compute the scalar cost of the SLP node NODE and its children
2026 and return it. Do not account defs that are marked in LIFE and
2027 update LIFE according to uses of NODE. */
2028
2029 static unsigned
2030 vect_bb_slp_scalar_cost (basic_block bb,
2031 slp_tree node, vec<bool, va_heap> *life)
2032 {
2033 unsigned scalar_cost = 0;
2034 unsigned i;
2035 gimple stmt;
2036 slp_tree child;
2037
2038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2039 {
2040 unsigned stmt_cost;
2041 ssa_op_iter op_iter;
2042 def_operand_p def_p;
2043 stmt_vec_info stmt_info;
2044
2045 if ((*life)[i])
2046 continue;
2047
2048 /* If there is a non-vectorized use of the defs then the scalar
2049 stmt is kept live in which case we do not account it or any
2050 required defs in the SLP children in the scalar cost. This
2051 way we make the vectorization more costly when compared to
2052 the scalar cost. */
2053 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2054 {
2055 imm_use_iterator use_iter;
2056 gimple use_stmt;
2057 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2058 if (!is_gimple_debug (use_stmt)
2059 && (gimple_code (use_stmt) == GIMPLE_PHI
2060 || gimple_bb (use_stmt) != bb
2061 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2062 {
2063 (*life)[i] = true;
2064 BREAK_FROM_IMM_USE_STMT (use_iter);
2065 }
2066 }
2067 if ((*life)[i])
2068 continue;
2069
2070 stmt_info = vinfo_for_stmt (stmt);
2071 if (STMT_VINFO_DATA_REF (stmt_info))
2072 {
2073 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2074 stmt_cost = vect_get_stmt_cost (scalar_load);
2075 else
2076 stmt_cost = vect_get_stmt_cost (scalar_store);
2077 }
2078 else
2079 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2080
2081 scalar_cost += stmt_cost;
2082 }
2083
2084 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2085 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2086
2087 return scalar_cost;
2088 }
2089
2090 /* Check if vectorization of the basic block is profitable. */
2091
2092 static bool
2093 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2094 {
2095 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2096 slp_instance instance;
2097 int i, j;
2098 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2099 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2100 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2101 stmt_vec_info stmt_info = NULL;
2102 stmt_vector_for_cost body_cost_vec;
2103 stmt_info_for_cost *ci;
2104
2105 /* Calculate vector costs. */
2106 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2107 {
2108 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2109
2110 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2111 {
2112 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2113 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2114 stmt_info, ci->misalign, vect_body);
2115 }
2116 }
2117
2118 /* Calculate scalar cost. */
2119 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2120 {
2121 auto_vec<bool, 20> life;
2122 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2123 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2124 SLP_INSTANCE_TREE (instance),
2125 &life);
2126 }
2127
2128 /* Complete the target-specific cost calculation. */
2129 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2130 &vec_inside_cost, &vec_epilogue_cost);
2131
2132 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2133
2134 if (dump_enabled_p ())
2135 {
2136 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2137 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2138 vec_inside_cost);
2139 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2140 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2141 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2142 }
2143
2144 /* Vectorization is profitable if its cost is less than the cost of scalar
2145 version. */
2146 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2147 return false;
2148
2149 return true;
2150 }
2151
2152 /* Check if the basic block can be vectorized. */
2153
2154 static bb_vec_info
2155 vect_slp_analyze_bb_1 (basic_block bb)
2156 {
2157 bb_vec_info bb_vinfo;
2158 vec<slp_instance> slp_instances;
2159 slp_instance instance;
2160 int i;
2161 int min_vf = 2;
2162 unsigned n_stmts = 0;
2163
2164 bb_vinfo = new_bb_vec_info (bb);
2165 if (!bb_vinfo)
2166 return NULL;
2167
2168 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2169 {
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2172 "not vectorized: unhandled data-ref in basic "
2173 "block.\n");
2174
2175 destroy_bb_vec_info (bb_vinfo);
2176 return NULL;
2177 }
2178
2179 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2180 {
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "not vectorized: not enough data-refs in "
2184 "basic block.\n");
2185
2186 destroy_bb_vec_info (bb_vinfo);
2187 return NULL;
2188 }
2189
2190 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2191 {
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2194 "not vectorized: unhandled data access in "
2195 "basic block.\n");
2196
2197 destroy_bb_vec_info (bb_vinfo);
2198 return NULL;
2199 }
2200
2201 vect_pattern_recog (NULL, bb_vinfo);
2202
2203 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2204 {
2205 if (dump_enabled_p ())
2206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2207 "not vectorized: bad data alignment in basic "
2208 "block.\n");
2209
2210 destroy_bb_vec_info (bb_vinfo);
2211 return NULL;
2212 }
2213
2214 /* Check the SLP opportunities in the basic block, analyze and build SLP
2215 trees. */
2216 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2217 {
2218 if (dump_enabled_p ())
2219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2220 "not vectorized: failed to find SLP opportunities "
2221 "in basic block.\n");
2222
2223 destroy_bb_vec_info (bb_vinfo);
2224 return NULL;
2225 }
2226
2227 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2228
2229 /* Mark all the statements that we want to vectorize as pure SLP and
2230 relevant. */
2231 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2232 {
2233 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2234 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2235 }
2236
2237 /* Mark all the statements that we do not want to vectorize. */
2238 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2239 !gsi_end_p (gsi); gsi_next (&gsi))
2240 {
2241 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2242 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2243 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2244 }
2245
2246 /* Analyze dependences. At this point all stmts not participating in
2247 vectorization have to be marked. Dependence analysis assumes
2248 that we either vectorize all SLP instances or none at all. */
2249 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2250 {
2251 if (dump_enabled_p ())
2252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2253 "not vectorized: unhandled data dependence "
2254 "in basic block.\n");
2255
2256 destroy_bb_vec_info (bb_vinfo);
2257 return NULL;
2258 }
2259
2260 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2261 {
2262 if (dump_enabled_p ())
2263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2264 "not vectorized: unsupported alignment in basic "
2265 "block.\n");
2266 destroy_bb_vec_info (bb_vinfo);
2267 return NULL;
2268 }
2269
2270 if (!vect_slp_analyze_operations (bb_vinfo))
2271 {
2272 if (dump_enabled_p ())
2273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2274 "not vectorized: bad operation in basic block.\n");
2275
2276 destroy_bb_vec_info (bb_vinfo);
2277 return NULL;
2278 }
2279
2280 /* Cost model: check if the vectorization is worthwhile. */
2281 if (!unlimited_cost_model (NULL)
2282 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2283 {
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "not vectorized: vectorization is not "
2287 "profitable.\n");
2288
2289 destroy_bb_vec_info (bb_vinfo);
2290 return NULL;
2291 }
2292
2293 if (dump_enabled_p ())
2294 dump_printf_loc (MSG_NOTE, vect_location,
2295 "Basic block will be vectorized using SLP\n");
2296
2297 return bb_vinfo;
2298 }
2299
2300
2301 bb_vec_info
2302 vect_slp_analyze_bb (basic_block bb)
2303 {
2304 bb_vec_info bb_vinfo;
2305 int insns = 0;
2306 gimple_stmt_iterator gsi;
2307 unsigned int vector_sizes;
2308
2309 if (dump_enabled_p ())
2310 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2311
2312 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2313 {
2314 gimple stmt = gsi_stmt (gsi);
2315 if (!is_gimple_debug (stmt)
2316 && !gimple_nop_p (stmt)
2317 && gimple_code (stmt) != GIMPLE_LABEL)
2318 insns++;
2319 }
2320
2321 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2322 {
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2325 "not vectorized: too many instructions in "
2326 "basic block.\n");
2327
2328 return NULL;
2329 }
2330
2331 /* Autodetect first vector size we try. */
2332 current_vector_size = 0;
2333 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2334
2335 while (1)
2336 {
2337 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2338 if (bb_vinfo)
2339 return bb_vinfo;
2340
2341 destroy_bb_vec_info (bb_vinfo);
2342
2343 vector_sizes &= ~current_vector_size;
2344 if (vector_sizes == 0
2345 || current_vector_size == 0)
2346 return NULL;
2347
2348 /* Try the next biggest vector size. */
2349 current_vector_size = 1 << floor_log2 (vector_sizes);
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_NOTE, vect_location,
2352 "***** Re-trying analysis with "
2353 "vector size %d\n", current_vector_size);
2354 }
2355 }
2356
2357
2358 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2359 the number of created vector stmts depends on the unrolling factor).
2360 However, the actual number of vector stmts for every SLP node depends on
2361 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2362 should be updated. In this function we assume that the inside costs
2363 calculated in vect_model_xxx_cost are linear in ncopies. */
2364
2365 void
2366 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2367 {
2368 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2369 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2370 slp_instance instance;
2371 stmt_vector_for_cost body_cost_vec;
2372 stmt_info_for_cost *si;
2373 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2374
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_NOTE, vect_location,
2377 "=== vect_update_slp_costs_according_to_vf ===\n");
2378
2379 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2380 {
2381 /* We assume that costs are linear in ncopies. */
2382 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2383
2384 /* Record the instance's instructions in the target cost model.
2385 This was delayed until here because the count of instructions
2386 isn't known beforehand. */
2387 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2388
2389 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2390 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2391 vinfo_for_stmt (si->stmt), si->misalign,
2392 vect_body);
2393 }
2394 }
2395
2396
2397 /* For constant and loop invariant defs of SLP_NODE this function returns
2398 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2399 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2400 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2401 REDUC_INDEX is the index of the reduction operand in the statements, unless
2402 it is -1. */
2403
2404 static void
2405 vect_get_constant_vectors (tree op, slp_tree slp_node,
2406 vec<tree> *vec_oprnds,
2407 unsigned int op_num, unsigned int number_of_vectors,
2408 int reduc_index)
2409 {
2410 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2411 gimple stmt = stmts[0];
2412 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2413 unsigned nunits;
2414 tree vec_cst;
2415 tree *elts;
2416 unsigned j, number_of_places_left_in_vector;
2417 tree vector_type;
2418 tree vop;
2419 int group_size = stmts.length ();
2420 unsigned int vec_num, i;
2421 unsigned number_of_copies = 1;
2422 vec<tree> voprnds;
2423 voprnds.create (number_of_vectors);
2424 bool constant_p, is_store;
2425 tree neutral_op = NULL;
2426 enum tree_code code = gimple_expr_code (stmt);
2427 gimple def_stmt;
2428 struct loop *loop;
2429 gimple_seq ctor_seq = NULL;
2430
2431 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2432 && reduc_index != -1)
2433 {
2434 op_num = reduc_index - 1;
2435 op = gimple_op (stmt, reduc_index);
2436 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2437 we need either neutral operands or the original operands. See
2438 get_initial_def_for_reduction() for details. */
2439 switch (code)
2440 {
2441 case WIDEN_SUM_EXPR:
2442 case DOT_PROD_EXPR:
2443 case PLUS_EXPR:
2444 case MINUS_EXPR:
2445 case BIT_IOR_EXPR:
2446 case BIT_XOR_EXPR:
2447 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2448 neutral_op = build_real (TREE_TYPE (op), dconst0);
2449 else
2450 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2451
2452 break;
2453
2454 case MULT_EXPR:
2455 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2456 neutral_op = build_real (TREE_TYPE (op), dconst1);
2457 else
2458 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2459
2460 break;
2461
2462 case BIT_AND_EXPR:
2463 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2464 break;
2465
2466 /* For MIN/MAX we don't have an easy neutral operand but
2467 the initial values can be used fine here. Only for
2468 a reduction chain we have to force a neutral element. */
2469 case MAX_EXPR:
2470 case MIN_EXPR:
2471 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2472 neutral_op = NULL;
2473 else
2474 {
2475 def_stmt = SSA_NAME_DEF_STMT (op);
2476 loop = (gimple_bb (stmt))->loop_father;
2477 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2478 loop_preheader_edge (loop));
2479 }
2480 break;
2481
2482 default:
2483 neutral_op = NULL;
2484 }
2485 }
2486
2487 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2488 {
2489 is_store = true;
2490 op = gimple_assign_rhs1 (stmt);
2491 }
2492 else
2493 is_store = false;
2494
2495 gcc_assert (op);
2496
2497 if (CONSTANT_CLASS_P (op))
2498 constant_p = true;
2499 else
2500 constant_p = false;
2501
2502 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2503 gcc_assert (vector_type);
2504 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2505
2506 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2507 created vectors. It is greater than 1 if unrolling is performed.
2508
2509 For example, we have two scalar operands, s1 and s2 (e.g., group of
2510 strided accesses of size two), while NUNITS is four (i.e., four scalars
2511 of this type can be packed in a vector). The output vector will contain
2512 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2513 will be 2).
2514
2515 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2516 containing the operands.
2517
2518 For example, NUNITS is four as before, and the group size is 8
2519 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2520 {s5, s6, s7, s8}. */
2521
2522 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2523
2524 number_of_places_left_in_vector = nunits;
2525 elts = XALLOCAVEC (tree, nunits);
2526 for (j = 0; j < number_of_copies; j++)
2527 {
2528 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2529 {
2530 if (is_store)
2531 op = gimple_assign_rhs1 (stmt);
2532 else
2533 {
2534 switch (code)
2535 {
2536 case COND_EXPR:
2537 if (op_num == 0 || op_num == 1)
2538 {
2539 tree cond = gimple_assign_rhs1 (stmt);
2540 op = TREE_OPERAND (cond, op_num);
2541 }
2542 else
2543 {
2544 if (op_num == 2)
2545 op = gimple_assign_rhs2 (stmt);
2546 else
2547 op = gimple_assign_rhs3 (stmt);
2548 }
2549 break;
2550
2551 case CALL_EXPR:
2552 op = gimple_call_arg (stmt, op_num);
2553 break;
2554
2555 case LSHIFT_EXPR:
2556 case RSHIFT_EXPR:
2557 case LROTATE_EXPR:
2558 case RROTATE_EXPR:
2559 op = gimple_op (stmt, op_num + 1);
2560 /* Unlike the other binary operators, shifts/rotates have
2561 the shift count being int, instead of the same type as
2562 the lhs, so make sure the scalar is the right type if
2563 we are dealing with vectors of
2564 long long/long/short/char. */
2565 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2566 op = fold_convert (TREE_TYPE (vector_type), op);
2567 break;
2568
2569 default:
2570 op = gimple_op (stmt, op_num + 1);
2571 break;
2572 }
2573 }
2574
2575 if (reduc_index != -1)
2576 {
2577 loop = (gimple_bb (stmt))->loop_father;
2578 def_stmt = SSA_NAME_DEF_STMT (op);
2579
2580 gcc_assert (loop);
2581
2582 /* Get the def before the loop. In reduction chain we have only
2583 one initial value. */
2584 if ((j != (number_of_copies - 1)
2585 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2586 && i != 0))
2587 && neutral_op)
2588 op = neutral_op;
2589 else
2590 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2591 loop_preheader_edge (loop));
2592 }
2593
2594 /* Create 'vect_ = {op0,op1,...,opn}'. */
2595 number_of_places_left_in_vector--;
2596 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2597 {
2598 if (CONSTANT_CLASS_P (op))
2599 {
2600 op = fold_unary (VIEW_CONVERT_EXPR,
2601 TREE_TYPE (vector_type), op);
2602 gcc_assert (op && CONSTANT_CLASS_P (op));
2603 }
2604 else
2605 {
2606 tree new_temp
2607 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2608 gimple init_stmt;
2609 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2610 op);
2611 init_stmt
2612 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2613 new_temp, op);
2614 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2615 op = new_temp;
2616 }
2617 }
2618 elts[number_of_places_left_in_vector] = op;
2619 if (!CONSTANT_CLASS_P (op))
2620 constant_p = false;
2621
2622 if (number_of_places_left_in_vector == 0)
2623 {
2624 number_of_places_left_in_vector = nunits;
2625
2626 if (constant_p)
2627 vec_cst = build_vector (vector_type, elts);
2628 else
2629 {
2630 vec<constructor_elt, va_gc> *v;
2631 unsigned k;
2632 vec_alloc (v, nunits);
2633 for (k = 0; k < nunits; ++k)
2634 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2635 vec_cst = build_constructor (vector_type, v);
2636 }
2637 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2638 vector_type, NULL));
2639 if (ctor_seq != NULL)
2640 {
2641 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2642 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2643 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2644 GSI_SAME_STMT);
2645 ctor_seq = NULL;
2646 }
2647 }
2648 }
2649 }
2650
2651 /* Since the vectors are created in the reverse order, we should invert
2652 them. */
2653 vec_num = voprnds.length ();
2654 for (j = vec_num; j != 0; j--)
2655 {
2656 vop = voprnds[j - 1];
2657 vec_oprnds->quick_push (vop);
2658 }
2659
2660 voprnds.release ();
2661
2662 /* In case that VF is greater than the unrolling factor needed for the SLP
2663 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2664 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2665 to replicate the vectors. */
2666 while (number_of_vectors > vec_oprnds->length ())
2667 {
2668 tree neutral_vec = NULL;
2669
2670 if (neutral_op)
2671 {
2672 if (!neutral_vec)
2673 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2674
2675 vec_oprnds->quick_push (neutral_vec);
2676 }
2677 else
2678 {
2679 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2680 vec_oprnds->quick_push (vop);
2681 }
2682 }
2683 }
2684
2685
2686 /* Get vectorized definitions from SLP_NODE that contains corresponding
2687 vectorized def-stmts. */
2688
2689 static void
2690 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2691 {
2692 tree vec_oprnd;
2693 gimple vec_def_stmt;
2694 unsigned int i;
2695
2696 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2697
2698 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2699 {
2700 gcc_assert (vec_def_stmt);
2701 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2702 vec_oprnds->quick_push (vec_oprnd);
2703 }
2704 }
2705
2706
2707 /* Get vectorized definitions for SLP_NODE.
2708 If the scalar definitions are loop invariants or constants, collect them and
2709 call vect_get_constant_vectors() to create vector stmts.
2710 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2711 must be stored in the corresponding child of SLP_NODE, and we call
2712 vect_get_slp_vect_defs () to retrieve them. */
2713
2714 void
2715 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2716 vec<vec<tree> > *vec_oprnds, int reduc_index)
2717 {
2718 gimple first_stmt;
2719 int number_of_vects = 0, i;
2720 unsigned int child_index = 0;
2721 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2722 slp_tree child = NULL;
2723 vec<tree> vec_defs;
2724 tree oprnd;
2725 bool vectorized_defs;
2726
2727 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2728 FOR_EACH_VEC_ELT (ops, i, oprnd)
2729 {
2730 /* For each operand we check if it has vectorized definitions in a child
2731 node or we need to create them (for invariants and constants). We
2732 check if the LHS of the first stmt of the next child matches OPRND.
2733 If it does, we found the correct child. Otherwise, we call
2734 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2735 to check this child node for the next operand. */
2736 vectorized_defs = false;
2737 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2738 {
2739 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2740
2741 /* We have to check both pattern and original def, if available. */
2742 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2743 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2744
2745 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2746 || (related
2747 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2748 {
2749 /* The number of vector defs is determined by the number of
2750 vector statements in the node from which we get those
2751 statements. */
2752 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2753 vectorized_defs = true;
2754 child_index++;
2755 }
2756 }
2757
2758 if (!vectorized_defs)
2759 {
2760 if (i == 0)
2761 {
2762 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2763 /* Number of vector stmts was calculated according to LHS in
2764 vect_schedule_slp_instance (), fix it by replacing LHS with
2765 RHS, if necessary. See vect_get_smallest_scalar_type () for
2766 details. */
2767 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2768 &rhs_size_unit);
2769 if (rhs_size_unit != lhs_size_unit)
2770 {
2771 number_of_vects *= rhs_size_unit;
2772 number_of_vects /= lhs_size_unit;
2773 }
2774 }
2775 }
2776
2777 /* Allocate memory for vectorized defs. */
2778 vec_defs = vNULL;
2779 vec_defs.create (number_of_vects);
2780
2781 /* For reduction defs we call vect_get_constant_vectors (), since we are
2782 looking for initial loop invariant values. */
2783 if (vectorized_defs && reduc_index == -1)
2784 /* The defs are already vectorized. */
2785 vect_get_slp_vect_defs (child, &vec_defs);
2786 else
2787 /* Build vectors from scalar defs. */
2788 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2789 number_of_vects, reduc_index);
2790
2791 vec_oprnds->quick_push (vec_defs);
2792
2793 /* For reductions, we only need initial values. */
2794 if (reduc_index != -1)
2795 return;
2796 }
2797 }
2798
2799
2800 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2801 building a vector of type MASK_TYPE from it) and two input vectors placed in
2802 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2803 shifting by STRIDE elements of DR_CHAIN for every copy.
2804 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2805 copies).
2806 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2807 the created stmts must be inserted. */
2808
2809 static inline void
2810 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2811 tree mask, int first_vec_indx, int second_vec_indx,
2812 gimple_stmt_iterator *gsi, slp_tree node,
2813 tree vectype, vec<tree> dr_chain,
2814 int ncopies, int vect_stmts_counter)
2815 {
2816 tree perm_dest;
2817 gimple perm_stmt = NULL;
2818 stmt_vec_info next_stmt_info;
2819 int i, stride;
2820 tree first_vec, second_vec, data_ref;
2821
2822 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2823
2824 /* Initialize the vect stmts of NODE to properly insert the generated
2825 stmts later. */
2826 for (i = SLP_TREE_VEC_STMTS (node).length ();
2827 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2828 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2829
2830 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2831 for (i = 0; i < ncopies; i++)
2832 {
2833 first_vec = dr_chain[first_vec_indx];
2834 second_vec = dr_chain[second_vec_indx];
2835
2836 /* Generate the permute statement. */
2837 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2838 first_vec, second_vec, mask);
2839 data_ref = make_ssa_name (perm_dest, perm_stmt);
2840 gimple_set_lhs (perm_stmt, data_ref);
2841 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2842
2843 /* Store the vector statement in NODE. */
2844 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2845
2846 first_vec_indx += stride;
2847 second_vec_indx += stride;
2848 }
2849
2850 /* Mark the scalar stmt as vectorized. */
2851 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2852 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2853 }
2854
2855
2856 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2857 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2858 representation. Check that the mask is valid and return FALSE if not.
2859 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2860 the next vector, i.e., the current first vector is not needed. */
2861
2862 static bool
2863 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2864 int mask_nunits, bool only_one_vec, int index,
2865 unsigned char *mask, int *current_mask_element,
2866 bool *need_next_vector, int *number_of_mask_fixes,
2867 bool *mask_fixed, bool *needs_first_vector)
2868 {
2869 int i;
2870
2871 /* Convert to target specific representation. */
2872 *current_mask_element = first_mask_element + m;
2873 /* Adjust the value in case it's a mask for second and third vectors. */
2874 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2875
2876 if (*current_mask_element < mask_nunits)
2877 *needs_first_vector = true;
2878
2879 /* We have only one input vector to permute but the mask accesses values in
2880 the next vector as well. */
2881 if (only_one_vec && *current_mask_element >= mask_nunits)
2882 {
2883 if (dump_enabled_p ())
2884 {
2885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2886 "permutation requires at least two vectors ");
2887 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2888 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2889 }
2890
2891 return false;
2892 }
2893
2894 /* The mask requires the next vector. */
2895 while (*current_mask_element >= mask_nunits * 2)
2896 {
2897 if (*needs_first_vector || *mask_fixed)
2898 {
2899 /* We either need the first vector too or have already moved to the
2900 next vector. In both cases, this permutation needs three
2901 vectors. */
2902 if (dump_enabled_p ())
2903 {
2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2905 "permutation requires at "
2906 "least three vectors ");
2907 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2908 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2909 }
2910
2911 return false;
2912 }
2913
2914 /* We move to the next vector, dropping the first one and working with
2915 the second and the third - we need to adjust the values of the mask
2916 accordingly. */
2917 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2918
2919 for (i = 0; i < index; i++)
2920 mask[i] -= mask_nunits * *number_of_mask_fixes;
2921
2922 (*number_of_mask_fixes)++;
2923 *mask_fixed = true;
2924 }
2925
2926 *need_next_vector = *mask_fixed;
2927
2928 /* This was the last element of this mask. Start a new one. */
2929 if (index == mask_nunits - 1)
2930 {
2931 *number_of_mask_fixes = 1;
2932 *mask_fixed = false;
2933 *needs_first_vector = false;
2934 }
2935
2936 return true;
2937 }
2938
2939
2940 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2941 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2942 permute statements for the SLP node NODE of the SLP instance
2943 SLP_NODE_INSTANCE. */
2944
2945 bool
2946 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2947 gimple_stmt_iterator *gsi, int vf,
2948 slp_instance slp_node_instance, bool analyze_only)
2949 {
2950 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2951 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2952 tree mask_element_type = NULL_TREE, mask_type;
2953 int i, j, k, nunits, vec_index = 0, scalar_index;
2954 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2955 gimple next_scalar_stmt;
2956 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2957 int first_mask_element;
2958 int index, unroll_factor, current_mask_element, ncopies;
2959 unsigned char *mask;
2960 bool only_one_vec = false, need_next_vector = false;
2961 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2962 int number_of_mask_fixes = 1;
2963 bool mask_fixed = false;
2964 bool needs_first_vector = false;
2965 machine_mode mode;
2966
2967 mode = TYPE_MODE (vectype);
2968
2969 if (!can_vec_perm_p (mode, false, NULL))
2970 {
2971 if (dump_enabled_p ())
2972 {
2973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2974 "no vect permute for ");
2975 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2976 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2977 }
2978 return false;
2979 }
2980
2981 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2982 same size as the vector element being permuted. */
2983 mask_element_type = lang_hooks.types.type_for_mode
2984 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2985 mask_type = get_vectype_for_scalar_type (mask_element_type);
2986 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2987 mask = XALLOCAVEC (unsigned char, nunits);
2988 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2989
2990 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2991 unrolling factor. */
2992 orig_vec_stmts_num = group_size *
2993 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2994 if (orig_vec_stmts_num == 1)
2995 only_one_vec = true;
2996
2997 /* Number of copies is determined by the final vectorization factor
2998 relatively to SLP_NODE_INSTANCE unrolling factor. */
2999 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3000
3001 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3002 return false;
3003
3004 /* Generate permutation masks for every NODE. Number of masks for each NODE
3005 is equal to GROUP_SIZE.
3006 E.g., we have a group of three nodes with three loads from the same
3007 location in each node, and the vector size is 4. I.e., we have a
3008 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3009 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3010 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3011 ...
3012
3013 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3014 The last mask is illegal since we assume two operands for permute
3015 operation, and the mask element values can't be outside that range.
3016 Hence, the last mask must be converted into {2,5,5,5}.
3017 For the first two permutations we need the first and the second input
3018 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3019 we need the second and the third vectors: {b1,c1,a2,b2} and
3020 {c2,a3,b3,c3}. */
3021
3022 {
3023 scalar_index = 0;
3024 index = 0;
3025 vect_stmts_counter = 0;
3026 vec_index = 0;
3027 first_vec_index = vec_index++;
3028 if (only_one_vec)
3029 second_vec_index = first_vec_index;
3030 else
3031 second_vec_index = vec_index++;
3032
3033 for (j = 0; j < unroll_factor; j++)
3034 {
3035 for (k = 0; k < group_size; k++)
3036 {
3037 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3038 first_mask_element = i + j * group_size;
3039 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3040 nunits, only_one_vec, index,
3041 mask, &current_mask_element,
3042 &need_next_vector,
3043 &number_of_mask_fixes, &mask_fixed,
3044 &needs_first_vector))
3045 return false;
3046 gcc_assert (current_mask_element < 2 * nunits);
3047 mask[index++] = current_mask_element;
3048
3049 if (index == nunits)
3050 {
3051 index = 0;
3052 if (!can_vec_perm_p (mode, false, mask))
3053 {
3054 if (dump_enabled_p ())
3055 {
3056 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3057 vect_location,
3058 "unsupported vect permute { ");
3059 for (i = 0; i < nunits; ++i)
3060 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3061 mask[i]);
3062 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3063 }
3064 return false;
3065 }
3066
3067 if (!analyze_only)
3068 {
3069 int l;
3070 tree mask_vec, *mask_elts;
3071 mask_elts = XALLOCAVEC (tree, nunits);
3072 for (l = 0; l < nunits; ++l)
3073 mask_elts[l] = build_int_cst (mask_element_type,
3074 mask[l]);
3075 mask_vec = build_vector (mask_type, mask_elts);
3076
3077 if (need_next_vector)
3078 {
3079 first_vec_index = second_vec_index;
3080 second_vec_index = vec_index;
3081 }
3082
3083 next_scalar_stmt
3084 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3085
3086 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3087 mask_vec, first_vec_index, second_vec_index,
3088 gsi, node, vectype, dr_chain,
3089 ncopies, vect_stmts_counter++);
3090 }
3091 }
3092 }
3093 }
3094 }
3095
3096 return true;
3097 }
3098
3099
3100
3101 /* Vectorize SLP instance tree in postorder. */
3102
3103 static bool
3104 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3105 unsigned int vectorization_factor)
3106 {
3107 gimple stmt;
3108 bool grouped_store, is_store;
3109 gimple_stmt_iterator si;
3110 stmt_vec_info stmt_info;
3111 unsigned int vec_stmts_size, nunits, group_size;
3112 tree vectype;
3113 int i;
3114 slp_tree child;
3115
3116 if (!node)
3117 return false;
3118
3119 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3120 vect_schedule_slp_instance (child, instance, vectorization_factor);
3121
3122 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3123 stmt_info = vinfo_for_stmt (stmt);
3124
3125 /* VECTYPE is the type of the destination. */
3126 vectype = STMT_VINFO_VECTYPE (stmt_info);
3127 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3128 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3129
3130 /* For each SLP instance calculate number of vector stmts to be created
3131 for the scalar stmts in each node of the SLP tree. Number of vector
3132 elements in one vector iteration is the number of scalar elements in
3133 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3134 size. */
3135 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3136
3137 if (!SLP_TREE_VEC_STMTS (node).exists ())
3138 {
3139 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3140 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3141 }
3142
3143 if (dump_enabled_p ())
3144 {
3145 dump_printf_loc (MSG_NOTE,vect_location,
3146 "------>vectorizing SLP node starting from: ");
3147 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3148 dump_printf (MSG_NOTE, "\n");
3149 }
3150
3151 /* Loads should be inserted before the first load. */
3152 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3153 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3154 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3155 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3156 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3157 else if (is_pattern_stmt_p (stmt_info))
3158 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3159 else
3160 si = gsi_for_stmt (stmt);
3161
3162 /* Stores should be inserted just before the last store. */
3163 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3164 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3165 {
3166 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3167 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3168 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3169 si = gsi_for_stmt (last_store);
3170 }
3171
3172 /* Mark the first element of the reduction chain as reduction to properly
3173 transform the node. In the analysis phase only the last element of the
3174 chain is marked as reduction. */
3175 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3176 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3177 {
3178 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3179 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3180 }
3181
3182 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3183 return is_store;
3184 }
3185
3186 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3187 For loop vectorization this is done in vectorizable_call, but for SLP
3188 it needs to be deferred until end of vect_schedule_slp, because multiple
3189 SLP instances may refer to the same scalar stmt. */
3190
3191 static void
3192 vect_remove_slp_scalar_calls (slp_tree node)
3193 {
3194 gimple stmt, new_stmt;
3195 gimple_stmt_iterator gsi;
3196 int i;
3197 slp_tree child;
3198 tree lhs;
3199 stmt_vec_info stmt_info;
3200
3201 if (!node)
3202 return;
3203
3204 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3205 vect_remove_slp_scalar_calls (child);
3206
3207 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3208 {
3209 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3210 continue;
3211 stmt_info = vinfo_for_stmt (stmt);
3212 if (stmt_info == NULL
3213 || is_pattern_stmt_p (stmt_info)
3214 || !PURE_SLP_STMT (stmt_info))
3215 continue;
3216 lhs = gimple_call_lhs (stmt);
3217 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3218 set_vinfo_for_stmt (new_stmt, stmt_info);
3219 set_vinfo_for_stmt (stmt, NULL);
3220 STMT_VINFO_STMT (stmt_info) = new_stmt;
3221 gsi = gsi_for_stmt (stmt);
3222 gsi_replace (&gsi, new_stmt, false);
3223 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3224 }
3225 }
3226
3227 /* Generate vector code for all SLP instances in the loop/basic block. */
3228
3229 bool
3230 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3231 {
3232 vec<slp_instance> slp_instances;
3233 slp_instance instance;
3234 unsigned int i, vf;
3235 bool is_store = false;
3236
3237 if (loop_vinfo)
3238 {
3239 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3240 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3241 }
3242 else
3243 {
3244 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3245 vf = 1;
3246 }
3247
3248 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3249 {
3250 /* Schedule the tree of INSTANCE. */
3251 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3252 instance, vf);
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_NOTE, vect_location,
3255 "vectorizing stmts using SLP.\n");
3256 }
3257
3258 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3259 {
3260 slp_tree root = SLP_INSTANCE_TREE (instance);
3261 gimple store;
3262 unsigned int j;
3263 gimple_stmt_iterator gsi;
3264
3265 /* Remove scalar call stmts. Do not do this for basic-block
3266 vectorization as not all uses may be vectorized.
3267 ??? Why should this be necessary? DCE should be able to
3268 remove the stmts itself.
3269 ??? For BB vectorization we can as well remove scalar
3270 stmts starting from the SLP tree root if they have no
3271 uses. */
3272 if (loop_vinfo)
3273 vect_remove_slp_scalar_calls (root);
3274
3275 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3276 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3277 {
3278 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3279 break;
3280
3281 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3282 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3283 /* Free the attached stmt_vec_info and remove the stmt. */
3284 gsi = gsi_for_stmt (store);
3285 unlink_stmt_vdef (store);
3286 gsi_remove (&gsi, true);
3287 release_defs (store);
3288 free_stmt_vec_info (store);
3289 }
3290 }
3291
3292 return is_store;
3293 }
3294
3295
3296 /* Vectorize the basic block. */
3297
3298 void
3299 vect_slp_transform_bb (basic_block bb)
3300 {
3301 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3302 gimple_stmt_iterator si;
3303
3304 gcc_assert (bb_vinfo);
3305
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3308
3309 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3310 {
3311 gimple stmt = gsi_stmt (si);
3312 stmt_vec_info stmt_info;
3313
3314 if (dump_enabled_p ())
3315 {
3316 dump_printf_loc (MSG_NOTE, vect_location,
3317 "------>SLPing statement: ");
3318 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3319 dump_printf (MSG_NOTE, "\n");
3320 }
3321
3322 stmt_info = vinfo_for_stmt (stmt);
3323 gcc_assert (stmt_info);
3324
3325 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3326 if (STMT_SLP_TYPE (stmt_info))
3327 {
3328 vect_schedule_slp (NULL, bb_vinfo);
3329 break;
3330 }
3331 }
3332
3333 if (dump_enabled_p ())
3334 dump_printf_loc (MSG_NOTE, vect_location,
3335 "BASIC BLOCK VECTORIZED\n");
3336
3337 destroy_bb_vec_info (bb_vinfo);
3338 }