New syntax for -fsanitize-recover.
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "gimple-expr.h"
35 #include "is-a.h"
36 #include "gimple.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "tree-pass.h"
44 #include "cfgloop.h"
45 #include "expr.h"
46 #include "recog.h" /* FIXME: for insn_data */
47 #include "optabs.h"
48 #include "tree-vectorizer.h"
49 #include "langhooks.h"
50
51 /* Extract the location of the basic block in the source code.
52 Return the basic block location if succeed and NULL if not. */
53
54 source_location
55 find_bb_location (basic_block bb)
56 {
57 gimple stmt = NULL;
58 gimple_stmt_iterator si;
59
60 if (!bb)
61 return UNKNOWN_LOCATION;
62
63 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
64 {
65 stmt = gsi_stmt (si);
66 if (gimple_location (stmt) != UNKNOWN_LOCATION)
67 return gimple_location (stmt);
68 }
69
70 return UNKNOWN_LOCATION;
71 }
72
73
74 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
75
76 static void
77 vect_free_slp_tree (slp_tree node)
78 {
79 int i;
80 slp_tree child;
81
82 if (!node)
83 return;
84
85 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
86 vect_free_slp_tree (child);
87
88 SLP_TREE_CHILDREN (node).release ();
89 SLP_TREE_SCALAR_STMTS (node).release ();
90 SLP_TREE_VEC_STMTS (node).release ();
91 SLP_TREE_LOAD_PERMUTATION (node).release ();
92
93 free (node);
94 }
95
96
97 /* Free the memory allocated for the SLP instance. */
98
99 void
100 vect_free_slp_instance (slp_instance instance)
101 {
102 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
103 SLP_INSTANCE_LOADS (instance).release ();
104 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
105 free (instance);
106 }
107
108
109 /* Create an SLP node for SCALAR_STMTS. */
110
111 static slp_tree
112 vect_create_new_slp_node (vec<gimple> scalar_stmts)
113 {
114 slp_tree node;
115 gimple stmt = scalar_stmts[0];
116 unsigned int nops;
117
118 if (is_gimple_call (stmt))
119 nops = gimple_call_num_args (stmt);
120 else if (is_gimple_assign (stmt))
121 {
122 nops = gimple_num_ops (stmt) - 1;
123 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
124 nops++;
125 }
126 else
127 return NULL;
128
129 node = XNEW (struct _slp_tree);
130 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
131 SLP_TREE_VEC_STMTS (node).create (0);
132 SLP_TREE_CHILDREN (node).create (nops);
133 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
134
135 return node;
136 }
137
138
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140 operand. */
141 static vec<slp_oprnd_info>
142 vect_create_oprnd_info (int nops, int group_size)
143 {
144 int i;
145 slp_oprnd_info oprnd_info;
146 vec<slp_oprnd_info> oprnds_info;
147
148 oprnds_info.create (nops);
149 for (i = 0; i < nops; i++)
150 {
151 oprnd_info = XNEW (struct _slp_oprnd_info);
152 oprnd_info->def_stmts.create (group_size);
153 oprnd_info->first_dt = vect_uninitialized_def;
154 oprnd_info->first_op_type = NULL_TREE;
155 oprnd_info->first_pattern = false;
156 oprnds_info.quick_push (oprnd_info);
157 }
158
159 return oprnds_info;
160 }
161
162
163 /* Free operands info. */
164
165 static void
166 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
167 {
168 int i;
169 slp_oprnd_info oprnd_info;
170
171 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
172 {
173 oprnd_info->def_stmts.release ();
174 XDELETE (oprnd_info);
175 }
176
177 oprnds_info.release ();
178 }
179
180
181 /* Find the place of the data-ref in STMT in the interleaving chain that starts
182 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
183
184 static int
185 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
186 {
187 gimple next_stmt = first_stmt;
188 int result = 0;
189
190 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
191 return -1;
192
193 do
194 {
195 if (next_stmt == stmt)
196 return result;
197 result++;
198 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199 }
200 while (next_stmt);
201
202 return -1;
203 }
204
205
206 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
207 they are of a valid type and that they match the defs of the first stmt of
208 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
209 return -1, if the error could be corrected by swapping operands of the
210 operation return 1, if everything is ok return 0. */
211
212 static int
213 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
214 gimple stmt, bool first,
215 vec<slp_oprnd_info> *oprnds_info)
216 {
217 tree oprnd;
218 unsigned int i, number_of_oprnds;
219 tree def;
220 gimple def_stmt;
221 enum vect_def_type dt = vect_uninitialized_def;
222 struct loop *loop = NULL;
223 bool pattern = false;
224 slp_oprnd_info oprnd_info;
225 int first_op_idx = 1;
226 bool commutative = false;
227 bool first_op_cond = false;
228
229 if (loop_vinfo)
230 loop = LOOP_VINFO_LOOP (loop_vinfo);
231
232 if (is_gimple_call (stmt))
233 {
234 number_of_oprnds = gimple_call_num_args (stmt);
235 first_op_idx = 3;
236 }
237 else if (is_gimple_assign (stmt))
238 {
239 enum tree_code code = gimple_assign_rhs_code (stmt);
240 number_of_oprnds = gimple_num_ops (stmt) - 1;
241 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
242 {
243 first_op_cond = true;
244 commutative = true;
245 number_of_oprnds++;
246 }
247 else
248 commutative = commutative_tree_code (code);
249 }
250 else
251 return -1;
252
253 bool swapped = false;
254 for (i = 0; i < number_of_oprnds; i++)
255 {
256 again:
257 if (first_op_cond)
258 {
259 if (i == 0 || i == 1)
260 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
261 swapped ? !i : i);
262 else
263 oprnd = gimple_op (stmt, first_op_idx + i - 1);
264 }
265 else
266 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
267
268 oprnd_info = (*oprnds_info)[i];
269
270 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
271 &def, &dt)
272 || (!def_stmt && dt != vect_constant_def))
273 {
274 if (dump_enabled_p ())
275 {
276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277 "Build SLP failed: can't find def for ");
278 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
279 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
280 }
281
282 return -1;
283 }
284
285 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
286 from the pattern. Check that all the stmts of the node are in the
287 pattern. */
288 if (def_stmt && gimple_bb (def_stmt)
289 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
290 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
291 && gimple_code (def_stmt) != GIMPLE_PHI))
292 && vinfo_for_stmt (def_stmt)
293 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
294 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
295 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
296 {
297 pattern = true;
298 if (!first && !oprnd_info->first_pattern)
299 {
300 if (i == 0
301 && !swapped
302 && commutative)
303 {
304 swapped = true;
305 goto again;
306 }
307
308 if (dump_enabled_p ())
309 {
310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
311 "Build SLP failed: some of the stmts"
312 " are in a pattern, and others are not ");
313 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
314 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
315 }
316
317 return 1;
318 }
319
320 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
321 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
322
323 if (dt == vect_unknown_def_type)
324 {
325 if (dump_enabled_p ())
326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327 "Unsupported pattern.\n");
328 return -1;
329 }
330
331 switch (gimple_code (def_stmt))
332 {
333 case GIMPLE_PHI:
334 def = gimple_phi_result (def_stmt);
335 break;
336
337 case GIMPLE_ASSIGN:
338 def = gimple_assign_lhs (def_stmt);
339 break;
340
341 default:
342 if (dump_enabled_p ())
343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
344 "unsupported defining stmt:\n");
345 return -1;
346 }
347 }
348
349 if (first)
350 {
351 oprnd_info->first_dt = dt;
352 oprnd_info->first_pattern = pattern;
353 oprnd_info->first_op_type = TREE_TYPE (oprnd);
354 }
355 else
356 {
357 /* Not first stmt of the group, check that the def-stmt/s match
358 the def-stmt/s of the first stmt. Allow different definition
359 types for reduction chains: the first stmt must be a
360 vect_reduction_def (a phi node), and the rest
361 vect_internal_def. */
362 if (((oprnd_info->first_dt != dt
363 && !(oprnd_info->first_dt == vect_reduction_def
364 && dt == vect_internal_def)
365 && !((oprnd_info->first_dt == vect_external_def
366 || oprnd_info->first_dt == vect_constant_def)
367 && (dt == vect_external_def
368 || dt == vect_constant_def)))
369 || !types_compatible_p (oprnd_info->first_op_type,
370 TREE_TYPE (oprnd))))
371 {
372 /* Try swapping operands if we got a mismatch. */
373 if (i == 0
374 && !swapped
375 && commutative)
376 {
377 swapped = true;
378 goto again;
379 }
380
381 if (dump_enabled_p ())
382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
383 "Build SLP failed: different types\n");
384
385 return 1;
386 }
387 }
388
389 /* Check the types of the definitions. */
390 switch (dt)
391 {
392 case vect_constant_def:
393 case vect_external_def:
394 case vect_reduction_def:
395 break;
396
397 case vect_internal_def:
398 oprnd_info->def_stmts.quick_push (def_stmt);
399 break;
400
401 default:
402 /* FORNOW: Not supported. */
403 if (dump_enabled_p ())
404 {
405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
406 "Build SLP failed: illegal type of def ");
407 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
408 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
409 }
410
411 return -1;
412 }
413 }
414
415 /* Swap operands. */
416 if (swapped)
417 {
418 if (first_op_cond)
419 {
420 tree cond = gimple_assign_rhs1 (stmt);
421 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
422 &TREE_OPERAND (cond, 1));
423 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
424 }
425 else
426 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
427 gimple_assign_rhs2_ptr (stmt));
428 }
429
430 return 0;
431 }
432
433
434 /* Verify if the scalar stmts STMTS are isomorphic, require data
435 permutation or are of unsupported types of operation. Return
436 true if they are, otherwise return false and indicate in *MATCHES
437 which stmts are not isomorphic to the first one. If MATCHES[0]
438 is false then this indicates the comparison could not be
439 carried out or the stmts will never be vectorized by SLP. */
440
441 static bool
442 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
443 vec<gimple> stmts, unsigned int group_size,
444 unsigned nops, unsigned int *max_nunits,
445 unsigned int vectorization_factor, bool *matches)
446 {
447 unsigned int i;
448 gimple stmt = stmts[0];
449 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
450 enum tree_code first_cond_code = ERROR_MARK;
451 tree lhs;
452 bool need_same_oprnds = false;
453 tree vectype, scalar_type, first_op1 = NULL_TREE;
454 optab optab;
455 int icode;
456 enum machine_mode optab_op2_mode;
457 enum machine_mode vec_mode;
458 struct data_reference *first_dr;
459 HOST_WIDE_INT dummy;
460 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
461 tree cond;
462
463 /* For every stmt in NODE find its def stmt/s. */
464 FOR_EACH_VEC_ELT (stmts, i, stmt)
465 {
466 matches[i] = false;
467
468 if (dump_enabled_p ())
469 {
470 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
471 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
472 dump_printf (MSG_NOTE, "\n");
473 }
474
475 /* Fail to vectorize statements marked as unvectorizable. */
476 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
477 {
478 if (dump_enabled_p ())
479 {
480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
481 "Build SLP failed: unvectorizable statement ");
482 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
483 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
484 }
485 /* Fatal mismatch. */
486 matches[0] = false;
487 return false;
488 }
489
490 lhs = gimple_get_lhs (stmt);
491 if (lhs == NULL_TREE)
492 {
493 if (dump_enabled_p ())
494 {
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "Build SLP failed: not GIMPLE_ASSIGN nor "
497 "GIMPLE_CALL ");
498 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
499 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
500 }
501 /* Fatal mismatch. */
502 matches[0] = false;
503 return false;
504 }
505
506 if (is_gimple_assign (stmt)
507 && gimple_assign_rhs_code (stmt) == COND_EXPR
508 && (cond = gimple_assign_rhs1 (stmt))
509 && !COMPARISON_CLASS_P (cond))
510 {
511 if (dump_enabled_p ())
512 {
513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514 "Build SLP failed: condition is not "
515 "comparison ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
518 }
519 /* Fatal mismatch. */
520 matches[0] = false;
521 return false;
522 }
523
524 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
525 vectype = get_vectype_for_scalar_type (scalar_type);
526 if (!vectype)
527 {
528 if (dump_enabled_p ())
529 {
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 "Build SLP failed: unsupported data-type ");
532 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
533 scalar_type);
534 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
535 }
536 /* Fatal mismatch. */
537 matches[0] = false;
538 return false;
539 }
540
541 /* In case of multiple types we need to detect the smallest type. */
542 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
543 {
544 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
545 if (bb_vinfo)
546 vectorization_factor = *max_nunits;
547 }
548
549 if (is_gimple_call (stmt))
550 {
551 rhs_code = CALL_EXPR;
552 if (gimple_call_internal_p (stmt)
553 || gimple_call_tail_p (stmt)
554 || gimple_call_noreturn_p (stmt)
555 || !gimple_call_nothrow_p (stmt)
556 || gimple_call_chain (stmt))
557 {
558 if (dump_enabled_p ())
559 {
560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported call type ");
562 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
563 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
564 }
565 /* Fatal mismatch. */
566 matches[0] = false;
567 return false;
568 }
569 }
570 else
571 rhs_code = gimple_assign_rhs_code (stmt);
572
573 /* Check the operation. */
574 if (i == 0)
575 {
576 first_stmt_code = rhs_code;
577
578 /* Shift arguments should be equal in all the packed stmts for a
579 vector shift with scalar shift operand. */
580 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
581 || rhs_code == LROTATE_EXPR
582 || rhs_code == RROTATE_EXPR)
583 {
584 vec_mode = TYPE_MODE (vectype);
585
586 /* First see if we have a vector/vector shift. */
587 optab = optab_for_tree_code (rhs_code, vectype,
588 optab_vector);
589
590 if (!optab
591 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
592 {
593 /* No vector/vector shift, try for a vector/scalar shift. */
594 optab = optab_for_tree_code (rhs_code, vectype,
595 optab_scalar);
596
597 if (!optab)
598 {
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
601 "Build SLP failed: no optab.\n");
602 /* Fatal mismatch. */
603 matches[0] = false;
604 return false;
605 }
606 icode = (int) optab_handler (optab, vec_mode);
607 if (icode == CODE_FOR_nothing)
608 {
609 if (dump_enabled_p ())
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611 "Build SLP failed: "
612 "op not supported by target.\n");
613 /* Fatal mismatch. */
614 matches[0] = false;
615 return false;
616 }
617 optab_op2_mode = insn_data[icode].operand[2].mode;
618 if (!VECTOR_MODE_P (optab_op2_mode))
619 {
620 need_same_oprnds = true;
621 first_op1 = gimple_assign_rhs2 (stmt);
622 }
623 }
624 }
625 else if (rhs_code == WIDEN_LSHIFT_EXPR)
626 {
627 need_same_oprnds = true;
628 first_op1 = gimple_assign_rhs2 (stmt);
629 }
630 }
631 else
632 {
633 if (first_stmt_code != rhs_code
634 && (first_stmt_code != IMAGPART_EXPR
635 || rhs_code != REALPART_EXPR)
636 && (first_stmt_code != REALPART_EXPR
637 || rhs_code != IMAGPART_EXPR)
638 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
639 && (first_stmt_code == ARRAY_REF
640 || first_stmt_code == BIT_FIELD_REF
641 || first_stmt_code == INDIRECT_REF
642 || first_stmt_code == COMPONENT_REF
643 || first_stmt_code == MEM_REF)))
644 {
645 if (dump_enabled_p ())
646 {
647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
648 "Build SLP failed: different operation "
649 "in stmt ");
650 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
651 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
652 }
653 /* Mismatch. */
654 continue;
655 }
656
657 if (need_same_oprnds
658 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
659 {
660 if (dump_enabled_p ())
661 {
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
663 "Build SLP failed: different shift "
664 "arguments in ");
665 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
666 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
667 }
668 /* Mismatch. */
669 continue;
670 }
671
672 if (rhs_code == CALL_EXPR)
673 {
674 gimple first_stmt = stmts[0];
675 if (gimple_call_num_args (stmt) != nops
676 || !operand_equal_p (gimple_call_fn (first_stmt),
677 gimple_call_fn (stmt), 0)
678 || gimple_call_fntype (first_stmt)
679 != gimple_call_fntype (stmt))
680 {
681 if (dump_enabled_p ())
682 {
683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
684 "Build SLP failed: different calls in ");
685 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
686 stmt, 0);
687 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
688 }
689 /* Mismatch. */
690 continue;
691 }
692 }
693 }
694
695 /* Grouped store or load. */
696 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
697 {
698 if (REFERENCE_CLASS_P (lhs))
699 {
700 /* Store. */
701 ;
702 }
703 else
704 {
705 /* Load. */
706 unsigned unrolling_factor
707 = least_common_multiple
708 (*max_nunits, group_size) / group_size;
709 /* FORNOW: Check that there is no gap between the loads
710 and no gap between the groups when we need to load
711 multiple groups at once.
712 ??? We should enhance this to only disallow gaps
713 inside vectors. */
714 if ((unrolling_factor > 1
715 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
716 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
717 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
718 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
719 {
720 if (dump_enabled_p ())
721 {
722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
723 "Build SLP failed: grouped "
724 "loads have gaps ");
725 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
726 stmt, 0);
727 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
728 }
729 /* Fatal mismatch. */
730 matches[0] = false;
731 return false;
732 }
733
734 /* Check that the size of interleaved loads group is not
735 greater than the SLP group size. */
736 unsigned ncopies
737 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
738 if (loop_vinfo
739 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
740 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
741 - GROUP_GAP (vinfo_for_stmt (stmt)))
742 > ncopies * group_size))
743 {
744 if (dump_enabled_p ())
745 {
746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
747 "Build SLP failed: the number "
748 "of interleaved loads is greater than "
749 "the SLP group size ");
750 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
751 stmt, 0);
752 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
753 }
754 /* Fatal mismatch. */
755 matches[0] = false;
756 return false;
757 }
758
759 old_first_load = first_load;
760 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
761 if (prev_first_load)
762 {
763 /* Check that there are no loads from different interleaving
764 chains in the same node. */
765 if (prev_first_load != first_load)
766 {
767 if (dump_enabled_p ())
768 {
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
770 vect_location,
771 "Build SLP failed: different "
772 "interleaving chains in one node ");
773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
774 stmt, 0);
775 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
776 }
777 /* Mismatch. */
778 continue;
779 }
780 }
781 else
782 prev_first_load = first_load;
783
784 /* In some cases a group of loads is just the same load
785 repeated N times. Only analyze its cost once. */
786 if (first_load == stmt && old_first_load != first_load)
787 {
788 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
789 if (vect_supportable_dr_alignment (first_dr, false)
790 == dr_unaligned_unsupported)
791 {
792 if (dump_enabled_p ())
793 {
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
795 vect_location,
796 "Build SLP failed: unsupported "
797 "unaligned load ");
798 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
799 stmt, 0);
800 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
801 }
802 /* Fatal mismatch. */
803 matches[0] = false;
804 return false;
805 }
806 }
807 }
808 } /* Grouped access. */
809 else
810 {
811 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
812 {
813 /* Not grouped load. */
814 if (dump_enabled_p ())
815 {
816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 "Build SLP failed: not grouped load ");
818 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
819 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
820 }
821
822 /* FORNOW: Not grouped loads are not supported. */
823 /* Fatal mismatch. */
824 matches[0] = false;
825 return false;
826 }
827
828 /* Not memory operation. */
829 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
830 && TREE_CODE_CLASS (rhs_code) != tcc_unary
831 && rhs_code != COND_EXPR
832 && rhs_code != CALL_EXPR)
833 {
834 if (dump_enabled_p ())
835 {
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
837 "Build SLP failed: operation");
838 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
839 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
840 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
841 }
842 /* Fatal mismatch. */
843 matches[0] = false;
844 return false;
845 }
846
847 if (rhs_code == COND_EXPR)
848 {
849 tree cond_expr = gimple_assign_rhs1 (stmt);
850
851 if (i == 0)
852 first_cond_code = TREE_CODE (cond_expr);
853 else if (first_cond_code != TREE_CODE (cond_expr))
854 {
855 if (dump_enabled_p ())
856 {
857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
858 "Build SLP failed: different"
859 " operation");
860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
861 stmt, 0);
862 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
863 }
864 /* Mismatch. */
865 continue;
866 }
867 }
868 }
869
870 matches[i] = true;
871 }
872
873 for (i = 0; i < group_size; ++i)
874 if (!matches[i])
875 return false;
876
877 return true;
878 }
879
880 /* Recursively build an SLP tree starting from NODE.
881 Fail (and return a value not equal to zero) if def-stmts are not
882 isomorphic, require data permutation or are of unsupported types of
883 operation. Otherwise, return 0.
884 The value returned is the depth in the SLP tree where a mismatch
885 was found. */
886
887 static bool
888 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
889 slp_tree *node, unsigned int group_size,
890 unsigned int *max_nunits,
891 vec<slp_tree> *loads,
892 unsigned int vectorization_factor,
893 bool *matches, unsigned *npermutes, unsigned *tree_size,
894 unsigned max_tree_size)
895 {
896 unsigned nops, i, this_npermutes = 0, this_tree_size = 0;
897 gimple stmt;
898
899 if (!matches)
900 matches = XALLOCAVEC (bool, group_size);
901 if (!npermutes)
902 npermutes = &this_npermutes;
903
904 matches[0] = false;
905
906 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
907 if (is_gimple_call (stmt))
908 nops = gimple_call_num_args (stmt);
909 else if (is_gimple_assign (stmt))
910 {
911 nops = gimple_num_ops (stmt) - 1;
912 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
913 nops++;
914 }
915 else
916 return false;
917
918 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
919 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
920 max_nunits, vectorization_factor, matches))
921 return false;
922
923 /* If the SLP node is a load, terminate the recursion. */
924 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
925 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
926 {
927 loads->safe_push (*node);
928 return true;
929 }
930
931 /* Get at the operands, verifying they are compatible. */
932 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
933 slp_oprnd_info oprnd_info;
934 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
935 {
936 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
937 stmt, (i == 0), &oprnds_info))
938 {
939 case 0:
940 break;
941 case -1:
942 matches[0] = false;
943 vect_free_oprnd_info (oprnds_info);
944 return false;
945 case 1:
946 matches[i] = false;
947 break;
948 }
949 }
950 for (i = 0; i < group_size; ++i)
951 if (!matches[i])
952 {
953 vect_free_oprnd_info (oprnds_info);
954 return false;
955 }
956
957 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
958
959 /* Create SLP_TREE nodes for the definition node/s. */
960 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
961 {
962 slp_tree child;
963 unsigned old_nloads = loads->length ();
964 unsigned old_max_nunits = *max_nunits;
965
966 if (oprnd_info->first_dt != vect_internal_def)
967 continue;
968
969 if (++this_tree_size > max_tree_size)
970 {
971 vect_free_oprnd_info (oprnds_info);
972 return false;
973 }
974
975 child = vect_create_new_slp_node (oprnd_info->def_stmts);
976 if (!child)
977 {
978 vect_free_oprnd_info (oprnds_info);
979 return false;
980 }
981
982 bool *matches = XALLOCAVEC (bool, group_size);
983 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
984 group_size, max_nunits, loads,
985 vectorization_factor, matches,
986 npermutes, &this_tree_size, max_tree_size))
987 {
988 oprnd_info->def_stmts = vNULL;
989 SLP_TREE_CHILDREN (*node).quick_push (child);
990 continue;
991 }
992
993 /* If the SLP build for operand zero failed and operand zero
994 and one can be commutated try that for the scalar stmts
995 that failed the match. */
996 if (i == 0
997 /* A first scalar stmt mismatch signals a fatal mismatch. */
998 && matches[0]
999 /* ??? For COND_EXPRs we can swap the comparison operands
1000 as well as the arms under some constraints. */
1001 && nops == 2
1002 && oprnds_info[1]->first_dt == vect_internal_def
1003 && is_gimple_assign (stmt)
1004 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1005 /* Do so only if the number of not successful permutes was nor more
1006 than a cut-ff as re-trying the recursive match on
1007 possibly each level of the tree would expose exponential
1008 behavior. */
1009 && *npermutes < 4)
1010 {
1011 /* Roll back. */
1012 *max_nunits = old_max_nunits;
1013 loads->truncate (old_nloads);
1014 /* Swap mismatched definition stmts. */
1015 dump_printf_loc (MSG_NOTE, vect_location,
1016 "Re-trying with swapped operands of stmts ");
1017 for (unsigned j = 0; j < group_size; ++j)
1018 if (!matches[j])
1019 {
1020 gimple tem = oprnds_info[0]->def_stmts[j];
1021 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1022 oprnds_info[1]->def_stmts[j] = tem;
1023 dump_printf (MSG_NOTE, "%d ", j);
1024 }
1025 dump_printf (MSG_NOTE, "\n");
1026 /* And try again ... */
1027 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1028 group_size, max_nunits, loads,
1029 vectorization_factor,
1030 matches, npermutes, &this_tree_size,
1031 max_tree_size))
1032 {
1033 oprnd_info->def_stmts = vNULL;
1034 SLP_TREE_CHILDREN (*node).quick_push (child);
1035 continue;
1036 }
1037
1038 ++*npermutes;
1039 }
1040
1041 oprnd_info->def_stmts = vNULL;
1042 vect_free_slp_tree (child);
1043 vect_free_oprnd_info (oprnds_info);
1044 return false;
1045 }
1046
1047 if (tree_size)
1048 *tree_size += this_tree_size;
1049
1050 vect_free_oprnd_info (oprnds_info);
1051 return true;
1052 }
1053
1054 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1055
1056 static void
1057 vect_print_slp_tree (int dump_kind, slp_tree node)
1058 {
1059 int i;
1060 gimple stmt;
1061 slp_tree child;
1062
1063 if (!node)
1064 return;
1065
1066 dump_printf (dump_kind, "node ");
1067 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1068 {
1069 dump_printf (dump_kind, "\n\tstmt %d ", i);
1070 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1071 }
1072 dump_printf (dump_kind, "\n");
1073
1074 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1075 vect_print_slp_tree (dump_kind, child);
1076 }
1077
1078
1079 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1080 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1081 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1082 stmts in NODE are to be marked. */
1083
1084 static void
1085 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1086 {
1087 int i;
1088 gimple stmt;
1089 slp_tree child;
1090
1091 if (!node)
1092 return;
1093
1094 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1095 if (j < 0 || i == j)
1096 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1097
1098 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1099 vect_mark_slp_stmts (child, mark, j);
1100 }
1101
1102
1103 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1104
1105 static void
1106 vect_mark_slp_stmts_relevant (slp_tree node)
1107 {
1108 int i;
1109 gimple stmt;
1110 stmt_vec_info stmt_info;
1111 slp_tree child;
1112
1113 if (!node)
1114 return;
1115
1116 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1117 {
1118 stmt_info = vinfo_for_stmt (stmt);
1119 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1120 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1121 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1122 }
1123
1124 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1125 vect_mark_slp_stmts_relevant (child);
1126 }
1127
1128
1129 /* Rearrange the statements of NODE according to PERMUTATION. */
1130
1131 static void
1132 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1133 vec<unsigned> permutation)
1134 {
1135 gimple stmt;
1136 vec<gimple> tmp_stmts;
1137 unsigned int i;
1138 slp_tree child;
1139
1140 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1141 vect_slp_rearrange_stmts (child, group_size, permutation);
1142
1143 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1144 tmp_stmts.create (group_size);
1145 tmp_stmts.quick_grow_cleared (group_size);
1146
1147 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1148 tmp_stmts[permutation[i]] = stmt;
1149
1150 SLP_TREE_SCALAR_STMTS (node).release ();
1151 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1152 }
1153
1154
1155 /* Check if the required load permutations in the SLP instance
1156 SLP_INSTN are supported. */
1157
1158 static bool
1159 vect_supported_load_permutation_p (slp_instance slp_instn)
1160 {
1161 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1162 unsigned int i, j, k, next;
1163 sbitmap load_index;
1164 slp_tree node;
1165 gimple stmt, load, next_load, first_load;
1166 struct data_reference *dr;
1167
1168 if (dump_enabled_p ())
1169 {
1170 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1171 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1172 if (node->load_permutation.exists ())
1173 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1174 dump_printf (MSG_NOTE, "%d ", next);
1175 else
1176 for (k = 0; k < group_size; ++k)
1177 dump_printf (MSG_NOTE, "%d ", k);
1178 dump_printf (MSG_NOTE, "\n");
1179 }
1180
1181 /* In case of reduction every load permutation is allowed, since the order
1182 of the reduction statements is not important (as opposed to the case of
1183 grouped stores). The only condition we need to check is that all the
1184 load nodes are of the same size and have the same permutation (and then
1185 rearrange all the nodes of the SLP instance according to this
1186 permutation). */
1187
1188 /* Check that all the load nodes are of the same size. */
1189 /* ??? Can't we assert this? */
1190 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1191 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1192 return false;
1193
1194 node = SLP_INSTANCE_TREE (slp_instn);
1195 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1196
1197 /* Reduction (there are no data-refs in the root).
1198 In reduction chain the order of the loads is important. */
1199 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1200 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1201 {
1202 slp_tree load;
1203 unsigned int lidx;
1204
1205 /* Compare all the permutation sequences to the first one. We know
1206 that at least one load is permuted. */
1207 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1208 if (!node->load_permutation.exists ())
1209 return false;
1210 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1211 {
1212 if (!load->load_permutation.exists ())
1213 return false;
1214 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1215 if (lidx != node->load_permutation[j])
1216 return false;
1217 }
1218
1219 /* Check that the loads in the first sequence are different and there
1220 are no gaps between them. */
1221 load_index = sbitmap_alloc (group_size);
1222 bitmap_clear (load_index);
1223 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1224 {
1225 if (bitmap_bit_p (load_index, lidx))
1226 {
1227 sbitmap_free (load_index);
1228 return false;
1229 }
1230 bitmap_set_bit (load_index, lidx);
1231 }
1232 for (i = 0; i < group_size; i++)
1233 if (!bitmap_bit_p (load_index, i))
1234 {
1235 sbitmap_free (load_index);
1236 return false;
1237 }
1238 sbitmap_free (load_index);
1239
1240 /* This permutation is valid for reduction. Since the order of the
1241 statements in the nodes is not important unless they are memory
1242 accesses, we can rearrange the statements in all the nodes
1243 according to the order of the loads. */
1244 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1245 node->load_permutation);
1246
1247 /* We are done, no actual permutations need to be generated. */
1248 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1249 SLP_TREE_LOAD_PERMUTATION (node).release ();
1250 return true;
1251 }
1252
1253 /* In basic block vectorization we allow any subchain of an interleaving
1254 chain.
1255 FORNOW: not supported in loop SLP because of realignment compications. */
1256 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1257 {
1258 /* Check that for every node in the instance the loads
1259 form a subchain. */
1260 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1261 {
1262 next_load = NULL;
1263 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1264 {
1265 if (j != 0 && next_load != load)
1266 return false;
1267 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1268 }
1269 }
1270
1271 /* Check that the alignment of the first load in every subchain, i.e.,
1272 the first statement in every load node, is supported.
1273 ??? This belongs in alignment checking. */
1274 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1275 {
1276 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1277 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1278 {
1279 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1280 if (vect_supportable_dr_alignment (dr, false)
1281 == dr_unaligned_unsupported)
1282 {
1283 if (dump_enabled_p ())
1284 {
1285 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1286 vect_location,
1287 "unsupported unaligned load ");
1288 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1289 first_load, 0);
1290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1291 }
1292 return false;
1293 }
1294 }
1295 }
1296
1297 /* We are done, no actual permutations need to be generated. */
1298 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1299 SLP_TREE_LOAD_PERMUTATION (node).release ();
1300 return true;
1301 }
1302
1303 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1304 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1305 well (unless it's reduction). */
1306 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1307 return false;
1308 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1309 if (!node->load_permutation.exists ())
1310 return false;
1311
1312 load_index = sbitmap_alloc (group_size);
1313 bitmap_clear (load_index);
1314 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1315 {
1316 unsigned int lidx = node->load_permutation[0];
1317 if (bitmap_bit_p (load_index, lidx))
1318 {
1319 sbitmap_free (load_index);
1320 return false;
1321 }
1322 bitmap_set_bit (load_index, lidx);
1323 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1324 if (k != lidx)
1325 {
1326 sbitmap_free (load_index);
1327 return false;
1328 }
1329 }
1330 for (i = 0; i < group_size; i++)
1331 if (!bitmap_bit_p (load_index, i))
1332 {
1333 sbitmap_free (load_index);
1334 return false;
1335 }
1336 sbitmap_free (load_index);
1337
1338 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1339 if (node->load_permutation.exists ()
1340 && !vect_transform_slp_perm_load
1341 (node, vNULL, NULL,
1342 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1343 return false;
1344 return true;
1345 }
1346
1347
1348 /* Find the first load in the loop that belongs to INSTANCE.
1349 When loads are in several SLP nodes, there can be a case in which the first
1350 load does not appear in the first SLP node to be transformed, causing
1351 incorrect order of statements. Since we generate all the loads together,
1352 they must be inserted before the first load of the SLP instance and not
1353 before the first load of the first node of the instance. */
1354
1355 static gimple
1356 vect_find_first_load_in_slp_instance (slp_instance instance)
1357 {
1358 int i, j;
1359 slp_tree load_node;
1360 gimple first_load = NULL, load;
1361
1362 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1363 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1364 first_load = get_earlier_stmt (load, first_load);
1365
1366 return first_load;
1367 }
1368
1369
1370 /* Find the last store in SLP INSTANCE. */
1371
1372 static gimple
1373 vect_find_last_store_in_slp_instance (slp_instance instance)
1374 {
1375 int i;
1376 slp_tree node;
1377 gimple last_store = NULL, store;
1378
1379 node = SLP_INSTANCE_TREE (instance);
1380 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1381 last_store = get_later_stmt (store, last_store);
1382
1383 return last_store;
1384 }
1385
1386 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1387
1388 static void
1389 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1390 slp_instance instance, slp_tree node,
1391 stmt_vector_for_cost *prologue_cost_vec,
1392 unsigned ncopies_for_cost)
1393 {
1394 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1395
1396 unsigned i;
1397 slp_tree child;
1398 gimple stmt, s;
1399 stmt_vec_info stmt_info;
1400 tree lhs;
1401 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1402
1403 /* Recurse down the SLP tree. */
1404 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1405 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1406 instance, child, prologue_cost_vec,
1407 ncopies_for_cost);
1408
1409 /* Look at the first scalar stmt to determine the cost. */
1410 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1411 stmt_info = vinfo_for_stmt (stmt);
1412 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1413 {
1414 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1415 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1416 vect_uninitialized_def,
1417 node, prologue_cost_vec, body_cost_vec);
1418 else
1419 {
1420 int i;
1421 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1422 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1423 node, prologue_cost_vec, body_cost_vec);
1424 /* If the load is permuted record the cost for the permutation.
1425 ??? Loads from multiple chains are let through here only
1426 for a single special case involving complex numbers where
1427 in the end no permutation is necessary. */
1428 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1429 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1430 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1431 && vect_get_place_in_interleaving_chain
1432 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1433 {
1434 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1435 stmt_info, 0, vect_body);
1436 break;
1437 }
1438 }
1439 }
1440 else
1441 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1442 stmt_info, 0, vect_body);
1443
1444 /* Scan operands and account for prologue cost of constants/externals.
1445 ??? This over-estimates cost for multiple uses and should be
1446 re-engineered. */
1447 lhs = gimple_get_lhs (stmt);
1448 for (i = 0; i < gimple_num_ops (stmt); ++i)
1449 {
1450 tree def, op = gimple_op (stmt, i);
1451 gimple def_stmt;
1452 enum vect_def_type dt;
1453 if (!op || op == lhs)
1454 continue;
1455 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1456 &def_stmt, &def, &dt)
1457 && (dt == vect_constant_def || dt == vect_external_def))
1458 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1459 stmt_info, 0, vect_prologue);
1460 }
1461 }
1462
1463 /* Compute the cost for the SLP instance INSTANCE. */
1464
1465 static void
1466 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1467 slp_instance instance, unsigned nunits)
1468 {
1469 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1470 unsigned ncopies_for_cost;
1471 stmt_info_for_cost *si;
1472 unsigned i;
1473
1474 /* Calculate the number of vector stmts to create based on the unrolling
1475 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1476 GROUP_SIZE / NUNITS otherwise. */
1477 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1478 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1479
1480 prologue_cost_vec.create (10);
1481 body_cost_vec.create (10);
1482 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1483 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1484 instance, SLP_INSTANCE_TREE (instance),
1485 &prologue_cost_vec, ncopies_for_cost);
1486
1487 /* Record the prologue costs, which were delayed until we were
1488 sure that SLP was successful. Unlike the body costs, we know
1489 the final values now regardless of the loop vectorization factor. */
1490 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1491 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1492 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1493 {
1494 struct _stmt_vec_info *stmt_info
1495 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1496 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1497 si->misalign, vect_prologue);
1498 }
1499
1500 prologue_cost_vec.release ();
1501 }
1502
1503 /* Analyze an SLP instance starting from a group of grouped stores. Call
1504 vect_build_slp_tree to build a tree of packed stmts if possible.
1505 Return FALSE if it's impossible to SLP any stmt in the loop. */
1506
1507 static bool
1508 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1509 gimple stmt, unsigned max_tree_size)
1510 {
1511 slp_instance new_instance;
1512 slp_tree node;
1513 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1514 unsigned int unrolling_factor = 1, nunits;
1515 tree vectype, scalar_type = NULL_TREE;
1516 gimple next;
1517 unsigned int vectorization_factor = 0;
1518 int i;
1519 unsigned int max_nunits = 0;
1520 vec<slp_tree> loads;
1521 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1522 vec<gimple> scalar_stmts;
1523
1524 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1525 {
1526 if (dr)
1527 {
1528 scalar_type = TREE_TYPE (DR_REF (dr));
1529 vectype = get_vectype_for_scalar_type (scalar_type);
1530 }
1531 else
1532 {
1533 gcc_assert (loop_vinfo);
1534 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1535 }
1536
1537 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1538 }
1539 else
1540 {
1541 gcc_assert (loop_vinfo);
1542 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1543 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1544 }
1545
1546 if (!vectype)
1547 {
1548 if (dump_enabled_p ())
1549 {
1550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1551 "Build SLP failed: unsupported data-type ");
1552 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1553 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1554 }
1555
1556 return false;
1557 }
1558
1559 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1560 if (loop_vinfo)
1561 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1562 else
1563 vectorization_factor = nunits;
1564
1565 /* Calculate the unrolling factor. */
1566 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1567 if (unrolling_factor != 1 && !loop_vinfo)
1568 {
1569 if (dump_enabled_p ())
1570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1571 "Build SLP failed: unrolling required in basic"
1572 " block SLP\n");
1573
1574 return false;
1575 }
1576
1577 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1578 scalar_stmts.create (group_size);
1579 next = stmt;
1580 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1581 {
1582 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1583 while (next)
1584 {
1585 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1586 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1587 scalar_stmts.safe_push (
1588 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1589 else
1590 scalar_stmts.safe_push (next);
1591 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1592 }
1593 }
1594 else
1595 {
1596 /* Collect reduction statements. */
1597 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1598 for (i = 0; reductions.iterate (i, &next); i++)
1599 scalar_stmts.safe_push (next);
1600 }
1601
1602 node = vect_create_new_slp_node (scalar_stmts);
1603
1604 loads.create (group_size);
1605
1606 /* Build the tree for the SLP instance. */
1607 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1608 &max_nunits, &loads,
1609 vectorization_factor, NULL, NULL, NULL,
1610 max_tree_size))
1611 {
1612 /* Calculate the unrolling factor based on the smallest type. */
1613 if (max_nunits > nunits)
1614 unrolling_factor = least_common_multiple (max_nunits, group_size)
1615 / group_size;
1616
1617 if (unrolling_factor != 1 && !loop_vinfo)
1618 {
1619 if (dump_enabled_p ())
1620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1621 "Build SLP failed: unrolling required in basic"
1622 " block SLP\n");
1623 vect_free_slp_tree (node);
1624 loads.release ();
1625 return false;
1626 }
1627
1628 /* Create a new SLP instance. */
1629 new_instance = XNEW (struct _slp_instance);
1630 SLP_INSTANCE_TREE (new_instance) = node;
1631 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1632 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1633 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1634 SLP_INSTANCE_LOADS (new_instance) = loads;
1635 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1636
1637 /* Compute the load permutation. */
1638 slp_tree load_node;
1639 bool loads_permuted = false;
1640 FOR_EACH_VEC_ELT (loads, i, load_node)
1641 {
1642 vec<unsigned> load_permutation;
1643 int j;
1644 gimple load, first_stmt;
1645 bool this_load_permuted = false;
1646 load_permutation.create (group_size);
1647 first_stmt = GROUP_FIRST_ELEMENT
1648 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1649 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1650 {
1651 int load_place
1652 = vect_get_place_in_interleaving_chain (load, first_stmt);
1653 gcc_assert (load_place != -1);
1654 if (load_place != j)
1655 this_load_permuted = true;
1656 load_permutation.safe_push (load_place);
1657 }
1658 if (!this_load_permuted)
1659 {
1660 load_permutation.release ();
1661 continue;
1662 }
1663 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1664 loads_permuted = true;
1665 }
1666
1667 if (loads_permuted)
1668 {
1669 if (!vect_supported_load_permutation_p (new_instance))
1670 {
1671 if (dump_enabled_p ())
1672 {
1673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1674 "Build SLP failed: unsupported load "
1675 "permutation ");
1676 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1677 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1678 }
1679 vect_free_slp_instance (new_instance);
1680 return false;
1681 }
1682
1683 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1684 = vect_find_first_load_in_slp_instance (new_instance);
1685 }
1686
1687 /* Compute the costs of this SLP instance. */
1688 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1689 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1690
1691 if (loop_vinfo)
1692 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1693 else
1694 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1695
1696 if (dump_enabled_p ())
1697 vect_print_slp_tree (MSG_NOTE, node);
1698
1699 return true;
1700 }
1701
1702 /* Failed to SLP. */
1703 /* Free the allocated memory. */
1704 vect_free_slp_tree (node);
1705 loads.release ();
1706
1707 return false;
1708 }
1709
1710
1711 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1712 trees of packed scalar stmts if SLP is possible. */
1713
1714 bool
1715 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1716 unsigned max_tree_size)
1717 {
1718 unsigned int i;
1719 vec<gimple> grouped_stores;
1720 vec<gimple> reductions = vNULL;
1721 vec<gimple> reduc_chains = vNULL;
1722 gimple first_element;
1723 bool ok = false;
1724
1725 if (dump_enabled_p ())
1726 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1727
1728 if (loop_vinfo)
1729 {
1730 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1731 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1732 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1733 }
1734 else
1735 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1736
1737 /* Find SLP sequences starting from groups of grouped stores. */
1738 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1739 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1740 max_tree_size))
1741 ok = true;
1742
1743 if (bb_vinfo && !ok)
1744 {
1745 if (dump_enabled_p ())
1746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1747 "Failed to SLP the basic block.\n");
1748
1749 return false;
1750 }
1751
1752 if (loop_vinfo
1753 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1754 {
1755 /* Find SLP sequences starting from reduction chains. */
1756 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1757 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1758 max_tree_size))
1759 ok = true;
1760 else
1761 return false;
1762
1763 /* Don't try to vectorize SLP reductions if reduction chain was
1764 detected. */
1765 return ok;
1766 }
1767
1768 /* Find SLP sequences starting from groups of reductions. */
1769 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1770 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1771 max_tree_size))
1772 ok = true;
1773
1774 return true;
1775 }
1776
1777
1778 /* For each possible SLP instance decide whether to SLP it and calculate overall
1779 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1780 least one instance. */
1781
1782 bool
1783 vect_make_slp_decision (loop_vec_info loop_vinfo)
1784 {
1785 unsigned int i, unrolling_factor = 1;
1786 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1787 slp_instance instance;
1788 int decided_to_slp = 0;
1789
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1792 "\n");
1793
1794 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1795 {
1796 /* FORNOW: SLP if you can. */
1797 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1798 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1799
1800 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1801 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1802 loop-based vectorization. Such stmts will be marked as HYBRID. */
1803 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1804 decided_to_slp++;
1805 }
1806
1807 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1808
1809 if (decided_to_slp && dump_enabled_p ())
1810 dump_printf_loc (MSG_NOTE, vect_location,
1811 "Decided to SLP %d instances. Unrolling factor %d\n",
1812 decided_to_slp, unrolling_factor);
1813
1814 return (decided_to_slp > 0);
1815 }
1816
1817
1818 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1819 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1820
1821 static void
1822 vect_detect_hybrid_slp_stmts (slp_tree node)
1823 {
1824 int i;
1825 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1826 gimple stmt = stmts[0];
1827 imm_use_iterator imm_iter;
1828 gimple use_stmt;
1829 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1830 slp_tree child;
1831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1832 struct loop *loop = NULL;
1833 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1834 basic_block bb = NULL;
1835
1836 if (!node)
1837 return;
1838
1839 if (loop_vinfo)
1840 loop = LOOP_VINFO_LOOP (loop_vinfo);
1841 else
1842 bb = BB_VINFO_BB (bb_vinfo);
1843
1844 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1845 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1846 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1847 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1848 if (gimple_bb (use_stmt)
1849 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1850 || bb == gimple_bb (use_stmt))
1851 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1852 && !STMT_SLP_TYPE (stmt_vinfo)
1853 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1854 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))
1855 || (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
1856 && STMT_VINFO_RELATED_STMT (stmt_vinfo)
1857 && !STMT_SLP_TYPE (vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo)))))
1858 && !(gimple_code (use_stmt) == GIMPLE_PHI
1859 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1860 == vect_reduction_def))
1861 vect_mark_slp_stmts (node, hybrid, i);
1862
1863 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1864 vect_detect_hybrid_slp_stmts (child);
1865 }
1866
1867
1868 /* Find stmts that must be both vectorized and SLPed. */
1869
1870 void
1871 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1872 {
1873 unsigned int i;
1874 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1875 slp_instance instance;
1876
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1879 "\n");
1880
1881 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1882 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1883 }
1884
1885
1886 /* Create and initialize a new bb_vec_info struct for BB, as well as
1887 stmt_vec_info structs for all the stmts in it. */
1888
1889 static bb_vec_info
1890 new_bb_vec_info (basic_block bb)
1891 {
1892 bb_vec_info res = NULL;
1893 gimple_stmt_iterator gsi;
1894
1895 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1896 BB_VINFO_BB (res) = bb;
1897
1898 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1899 {
1900 gimple stmt = gsi_stmt (gsi);
1901 gimple_set_uid (stmt, 0);
1902 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1903 }
1904
1905 BB_VINFO_GROUPED_STORES (res).create (10);
1906 BB_VINFO_SLP_INSTANCES (res).create (2);
1907 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1908
1909 bb->aux = res;
1910 return res;
1911 }
1912
1913
1914 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1915 stmts in the basic block. */
1916
1917 static void
1918 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1919 {
1920 vec<slp_instance> slp_instances;
1921 slp_instance instance;
1922 basic_block bb;
1923 gimple_stmt_iterator si;
1924 unsigned i;
1925
1926 if (!bb_vinfo)
1927 return;
1928
1929 bb = BB_VINFO_BB (bb_vinfo);
1930
1931 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1932 {
1933 gimple stmt = gsi_stmt (si);
1934 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1935
1936 if (stmt_info)
1937 /* Free stmt_vec_info. */
1938 free_stmt_vec_info (stmt);
1939 }
1940
1941 vect_destroy_datarefs (NULL, bb_vinfo);
1942 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1943 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1944 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1945 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1946 vect_free_slp_instance (instance);
1947 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1948 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1949 free (bb_vinfo);
1950 bb->aux = NULL;
1951 }
1952
1953
1954 /* Analyze statements contained in SLP tree node after recursively analyzing
1955 the subtree. Return TRUE if the operations are supported. */
1956
1957 static bool
1958 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1959 {
1960 bool dummy;
1961 int i;
1962 gimple stmt;
1963 slp_tree child;
1964
1965 if (!node)
1966 return true;
1967
1968 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1969 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1970 return false;
1971
1972 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1973 {
1974 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1975 gcc_assert (stmt_info);
1976 gcc_assert (PURE_SLP_STMT (stmt_info));
1977
1978 if (!vect_analyze_stmt (stmt, &dummy, node))
1979 return false;
1980 }
1981
1982 return true;
1983 }
1984
1985
1986 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1987 operations are supported. */
1988
1989 static bool
1990 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1991 {
1992 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1993 slp_instance instance;
1994 int i;
1995
1996 for (i = 0; slp_instances.iterate (i, &instance); )
1997 {
1998 if (!vect_slp_analyze_node_operations (bb_vinfo,
1999 SLP_INSTANCE_TREE (instance)))
2000 {
2001 vect_free_slp_instance (instance);
2002 slp_instances.ordered_remove (i);
2003 }
2004 else
2005 i++;
2006 }
2007
2008 if (!slp_instances.length ())
2009 return false;
2010
2011 return true;
2012 }
2013
2014
2015 /* Compute the scalar cost of the SLP node NODE and its children
2016 and return it. Do not account defs that are marked in LIFE and
2017 update LIFE according to uses of NODE. */
2018
2019 static unsigned
2020 vect_bb_slp_scalar_cost (basic_block bb,
2021 slp_tree node, vec<bool, va_heap> *life)
2022 {
2023 unsigned scalar_cost = 0;
2024 unsigned i;
2025 gimple stmt;
2026 slp_tree child;
2027
2028 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2029 {
2030 unsigned stmt_cost;
2031 ssa_op_iter op_iter;
2032 def_operand_p def_p;
2033 stmt_vec_info stmt_info;
2034
2035 if ((*life)[i])
2036 continue;
2037
2038 /* If there is a non-vectorized use of the defs then the scalar
2039 stmt is kept live in which case we do not account it or any
2040 required defs in the SLP children in the scalar cost. This
2041 way we make the vectorization more costly when compared to
2042 the scalar cost. */
2043 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2044 {
2045 imm_use_iterator use_iter;
2046 gimple use_stmt;
2047 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2048 if (!is_gimple_debug (use_stmt)
2049 && (gimple_code (use_stmt) == GIMPLE_PHI
2050 || gimple_bb (use_stmt) != bb
2051 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2052 {
2053 (*life)[i] = true;
2054 BREAK_FROM_IMM_USE_STMT (use_iter);
2055 }
2056 }
2057 if ((*life)[i])
2058 continue;
2059
2060 stmt_info = vinfo_for_stmt (stmt);
2061 if (STMT_VINFO_DATA_REF (stmt_info))
2062 {
2063 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2064 stmt_cost = vect_get_stmt_cost (scalar_load);
2065 else
2066 stmt_cost = vect_get_stmt_cost (scalar_store);
2067 }
2068 else
2069 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2070
2071 scalar_cost += stmt_cost;
2072 }
2073
2074 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2075 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2076
2077 return scalar_cost;
2078 }
2079
2080 /* Check if vectorization of the basic block is profitable. */
2081
2082 static bool
2083 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2084 {
2085 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2086 slp_instance instance;
2087 int i, j;
2088 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2089 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2090 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2091 stmt_vec_info stmt_info = NULL;
2092 stmt_vector_for_cost body_cost_vec;
2093 stmt_info_for_cost *ci;
2094
2095 /* Calculate vector costs. */
2096 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2097 {
2098 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2099
2100 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2101 {
2102 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2103 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2104 stmt_info, ci->misalign, vect_body);
2105 }
2106 }
2107
2108 /* Calculate scalar cost. */
2109 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2110 {
2111 auto_vec<bool, 20> life;
2112 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2113 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2114 SLP_INSTANCE_TREE (instance),
2115 &life);
2116 }
2117
2118 /* Complete the target-specific cost calculation. */
2119 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2120 &vec_inside_cost, &vec_epilogue_cost);
2121
2122 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2123
2124 if (dump_enabled_p ())
2125 {
2126 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2127 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2128 vec_inside_cost);
2129 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2130 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2131 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2132 }
2133
2134 /* Vectorization is profitable if its cost is less than the cost of scalar
2135 version. */
2136 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2137 return false;
2138
2139 return true;
2140 }
2141
2142 /* Check if the basic block can be vectorized. */
2143
2144 static bb_vec_info
2145 vect_slp_analyze_bb_1 (basic_block bb)
2146 {
2147 bb_vec_info bb_vinfo;
2148 vec<slp_instance> slp_instances;
2149 slp_instance instance;
2150 int i;
2151 int min_vf = 2;
2152 unsigned n_stmts = 0;
2153
2154 bb_vinfo = new_bb_vec_info (bb);
2155 if (!bb_vinfo)
2156 return NULL;
2157
2158 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2159 {
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2162 "not vectorized: unhandled data-ref in basic "
2163 "block.\n");
2164
2165 destroy_bb_vec_info (bb_vinfo);
2166 return NULL;
2167 }
2168
2169 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2170 {
2171 if (dump_enabled_p ())
2172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2173 "not vectorized: not enough data-refs in "
2174 "basic block.\n");
2175
2176 destroy_bb_vec_info (bb_vinfo);
2177 return NULL;
2178 }
2179
2180 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2181 {
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184 "not vectorized: unhandled data access in "
2185 "basic block.\n");
2186
2187 destroy_bb_vec_info (bb_vinfo);
2188 return NULL;
2189 }
2190
2191 vect_pattern_recog (NULL, bb_vinfo);
2192
2193 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2194 {
2195 if (dump_enabled_p ())
2196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2197 "not vectorized: bad data alignment in basic "
2198 "block.\n");
2199
2200 destroy_bb_vec_info (bb_vinfo);
2201 return NULL;
2202 }
2203
2204 /* Check the SLP opportunities in the basic block, analyze and build SLP
2205 trees. */
2206 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2207 {
2208 if (dump_enabled_p ())
2209 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2210 "not vectorized: failed to find SLP opportunities "
2211 "in basic block.\n");
2212
2213 destroy_bb_vec_info (bb_vinfo);
2214 return NULL;
2215 }
2216
2217 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2218
2219 /* Mark all the statements that we want to vectorize as pure SLP and
2220 relevant. */
2221 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2222 {
2223 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2224 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2225 }
2226
2227 /* Mark all the statements that we do not want to vectorize. */
2228 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2229 !gsi_end_p (gsi); gsi_next (&gsi))
2230 {
2231 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2232 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2233 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2234 }
2235
2236 /* Analyze dependences. At this point all stmts not participating in
2237 vectorization have to be marked. Dependence analysis assumes
2238 that we either vectorize all SLP instances or none at all. */
2239 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2240 {
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "not vectorized: unhandled data dependence "
2244 "in basic block.\n");
2245
2246 destroy_bb_vec_info (bb_vinfo);
2247 return NULL;
2248 }
2249
2250 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2251 {
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 "not vectorized: unsupported alignment in basic "
2255 "block.\n");
2256 destroy_bb_vec_info (bb_vinfo);
2257 return NULL;
2258 }
2259
2260 if (!vect_slp_analyze_operations (bb_vinfo))
2261 {
2262 if (dump_enabled_p ())
2263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2264 "not vectorized: bad operation in basic block.\n");
2265
2266 destroy_bb_vec_info (bb_vinfo);
2267 return NULL;
2268 }
2269
2270 /* Cost model: check if the vectorization is worthwhile. */
2271 if (!unlimited_cost_model (NULL)
2272 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2273 {
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "not vectorized: vectorization is not "
2277 "profitable.\n");
2278
2279 destroy_bb_vec_info (bb_vinfo);
2280 return NULL;
2281 }
2282
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_NOTE, vect_location,
2285 "Basic block will be vectorized using SLP\n");
2286
2287 return bb_vinfo;
2288 }
2289
2290
2291 bb_vec_info
2292 vect_slp_analyze_bb (basic_block bb)
2293 {
2294 bb_vec_info bb_vinfo;
2295 int insns = 0;
2296 gimple_stmt_iterator gsi;
2297 unsigned int vector_sizes;
2298
2299 if (dump_enabled_p ())
2300 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2301
2302 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2303 {
2304 gimple stmt = gsi_stmt (gsi);
2305 if (!is_gimple_debug (stmt)
2306 && !gimple_nop_p (stmt)
2307 && gimple_code (stmt) != GIMPLE_LABEL)
2308 insns++;
2309 }
2310
2311 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2312 {
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2315 "not vectorized: too many instructions in "
2316 "basic block.\n");
2317
2318 return NULL;
2319 }
2320
2321 /* Autodetect first vector size we try. */
2322 current_vector_size = 0;
2323 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2324
2325 while (1)
2326 {
2327 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2328 if (bb_vinfo)
2329 return bb_vinfo;
2330
2331 destroy_bb_vec_info (bb_vinfo);
2332
2333 vector_sizes &= ~current_vector_size;
2334 if (vector_sizes == 0
2335 || current_vector_size == 0)
2336 return NULL;
2337
2338 /* Try the next biggest vector size. */
2339 current_vector_size = 1 << floor_log2 (vector_sizes);
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_NOTE, vect_location,
2342 "***** Re-trying analysis with "
2343 "vector size %d\n", current_vector_size);
2344 }
2345 }
2346
2347
2348 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2349 the number of created vector stmts depends on the unrolling factor).
2350 However, the actual number of vector stmts for every SLP node depends on
2351 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2352 should be updated. In this function we assume that the inside costs
2353 calculated in vect_model_xxx_cost are linear in ncopies. */
2354
2355 void
2356 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2357 {
2358 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2359 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2360 slp_instance instance;
2361 stmt_vector_for_cost body_cost_vec;
2362 stmt_info_for_cost *si;
2363 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2364
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE, vect_location,
2367 "=== vect_update_slp_costs_according_to_vf ===\n");
2368
2369 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2370 {
2371 /* We assume that costs are linear in ncopies. */
2372 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2373
2374 /* Record the instance's instructions in the target cost model.
2375 This was delayed until here because the count of instructions
2376 isn't known beforehand. */
2377 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2378
2379 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2380 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2381 vinfo_for_stmt (si->stmt), si->misalign,
2382 vect_body);
2383 }
2384 }
2385
2386
2387 /* For constant and loop invariant defs of SLP_NODE this function returns
2388 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2389 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2390 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2391 REDUC_INDEX is the index of the reduction operand in the statements, unless
2392 it is -1. */
2393
2394 static void
2395 vect_get_constant_vectors (tree op, slp_tree slp_node,
2396 vec<tree> *vec_oprnds,
2397 unsigned int op_num, unsigned int number_of_vectors,
2398 int reduc_index)
2399 {
2400 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2401 gimple stmt = stmts[0];
2402 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2403 unsigned nunits;
2404 tree vec_cst;
2405 tree *elts;
2406 unsigned j, number_of_places_left_in_vector;
2407 tree vector_type;
2408 tree vop;
2409 int group_size = stmts.length ();
2410 unsigned int vec_num, i;
2411 unsigned number_of_copies = 1;
2412 vec<tree> voprnds;
2413 voprnds.create (number_of_vectors);
2414 bool constant_p, is_store;
2415 tree neutral_op = NULL;
2416 enum tree_code code = gimple_expr_code (stmt);
2417 gimple def_stmt;
2418 struct loop *loop;
2419 gimple_seq ctor_seq = NULL;
2420
2421 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2422 && reduc_index != -1)
2423 {
2424 op_num = reduc_index - 1;
2425 op = gimple_op (stmt, reduc_index);
2426 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2427 we need either neutral operands or the original operands. See
2428 get_initial_def_for_reduction() for details. */
2429 switch (code)
2430 {
2431 case WIDEN_SUM_EXPR:
2432 case DOT_PROD_EXPR:
2433 case PLUS_EXPR:
2434 case MINUS_EXPR:
2435 case BIT_IOR_EXPR:
2436 case BIT_XOR_EXPR:
2437 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2438 neutral_op = build_real (TREE_TYPE (op), dconst0);
2439 else
2440 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2441
2442 break;
2443
2444 case MULT_EXPR:
2445 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2446 neutral_op = build_real (TREE_TYPE (op), dconst1);
2447 else
2448 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2449
2450 break;
2451
2452 case BIT_AND_EXPR:
2453 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2454 break;
2455
2456 /* For MIN/MAX we don't have an easy neutral operand but
2457 the initial values can be used fine here. Only for
2458 a reduction chain we have to force a neutral element. */
2459 case MAX_EXPR:
2460 case MIN_EXPR:
2461 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2462 neutral_op = NULL;
2463 else
2464 {
2465 def_stmt = SSA_NAME_DEF_STMT (op);
2466 loop = (gimple_bb (stmt))->loop_father;
2467 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2468 loop_preheader_edge (loop));
2469 }
2470 break;
2471
2472 default:
2473 neutral_op = NULL;
2474 }
2475 }
2476
2477 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2478 {
2479 is_store = true;
2480 op = gimple_assign_rhs1 (stmt);
2481 }
2482 else
2483 is_store = false;
2484
2485 gcc_assert (op);
2486
2487 if (CONSTANT_CLASS_P (op))
2488 constant_p = true;
2489 else
2490 constant_p = false;
2491
2492 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2493 gcc_assert (vector_type);
2494 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2495
2496 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2497 created vectors. It is greater than 1 if unrolling is performed.
2498
2499 For example, we have two scalar operands, s1 and s2 (e.g., group of
2500 strided accesses of size two), while NUNITS is four (i.e., four scalars
2501 of this type can be packed in a vector). The output vector will contain
2502 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2503 will be 2).
2504
2505 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2506 containing the operands.
2507
2508 For example, NUNITS is four as before, and the group size is 8
2509 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2510 {s5, s6, s7, s8}. */
2511
2512 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2513
2514 number_of_places_left_in_vector = nunits;
2515 elts = XALLOCAVEC (tree, nunits);
2516 for (j = 0; j < number_of_copies; j++)
2517 {
2518 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2519 {
2520 if (is_store)
2521 op = gimple_assign_rhs1 (stmt);
2522 else
2523 {
2524 switch (code)
2525 {
2526 case COND_EXPR:
2527 if (op_num == 0 || op_num == 1)
2528 {
2529 tree cond = gimple_assign_rhs1 (stmt);
2530 op = TREE_OPERAND (cond, op_num);
2531 }
2532 else
2533 {
2534 if (op_num == 2)
2535 op = gimple_assign_rhs2 (stmt);
2536 else
2537 op = gimple_assign_rhs3 (stmt);
2538 }
2539 break;
2540
2541 case CALL_EXPR:
2542 op = gimple_call_arg (stmt, op_num);
2543 break;
2544
2545 case LSHIFT_EXPR:
2546 case RSHIFT_EXPR:
2547 case LROTATE_EXPR:
2548 case RROTATE_EXPR:
2549 op = gimple_op (stmt, op_num + 1);
2550 /* Unlike the other binary operators, shifts/rotates have
2551 the shift count being int, instead of the same type as
2552 the lhs, so make sure the scalar is the right type if
2553 we are dealing with vectors of
2554 long long/long/short/char. */
2555 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2556 op = fold_convert (TREE_TYPE (vector_type), op);
2557 break;
2558
2559 default:
2560 op = gimple_op (stmt, op_num + 1);
2561 break;
2562 }
2563 }
2564
2565 if (reduc_index != -1)
2566 {
2567 loop = (gimple_bb (stmt))->loop_father;
2568 def_stmt = SSA_NAME_DEF_STMT (op);
2569
2570 gcc_assert (loop);
2571
2572 /* Get the def before the loop. In reduction chain we have only
2573 one initial value. */
2574 if ((j != (number_of_copies - 1)
2575 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2576 && i != 0))
2577 && neutral_op)
2578 op = neutral_op;
2579 else
2580 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2581 loop_preheader_edge (loop));
2582 }
2583
2584 /* Create 'vect_ = {op0,op1,...,opn}'. */
2585 number_of_places_left_in_vector--;
2586 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2587 {
2588 if (CONSTANT_CLASS_P (op))
2589 {
2590 op = fold_unary (VIEW_CONVERT_EXPR,
2591 TREE_TYPE (vector_type), op);
2592 gcc_assert (op && CONSTANT_CLASS_P (op));
2593 }
2594 else
2595 {
2596 tree new_temp
2597 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2598 gimple init_stmt;
2599 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2600 op);
2601 init_stmt
2602 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2603 new_temp, op, NULL_TREE);
2604 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2605 op = new_temp;
2606 }
2607 }
2608 elts[number_of_places_left_in_vector] = op;
2609 if (!CONSTANT_CLASS_P (op))
2610 constant_p = false;
2611
2612 if (number_of_places_left_in_vector == 0)
2613 {
2614 number_of_places_left_in_vector = nunits;
2615
2616 if (constant_p)
2617 vec_cst = build_vector (vector_type, elts);
2618 else
2619 {
2620 vec<constructor_elt, va_gc> *v;
2621 unsigned k;
2622 vec_alloc (v, nunits);
2623 for (k = 0; k < nunits; ++k)
2624 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2625 vec_cst = build_constructor (vector_type, v);
2626 }
2627 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2628 vector_type, NULL));
2629 if (ctor_seq != NULL)
2630 {
2631 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2632 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2633 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2634 GSI_SAME_STMT);
2635 ctor_seq = NULL;
2636 }
2637 }
2638 }
2639 }
2640
2641 /* Since the vectors are created in the reverse order, we should invert
2642 them. */
2643 vec_num = voprnds.length ();
2644 for (j = vec_num; j != 0; j--)
2645 {
2646 vop = voprnds[j - 1];
2647 vec_oprnds->quick_push (vop);
2648 }
2649
2650 voprnds.release ();
2651
2652 /* In case that VF is greater than the unrolling factor needed for the SLP
2653 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2654 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2655 to replicate the vectors. */
2656 while (number_of_vectors > vec_oprnds->length ())
2657 {
2658 tree neutral_vec = NULL;
2659
2660 if (neutral_op)
2661 {
2662 if (!neutral_vec)
2663 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2664
2665 vec_oprnds->quick_push (neutral_vec);
2666 }
2667 else
2668 {
2669 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2670 vec_oprnds->quick_push (vop);
2671 }
2672 }
2673 }
2674
2675
2676 /* Get vectorized definitions from SLP_NODE that contains corresponding
2677 vectorized def-stmts. */
2678
2679 static void
2680 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2681 {
2682 tree vec_oprnd;
2683 gimple vec_def_stmt;
2684 unsigned int i;
2685
2686 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2687
2688 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2689 {
2690 gcc_assert (vec_def_stmt);
2691 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2692 vec_oprnds->quick_push (vec_oprnd);
2693 }
2694 }
2695
2696
2697 /* Get vectorized definitions for SLP_NODE.
2698 If the scalar definitions are loop invariants or constants, collect them and
2699 call vect_get_constant_vectors() to create vector stmts.
2700 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2701 must be stored in the corresponding child of SLP_NODE, and we call
2702 vect_get_slp_vect_defs () to retrieve them. */
2703
2704 void
2705 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2706 vec<vec<tree> > *vec_oprnds, int reduc_index)
2707 {
2708 gimple first_stmt;
2709 int number_of_vects = 0, i;
2710 unsigned int child_index = 0;
2711 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2712 slp_tree child = NULL;
2713 vec<tree> vec_defs;
2714 tree oprnd;
2715 bool vectorized_defs;
2716
2717 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2718 FOR_EACH_VEC_ELT (ops, i, oprnd)
2719 {
2720 /* For each operand we check if it has vectorized definitions in a child
2721 node or we need to create them (for invariants and constants). We
2722 check if the LHS of the first stmt of the next child matches OPRND.
2723 If it does, we found the correct child. Otherwise, we call
2724 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2725 to check this child node for the next operand. */
2726 vectorized_defs = false;
2727 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2728 {
2729 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2730
2731 /* We have to check both pattern and original def, if available. */
2732 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2733 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2734
2735 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2736 || (related
2737 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2738 {
2739 /* The number of vector defs is determined by the number of
2740 vector statements in the node from which we get those
2741 statements. */
2742 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2743 vectorized_defs = true;
2744 child_index++;
2745 }
2746 }
2747
2748 if (!vectorized_defs)
2749 {
2750 if (i == 0)
2751 {
2752 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2753 /* Number of vector stmts was calculated according to LHS in
2754 vect_schedule_slp_instance (), fix it by replacing LHS with
2755 RHS, if necessary. See vect_get_smallest_scalar_type () for
2756 details. */
2757 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2758 &rhs_size_unit);
2759 if (rhs_size_unit != lhs_size_unit)
2760 {
2761 number_of_vects *= rhs_size_unit;
2762 number_of_vects /= lhs_size_unit;
2763 }
2764 }
2765 }
2766
2767 /* Allocate memory for vectorized defs. */
2768 vec_defs = vNULL;
2769 vec_defs.create (number_of_vects);
2770
2771 /* For reduction defs we call vect_get_constant_vectors (), since we are
2772 looking for initial loop invariant values. */
2773 if (vectorized_defs && reduc_index == -1)
2774 /* The defs are already vectorized. */
2775 vect_get_slp_vect_defs (child, &vec_defs);
2776 else
2777 /* Build vectors from scalar defs. */
2778 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2779 number_of_vects, reduc_index);
2780
2781 vec_oprnds->quick_push (vec_defs);
2782
2783 /* For reductions, we only need initial values. */
2784 if (reduc_index != -1)
2785 return;
2786 }
2787 }
2788
2789
2790 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2791 building a vector of type MASK_TYPE from it) and two input vectors placed in
2792 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2793 shifting by STRIDE elements of DR_CHAIN for every copy.
2794 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2795 copies).
2796 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2797 the created stmts must be inserted. */
2798
2799 static inline void
2800 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2801 tree mask, int first_vec_indx, int second_vec_indx,
2802 gimple_stmt_iterator *gsi, slp_tree node,
2803 tree vectype, vec<tree> dr_chain,
2804 int ncopies, int vect_stmts_counter)
2805 {
2806 tree perm_dest;
2807 gimple perm_stmt = NULL;
2808 stmt_vec_info next_stmt_info;
2809 int i, stride;
2810 tree first_vec, second_vec, data_ref;
2811
2812 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2813
2814 /* Initialize the vect stmts of NODE to properly insert the generated
2815 stmts later. */
2816 for (i = SLP_TREE_VEC_STMTS (node).length ();
2817 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2818 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2819
2820 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2821 for (i = 0; i < ncopies; i++)
2822 {
2823 first_vec = dr_chain[first_vec_indx];
2824 second_vec = dr_chain[second_vec_indx];
2825
2826 /* Generate the permute statement. */
2827 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2828 first_vec, second_vec, mask);
2829 data_ref = make_ssa_name (perm_dest, perm_stmt);
2830 gimple_set_lhs (perm_stmt, data_ref);
2831 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2832
2833 /* Store the vector statement in NODE. */
2834 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2835
2836 first_vec_indx += stride;
2837 second_vec_indx += stride;
2838 }
2839
2840 /* Mark the scalar stmt as vectorized. */
2841 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2842 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2843 }
2844
2845
2846 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2847 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2848 representation. Check that the mask is valid and return FALSE if not.
2849 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2850 the next vector, i.e., the current first vector is not needed. */
2851
2852 static bool
2853 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2854 int mask_nunits, bool only_one_vec, int index,
2855 unsigned char *mask, int *current_mask_element,
2856 bool *need_next_vector, int *number_of_mask_fixes,
2857 bool *mask_fixed, bool *needs_first_vector)
2858 {
2859 int i;
2860
2861 /* Convert to target specific representation. */
2862 *current_mask_element = first_mask_element + m;
2863 /* Adjust the value in case it's a mask for second and third vectors. */
2864 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2865
2866 if (*current_mask_element < mask_nunits)
2867 *needs_first_vector = true;
2868
2869 /* We have only one input vector to permute but the mask accesses values in
2870 the next vector as well. */
2871 if (only_one_vec && *current_mask_element >= mask_nunits)
2872 {
2873 if (dump_enabled_p ())
2874 {
2875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2876 "permutation requires at least two vectors ");
2877 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2878 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2879 }
2880
2881 return false;
2882 }
2883
2884 /* The mask requires the next vector. */
2885 if (*current_mask_element >= mask_nunits * 2)
2886 {
2887 if (*needs_first_vector || *mask_fixed)
2888 {
2889 /* We either need the first vector too or have already moved to the
2890 next vector. In both cases, this permutation needs three
2891 vectors. */
2892 if (dump_enabled_p ())
2893 {
2894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2895 "permutation requires at "
2896 "least three vectors ");
2897 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2898 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2899 }
2900
2901 return false;
2902 }
2903
2904 /* We move to the next vector, dropping the first one and working with
2905 the second and the third - we need to adjust the values of the mask
2906 accordingly. */
2907 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2908
2909 for (i = 0; i < index; i++)
2910 mask[i] -= mask_nunits * *number_of_mask_fixes;
2911
2912 (*number_of_mask_fixes)++;
2913 *mask_fixed = true;
2914 }
2915
2916 *need_next_vector = *mask_fixed;
2917
2918 /* This was the last element of this mask. Start a new one. */
2919 if (index == mask_nunits - 1)
2920 {
2921 *number_of_mask_fixes = 1;
2922 *mask_fixed = false;
2923 *needs_first_vector = false;
2924 }
2925
2926 return true;
2927 }
2928
2929
2930 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2931 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2932 permute statements for the SLP node NODE of the SLP instance
2933 SLP_NODE_INSTANCE. */
2934
2935 bool
2936 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2937 gimple_stmt_iterator *gsi, int vf,
2938 slp_instance slp_node_instance, bool analyze_only)
2939 {
2940 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2942 tree mask_element_type = NULL_TREE, mask_type;
2943 int i, j, k, nunits, vec_index = 0, scalar_index;
2944 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2945 gimple next_scalar_stmt;
2946 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2947 int first_mask_element;
2948 int index, unroll_factor, current_mask_element, ncopies;
2949 unsigned char *mask;
2950 bool only_one_vec = false, need_next_vector = false;
2951 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2952 int number_of_mask_fixes = 1;
2953 bool mask_fixed = false;
2954 bool needs_first_vector = false;
2955 enum machine_mode mode;
2956
2957 mode = TYPE_MODE (vectype);
2958
2959 if (!can_vec_perm_p (mode, false, NULL))
2960 {
2961 if (dump_enabled_p ())
2962 {
2963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2964 "no vect permute for ");
2965 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2966 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2967 }
2968 return false;
2969 }
2970
2971 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2972 same size as the vector element being permuted. */
2973 mask_element_type = lang_hooks.types.type_for_mode
2974 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2975 mask_type = get_vectype_for_scalar_type (mask_element_type);
2976 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2977 mask = XALLOCAVEC (unsigned char, nunits);
2978 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2979
2980 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2981 unrolling factor. */
2982 orig_vec_stmts_num = group_size *
2983 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2984 if (orig_vec_stmts_num == 1)
2985 only_one_vec = true;
2986
2987 /* Number of copies is determined by the final vectorization factor
2988 relatively to SLP_NODE_INSTANCE unrolling factor. */
2989 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2990
2991 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2992 return false;
2993
2994 /* Generate permutation masks for every NODE. Number of masks for each NODE
2995 is equal to GROUP_SIZE.
2996 E.g., we have a group of three nodes with three loads from the same
2997 location in each node, and the vector size is 4. I.e., we have a
2998 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2999 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3000 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3001 ...
3002
3003 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3004 The last mask is illegal since we assume two operands for permute
3005 operation, and the mask element values can't be outside that range.
3006 Hence, the last mask must be converted into {2,5,5,5}.
3007 For the first two permutations we need the first and the second input
3008 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3009 we need the second and the third vectors: {b1,c1,a2,b2} and
3010 {c2,a3,b3,c3}. */
3011
3012 {
3013 scalar_index = 0;
3014 index = 0;
3015 vect_stmts_counter = 0;
3016 vec_index = 0;
3017 first_vec_index = vec_index++;
3018 if (only_one_vec)
3019 second_vec_index = first_vec_index;
3020 else
3021 second_vec_index = vec_index++;
3022
3023 for (j = 0; j < unroll_factor; j++)
3024 {
3025 for (k = 0; k < group_size; k++)
3026 {
3027 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3028 first_mask_element = i + j * group_size;
3029 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3030 nunits, only_one_vec, index,
3031 mask, &current_mask_element,
3032 &need_next_vector,
3033 &number_of_mask_fixes, &mask_fixed,
3034 &needs_first_vector))
3035 return false;
3036 mask[index++] = current_mask_element;
3037
3038 if (index == nunits)
3039 {
3040 index = 0;
3041 if (!can_vec_perm_p (mode, false, mask))
3042 {
3043 if (dump_enabled_p ())
3044 {
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3046 vect_location,
3047 "unsupported vect permute { ");
3048 for (i = 0; i < nunits; ++i)
3049 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3050 mask[i]);
3051 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3052 }
3053 return false;
3054 }
3055
3056 if (!analyze_only)
3057 {
3058 int l;
3059 tree mask_vec, *mask_elts;
3060 mask_elts = XALLOCAVEC (tree, nunits);
3061 for (l = 0; l < nunits; ++l)
3062 mask_elts[l] = build_int_cst (mask_element_type,
3063 mask[l]);
3064 mask_vec = build_vector (mask_type, mask_elts);
3065
3066 if (need_next_vector)
3067 {
3068 first_vec_index = second_vec_index;
3069 second_vec_index = vec_index;
3070 }
3071
3072 next_scalar_stmt
3073 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3074
3075 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3076 mask_vec, first_vec_index, second_vec_index,
3077 gsi, node, vectype, dr_chain,
3078 ncopies, vect_stmts_counter++);
3079 }
3080 }
3081 }
3082 }
3083 }
3084
3085 return true;
3086 }
3087
3088
3089
3090 /* Vectorize SLP instance tree in postorder. */
3091
3092 static bool
3093 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3094 unsigned int vectorization_factor)
3095 {
3096 gimple stmt;
3097 bool grouped_store, is_store;
3098 gimple_stmt_iterator si;
3099 stmt_vec_info stmt_info;
3100 unsigned int vec_stmts_size, nunits, group_size;
3101 tree vectype;
3102 int i;
3103 slp_tree child;
3104
3105 if (!node)
3106 return false;
3107
3108 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3109 vect_schedule_slp_instance (child, instance, vectorization_factor);
3110
3111 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3112 stmt_info = vinfo_for_stmt (stmt);
3113
3114 /* VECTYPE is the type of the destination. */
3115 vectype = STMT_VINFO_VECTYPE (stmt_info);
3116 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3117 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3118
3119 /* For each SLP instance calculate number of vector stmts to be created
3120 for the scalar stmts in each node of the SLP tree. Number of vector
3121 elements in one vector iteration is the number of scalar elements in
3122 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3123 size. */
3124 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3125
3126 if (!SLP_TREE_VEC_STMTS (node).exists ())
3127 {
3128 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3129 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3130 }
3131
3132 if (dump_enabled_p ())
3133 {
3134 dump_printf_loc (MSG_NOTE,vect_location,
3135 "------>vectorizing SLP node starting from: ");
3136 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3137 dump_printf (MSG_NOTE, "\n");
3138 }
3139
3140 /* Loads should be inserted before the first load. */
3141 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3142 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3143 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3144 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3145 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3146 else if (is_pattern_stmt_p (stmt_info))
3147 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3148 else
3149 si = gsi_for_stmt (stmt);
3150
3151 /* Stores should be inserted just before the last store. */
3152 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3153 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3154 {
3155 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3156 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3157 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3158 si = gsi_for_stmt (last_store);
3159 }
3160
3161 /* Mark the first element of the reduction chain as reduction to properly
3162 transform the node. In the analysis phase only the last element of the
3163 chain is marked as reduction. */
3164 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3165 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3166 {
3167 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3168 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3169 }
3170
3171 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3172 return is_store;
3173 }
3174
3175 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3176 For loop vectorization this is done in vectorizable_call, but for SLP
3177 it needs to be deferred until end of vect_schedule_slp, because multiple
3178 SLP instances may refer to the same scalar stmt. */
3179
3180 static void
3181 vect_remove_slp_scalar_calls (slp_tree node)
3182 {
3183 gimple stmt, new_stmt;
3184 gimple_stmt_iterator gsi;
3185 int i;
3186 slp_tree child;
3187 tree lhs;
3188 stmt_vec_info stmt_info;
3189
3190 if (!node)
3191 return;
3192
3193 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3194 vect_remove_slp_scalar_calls (child);
3195
3196 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3197 {
3198 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3199 continue;
3200 stmt_info = vinfo_for_stmt (stmt);
3201 if (stmt_info == NULL
3202 || is_pattern_stmt_p (stmt_info)
3203 || !PURE_SLP_STMT (stmt_info))
3204 continue;
3205 lhs = gimple_call_lhs (stmt);
3206 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3207 set_vinfo_for_stmt (new_stmt, stmt_info);
3208 set_vinfo_for_stmt (stmt, NULL);
3209 STMT_VINFO_STMT (stmt_info) = new_stmt;
3210 gsi = gsi_for_stmt (stmt);
3211 gsi_replace (&gsi, new_stmt, false);
3212 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3213 }
3214 }
3215
3216 /* Generate vector code for all SLP instances in the loop/basic block. */
3217
3218 bool
3219 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3220 {
3221 vec<slp_instance> slp_instances;
3222 slp_instance instance;
3223 unsigned int i, vf;
3224 bool is_store = false;
3225
3226 if (loop_vinfo)
3227 {
3228 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3229 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3230 }
3231 else
3232 {
3233 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3234 vf = 1;
3235 }
3236
3237 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3238 {
3239 /* Schedule the tree of INSTANCE. */
3240 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3241 instance, vf);
3242 if (dump_enabled_p ())
3243 dump_printf_loc (MSG_NOTE, vect_location,
3244 "vectorizing stmts using SLP.\n");
3245 }
3246
3247 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3248 {
3249 slp_tree root = SLP_INSTANCE_TREE (instance);
3250 gimple store;
3251 unsigned int j;
3252 gimple_stmt_iterator gsi;
3253
3254 /* Remove scalar call stmts. Do not do this for basic-block
3255 vectorization as not all uses may be vectorized.
3256 ??? Why should this be necessary? DCE should be able to
3257 remove the stmts itself.
3258 ??? For BB vectorization we can as well remove scalar
3259 stmts starting from the SLP tree root if they have no
3260 uses. */
3261 if (loop_vinfo)
3262 vect_remove_slp_scalar_calls (root);
3263
3264 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3265 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3266 {
3267 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3268 break;
3269
3270 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3271 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3272 /* Free the attached stmt_vec_info and remove the stmt. */
3273 gsi = gsi_for_stmt (store);
3274 unlink_stmt_vdef (store);
3275 gsi_remove (&gsi, true);
3276 release_defs (store);
3277 free_stmt_vec_info (store);
3278 }
3279 }
3280
3281 return is_store;
3282 }
3283
3284
3285 /* Vectorize the basic block. */
3286
3287 void
3288 vect_slp_transform_bb (basic_block bb)
3289 {
3290 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3291 gimple_stmt_iterator si;
3292
3293 gcc_assert (bb_vinfo);
3294
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3297
3298 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3299 {
3300 gimple stmt = gsi_stmt (si);
3301 stmt_vec_info stmt_info;
3302
3303 if (dump_enabled_p ())
3304 {
3305 dump_printf_loc (MSG_NOTE, vect_location,
3306 "------>SLPing statement: ");
3307 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3308 dump_printf (MSG_NOTE, "\n");
3309 }
3310
3311 stmt_info = vinfo_for_stmt (stmt);
3312 gcc_assert (stmt_info);
3313
3314 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3315 if (STMT_SLP_TYPE (stmt_info))
3316 {
3317 vect_schedule_slp (NULL, bb_vinfo);
3318 break;
3319 }
3320 }
3321
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_NOTE, vect_location,
3324 "BASIC BLOCK VECTORIZED\n");
3325
3326 destroy_bb_vec_info (bb_vinfo);
3327 }