Daily bump.
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
45
46 LOC
47 find_bb_location (basic_block bb)
48 {
49 gimple stmt = NULL;
50 gimple_stmt_iterator si;
51
52 if (!bb)
53 return UNKNOWN_LOC;
54
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 {
57 stmt = gsi_stmt (si);
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
60 }
61
62 return UNKNOWN_LOC;
63 }
64
65
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67
68 static void
69 vect_free_slp_tree (slp_tree node)
70 {
71 int i;
72 slp_void_p child;
73
74 if (!node)
75 return;
76
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree) child);
79
80 VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
81 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82
83 if (SLP_TREE_VEC_STMTS (node))
84 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85
86 free (node);
87 }
88
89
90 /* Free the memory allocated for the SLP instance. */
91
92 void
93 vect_free_slp_instance (slp_instance instance)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
96 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
97 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 }
99
100
101 /* Create an SLP node for SCALAR_STMTS. */
102
103 static slp_tree
104 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
105 {
106 slp_tree node;
107 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
108 unsigned int nops;
109
110 if (is_gimple_call (stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (is_gimple_assign (stmt))
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
118 else
119 return NULL;
120
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node) = NULL;
124 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
125 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
126 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
127
128 return node;
129 }
130
131
132 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
133 operand. */
134 static VEC (slp_oprnd_info, heap) *
135 vect_create_oprnd_info (int nops, int group_size)
136 {
137 int i;
138 slp_oprnd_info oprnd_info;
139 VEC (slp_oprnd_info, heap) *oprnds_info;
140
141 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
142 for (i = 0; i < nops; i++)
143 {
144 oprnd_info = XNEW (struct _slp_oprnd_info);
145 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
146 oprnd_info->first_dt = vect_uninitialized_def;
147 oprnd_info->first_def_type = NULL_TREE;
148 oprnd_info->first_const_oprnd = NULL_TREE;
149 oprnd_info->first_pattern = false;
150 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
151 }
152
153 return oprnds_info;
154 }
155
156
157 /* Free operands info. */
158
159 static void
160 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
161 {
162 int i;
163 slp_oprnd_info oprnd_info;
164
165 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
166 {
167 VEC_free (gimple, heap, oprnd_info->def_stmts);
168 XDELETE (oprnd_info);
169 }
170
171 VEC_free (slp_oprnd_info, heap, *oprnds_info);
172 }
173
174
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
178
179 static bool
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
181 slp_tree slp_node, gimple stmt,
182 int ncopies_for_cost, bool first,
183 VEC (slp_oprnd_info, heap) **oprnds_info)
184 {
185 tree oprnd;
186 unsigned int i, number_of_oprnds;
187 tree def, def_op0 = NULL_TREE;
188 gimple def_stmt;
189 enum vect_def_type dt = vect_uninitialized_def;
190 enum vect_def_type dt_op0 = vect_uninitialized_def;
191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192 tree lhs = gimple_get_lhs (stmt);
193 struct loop *loop = NULL;
194 enum tree_code rhs_code;
195 bool different_types = false;
196 bool pattern = false;
197 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
198 int op_idx = 1;
199 tree compare_rhs = NULL_TREE;
200
201 if (loop_vinfo)
202 loop = LOOP_VINFO_LOOP (loop_vinfo);
203
204 if (is_gimple_call (stmt))
205 {
206 number_of_oprnds = gimple_call_num_args (stmt);
207 op_idx = 3;
208 }
209 else if (is_gimple_assign (stmt))
210 {
211 number_of_oprnds = gimple_num_ops (stmt) - 1;
212 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
213 number_of_oprnds++;
214 }
215 else
216 return false;
217
218 for (i = 0; i < number_of_oprnds; i++)
219 {
220 if (compare_rhs)
221 {
222 oprnd = compare_rhs;
223 compare_rhs = NULL_TREE;
224 }
225 else
226 oprnd = gimple_op (stmt, op_idx++);
227
228 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
229
230 if (COMPARISON_CLASS_P (oprnd))
231 {
232 compare_rhs = TREE_OPERAND (oprnd, 1);
233 oprnd = TREE_OPERAND (oprnd, 0);
234 }
235
236 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
237 &def, &dt)
238 || (!def_stmt && dt != vect_constant_def))
239 {
240 if (vect_print_dump_info (REPORT_SLP))
241 {
242 fprintf (vect_dump, "Build SLP failed: can't find def for ");
243 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
244 }
245
246 return false;
247 }
248
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
251 pattern. */
252 if (loop && def_stmt && gimple_bb (def_stmt)
253 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
254 && vinfo_for_stmt (def_stmt)
255 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
256 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
257 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
258 {
259 pattern = true;
260 if (!first && !oprnd_info->first_pattern)
261 {
262 if (vect_print_dump_info (REPORT_DETAILS))
263 {
264 fprintf (vect_dump, "Build SLP failed: some of the stmts"
265 " are in a pattern, and others are not ");
266 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
267 }
268
269 return false;
270 }
271
272 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
273 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
274
275 if (dt == vect_unknown_def_type)
276 {
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "Unsupported pattern.");
279 return false;
280 }
281
282 switch (gimple_code (def_stmt))
283 {
284 case GIMPLE_PHI:
285 def = gimple_phi_result (def_stmt);
286 break;
287
288 case GIMPLE_ASSIGN:
289 def = gimple_assign_lhs (def_stmt);
290 break;
291
292 default:
293 if (vect_print_dump_info (REPORT_DETAILS))
294 fprintf (vect_dump, "unsupported defining stmt: ");
295 return false;
296 }
297 }
298
299 if (first)
300 {
301 oprnd_info->first_dt = dt;
302 oprnd_info->first_pattern = pattern;
303 if (def)
304 {
305 oprnd_info->first_def_type = TREE_TYPE (def);
306 oprnd_info->first_const_oprnd = NULL_TREE;
307 }
308 else
309 {
310 oprnd_info->first_def_type = NULL_TREE;
311 oprnd_info->first_const_oprnd = oprnd;
312 }
313
314 if (i == 0)
315 {
316 def_op0 = def;
317 dt_op0 = dt;
318 /* Analyze costs (for the first stmt of the group only). */
319 if (REFERENCE_CLASS_P (lhs))
320 /* Store. */
321 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
322 dt, slp_node);
323 else
324 {
325 enum vect_def_type dts[2];
326 dts[0] = dt;
327 dts[1] = vect_uninitialized_def;
328 /* Not memory operation (we don't call this function for
329 loads). */
330 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
331 slp_node);
332 }
333 }
334 }
335 else
336 {
337 /* Not first stmt of the group, check that the def-stmt/s match
338 the def-stmt/s of the first stmt. Allow different definition
339 types for reduction chains: the first stmt must be a
340 vect_reduction_def (a phi node), and the rest
341 vect_internal_def. */
342 if (((oprnd_info->first_dt != dt
343 && !(oprnd_info->first_dt == vect_reduction_def
344 && dt == vect_internal_def))
345 || (oprnd_info->first_def_type != NULL_TREE
346 && def
347 && !types_compatible_p (oprnd_info->first_def_type,
348 TREE_TYPE (def))))
349 || (!def
350 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
351 TREE_TYPE (oprnd)))
352 || different_types)
353 {
354 if (number_of_oprnds != 2)
355 {
356 if (vect_print_dump_info (REPORT_SLP))
357 fprintf (vect_dump, "Build SLP failed: different types ");
358
359 return false;
360 }
361
362 /* Try to swap operands in case of binary operation. */
363 if (i == 0)
364 different_types = true;
365 else
366 {
367 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
368 if (is_gimple_assign (stmt)
369 && (rhs_code = gimple_assign_rhs_code (stmt))
370 && TREE_CODE_CLASS (rhs_code) == tcc_binary
371 && commutative_tree_code (rhs_code)
372 && oprnd0_info->first_dt == dt
373 && oprnd_info->first_dt == dt_op0
374 && def_op0 && def
375 && !(oprnd0_info->first_def_type
376 && !types_compatible_p (oprnd0_info->first_def_type,
377 TREE_TYPE (def)))
378 && !(oprnd_info->first_def_type
379 && !types_compatible_p (oprnd_info->first_def_type,
380 TREE_TYPE (def_op0))))
381 {
382 if (vect_print_dump_info (REPORT_SLP))
383 {
384 fprintf (vect_dump, "Swapping operands of ");
385 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
386 }
387
388 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
389 gimple_assign_rhs2_ptr (stmt));
390 }
391 else
392 {
393 if (vect_print_dump_info (REPORT_SLP))
394 fprintf (vect_dump, "Build SLP failed: different types ");
395
396 return false;
397 }
398 }
399 }
400 }
401
402 /* Check the types of the definitions. */
403 switch (dt)
404 {
405 case vect_constant_def:
406 case vect_external_def:
407 case vect_reduction_def:
408 break;
409
410 case vect_internal_def:
411 if (different_types)
412 {
413 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
414 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
415 if (i == 0)
416 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
417 else
418 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
419 }
420 else
421 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
422
423 break;
424
425 default:
426 /* FORNOW: Not supported. */
427 if (vect_print_dump_info (REPORT_SLP))
428 {
429 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
430 print_generic_expr (vect_dump, def, TDF_SLIM);
431 }
432
433 return false;
434 }
435 }
436
437 return true;
438 }
439
440
441 /* Recursively build an SLP tree starting from NODE.
442 Fail (and return FALSE) if def-stmts are not isomorphic, require data
443 permutation or are of unsupported types of operation. Otherwise, return
444 TRUE. */
445
446 static bool
447 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
448 slp_tree *node, unsigned int group_size,
449 int *inside_cost, int *outside_cost,
450 int ncopies_for_cost, unsigned int *max_nunits,
451 VEC (int, heap) **load_permutation,
452 VEC (slp_tree, heap) **loads,
453 unsigned int vectorization_factor, bool *loads_permuted)
454 {
455 unsigned int i;
456 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
457 gimple stmt = VEC_index (gimple, stmts, 0);
458 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
459 enum tree_code first_cond_code = ERROR_MARK;
460 tree lhs;
461 bool stop_recursion = false, need_same_oprnds = false;
462 tree vectype, scalar_type, first_op1 = NULL_TREE;
463 unsigned int ncopies;
464 optab optab;
465 int icode;
466 enum machine_mode optab_op2_mode;
467 enum machine_mode vec_mode;
468 struct data_reference *first_dr;
469 HOST_WIDE_INT dummy;
470 bool permutation = false;
471 unsigned int load_place;
472 gimple first_load, prev_first_load = NULL;
473 VEC (slp_oprnd_info, heap) *oprnds_info;
474 unsigned int nops;
475 slp_oprnd_info oprnd_info;
476 tree cond;
477
478 if (is_gimple_call (stmt))
479 nops = gimple_call_num_args (stmt);
480 else if (is_gimple_assign (stmt))
481 {
482 nops = gimple_num_ops (stmt) - 1;
483 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
484 nops++;
485 }
486 else
487 return false;
488
489 oprnds_info = vect_create_oprnd_info (nops, group_size);
490
491 /* For every stmt in NODE find its def stmt/s. */
492 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
493 {
494 if (vect_print_dump_info (REPORT_SLP))
495 {
496 fprintf (vect_dump, "Build SLP for ");
497 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
498 }
499
500 /* Fail to vectorize statements marked as unvectorizable. */
501 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
502 {
503 if (vect_print_dump_info (REPORT_SLP))
504 {
505 fprintf (vect_dump,
506 "Build SLP failed: unvectorizable statement ");
507 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
508 }
509
510 vect_free_oprnd_info (&oprnds_info);
511 return false;
512 }
513
514 lhs = gimple_get_lhs (stmt);
515 if (lhs == NULL_TREE)
516 {
517 if (vect_print_dump_info (REPORT_SLP))
518 {
519 fprintf (vect_dump,
520 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
521 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
522 }
523
524 vect_free_oprnd_info (&oprnds_info);
525 return false;
526 }
527
528 if (is_gimple_assign (stmt)
529 && gimple_assign_rhs_code (stmt) == COND_EXPR
530 && (cond = gimple_assign_rhs1 (stmt))
531 && !COMPARISON_CLASS_P (cond))
532 {
533 if (vect_print_dump_info (REPORT_SLP))
534 {
535 fprintf (vect_dump,
536 "Build SLP failed: condition is not comparison ");
537 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
538 }
539
540 vect_free_oprnd_info (&oprnds_info);
541 return false;
542 }
543
544 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
545 vectype = get_vectype_for_scalar_type (scalar_type);
546 if (!vectype)
547 {
548 if (vect_print_dump_info (REPORT_SLP))
549 {
550 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
551 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
552 }
553
554 vect_free_oprnd_info (&oprnds_info);
555 return false;
556 }
557
558 /* In case of multiple types we need to detect the smallest type. */
559 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
560 {
561 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
562 if (bb_vinfo)
563 vectorization_factor = *max_nunits;
564 }
565
566 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
567
568 if (is_gimple_call (stmt))
569 {
570 rhs_code = CALL_EXPR;
571 if (gimple_call_internal_p (stmt)
572 || gimple_call_tail_p (stmt)
573 || gimple_call_noreturn_p (stmt)
574 || !gimple_call_nothrow_p (stmt)
575 || gimple_call_chain (stmt))
576 {
577 if (vect_print_dump_info (REPORT_SLP))
578 {
579 fprintf (vect_dump,
580 "Build SLP failed: unsupported call type ");
581 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
582 }
583
584 vect_free_oprnd_info (&oprnds_info);
585 return false;
586 }
587 }
588 else
589 rhs_code = gimple_assign_rhs_code (stmt);
590
591 /* Check the operation. */
592 if (i == 0)
593 {
594 first_stmt_code = rhs_code;
595
596 /* Shift arguments should be equal in all the packed stmts for a
597 vector shift with scalar shift operand. */
598 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
599 || rhs_code == LROTATE_EXPR
600 || rhs_code == RROTATE_EXPR)
601 {
602 vec_mode = TYPE_MODE (vectype);
603
604 /* First see if we have a vector/vector shift. */
605 optab = optab_for_tree_code (rhs_code, vectype,
606 optab_vector);
607
608 if (!optab
609 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
610 {
611 /* No vector/vector shift, try for a vector/scalar shift. */
612 optab = optab_for_tree_code (rhs_code, vectype,
613 optab_scalar);
614
615 if (!optab)
616 {
617 if (vect_print_dump_info (REPORT_SLP))
618 fprintf (vect_dump, "Build SLP failed: no optab.");
619 vect_free_oprnd_info (&oprnds_info);
620 return false;
621 }
622 icode = (int) optab_handler (optab, vec_mode);
623 if (icode == CODE_FOR_nothing)
624 {
625 if (vect_print_dump_info (REPORT_SLP))
626 fprintf (vect_dump, "Build SLP failed: "
627 "op not supported by target.");
628 vect_free_oprnd_info (&oprnds_info);
629 return false;
630 }
631 optab_op2_mode = insn_data[icode].operand[2].mode;
632 if (!VECTOR_MODE_P (optab_op2_mode))
633 {
634 need_same_oprnds = true;
635 first_op1 = gimple_assign_rhs2 (stmt);
636 }
637 }
638 }
639 else if (rhs_code == WIDEN_LSHIFT_EXPR)
640 {
641 need_same_oprnds = true;
642 first_op1 = gimple_assign_rhs2 (stmt);
643 }
644 }
645 else
646 {
647 if (first_stmt_code != rhs_code
648 && (first_stmt_code != IMAGPART_EXPR
649 || rhs_code != REALPART_EXPR)
650 && (first_stmt_code != REALPART_EXPR
651 || rhs_code != IMAGPART_EXPR)
652 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
653 && (first_stmt_code == ARRAY_REF
654 || first_stmt_code == INDIRECT_REF
655 || first_stmt_code == COMPONENT_REF
656 || first_stmt_code == MEM_REF)))
657 {
658 if (vect_print_dump_info (REPORT_SLP))
659 {
660 fprintf (vect_dump,
661 "Build SLP failed: different operation in stmt ");
662 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
663 }
664
665 vect_free_oprnd_info (&oprnds_info);
666 return false;
667 }
668
669 if (need_same_oprnds
670 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
671 {
672 if (vect_print_dump_info (REPORT_SLP))
673 {
674 fprintf (vect_dump,
675 "Build SLP failed: different shift arguments in ");
676 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
677 }
678
679 vect_free_oprnd_info (&oprnds_info);
680 return false;
681 }
682
683 if (rhs_code == CALL_EXPR)
684 {
685 gimple first_stmt = VEC_index (gimple, stmts, 0);
686 if (gimple_call_num_args (stmt) != nops
687 || !operand_equal_p (gimple_call_fn (first_stmt),
688 gimple_call_fn (stmt), 0)
689 || gimple_call_fntype (first_stmt)
690 != gimple_call_fntype (stmt))
691 {
692 if (vect_print_dump_info (REPORT_SLP))
693 {
694 fprintf (vect_dump,
695 "Build SLP failed: different calls in ");
696 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
697 }
698
699 vect_free_oprnd_info (&oprnds_info);
700 return false;
701 }
702 }
703 }
704
705 /* Strided store or load. */
706 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
707 {
708 if (REFERENCE_CLASS_P (lhs))
709 {
710 /* Store. */
711 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
712 stmt, ncopies_for_cost,
713 (i == 0), &oprnds_info))
714 {
715 vect_free_oprnd_info (&oprnds_info);
716 return false;
717 }
718 }
719 else
720 {
721 /* Load. */
722 /* FORNOW: Check that there is no gap between the loads. */
723 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
724 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
725 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
726 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
727 {
728 if (vect_print_dump_info (REPORT_SLP))
729 {
730 fprintf (vect_dump, "Build SLP failed: strided "
731 "loads have gaps ");
732 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
733 }
734
735 vect_free_oprnd_info (&oprnds_info);
736 return false;
737 }
738
739 /* Check that the size of interleaved loads group is not
740 greater than the SLP group size. */
741 if (loop_vinfo
742 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
743 {
744 if (vect_print_dump_info (REPORT_SLP))
745 {
746 fprintf (vect_dump, "Build SLP failed: the number of "
747 "interleaved loads is greater than"
748 " the SLP group size ");
749 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
750 }
751
752 vect_free_oprnd_info (&oprnds_info);
753 return false;
754 }
755
756 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
757 if (prev_first_load)
758 {
759 /* Check that there are no loads from different interleaving
760 chains in the same node. The only exception is complex
761 numbers. */
762 if (prev_first_load != first_load
763 && rhs_code != REALPART_EXPR
764 && rhs_code != IMAGPART_EXPR)
765 {
766 if (vect_print_dump_info (REPORT_SLP))
767 {
768 fprintf (vect_dump, "Build SLP failed: different "
769 "interleaving chains in one node ");
770 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
771 }
772
773 vect_free_oprnd_info (&oprnds_info);
774 return false;
775 }
776 }
777 else
778 prev_first_load = first_load;
779
780 if (first_load == stmt)
781 {
782 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
783 if (vect_supportable_dr_alignment (first_dr, false)
784 == dr_unaligned_unsupported)
785 {
786 if (vect_print_dump_info (REPORT_SLP))
787 {
788 fprintf (vect_dump, "Build SLP failed: unsupported "
789 "unaligned load ");
790 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
791 }
792
793 vect_free_oprnd_info (&oprnds_info);
794 return false;
795 }
796
797 /* Analyze costs (for the first stmt in the group). */
798 vect_model_load_cost (vinfo_for_stmt (stmt),
799 ncopies_for_cost, false, *node);
800 }
801
802 /* Store the place of this load in the interleaving chain. In
803 case that permutation is needed we later decide if a specific
804 permutation is supported. */
805 load_place = vect_get_place_in_interleaving_chain (stmt,
806 first_load);
807 if (load_place != i)
808 permutation = true;
809
810 VEC_safe_push (int, heap, *load_permutation, load_place);
811
812 /* We stop the tree when we reach a group of loads. */
813 stop_recursion = true;
814 continue;
815 }
816 } /* Strided access. */
817 else
818 {
819 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
820 {
821 /* Not strided load. */
822 if (vect_print_dump_info (REPORT_SLP))
823 {
824 fprintf (vect_dump, "Build SLP failed: not strided load ");
825 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
826 }
827
828 /* FORNOW: Not strided loads are not supported. */
829 vect_free_oprnd_info (&oprnds_info);
830 return false;
831 }
832
833 /* Not memory operation. */
834 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
835 && TREE_CODE_CLASS (rhs_code) != tcc_unary
836 && rhs_code != COND_EXPR
837 && rhs_code != CALL_EXPR)
838 {
839 if (vect_print_dump_info (REPORT_SLP))
840 {
841 fprintf (vect_dump, "Build SLP failed: operation");
842 fprintf (vect_dump, " unsupported ");
843 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
844 }
845
846 vect_free_oprnd_info (&oprnds_info);
847 return false;
848 }
849
850 if (rhs_code == COND_EXPR)
851 {
852 tree cond_expr = gimple_assign_rhs1 (stmt);
853
854 if (i == 0)
855 first_cond_code = TREE_CODE (cond_expr);
856 else if (first_cond_code != TREE_CODE (cond_expr))
857 {
858 if (vect_print_dump_info (REPORT_SLP))
859 {
860 fprintf (vect_dump, "Build SLP failed: different"
861 " operation");
862 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
863 }
864
865 vect_free_oprnd_info (&oprnds_info);
866 return false;
867 }
868 }
869
870 /* Find the def-stmts. */
871 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
872 ncopies_for_cost, (i == 0),
873 &oprnds_info))
874 {
875 vect_free_oprnd_info (&oprnds_info);
876 return false;
877 }
878 }
879 }
880
881 /* Add the costs of the node to the overall instance costs. */
882 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
883 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
884
885 /* Strided loads were reached - stop the recursion. */
886 if (stop_recursion)
887 {
888 VEC_safe_push (slp_tree, heap, *loads, *node);
889 if (permutation)
890 {
891
892 *loads_permuted = true;
893 *inside_cost
894 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
895 * group_size;
896 }
897 else
898 {
899 /* We don't check here complex numbers chains, so we set
900 LOADS_PERMUTED for further check in
901 vect_supported_load_permutation_p. */
902 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
903 *loads_permuted = true;
904 }
905
906 vect_free_oprnd_info (&oprnds_info);
907 return true;
908 }
909
910 /* Create SLP_TREE nodes for the definition node/s. */
911 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
912 {
913 slp_tree child;
914
915 if (oprnd_info->first_dt != vect_internal_def)
916 continue;
917
918 child = vect_create_new_slp_node (oprnd_info->def_stmts);
919 if (!child
920 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
921 inside_cost, outside_cost, ncopies_for_cost,
922 max_nunits, load_permutation, loads,
923 vectorization_factor, loads_permuted))
924 {
925 if (child)
926 oprnd_info->def_stmts = NULL;
927 vect_free_slp_tree (child);
928 vect_free_oprnd_info (&oprnds_info);
929 return false;
930 }
931
932 oprnd_info->def_stmts = NULL;
933 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
934 }
935
936 vect_free_oprnd_info (&oprnds_info);
937 return true;
938 }
939
940
941 static void
942 vect_print_slp_tree (slp_tree node)
943 {
944 int i;
945 gimple stmt;
946 slp_void_p child;
947
948 if (!node)
949 return;
950
951 fprintf (vect_dump, "node ");
952 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
953 {
954 fprintf (vect_dump, "\n\tstmt %d ", i);
955 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
956 }
957 fprintf (vect_dump, "\n");
958
959 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
960 vect_print_slp_tree ((slp_tree) child);
961 }
962
963
964 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
965 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
966 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
967 stmts in NODE are to be marked. */
968
969 static void
970 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
971 {
972 int i;
973 gimple stmt;
974 slp_void_p child;
975
976 if (!node)
977 return;
978
979 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
980 if (j < 0 || i == j)
981 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
982
983 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
984 vect_mark_slp_stmts ((slp_tree) child, mark, j);
985 }
986
987
988 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
989
990 static void
991 vect_mark_slp_stmts_relevant (slp_tree node)
992 {
993 int i;
994 gimple stmt;
995 stmt_vec_info stmt_info;
996 slp_void_p child;
997
998 if (!node)
999 return;
1000
1001 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1002 {
1003 stmt_info = vinfo_for_stmt (stmt);
1004 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1005 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1006 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1007 }
1008
1009 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1010 vect_mark_slp_stmts_relevant ((slp_tree) child);
1011 }
1012
1013
1014 /* Check if the permutation required by the SLP INSTANCE is supported.
1015 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1016
1017 static bool
1018 vect_supported_slp_permutation_p (slp_instance instance)
1019 {
1020 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1021 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1022 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1023 VEC (slp_tree, heap) *sorted_loads = NULL;
1024 int index;
1025 slp_tree *tmp_loads = NULL;
1026 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1027 slp_tree load;
1028
1029 /* FORNOW: The only supported loads permutation is loads from the same
1030 location in all the loads in the node, when the data-refs in
1031 nodes of LOADS constitute an interleaving chain.
1032 Sort the nodes according to the order of accesses in the chain. */
1033 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1034 for (i = 0, j = 0;
1035 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1036 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1037 i += group_size, j++)
1038 {
1039 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1040 /* Check that the loads are all in the same interleaving chain. */
1041 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1042 {
1043 if (vect_print_dump_info (REPORT_DETAILS))
1044 {
1045 fprintf (vect_dump, "Build SLP failed: unsupported data "
1046 "permutation ");
1047 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1048 }
1049
1050 free (tmp_loads);
1051 return false;
1052 }
1053
1054 tmp_loads[index] = load;
1055 }
1056
1057 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1058 for (i = 0; i < group_size; i++)
1059 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1060
1061 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1062 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1063 free (tmp_loads);
1064
1065 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1066 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1067 instance, true))
1068 return false;
1069
1070 return true;
1071 }
1072
1073
1074 /* Rearrange the statements of NODE according to PERMUTATION. */
1075
1076 static void
1077 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1078 VEC (int, heap) *permutation)
1079 {
1080 gimple stmt;
1081 VEC (gimple, heap) *tmp_stmts;
1082 unsigned int index, i;
1083 slp_void_p child;
1084
1085 if (!node)
1086 return;
1087
1088 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1089 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1090
1091 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1092 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1093
1094 for (i = 0; i < group_size; i++)
1095 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1096
1097 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1098 {
1099 index = VEC_index (int, permutation, i);
1100 VEC_replace (gimple, tmp_stmts, index, stmt);
1101 }
1102
1103 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1104 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1105 }
1106
1107
1108 /* Check if the required load permutation is supported.
1109 LOAD_PERMUTATION contains a list of indices of the loads.
1110 In SLP this permutation is relative to the order of strided stores that are
1111 the base of the SLP instance. */
1112
1113 static bool
1114 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1115 VEC (int, heap) *load_permutation)
1116 {
1117 int i = 0, j, prev = -1, next, k, number_of_groups;
1118 bool supported, bad_permutation = false;
1119 sbitmap load_index;
1120 slp_tree node, other_complex_node;
1121 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1122 unsigned complex_numbers = 0;
1123 struct data_reference *dr;
1124 bb_vec_info bb_vinfo;
1125
1126 /* FORNOW: permutations are only supported in SLP. */
1127 if (!slp_instn)
1128 return false;
1129
1130 if (vect_print_dump_info (REPORT_SLP))
1131 {
1132 fprintf (vect_dump, "Load permutation ");
1133 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1134 fprintf (vect_dump, "%d ", next);
1135 }
1136
1137 /* In case of reduction every load permutation is allowed, since the order
1138 of the reduction statements is not important (as opposed to the case of
1139 strided stores). The only condition we need to check is that all the
1140 load nodes are of the same size and have the same permutation (and then
1141 rearrange all the nodes of the SLP instance according to this
1142 permutation). */
1143
1144 /* Check that all the load nodes are of the same size. */
1145 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1146 {
1147 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1148 != (unsigned) group_size)
1149 return false;
1150
1151 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1152 if (is_gimple_assign (stmt)
1153 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1154 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1155 complex_numbers++;
1156 }
1157
1158 /* Complex operands can be swapped as following:
1159 real_c = real_b + real_a;
1160 imag_c = imag_a + imag_b;
1161 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1162 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1163 chains are mixed, they match the above pattern. */
1164 if (complex_numbers)
1165 {
1166 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1167 {
1168 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1169 {
1170 if (j == 0)
1171 first = stmt;
1172 else
1173 {
1174 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1175 {
1176 if (complex_numbers != 2)
1177 return false;
1178
1179 if (i == 0)
1180 k = 1;
1181 else
1182 k = 0;
1183
1184 other_complex_node = VEC_index (slp_tree,
1185 SLP_INSTANCE_LOADS (slp_instn), k);
1186 other_node_first = VEC_index (gimple,
1187 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1188
1189 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1190 != other_node_first)
1191 return false;
1192 }
1193 }
1194 }
1195 }
1196 }
1197
1198 /* We checked that this case ok, so there is no need to proceed with
1199 permutation tests. */
1200 if (complex_numbers == 2)
1201 {
1202 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1203 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1204 return true;
1205 }
1206
1207 node = SLP_INSTANCE_TREE (slp_instn);
1208 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1209 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1210 instance, not all the loads belong to the same node or interleaving
1211 group. Hence, we need to divide them into groups according to
1212 GROUP_SIZE. */
1213 number_of_groups = VEC_length (int, load_permutation) / group_size;
1214
1215 /* Reduction (there are no data-refs in the root).
1216 In reduction chain the order of the loads is important. */
1217 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1218 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1219 {
1220 int first_group_load_index;
1221
1222 /* Compare all the permutation sequences to the first one. */
1223 for (i = 1; i < number_of_groups; i++)
1224 {
1225 k = 0;
1226 for (j = i * group_size; j < i * group_size + group_size; j++)
1227 {
1228 next = VEC_index (int, load_permutation, j);
1229 first_group_load_index = VEC_index (int, load_permutation, k);
1230
1231 if (next != first_group_load_index)
1232 {
1233 bad_permutation = true;
1234 break;
1235 }
1236
1237 k++;
1238 }
1239
1240 if (bad_permutation)
1241 break;
1242 }
1243
1244 if (!bad_permutation)
1245 {
1246 /* Check that the loads in the first sequence are different and there
1247 are no gaps between them. */
1248 load_index = sbitmap_alloc (group_size);
1249 sbitmap_zero (load_index);
1250 for (k = 0; k < group_size; k++)
1251 {
1252 first_group_load_index = VEC_index (int, load_permutation, k);
1253 if (TEST_BIT (load_index, first_group_load_index))
1254 {
1255 bad_permutation = true;
1256 break;
1257 }
1258
1259 SET_BIT (load_index, first_group_load_index);
1260 }
1261
1262 if (!bad_permutation)
1263 for (k = 0; k < group_size; k++)
1264 if (!TEST_BIT (load_index, k))
1265 {
1266 bad_permutation = true;
1267 break;
1268 }
1269
1270 sbitmap_free (load_index);
1271 }
1272
1273 if (!bad_permutation)
1274 {
1275 /* This permutation is valid for reduction. Since the order of the
1276 statements in the nodes is not important unless they are memory
1277 accesses, we can rearrange the statements in all the nodes
1278 according to the order of the loads. */
1279 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1280 load_permutation);
1281 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1282 return true;
1283 }
1284 }
1285
1286 /* In basic block vectorization we allow any subchain of an interleaving
1287 chain.
1288 FORNOW: not supported in loop SLP because of realignment compications. */
1289 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1290 bad_permutation = false;
1291 /* Check that for every node in the instance teh loads form a subchain. */
1292 if (bb_vinfo)
1293 {
1294 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1295 {
1296 next_load = NULL;
1297 first_load = NULL;
1298 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1299 {
1300 if (!first_load)
1301 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1302 else if (first_load
1303 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1304 {
1305 bad_permutation = true;
1306 break;
1307 }
1308
1309 if (j != 0 && next_load != load)
1310 {
1311 bad_permutation = true;
1312 break;
1313 }
1314
1315 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1316 }
1317
1318 if (bad_permutation)
1319 break;
1320 }
1321
1322 /* Check that the alignment of the first load in every subchain, i.e.,
1323 the first statement in every load node, is supported. */
1324 if (!bad_permutation)
1325 {
1326 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1327 {
1328 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1329 if (first_load
1330 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1331 {
1332 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1333 if (vect_supportable_dr_alignment (dr, false)
1334 == dr_unaligned_unsupported)
1335 {
1336 if (vect_print_dump_info (REPORT_SLP))
1337 {
1338 fprintf (vect_dump, "unsupported unaligned load ");
1339 print_gimple_stmt (vect_dump, first_load, 0,
1340 TDF_SLIM);
1341 }
1342 bad_permutation = true;
1343 break;
1344 }
1345 }
1346 }
1347
1348 if (!bad_permutation)
1349 {
1350 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1351 return true;
1352 }
1353 }
1354 }
1355
1356 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1357 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1358 well (unless it's reduction). */
1359 if (VEC_length (int, load_permutation)
1360 != (unsigned int) (group_size * group_size))
1361 return false;
1362
1363 supported = true;
1364 load_index = sbitmap_alloc (group_size);
1365 sbitmap_zero (load_index);
1366 for (j = 0; j < group_size; j++)
1367 {
1368 for (i = j * group_size, k = 0;
1369 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1370 i++, k++)
1371 {
1372 if (i != j * group_size && next != prev)
1373 {
1374 supported = false;
1375 break;
1376 }
1377
1378 prev = next;
1379 }
1380
1381 if (TEST_BIT (load_index, prev))
1382 {
1383 supported = false;
1384 break;
1385 }
1386
1387 SET_BIT (load_index, prev);
1388 }
1389
1390 for (j = 0; j < group_size; j++)
1391 if (!TEST_BIT (load_index, j))
1392 return false;
1393
1394 sbitmap_free (load_index);
1395
1396 if (supported && i == group_size * group_size
1397 && vect_supported_slp_permutation_p (slp_instn))
1398 return true;
1399
1400 return false;
1401 }
1402
1403
1404 /* Find the first load in the loop that belongs to INSTANCE.
1405 When loads are in several SLP nodes, there can be a case in which the first
1406 load does not appear in the first SLP node to be transformed, causing
1407 incorrect order of statements. Since we generate all the loads together,
1408 they must be inserted before the first load of the SLP instance and not
1409 before the first load of the first node of the instance. */
1410
1411 static gimple
1412 vect_find_first_load_in_slp_instance (slp_instance instance)
1413 {
1414 int i, j;
1415 slp_tree load_node;
1416 gimple first_load = NULL, load;
1417
1418 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1419 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1420 first_load = get_earlier_stmt (load, first_load);
1421
1422 return first_load;
1423 }
1424
1425
1426 /* Find the last store in SLP INSTANCE. */
1427
1428 static gimple
1429 vect_find_last_store_in_slp_instance (slp_instance instance)
1430 {
1431 int i;
1432 slp_tree node;
1433 gimple last_store = NULL, store;
1434
1435 node = SLP_INSTANCE_TREE (instance);
1436 for (i = 0;
1437 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1438 i++)
1439 last_store = get_later_stmt (store, last_store);
1440
1441 return last_store;
1442 }
1443
1444
1445 /* Analyze an SLP instance starting from a group of strided stores. Call
1446 vect_build_slp_tree to build a tree of packed stmts if possible.
1447 Return FALSE if it's impossible to SLP any stmt in the loop. */
1448
1449 static bool
1450 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1451 gimple stmt)
1452 {
1453 slp_instance new_instance;
1454 slp_tree node;
1455 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1456 unsigned int unrolling_factor = 1, nunits;
1457 tree vectype, scalar_type = NULL_TREE;
1458 gimple next;
1459 unsigned int vectorization_factor = 0;
1460 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1461 unsigned int max_nunits = 0;
1462 VEC (int, heap) *load_permutation;
1463 VEC (slp_tree, heap) *loads;
1464 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1465 bool loads_permuted = false;
1466 VEC (gimple, heap) *scalar_stmts;
1467
1468 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1469 {
1470 if (dr)
1471 {
1472 scalar_type = TREE_TYPE (DR_REF (dr));
1473 vectype = get_vectype_for_scalar_type (scalar_type);
1474 }
1475 else
1476 {
1477 gcc_assert (loop_vinfo);
1478 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1479 }
1480
1481 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1482 }
1483 else
1484 {
1485 gcc_assert (loop_vinfo);
1486 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1487 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1488 }
1489
1490 if (!vectype)
1491 {
1492 if (vect_print_dump_info (REPORT_SLP))
1493 {
1494 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1495 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1496 }
1497
1498 return false;
1499 }
1500
1501 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1502 if (loop_vinfo)
1503 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1504 else
1505 vectorization_factor = nunits;
1506
1507 /* Calculate the unrolling factor. */
1508 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1509 if (unrolling_factor != 1 && !loop_vinfo)
1510 {
1511 if (vect_print_dump_info (REPORT_SLP))
1512 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1513 " block SLP");
1514
1515 return false;
1516 }
1517
1518 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1519 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1520 next = stmt;
1521 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1522 {
1523 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1524 while (next)
1525 {
1526 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1527 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1528 VEC_safe_push (gimple, heap, scalar_stmts,
1529 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1530 else
1531 VEC_safe_push (gimple, heap, scalar_stmts, next);
1532 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1533 }
1534 }
1535 else
1536 {
1537 /* Collect reduction statements. */
1538 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1539 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1540 VEC_safe_push (gimple, heap, scalar_stmts, next);
1541 }
1542
1543 node = vect_create_new_slp_node (scalar_stmts);
1544
1545 /* Calculate the number of vector stmts to create based on the unrolling
1546 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1547 GROUP_SIZE / NUNITS otherwise. */
1548 ncopies_for_cost = unrolling_factor * group_size / nunits;
1549
1550 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1551 loads = VEC_alloc (slp_tree, heap, group_size);
1552
1553 /* Build the tree for the SLP instance. */
1554 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1555 &inside_cost, &outside_cost, ncopies_for_cost,
1556 &max_nunits, &load_permutation, &loads,
1557 vectorization_factor, &loads_permuted))
1558 {
1559 /* Calculate the unrolling factor based on the smallest type. */
1560 if (max_nunits > nunits)
1561 unrolling_factor = least_common_multiple (max_nunits, group_size)
1562 / group_size;
1563
1564 if (unrolling_factor != 1 && !loop_vinfo)
1565 {
1566 if (vect_print_dump_info (REPORT_SLP))
1567 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1568 " block SLP");
1569 return false;
1570 }
1571
1572 /* Create a new SLP instance. */
1573 new_instance = XNEW (struct _slp_instance);
1574 SLP_INSTANCE_TREE (new_instance) = node;
1575 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1576 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1577 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1578 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1579 SLP_INSTANCE_LOADS (new_instance) = loads;
1580 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1581 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1582
1583 if (loads_permuted)
1584 {
1585 if (!vect_supported_load_permutation_p (new_instance, group_size,
1586 load_permutation))
1587 {
1588 if (vect_print_dump_info (REPORT_SLP))
1589 {
1590 fprintf (vect_dump, "Build SLP failed: unsupported load "
1591 "permutation ");
1592 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1593 }
1594
1595 vect_free_slp_instance (new_instance);
1596 return false;
1597 }
1598
1599 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1600 = vect_find_first_load_in_slp_instance (new_instance);
1601 }
1602 else
1603 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1604
1605 if (loop_vinfo)
1606 VEC_safe_push (slp_instance, heap,
1607 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1608 new_instance);
1609 else
1610 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1611 new_instance);
1612
1613 if (vect_print_dump_info (REPORT_SLP))
1614 vect_print_slp_tree (node);
1615
1616 return true;
1617 }
1618
1619 /* Failed to SLP. */
1620 /* Free the allocated memory. */
1621 vect_free_slp_tree (node);
1622 VEC_free (int, heap, load_permutation);
1623 VEC_free (slp_tree, heap, loads);
1624
1625 return false;
1626 }
1627
1628
1629 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1630 trees of packed scalar stmts if SLP is possible. */
1631
1632 bool
1633 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1634 {
1635 unsigned int i;
1636 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1637 gimple first_element;
1638 bool ok = false;
1639
1640 if (vect_print_dump_info (REPORT_SLP))
1641 fprintf (vect_dump, "=== vect_analyze_slp ===");
1642
1643 if (loop_vinfo)
1644 {
1645 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1646 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1647 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1648 }
1649 else
1650 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1651
1652 /* Find SLP sequences starting from groups of strided stores. */
1653 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1654 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1655 ok = true;
1656
1657 if (bb_vinfo && !ok)
1658 {
1659 if (vect_print_dump_info (REPORT_SLP))
1660 fprintf (vect_dump, "Failed to SLP the basic block.");
1661
1662 return false;
1663 }
1664
1665 if (loop_vinfo
1666 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1667 {
1668 /* Find SLP sequences starting from reduction chains. */
1669 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1670 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1671 ok = true;
1672 else
1673 return false;
1674
1675 /* Don't try to vectorize SLP reductions if reduction chain was
1676 detected. */
1677 return ok;
1678 }
1679
1680 /* Find SLP sequences starting from groups of reductions. */
1681 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1682 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1683 VEC_index (gimple, reductions, 0)))
1684 ok = true;
1685
1686 return true;
1687 }
1688
1689
1690 /* For each possible SLP instance decide whether to SLP it and calculate overall
1691 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1692 least one instance. */
1693
1694 bool
1695 vect_make_slp_decision (loop_vec_info loop_vinfo)
1696 {
1697 unsigned int i, unrolling_factor = 1;
1698 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1699 slp_instance instance;
1700 int decided_to_slp = 0;
1701
1702 if (vect_print_dump_info (REPORT_SLP))
1703 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1704
1705 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1706 {
1707 /* FORNOW: SLP if you can. */
1708 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1709 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1710
1711 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1712 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1713 loop-based vectorization. Such stmts will be marked as HYBRID. */
1714 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1715 decided_to_slp++;
1716 }
1717
1718 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1719
1720 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1721 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1722 decided_to_slp, unrolling_factor);
1723
1724 return (decided_to_slp > 0);
1725 }
1726
1727
1728 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1729 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1730
1731 static void
1732 vect_detect_hybrid_slp_stmts (slp_tree node)
1733 {
1734 int i;
1735 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1736 gimple stmt = VEC_index (gimple, stmts, 0);
1737 imm_use_iterator imm_iter;
1738 gimple use_stmt;
1739 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1740 slp_void_p child;
1741 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1742 struct loop *loop = NULL;
1743 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1744 basic_block bb = NULL;
1745
1746 if (!node)
1747 return;
1748
1749 if (loop_vinfo)
1750 loop = LOOP_VINFO_LOOP (loop_vinfo);
1751 else
1752 bb = BB_VINFO_BB (bb_vinfo);
1753
1754 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1755 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1756 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1757 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1758 if (gimple_bb (use_stmt)
1759 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1760 || bb == gimple_bb (use_stmt))
1761 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1762 && !STMT_SLP_TYPE (stmt_vinfo)
1763 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1764 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1765 && !(gimple_code (use_stmt) == GIMPLE_PHI
1766 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1767 == vect_reduction_def))
1768 vect_mark_slp_stmts (node, hybrid, i);
1769
1770 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1771 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1772 }
1773
1774
1775 /* Find stmts that must be both vectorized and SLPed. */
1776
1777 void
1778 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1779 {
1780 unsigned int i;
1781 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1782 slp_instance instance;
1783
1784 if (vect_print_dump_info (REPORT_SLP))
1785 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1786
1787 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1788 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1789 }
1790
1791
1792 /* Create and initialize a new bb_vec_info struct for BB, as well as
1793 stmt_vec_info structs for all the stmts in it. */
1794
1795 static bb_vec_info
1796 new_bb_vec_info (basic_block bb)
1797 {
1798 bb_vec_info res = NULL;
1799 gimple_stmt_iterator gsi;
1800
1801 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1802 BB_VINFO_BB (res) = bb;
1803
1804 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1805 {
1806 gimple stmt = gsi_stmt (gsi);
1807 gimple_set_uid (stmt, 0);
1808 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1809 }
1810
1811 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1812 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1813
1814 bb->aux = res;
1815 return res;
1816 }
1817
1818
1819 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1820 stmts in the basic block. */
1821
1822 static void
1823 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1824 {
1825 basic_block bb;
1826 gimple_stmt_iterator si;
1827
1828 if (!bb_vinfo)
1829 return;
1830
1831 bb = BB_VINFO_BB (bb_vinfo);
1832
1833 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1834 {
1835 gimple stmt = gsi_stmt (si);
1836 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1837
1838 if (stmt_info)
1839 /* Free stmt_vec_info. */
1840 free_stmt_vec_info (stmt);
1841 }
1842
1843 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1844 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1845 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1846 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1847 free (bb_vinfo);
1848 bb->aux = NULL;
1849 }
1850
1851
1852 /* Analyze statements contained in SLP tree node after recursively analyzing
1853 the subtree. Return TRUE if the operations are supported. */
1854
1855 static bool
1856 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1857 {
1858 bool dummy;
1859 int i;
1860 gimple stmt;
1861 slp_void_p child;
1862
1863 if (!node)
1864 return true;
1865
1866 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1867 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1868 return false;
1869
1870 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1871 {
1872 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1873 gcc_assert (stmt_info);
1874 gcc_assert (PURE_SLP_STMT (stmt_info));
1875
1876 if (!vect_analyze_stmt (stmt, &dummy, node))
1877 return false;
1878 }
1879
1880 return true;
1881 }
1882
1883
1884 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1885 operations are supported. */
1886
1887 static bool
1888 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1889 {
1890 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1891 slp_instance instance;
1892 int i;
1893
1894 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1895 {
1896 if (!vect_slp_analyze_node_operations (bb_vinfo,
1897 SLP_INSTANCE_TREE (instance)))
1898 {
1899 vect_free_slp_instance (instance);
1900 VEC_ordered_remove (slp_instance, slp_instances, i);
1901 }
1902 else
1903 i++;
1904 }
1905
1906 if (!VEC_length (slp_instance, slp_instances))
1907 return false;
1908
1909 return true;
1910 }
1911
1912 /* Check if vectorization of the basic block is profitable. */
1913
1914 static bool
1915 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1916 {
1917 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1918 slp_instance instance;
1919 int i;
1920 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1921 unsigned int stmt_cost;
1922 gimple stmt;
1923 gimple_stmt_iterator si;
1924 basic_block bb = BB_VINFO_BB (bb_vinfo);
1925 stmt_vec_info stmt_info = NULL;
1926 tree dummy_type = NULL;
1927 int dummy = 0;
1928
1929 /* Calculate vector costs. */
1930 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1931 {
1932 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1933 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1934 }
1935
1936 /* Calculate scalar cost. */
1937 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1938 {
1939 stmt = gsi_stmt (si);
1940 stmt_info = vinfo_for_stmt (stmt);
1941
1942 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1943 || !PURE_SLP_STMT (stmt_info))
1944 continue;
1945
1946 if (STMT_VINFO_DATA_REF (stmt_info))
1947 {
1948 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1949 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1950 (scalar_load, dummy_type, dummy);
1951 else
1952 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1953 (scalar_store, dummy_type, dummy);
1954 }
1955 else
1956 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1957 (scalar_stmt, dummy_type, dummy);
1958
1959 scalar_cost += stmt_cost;
1960 }
1961
1962 if (vect_print_dump_info (REPORT_COST))
1963 {
1964 fprintf (vect_dump, "Cost model analysis: \n");
1965 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1966 vec_inside_cost);
1967 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1968 vec_outside_cost);
1969 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1970 }
1971
1972 /* Vectorization is profitable if its cost is less than the cost of scalar
1973 version. */
1974 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1975 return false;
1976
1977 return true;
1978 }
1979
1980 /* Check if the basic block can be vectorized. */
1981
1982 static bb_vec_info
1983 vect_slp_analyze_bb_1 (basic_block bb)
1984 {
1985 bb_vec_info bb_vinfo;
1986 VEC (ddr_p, heap) *ddrs;
1987 VEC (slp_instance, heap) *slp_instances;
1988 slp_instance instance;
1989 int i;
1990 int min_vf = 2;
1991 int max_vf = MAX_VECTORIZATION_FACTOR;
1992
1993 bb_vinfo = new_bb_vec_info (bb);
1994 if (!bb_vinfo)
1995 return NULL;
1996
1997 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1998 {
1999 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2000 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
2001 "block.\n");
2002
2003 destroy_bb_vec_info (bb_vinfo);
2004 return NULL;
2005 }
2006
2007 ddrs = BB_VINFO_DDRS (bb_vinfo);
2008 if (!VEC_length (ddr_p, ddrs))
2009 {
2010 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2011 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
2012 "block.\n");
2013
2014 destroy_bb_vec_info (bb_vinfo);
2015 return NULL;
2016 }
2017
2018 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2019 || min_vf > max_vf)
2020 {
2021 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2022 fprintf (vect_dump, "not vectorized: unhandled data dependence "
2023 "in basic block.\n");
2024
2025 destroy_bb_vec_info (bb_vinfo);
2026 return NULL;
2027 }
2028
2029 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2030 {
2031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2032 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2033 "block.\n");
2034
2035 destroy_bb_vec_info (bb_vinfo);
2036 return NULL;
2037 }
2038
2039 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2040 {
2041 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2042 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2043 "block.\n");
2044
2045 destroy_bb_vec_info (bb_vinfo);
2046 return NULL;
2047 }
2048
2049 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2050 {
2051 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2052 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2053 "block.\n");
2054
2055 destroy_bb_vec_info (bb_vinfo);
2056 return NULL;
2057 }
2058
2059 /* Check the SLP opportunities in the basic block, analyze and build SLP
2060 trees. */
2061 if (!vect_analyze_slp (NULL, bb_vinfo))
2062 {
2063 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2064 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2065 "in basic block.\n");
2066
2067 destroy_bb_vec_info (bb_vinfo);
2068 return NULL;
2069 }
2070
2071 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2072
2073 /* Mark all the statements that we want to vectorize as pure SLP and
2074 relevant. */
2075 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2076 {
2077 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2078 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2079 }
2080
2081 if (!vect_slp_analyze_operations (bb_vinfo))
2082 {
2083 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2084 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2085
2086 destroy_bb_vec_info (bb_vinfo);
2087 return NULL;
2088 }
2089
2090 /* Cost model: check if the vectorization is worthwhile. */
2091 if (flag_vect_cost_model
2092 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2093 {
2094 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2095 fprintf (vect_dump, "not vectorized: vectorization is not "
2096 "profitable.\n");
2097
2098 destroy_bb_vec_info (bb_vinfo);
2099 return NULL;
2100 }
2101
2102 if (vect_print_dump_info (REPORT_DETAILS))
2103 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2104
2105 return bb_vinfo;
2106 }
2107
2108
2109 bb_vec_info
2110 vect_slp_analyze_bb (basic_block bb)
2111 {
2112 bb_vec_info bb_vinfo;
2113 int insns = 0;
2114 gimple_stmt_iterator gsi;
2115 unsigned int vector_sizes;
2116
2117 if (vect_print_dump_info (REPORT_DETAILS))
2118 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2119
2120 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2121 {
2122 gimple stmt = gsi_stmt (gsi);
2123 if (!is_gimple_debug (stmt)
2124 && !gimple_nop_p (stmt)
2125 && gimple_code (stmt) != GIMPLE_LABEL)
2126 insns++;
2127 }
2128
2129 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2130 {
2131 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2132 fprintf (vect_dump, "not vectorized: too many instructions in basic "
2133 "block.\n");
2134
2135 return NULL;
2136 }
2137
2138 /* Autodetect first vector size we try. */
2139 current_vector_size = 0;
2140 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2141
2142 while (1)
2143 {
2144 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2145 if (bb_vinfo)
2146 return bb_vinfo;
2147
2148 destroy_bb_vec_info (bb_vinfo);
2149
2150 vector_sizes &= ~current_vector_size;
2151 if (vector_sizes == 0
2152 || current_vector_size == 0)
2153 return NULL;
2154
2155 /* Try the next biggest vector size. */
2156 current_vector_size = 1 << floor_log2 (vector_sizes);
2157 if (vect_print_dump_info (REPORT_DETAILS))
2158 fprintf (vect_dump, "***** Re-trying analysis with "
2159 "vector size %d\n", current_vector_size);
2160 }
2161 }
2162
2163
2164 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2165 the number of created vector stmts depends on the unrolling factor).
2166 However, the actual number of vector stmts for every SLP node depends on
2167 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2168 should be updated. In this function we assume that the inside costs
2169 calculated in vect_model_xxx_cost are linear in ncopies. */
2170
2171 void
2172 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2173 {
2174 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2175 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2176 slp_instance instance;
2177
2178 if (vect_print_dump_info (REPORT_SLP))
2179 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2180
2181 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2182 /* We assume that costs are linear in ncopies. */
2183 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2184 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2185 }
2186
2187
2188 /* For constant and loop invariant defs of SLP_NODE this function returns
2189 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2190 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2191 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2192 REDUC_INDEX is the index of the reduction operand in the statements, unless
2193 it is -1. */
2194
2195 static void
2196 vect_get_constant_vectors (tree op, slp_tree slp_node,
2197 VEC (tree, heap) **vec_oprnds,
2198 unsigned int op_num, unsigned int number_of_vectors,
2199 int reduc_index)
2200 {
2201 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2202 gimple stmt = VEC_index (gimple, stmts, 0);
2203 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2204 int nunits;
2205 tree vec_cst;
2206 tree t = NULL_TREE;
2207 int j, number_of_places_left_in_vector;
2208 tree vector_type;
2209 tree vop;
2210 int group_size = VEC_length (gimple, stmts);
2211 unsigned int vec_num, i;
2212 int number_of_copies = 1;
2213 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2214 bool constant_p, is_store;
2215 tree neutral_op = NULL;
2216 enum tree_code code = gimple_expr_code (stmt);
2217 gimple def_stmt;
2218 struct loop *loop;
2219
2220 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2221 && reduc_index != -1)
2222 {
2223 op_num = reduc_index - 1;
2224 op = gimple_op (stmt, reduc_index);
2225 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2226 we need either neutral operands or the original operands. See
2227 get_initial_def_for_reduction() for details. */
2228 switch (code)
2229 {
2230 case WIDEN_SUM_EXPR:
2231 case DOT_PROD_EXPR:
2232 case PLUS_EXPR:
2233 case MINUS_EXPR:
2234 case BIT_IOR_EXPR:
2235 case BIT_XOR_EXPR:
2236 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2237 neutral_op = build_real (TREE_TYPE (op), dconst0);
2238 else
2239 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2240
2241 break;
2242
2243 case MULT_EXPR:
2244 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2245 neutral_op = build_real (TREE_TYPE (op), dconst1);
2246 else
2247 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2248
2249 break;
2250
2251 case BIT_AND_EXPR:
2252 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2253 break;
2254
2255 case MAX_EXPR:
2256 case MIN_EXPR:
2257 def_stmt = SSA_NAME_DEF_STMT (op);
2258 loop = (gimple_bb (stmt))->loop_father;
2259 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2260 loop_preheader_edge (loop));
2261 break;
2262
2263 default:
2264 neutral_op = NULL;
2265 }
2266 }
2267
2268 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2269 {
2270 is_store = true;
2271 op = gimple_assign_rhs1 (stmt);
2272 }
2273 else
2274 is_store = false;
2275
2276 gcc_assert (op);
2277
2278 if (CONSTANT_CLASS_P (op))
2279 constant_p = true;
2280 else
2281 constant_p = false;
2282
2283 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2284 gcc_assert (vector_type);
2285 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2286
2287 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2288 created vectors. It is greater than 1 if unrolling is performed.
2289
2290 For example, we have two scalar operands, s1 and s2 (e.g., group of
2291 strided accesses of size two), while NUNITS is four (i.e., four scalars
2292 of this type can be packed in a vector). The output vector will contain
2293 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2294 will be 2).
2295
2296 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2297 containing the operands.
2298
2299 For example, NUNITS is four as before, and the group size is 8
2300 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2301 {s5, s6, s7, s8}. */
2302
2303 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2304
2305 number_of_places_left_in_vector = nunits;
2306 for (j = 0; j < number_of_copies; j++)
2307 {
2308 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2309 {
2310 if (is_store)
2311 op = gimple_assign_rhs1 (stmt);
2312 else
2313 {
2314 switch (code)
2315 {
2316 case COND_EXPR:
2317 if (op_num == 0 || op_num == 1)
2318 {
2319 tree cond = gimple_assign_rhs1 (stmt);
2320 op = TREE_OPERAND (cond, op_num);
2321 }
2322 else
2323 {
2324 if (op_num == 2)
2325 op = gimple_assign_rhs2 (stmt);
2326 else
2327 op = gimple_assign_rhs3 (stmt);
2328 }
2329 break;
2330
2331 case CALL_EXPR:
2332 op = gimple_call_arg (stmt, op_num);
2333 break;
2334
2335 default:
2336 op = gimple_op (stmt, op_num + 1);
2337 }
2338 }
2339
2340 if (reduc_index != -1)
2341 {
2342 loop = (gimple_bb (stmt))->loop_father;
2343 def_stmt = SSA_NAME_DEF_STMT (op);
2344
2345 gcc_assert (loop);
2346
2347 /* Get the def before the loop. In reduction chain we have only
2348 one initial value. */
2349 if ((j != (number_of_copies - 1)
2350 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2351 && i != 0))
2352 && neutral_op)
2353 op = neutral_op;
2354 else
2355 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2356 loop_preheader_edge (loop));
2357 }
2358
2359 /* Create 'vect_ = {op0,op1,...,opn}'. */
2360 t = tree_cons (NULL_TREE, op, t);
2361
2362 number_of_places_left_in_vector--;
2363
2364 if (number_of_places_left_in_vector == 0)
2365 {
2366 number_of_places_left_in_vector = nunits;
2367
2368 if (constant_p)
2369 vec_cst = build_vector (vector_type, t);
2370 else
2371 vec_cst = build_constructor_from_list (vector_type, t);
2372 VEC_quick_push (tree, voprnds,
2373 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2374 t = NULL_TREE;
2375 }
2376 }
2377 }
2378
2379 /* Since the vectors are created in the reverse order, we should invert
2380 them. */
2381 vec_num = VEC_length (tree, voprnds);
2382 for (j = vec_num - 1; j >= 0; j--)
2383 {
2384 vop = VEC_index (tree, voprnds, j);
2385 VEC_quick_push (tree, *vec_oprnds, vop);
2386 }
2387
2388 VEC_free (tree, heap, voprnds);
2389
2390 /* In case that VF is greater than the unrolling factor needed for the SLP
2391 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2392 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2393 to replicate the vectors. */
2394 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2395 {
2396 tree neutral_vec = NULL;
2397
2398 if (neutral_op)
2399 {
2400 if (!neutral_vec)
2401 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2402
2403 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2404 }
2405 else
2406 {
2407 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2408 VEC_quick_push (tree, *vec_oprnds, vop);
2409 }
2410 }
2411 }
2412
2413
2414 /* Get vectorized definitions from SLP_NODE that contains corresponding
2415 vectorized def-stmts. */
2416
2417 static void
2418 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2419 {
2420 tree vec_oprnd;
2421 gimple vec_def_stmt;
2422 unsigned int i;
2423
2424 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2425
2426 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2427 {
2428 gcc_assert (vec_def_stmt);
2429 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2430 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2431 }
2432 }
2433
2434
2435 /* Get vectorized definitions for SLP_NODE.
2436 If the scalar definitions are loop invariants or constants, collect them and
2437 call vect_get_constant_vectors() to create vector stmts.
2438 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2439 must be stored in the corresponding child of SLP_NODE, and we call
2440 vect_get_slp_vect_defs () to retrieve them. */
2441
2442 void
2443 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2444 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2445 {
2446 gimple first_stmt, first_def;
2447 int number_of_vects = 0, i;
2448 unsigned int child_index = 0;
2449 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2450 slp_tree child = NULL;
2451 VEC (tree, heap) *vec_defs;
2452 tree oprnd, def_lhs;
2453 bool vectorized_defs;
2454
2455 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2456 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2457 {
2458 /* For each operand we check if it has vectorized definitions in a child
2459 node or we need to create them (for invariants and constants). We
2460 check if the LHS of the first stmt of the next child matches OPRND.
2461 If it does, we found the correct child. Otherwise, we call
2462 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2463 to check this child node for the next operand. */
2464 vectorized_defs = false;
2465 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2466 {
2467 child = (slp_tree) VEC_index (slp_void_p,
2468 SLP_TREE_CHILDREN (slp_node),
2469 child_index);
2470 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2471
2472 /* In the end of a pattern sequence we have a use of the original stmt,
2473 so we need to compare OPRND with the original def. */
2474 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2475 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2476 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2477 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2478
2479 if (is_gimple_call (first_def))
2480 def_lhs = gimple_call_lhs (first_def);
2481 else
2482 def_lhs = gimple_assign_lhs (first_def);
2483
2484 if (operand_equal_p (oprnd, def_lhs, 0))
2485 {
2486 /* The number of vector defs is determined by the number of
2487 vector statements in the node from which we get those
2488 statements. */
2489 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2490 vectorized_defs = true;
2491 child_index++;
2492 }
2493 }
2494
2495 if (!vectorized_defs)
2496 {
2497 if (i == 0)
2498 {
2499 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2500 /* Number of vector stmts was calculated according to LHS in
2501 vect_schedule_slp_instance (), fix it by replacing LHS with
2502 RHS, if necessary. See vect_get_smallest_scalar_type () for
2503 details. */
2504 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2505 &rhs_size_unit);
2506 if (rhs_size_unit != lhs_size_unit)
2507 {
2508 number_of_vects *= rhs_size_unit;
2509 number_of_vects /= lhs_size_unit;
2510 }
2511 }
2512 }
2513
2514 /* Allocate memory for vectorized defs. */
2515 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2516
2517 /* For reduction defs we call vect_get_constant_vectors (), since we are
2518 looking for initial loop invariant values. */
2519 if (vectorized_defs && reduc_index == -1)
2520 /* The defs are already vectorized. */
2521 vect_get_slp_vect_defs (child, &vec_defs);
2522 else
2523 /* Build vectors from scalar defs. */
2524 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2525 number_of_vects, reduc_index);
2526
2527 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2528
2529 /* For reductions, we only need initial values. */
2530 if (reduc_index != -1)
2531 return;
2532 }
2533 }
2534
2535
2536 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2537 building a vector of type MASK_TYPE from it) and two input vectors placed in
2538 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2539 shifting by STRIDE elements of DR_CHAIN for every copy.
2540 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2541 copies).
2542 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2543 the created stmts must be inserted. */
2544
2545 static inline void
2546 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2547 tree mask, int first_vec_indx, int second_vec_indx,
2548 gimple_stmt_iterator *gsi, slp_tree node,
2549 tree vectype, VEC(tree,heap) *dr_chain,
2550 int ncopies, int vect_stmts_counter)
2551 {
2552 tree perm_dest;
2553 gimple perm_stmt = NULL;
2554 stmt_vec_info next_stmt_info;
2555 int i, stride;
2556 tree first_vec, second_vec, data_ref;
2557
2558 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2559
2560 /* Initialize the vect stmts of NODE to properly insert the generated
2561 stmts later. */
2562 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2563 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2564 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2565
2566 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2567 for (i = 0; i < ncopies; i++)
2568 {
2569 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2570 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2571
2572 /* Generate the permute statement. */
2573 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2574 first_vec, second_vec, mask);
2575 data_ref = make_ssa_name (perm_dest, perm_stmt);
2576 gimple_set_lhs (perm_stmt, data_ref);
2577 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2578
2579 /* Store the vector statement in NODE. */
2580 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2581 stride * i + vect_stmts_counter, perm_stmt);
2582
2583 first_vec_indx += stride;
2584 second_vec_indx += stride;
2585 }
2586
2587 /* Mark the scalar stmt as vectorized. */
2588 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2589 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2590 }
2591
2592
2593 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2594 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2595 representation. Check that the mask is valid and return FALSE if not.
2596 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2597 the next vector, i.e., the current first vector is not needed. */
2598
2599 static bool
2600 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2601 int mask_nunits, bool only_one_vec, int index,
2602 unsigned char *mask, int *current_mask_element,
2603 bool *need_next_vector, int *number_of_mask_fixes,
2604 bool *mask_fixed, bool *needs_first_vector)
2605 {
2606 int i;
2607
2608 /* Convert to target specific representation. */
2609 *current_mask_element = first_mask_element + m;
2610 /* Adjust the value in case it's a mask for second and third vectors. */
2611 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2612
2613 if (*current_mask_element < mask_nunits)
2614 *needs_first_vector = true;
2615
2616 /* We have only one input vector to permute but the mask accesses values in
2617 the next vector as well. */
2618 if (only_one_vec && *current_mask_element >= mask_nunits)
2619 {
2620 if (vect_print_dump_info (REPORT_DETAILS))
2621 {
2622 fprintf (vect_dump, "permutation requires at least two vectors ");
2623 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2624 }
2625
2626 return false;
2627 }
2628
2629 /* The mask requires the next vector. */
2630 if (*current_mask_element >= mask_nunits * 2)
2631 {
2632 if (*needs_first_vector || *mask_fixed)
2633 {
2634 /* We either need the first vector too or have already moved to the
2635 next vector. In both cases, this permutation needs three
2636 vectors. */
2637 if (vect_print_dump_info (REPORT_DETAILS))
2638 {
2639 fprintf (vect_dump, "permutation requires at "
2640 "least three vectors ");
2641 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2642 }
2643
2644 return false;
2645 }
2646
2647 /* We move to the next vector, dropping the first one and working with
2648 the second and the third - we need to adjust the values of the mask
2649 accordingly. */
2650 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2651
2652 for (i = 0; i < index; i++)
2653 mask[i] -= mask_nunits * *number_of_mask_fixes;
2654
2655 (*number_of_mask_fixes)++;
2656 *mask_fixed = true;
2657 }
2658
2659 *need_next_vector = *mask_fixed;
2660
2661 /* This was the last element of this mask. Start a new one. */
2662 if (index == mask_nunits - 1)
2663 {
2664 *number_of_mask_fixes = 1;
2665 *mask_fixed = false;
2666 *needs_first_vector = false;
2667 }
2668
2669 return true;
2670 }
2671
2672
2673 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2674 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2675 permute statements for SLP_NODE_INSTANCE. */
2676 bool
2677 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2678 gimple_stmt_iterator *gsi, int vf,
2679 slp_instance slp_node_instance, bool analyze_only)
2680 {
2681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2682 tree mask_element_type = NULL_TREE, mask_type;
2683 int i, j, k, nunits, vec_index = 0, scalar_index;
2684 slp_tree node;
2685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2686 gimple next_scalar_stmt;
2687 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2688 int first_mask_element;
2689 int index, unroll_factor, current_mask_element, ncopies;
2690 unsigned char *mask;
2691 bool only_one_vec = false, need_next_vector = false;
2692 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2693 int number_of_mask_fixes = 1;
2694 bool mask_fixed = false;
2695 bool needs_first_vector = false;
2696 enum machine_mode mode;
2697
2698 mode = TYPE_MODE (vectype);
2699
2700 if (!can_vec_perm_p (mode, false, NULL))
2701 {
2702 if (vect_print_dump_info (REPORT_DETAILS))
2703 {
2704 fprintf (vect_dump, "no vect permute for ");
2705 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2706 }
2707 return false;
2708 }
2709
2710 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2711 same size as the vector element being permuted. */
2712 mask_element_type
2713 = lang_hooks.types.type_for_size
2714 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2715 mask_type = get_vectype_for_scalar_type (mask_element_type);
2716 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2717 mask = XALLOCAVEC (unsigned char, nunits);
2718 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2719
2720 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2721 unrolling factor. */
2722 orig_vec_stmts_num = group_size *
2723 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2724 if (orig_vec_stmts_num == 1)
2725 only_one_vec = true;
2726
2727 /* Number of copies is determined by the final vectorization factor
2728 relatively to SLP_NODE_INSTANCE unrolling factor. */
2729 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2730
2731 /* Generate permutation masks for every NODE. Number of masks for each NODE
2732 is equal to GROUP_SIZE.
2733 E.g., we have a group of three nodes with three loads from the same
2734 location in each node, and the vector size is 4. I.e., we have a
2735 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2736 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2737 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2738 ...
2739
2740 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2741 The last mask is illegal since we assume two operands for permute
2742 operation, and the mask element values can't be outside that range.
2743 Hence, the last mask must be converted into {2,5,5,5}.
2744 For the first two permutations we need the first and the second input
2745 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2746 we need the second and the third vectors: {b1,c1,a2,b2} and
2747 {c2,a3,b3,c3}. */
2748
2749 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2750 {
2751 scalar_index = 0;
2752 index = 0;
2753 vect_stmts_counter = 0;
2754 vec_index = 0;
2755 first_vec_index = vec_index++;
2756 if (only_one_vec)
2757 second_vec_index = first_vec_index;
2758 else
2759 second_vec_index = vec_index++;
2760
2761 for (j = 0; j < unroll_factor; j++)
2762 {
2763 for (k = 0; k < group_size; k++)
2764 {
2765 first_mask_element = i + j * group_size;
2766 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2767 nunits, only_one_vec, index,
2768 mask, &current_mask_element,
2769 &need_next_vector,
2770 &number_of_mask_fixes, &mask_fixed,
2771 &needs_first_vector))
2772 return false;
2773 mask[index++] = current_mask_element;
2774
2775 if (index == nunits)
2776 {
2777 tree mask_vec = NULL;
2778
2779 if (!can_vec_perm_p (mode, false, mask))
2780 {
2781 if (vect_print_dump_info (REPORT_DETAILS))
2782 {
2783 fprintf (vect_dump, "unsupported vect permute { ");
2784 for (i = 0; i < nunits; ++i)
2785 fprintf (vect_dump, "%d ", mask[i]);
2786 fprintf (vect_dump, "}\n");
2787 }
2788 return false;
2789 }
2790
2791 while (--index >= 0)
2792 {
2793 tree t = build_int_cst (mask_element_type, mask[index]);
2794 mask_vec = tree_cons (NULL, t, mask_vec);
2795 }
2796 mask_vec = build_vector (mask_type, mask_vec);
2797 index = 0;
2798
2799 if (!analyze_only)
2800 {
2801 if (need_next_vector)
2802 {
2803 first_vec_index = second_vec_index;
2804 second_vec_index = vec_index;
2805 }
2806
2807 next_scalar_stmt = VEC_index (gimple,
2808 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2809
2810 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2811 mask_vec, first_vec_index, second_vec_index,
2812 gsi, node, vectype, dr_chain,
2813 ncopies, vect_stmts_counter++);
2814 }
2815 }
2816 }
2817 }
2818 }
2819
2820 return true;
2821 }
2822
2823
2824
2825 /* Vectorize SLP instance tree in postorder. */
2826
2827 static bool
2828 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2829 unsigned int vectorization_factor)
2830 {
2831 gimple stmt;
2832 bool strided_store, is_store;
2833 gimple_stmt_iterator si;
2834 stmt_vec_info stmt_info;
2835 unsigned int vec_stmts_size, nunits, group_size;
2836 tree vectype;
2837 int i;
2838 slp_tree loads_node;
2839 slp_void_p child;
2840
2841 if (!node)
2842 return false;
2843
2844 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2845 vect_schedule_slp_instance ((slp_tree) child, instance,
2846 vectorization_factor);
2847
2848 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2849 stmt_info = vinfo_for_stmt (stmt);
2850
2851 /* VECTYPE is the type of the destination. */
2852 vectype = STMT_VINFO_VECTYPE (stmt_info);
2853 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2854 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2855
2856 /* For each SLP instance calculate number of vector stmts to be created
2857 for the scalar stmts in each node of the SLP tree. Number of vector
2858 elements in one vector iteration is the number of scalar elements in
2859 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2860 size. */
2861 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2862
2863 /* In case of load permutation we have to allocate vectorized statements for
2864 all the nodes that participate in that permutation. */
2865 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2866 {
2867 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2868 {
2869 if (!SLP_TREE_VEC_STMTS (loads_node))
2870 {
2871 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2872 vec_stmts_size);
2873 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2874 }
2875 }
2876 }
2877
2878 if (!SLP_TREE_VEC_STMTS (node))
2879 {
2880 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2881 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2882 }
2883
2884 if (vect_print_dump_info (REPORT_DETAILS))
2885 {
2886 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2887 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2888 }
2889
2890 /* Loads should be inserted before the first load. */
2891 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2892 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2893 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2894 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2895 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2896 else if (is_pattern_stmt_p (stmt_info))
2897 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2898 else
2899 si = gsi_for_stmt (stmt);
2900
2901 /* Stores should be inserted just before the last store. */
2902 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2903 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2904 {
2905 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2906 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
2907 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
2908 si = gsi_for_stmt (last_store);
2909 }
2910
2911 /* Mark the first element of the reduction chain as reduction to properly
2912 transform the node. In the analysis phase only the last element of the
2913 chain is marked as reduction. */
2914 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2915 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2916 {
2917 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2918 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2919 }
2920
2921 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2922 return is_store;
2923 }
2924
2925 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2926 For loop vectorization this is done in vectorizable_call, but for SLP
2927 it needs to be deferred until end of vect_schedule_slp, because multiple
2928 SLP instances may refer to the same scalar stmt. */
2929
2930 static void
2931 vect_remove_slp_scalar_calls (slp_tree node)
2932 {
2933 gimple stmt, new_stmt;
2934 gimple_stmt_iterator gsi;
2935 int i;
2936 slp_void_p child;
2937 tree lhs;
2938 stmt_vec_info stmt_info;
2939
2940 if (!node)
2941 return;
2942
2943 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2944 vect_remove_slp_scalar_calls ((slp_tree) child);
2945
2946 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
2947 {
2948 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
2949 continue;
2950 stmt_info = vinfo_for_stmt (stmt);
2951 if (stmt_info == NULL
2952 || is_pattern_stmt_p (stmt_info)
2953 || !PURE_SLP_STMT (stmt_info))
2954 continue;
2955 lhs = gimple_call_lhs (stmt);
2956 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2957 set_vinfo_for_stmt (new_stmt, stmt_info);
2958 set_vinfo_for_stmt (stmt, NULL);
2959 STMT_VINFO_STMT (stmt_info) = new_stmt;
2960 gsi = gsi_for_stmt (stmt);
2961 gsi_replace (&gsi, new_stmt, false);
2962 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2963 }
2964 }
2965
2966 /* Generate vector code for all SLP instances in the loop/basic block. */
2967
2968 bool
2969 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2970 {
2971 VEC (slp_instance, heap) *slp_instances;
2972 slp_instance instance;
2973 unsigned int i, vf;
2974 bool is_store = false;
2975
2976 if (loop_vinfo)
2977 {
2978 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2979 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2980 }
2981 else
2982 {
2983 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2984 vf = 1;
2985 }
2986
2987 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2988 {
2989 /* Schedule the tree of INSTANCE. */
2990 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2991 instance, vf);
2992 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2993 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2994 fprintf (vect_dump, "vectorizing stmts using SLP.");
2995 }
2996
2997 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2998 {
2999 slp_tree root = SLP_INSTANCE_TREE (instance);
3000 gimple store;
3001 unsigned int j;
3002 gimple_stmt_iterator gsi;
3003
3004 vect_remove_slp_scalar_calls (root);
3005
3006 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3007 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3008 {
3009 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3010 break;
3011
3012 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3013 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3014 /* Free the attached stmt_vec_info and remove the stmt. */
3015 gsi = gsi_for_stmt (store);
3016 gsi_remove (&gsi, true);
3017 free_stmt_vec_info (store);
3018 }
3019 }
3020
3021 return is_store;
3022 }
3023
3024
3025 /* Vectorize the basic block. */
3026
3027 void
3028 vect_slp_transform_bb (basic_block bb)
3029 {
3030 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3031 gimple_stmt_iterator si;
3032
3033 gcc_assert (bb_vinfo);
3034
3035 if (vect_print_dump_info (REPORT_DETAILS))
3036 fprintf (vect_dump, "SLPing BB\n");
3037
3038 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3039 {
3040 gimple stmt = gsi_stmt (si);
3041 stmt_vec_info stmt_info;
3042
3043 if (vect_print_dump_info (REPORT_DETAILS))
3044 {
3045 fprintf (vect_dump, "------>SLPing statement: ");
3046 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3047 }
3048
3049 stmt_info = vinfo_for_stmt (stmt);
3050 gcc_assert (stmt_info);
3051
3052 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3053 if (STMT_SLP_TYPE (stmt_info))
3054 {
3055 vect_schedule_slp (NULL, bb_vinfo);
3056 break;
3057 }
3058 }
3059
3060 mark_sym_for_renaming (gimple_vop (cfun));
3061 /* The memory tags and pointers in vectorized statements need to
3062 have their SSA forms updated. FIXME, why can't this be delayed
3063 until all the loops have been transformed? */
3064 update_ssa (TODO_update_ssa);
3065
3066 if (vect_print_dump_info (REPORT_DETAILS))
3067 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3068
3069 destroy_bb_vec_info (bb_vinfo);
3070 }
3071