4606c1922acb1432d6ea869824a2174b935379c0
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
45
46 LOC
47 find_bb_location (basic_block bb)
48 {
49 gimple stmt = NULL;
50 gimple_stmt_iterator si;
51
52 if (!bb)
53 return UNKNOWN_LOC;
54
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 {
57 stmt = gsi_stmt (si);
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
60 }
61
62 return UNKNOWN_LOC;
63 }
64
65
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67
68 static void
69 vect_free_slp_tree (slp_tree node)
70 {
71 int i;
72 slp_void_p child;
73
74 if (!node)
75 return;
76
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree) child);
79
80 VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
81 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82
83 if (SLP_TREE_VEC_STMTS (node))
84 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85
86 free (node);
87 }
88
89
90 /* Free the memory allocated for the SLP instance. */
91
92 void
93 vect_free_slp_instance (slp_instance instance)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
96 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
97 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 }
99
100
101 /* Create an SLP node for SCALAR_STMTS. */
102
103 static slp_tree
104 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
105 {
106 slp_tree node;
107 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
108 unsigned int nops;
109
110 if (is_gimple_call (stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (is_gimple_assign (stmt))
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
118 else
119 return NULL;
120
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node) = NULL;
124 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
125 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
126 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
127
128 return node;
129 }
130
131
132 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
133 operand. */
134 static VEC (slp_oprnd_info, heap) *
135 vect_create_oprnd_info (int nops, int group_size)
136 {
137 int i;
138 slp_oprnd_info oprnd_info;
139 VEC (slp_oprnd_info, heap) *oprnds_info;
140
141 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
142 for (i = 0; i < nops; i++)
143 {
144 oprnd_info = XNEW (struct _slp_oprnd_info);
145 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
146 oprnd_info->first_dt = vect_uninitialized_def;
147 oprnd_info->first_def_type = NULL_TREE;
148 oprnd_info->first_const_oprnd = NULL_TREE;
149 oprnd_info->first_pattern = false;
150 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
151 }
152
153 return oprnds_info;
154 }
155
156
157 /* Free operands info. */
158
159 static void
160 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
161 {
162 int i;
163 slp_oprnd_info oprnd_info;
164
165 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
166 {
167 VEC_free (gimple, heap, oprnd_info->def_stmts);
168 XDELETE (oprnd_info);
169 }
170
171 VEC_free (slp_oprnd_info, heap, *oprnds_info);
172 }
173
174
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
178
179 static bool
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
181 slp_tree slp_node, gimple stmt,
182 int ncopies_for_cost, bool first,
183 VEC (slp_oprnd_info, heap) **oprnds_info)
184 {
185 tree oprnd;
186 unsigned int i, number_of_oprnds;
187 tree def, def_op0 = NULL_TREE;
188 gimple def_stmt;
189 enum vect_def_type dt = vect_uninitialized_def;
190 enum vect_def_type dt_op0 = vect_uninitialized_def;
191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192 tree lhs = gimple_get_lhs (stmt);
193 struct loop *loop = NULL;
194 enum tree_code rhs_code;
195 bool different_types = false;
196 bool pattern = false;
197 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
198 int op_idx = 1;
199 tree compare_rhs = NULL_TREE;
200
201 if (loop_vinfo)
202 loop = LOOP_VINFO_LOOP (loop_vinfo);
203
204 if (is_gimple_call (stmt))
205 {
206 number_of_oprnds = gimple_call_num_args (stmt);
207 op_idx = 3;
208 }
209 else if (is_gimple_assign (stmt))
210 {
211 number_of_oprnds = gimple_num_ops (stmt) - 1;
212 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
213 number_of_oprnds++;
214 }
215 else
216 return false;
217
218 for (i = 0; i < number_of_oprnds; i++)
219 {
220 if (compare_rhs)
221 {
222 oprnd = compare_rhs;
223 compare_rhs = NULL_TREE;
224 }
225 else
226 oprnd = gimple_op (stmt, op_idx++);
227
228 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
229
230 if (COMPARISON_CLASS_P (oprnd))
231 {
232 compare_rhs = TREE_OPERAND (oprnd, 1);
233 oprnd = TREE_OPERAND (oprnd, 0);
234 }
235
236 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
237 &dt)
238 || (!def_stmt && dt != vect_constant_def))
239 {
240 if (vect_print_dump_info (REPORT_SLP))
241 {
242 fprintf (vect_dump, "Build SLP failed: can't find def for ");
243 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
244 }
245
246 return false;
247 }
248
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
251 pattern. */
252 if (loop && def_stmt && gimple_bb (def_stmt)
253 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
254 && vinfo_for_stmt (def_stmt)
255 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
256 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
257 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
258 {
259 pattern = true;
260 if (!first && !oprnd_info->first_pattern)
261 {
262 if (vect_print_dump_info (REPORT_DETAILS))
263 {
264 fprintf (vect_dump, "Build SLP failed: some of the stmts"
265 " are in a pattern, and others are not ");
266 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
267 }
268
269 return false;
270 }
271
272 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
273 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
274
275 if (dt == vect_unknown_def_type)
276 {
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "Unsupported pattern.");
279 return false;
280 }
281
282 switch (gimple_code (def_stmt))
283 {
284 case GIMPLE_PHI:
285 def = gimple_phi_result (def_stmt);
286 break;
287
288 case GIMPLE_ASSIGN:
289 def = gimple_assign_lhs (def_stmt);
290 break;
291
292 default:
293 if (vect_print_dump_info (REPORT_DETAILS))
294 fprintf (vect_dump, "unsupported defining stmt: ");
295 return false;
296 }
297 }
298
299 if (first)
300 {
301 oprnd_info->first_dt = dt;
302 oprnd_info->first_pattern = pattern;
303 if (def)
304 {
305 oprnd_info->first_def_type = TREE_TYPE (def);
306 oprnd_info->first_const_oprnd = NULL_TREE;
307 }
308 else
309 {
310 oprnd_info->first_def_type = NULL_TREE;
311 oprnd_info->first_const_oprnd = oprnd;
312 }
313
314 if (i == 0)
315 {
316 def_op0 = def;
317 dt_op0 = dt;
318 /* Analyze costs (for the first stmt of the group only). */
319 if (REFERENCE_CLASS_P (lhs))
320 /* Store. */
321 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
322 dt, slp_node);
323 else
324 /* Not memory operation (we don't call this function for
325 loads). */
326 vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
327 slp_node);
328 }
329 }
330 else
331 {
332 /* Not first stmt of the group, check that the def-stmt/s match
333 the def-stmt/s of the first stmt. Allow different definition
334 types for reduction chains: the first stmt must be a
335 vect_reduction_def (a phi node), and the rest
336 vect_internal_def. */
337 if (((oprnd_info->first_dt != dt
338 && !(oprnd_info->first_dt == vect_reduction_def
339 && dt == vect_internal_def))
340 || (oprnd_info->first_def_type != NULL_TREE
341 && def
342 && !types_compatible_p (oprnd_info->first_def_type,
343 TREE_TYPE (def))))
344 || (!def
345 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
346 TREE_TYPE (oprnd)))
347 || different_types)
348 {
349 if (number_of_oprnds != 2)
350 {
351 if (vect_print_dump_info (REPORT_SLP))
352 fprintf (vect_dump, "Build SLP failed: different types ");
353
354 return false;
355 }
356
357 /* Try to swap operands in case of binary operation. */
358 if (i == 0)
359 different_types = true;
360 else
361 {
362 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
363 if (is_gimple_assign (stmt)
364 && (rhs_code = gimple_assign_rhs_code (stmt))
365 && TREE_CODE_CLASS (rhs_code) == tcc_binary
366 && commutative_tree_code (rhs_code)
367 && oprnd0_info->first_dt == dt
368 && oprnd_info->first_dt == dt_op0
369 && def_op0 && def
370 && !(oprnd0_info->first_def_type
371 && !types_compatible_p (oprnd0_info->first_def_type,
372 TREE_TYPE (def)))
373 && !(oprnd_info->first_def_type
374 && !types_compatible_p (oprnd_info->first_def_type,
375 TREE_TYPE (def_op0))))
376 {
377 if (vect_print_dump_info (REPORT_SLP))
378 {
379 fprintf (vect_dump, "Swapping operands of ");
380 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
381 }
382
383 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
384 gimple_assign_rhs2_ptr (stmt));
385 }
386 else
387 {
388 if (vect_print_dump_info (REPORT_SLP))
389 fprintf (vect_dump, "Build SLP failed: different types ");
390
391 return false;
392 }
393 }
394 }
395 }
396
397 /* Check the types of the definitions. */
398 switch (dt)
399 {
400 case vect_constant_def:
401 case vect_external_def:
402 case vect_reduction_def:
403 break;
404
405 case vect_internal_def:
406 if (different_types)
407 {
408 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
409 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
410 if (i == 0)
411 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
412 else
413 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
414 }
415 else
416 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
417
418 break;
419
420 default:
421 /* FORNOW: Not supported. */
422 if (vect_print_dump_info (REPORT_SLP))
423 {
424 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
425 print_generic_expr (vect_dump, def, TDF_SLIM);
426 }
427
428 return false;
429 }
430 }
431
432 return true;
433 }
434
435
436 /* Recursively build an SLP tree starting from NODE.
437 Fail (and return FALSE) if def-stmts are not isomorphic, require data
438 permutation or are of unsupported types of operation. Otherwise, return
439 TRUE. */
440
441 static bool
442 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
443 slp_tree *node, unsigned int group_size,
444 int *inside_cost, int *outside_cost,
445 int ncopies_for_cost, unsigned int *max_nunits,
446 VEC (int, heap) **load_permutation,
447 VEC (slp_tree, heap) **loads,
448 unsigned int vectorization_factor, bool *loads_permuted)
449 {
450 unsigned int i;
451 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
452 gimple stmt = VEC_index (gimple, stmts, 0);
453 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
454 enum tree_code first_cond_code = ERROR_MARK;
455 tree lhs;
456 bool stop_recursion = false, need_same_oprnds = false;
457 tree vectype, scalar_type, first_op1 = NULL_TREE;
458 unsigned int ncopies;
459 optab optab;
460 int icode;
461 enum machine_mode optab_op2_mode;
462 enum machine_mode vec_mode;
463 struct data_reference *first_dr;
464 HOST_WIDE_INT dummy;
465 bool permutation = false;
466 unsigned int load_place;
467 gimple first_load, prev_first_load = NULL;
468 VEC (slp_oprnd_info, heap) *oprnds_info;
469 unsigned int nops;
470 slp_oprnd_info oprnd_info;
471 tree cond;
472
473 if (is_gimple_call (stmt))
474 nops = gimple_call_num_args (stmt);
475 else if (is_gimple_assign (stmt))
476 {
477 nops = gimple_num_ops (stmt) - 1;
478 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
479 nops++;
480 }
481 else
482 return false;
483
484 oprnds_info = vect_create_oprnd_info (nops, group_size);
485
486 /* For every stmt in NODE find its def stmt/s. */
487 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
488 {
489 if (vect_print_dump_info (REPORT_SLP))
490 {
491 fprintf (vect_dump, "Build SLP for ");
492 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
493 }
494
495 /* Fail to vectorize statements marked as unvectorizable. */
496 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
497 {
498 if (vect_print_dump_info (REPORT_SLP))
499 {
500 fprintf (vect_dump,
501 "Build SLP failed: unvectorizable statement ");
502 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
503 }
504
505 vect_free_oprnd_info (&oprnds_info);
506 return false;
507 }
508
509 lhs = gimple_get_lhs (stmt);
510 if (lhs == NULL_TREE)
511 {
512 if (vect_print_dump_info (REPORT_SLP))
513 {
514 fprintf (vect_dump,
515 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
516 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
517 }
518
519 vect_free_oprnd_info (&oprnds_info);
520 return false;
521 }
522
523 if (is_gimple_assign (stmt)
524 && gimple_assign_rhs_code (stmt) == COND_EXPR
525 && (cond = gimple_assign_rhs1 (stmt))
526 && !COMPARISON_CLASS_P (cond))
527 {
528 if (vect_print_dump_info (REPORT_SLP))
529 {
530 fprintf (vect_dump,
531 "Build SLP failed: condition is not comparison ");
532 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
533 }
534
535 vect_free_oprnd_info (&oprnds_info);
536 return false;
537 }
538
539 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
540 vectype = get_vectype_for_scalar_type (scalar_type);
541 if (!vectype)
542 {
543 if (vect_print_dump_info (REPORT_SLP))
544 {
545 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
546 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
547 }
548
549 vect_free_oprnd_info (&oprnds_info);
550 return false;
551 }
552
553 /* In case of multiple types we need to detect the smallest type. */
554 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
555 {
556 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
557 if (bb_vinfo)
558 vectorization_factor = *max_nunits;
559 }
560
561 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
562
563 if (is_gimple_call (stmt))
564 {
565 rhs_code = CALL_EXPR;
566 if (gimple_call_internal_p (stmt)
567 || gimple_call_tail_p (stmt)
568 || gimple_call_noreturn_p (stmt)
569 || !gimple_call_nothrow_p (stmt)
570 || gimple_call_chain (stmt))
571 {
572 if (vect_print_dump_info (REPORT_SLP))
573 {
574 fprintf (vect_dump,
575 "Build SLP failed: unsupported call type ");
576 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
577 }
578
579 vect_free_oprnd_info (&oprnds_info);
580 return false;
581 }
582 }
583 else
584 rhs_code = gimple_assign_rhs_code (stmt);
585
586 /* Check the operation. */
587 if (i == 0)
588 {
589 first_stmt_code = rhs_code;
590
591 /* Shift arguments should be equal in all the packed stmts for a
592 vector shift with scalar shift operand. */
593 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
594 || rhs_code == LROTATE_EXPR
595 || rhs_code == RROTATE_EXPR)
596 {
597 vec_mode = TYPE_MODE (vectype);
598
599 /* First see if we have a vector/vector shift. */
600 optab = optab_for_tree_code (rhs_code, vectype,
601 optab_vector);
602
603 if (!optab
604 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
605 {
606 /* No vector/vector shift, try for a vector/scalar shift. */
607 optab = optab_for_tree_code (rhs_code, vectype,
608 optab_scalar);
609
610 if (!optab)
611 {
612 if (vect_print_dump_info (REPORT_SLP))
613 fprintf (vect_dump, "Build SLP failed: no optab.");
614 vect_free_oprnd_info (&oprnds_info);
615 return false;
616 }
617 icode = (int) optab_handler (optab, vec_mode);
618 if (icode == CODE_FOR_nothing)
619 {
620 if (vect_print_dump_info (REPORT_SLP))
621 fprintf (vect_dump, "Build SLP failed: "
622 "op not supported by target.");
623 vect_free_oprnd_info (&oprnds_info);
624 return false;
625 }
626 optab_op2_mode = insn_data[icode].operand[2].mode;
627 if (!VECTOR_MODE_P (optab_op2_mode))
628 {
629 need_same_oprnds = true;
630 first_op1 = gimple_assign_rhs2 (stmt);
631 }
632 }
633 }
634 else if (rhs_code == WIDEN_LSHIFT_EXPR)
635 {
636 need_same_oprnds = true;
637 first_op1 = gimple_assign_rhs2 (stmt);
638 }
639 }
640 else
641 {
642 if (first_stmt_code != rhs_code
643 && (first_stmt_code != IMAGPART_EXPR
644 || rhs_code != REALPART_EXPR)
645 && (first_stmt_code != REALPART_EXPR
646 || rhs_code != IMAGPART_EXPR)
647 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
648 && (first_stmt_code == ARRAY_REF
649 || first_stmt_code == INDIRECT_REF
650 || first_stmt_code == COMPONENT_REF
651 || first_stmt_code == MEM_REF)))
652 {
653 if (vect_print_dump_info (REPORT_SLP))
654 {
655 fprintf (vect_dump,
656 "Build SLP failed: different operation in stmt ");
657 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
658 }
659
660 vect_free_oprnd_info (&oprnds_info);
661 return false;
662 }
663
664 if (need_same_oprnds
665 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
666 {
667 if (vect_print_dump_info (REPORT_SLP))
668 {
669 fprintf (vect_dump,
670 "Build SLP failed: different shift arguments in ");
671 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
672 }
673
674 vect_free_oprnd_info (&oprnds_info);
675 return false;
676 }
677
678 if (rhs_code == CALL_EXPR)
679 {
680 gimple first_stmt = VEC_index (gimple, stmts, 0);
681 if (gimple_call_num_args (stmt) != nops
682 || !operand_equal_p (gimple_call_fn (first_stmt),
683 gimple_call_fn (stmt), 0)
684 || gimple_call_fntype (first_stmt)
685 != gimple_call_fntype (stmt))
686 {
687 if (vect_print_dump_info (REPORT_SLP))
688 {
689 fprintf (vect_dump,
690 "Build SLP failed: different calls in ");
691 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
692 }
693
694 vect_free_oprnd_info (&oprnds_info);
695 return false;
696 }
697 }
698 }
699
700 /* Strided store or load. */
701 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
702 {
703 if (REFERENCE_CLASS_P (lhs))
704 {
705 /* Store. */
706 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
707 stmt, ncopies_for_cost,
708 (i == 0), &oprnds_info))
709 {
710 vect_free_oprnd_info (&oprnds_info);
711 return false;
712 }
713 }
714 else
715 {
716 /* Load. */
717 /* FORNOW: Check that there is no gap between the loads. */
718 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
719 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
720 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
721 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
722 {
723 if (vect_print_dump_info (REPORT_SLP))
724 {
725 fprintf (vect_dump, "Build SLP failed: strided "
726 "loads have gaps ");
727 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
728 }
729
730 vect_free_oprnd_info (&oprnds_info);
731 return false;
732 }
733
734 /* Check that the size of interleaved loads group is not
735 greater than the SLP group size. */
736 if (loop_vinfo
737 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
738 {
739 if (vect_print_dump_info (REPORT_SLP))
740 {
741 fprintf (vect_dump, "Build SLP failed: the number of "
742 "interleaved loads is greater than"
743 " the SLP group size ");
744 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
745 }
746
747 vect_free_oprnd_info (&oprnds_info);
748 return false;
749 }
750
751 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
752 if (prev_first_load)
753 {
754 /* Check that there are no loads from different interleaving
755 chains in the same node. The only exception is complex
756 numbers. */
757 if (prev_first_load != first_load
758 && rhs_code != REALPART_EXPR
759 && rhs_code != IMAGPART_EXPR)
760 {
761 if (vect_print_dump_info (REPORT_SLP))
762 {
763 fprintf (vect_dump, "Build SLP failed: different "
764 "interleaving chains in one node ");
765 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
766 }
767
768 vect_free_oprnd_info (&oprnds_info);
769 return false;
770 }
771 }
772 else
773 prev_first_load = first_load;
774
775 if (first_load == stmt)
776 {
777 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
778 if (vect_supportable_dr_alignment (first_dr, false)
779 == dr_unaligned_unsupported)
780 {
781 if (vect_print_dump_info (REPORT_SLP))
782 {
783 fprintf (vect_dump, "Build SLP failed: unsupported "
784 "unaligned load ");
785 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
786 }
787
788 vect_free_oprnd_info (&oprnds_info);
789 return false;
790 }
791
792 /* Analyze costs (for the first stmt in the group). */
793 vect_model_load_cost (vinfo_for_stmt (stmt),
794 ncopies_for_cost, false, *node);
795 }
796
797 /* Store the place of this load in the interleaving chain. In
798 case that permutation is needed we later decide if a specific
799 permutation is supported. */
800 load_place = vect_get_place_in_interleaving_chain (stmt,
801 first_load);
802 if (load_place != i)
803 permutation = true;
804
805 VEC_safe_push (int, heap, *load_permutation, load_place);
806
807 /* We stop the tree when we reach a group of loads. */
808 stop_recursion = true;
809 continue;
810 }
811 } /* Strided access. */
812 else
813 {
814 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
815 {
816 /* Not strided load. */
817 if (vect_print_dump_info (REPORT_SLP))
818 {
819 fprintf (vect_dump, "Build SLP failed: not strided load ");
820 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
821 }
822
823 /* FORNOW: Not strided loads are not supported. */
824 vect_free_oprnd_info (&oprnds_info);
825 return false;
826 }
827
828 /* Not memory operation. */
829 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
830 && TREE_CODE_CLASS (rhs_code) != tcc_unary
831 && rhs_code != COND_EXPR
832 && rhs_code != CALL_EXPR)
833 {
834 if (vect_print_dump_info (REPORT_SLP))
835 {
836 fprintf (vect_dump, "Build SLP failed: operation");
837 fprintf (vect_dump, " unsupported ");
838 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
839 }
840
841 vect_free_oprnd_info (&oprnds_info);
842 return false;
843 }
844
845 if (rhs_code == COND_EXPR)
846 {
847 tree cond_expr = gimple_assign_rhs1 (stmt);
848
849 if (i == 0)
850 first_cond_code = TREE_CODE (cond_expr);
851 else if (first_cond_code != TREE_CODE (cond_expr))
852 {
853 if (vect_print_dump_info (REPORT_SLP))
854 {
855 fprintf (vect_dump, "Build SLP failed: different"
856 " operation");
857 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
858 }
859
860 vect_free_oprnd_info (&oprnds_info);
861 return false;
862 }
863 }
864
865 /* Find the def-stmts. */
866 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
867 ncopies_for_cost, (i == 0),
868 &oprnds_info))
869 {
870 vect_free_oprnd_info (&oprnds_info);
871 return false;
872 }
873 }
874 }
875
876 /* Add the costs of the node to the overall instance costs. */
877 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
878 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
879
880 /* Strided loads were reached - stop the recursion. */
881 if (stop_recursion)
882 {
883 VEC_safe_push (slp_tree, heap, *loads, *node);
884 if (permutation)
885 {
886
887 *loads_permuted = true;
888 *inside_cost
889 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
890 * group_size;
891 }
892 else
893 {
894 /* We don't check here complex numbers chains, so we set
895 LOADS_PERMUTED for further check in
896 vect_supported_load_permutation_p. */
897 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
898 *loads_permuted = true;
899 }
900
901 vect_free_oprnd_info (&oprnds_info);
902 return true;
903 }
904
905 /* Create SLP_TREE nodes for the definition node/s. */
906 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
907 {
908 slp_tree child;
909
910 if (oprnd_info->first_dt != vect_internal_def)
911 continue;
912
913 child = vect_create_new_slp_node (oprnd_info->def_stmts);
914 if (!child
915 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
916 inside_cost, outside_cost, ncopies_for_cost,
917 max_nunits, load_permutation, loads,
918 vectorization_factor, loads_permuted))
919 {
920 if (child)
921 oprnd_info->def_stmts = NULL;
922 vect_free_slp_tree (child);
923 vect_free_oprnd_info (&oprnds_info);
924 return false;
925 }
926
927 oprnd_info->def_stmts = NULL;
928 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
929 }
930
931 vect_free_oprnd_info (&oprnds_info);
932 return true;
933 }
934
935
936 static void
937 vect_print_slp_tree (slp_tree node)
938 {
939 int i;
940 gimple stmt;
941 slp_void_p child;
942
943 if (!node)
944 return;
945
946 fprintf (vect_dump, "node ");
947 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
948 {
949 fprintf (vect_dump, "\n\tstmt %d ", i);
950 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
951 }
952 fprintf (vect_dump, "\n");
953
954 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
955 vect_print_slp_tree ((slp_tree) child);
956 }
957
958
959 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
960 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
961 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
962 stmts in NODE are to be marked. */
963
964 static void
965 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
966 {
967 int i;
968 gimple stmt;
969 slp_void_p child;
970
971 if (!node)
972 return;
973
974 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
975 if (j < 0 || i == j)
976 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
977
978 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
979 vect_mark_slp_stmts ((slp_tree) child, mark, j);
980 }
981
982
983 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
984
985 static void
986 vect_mark_slp_stmts_relevant (slp_tree node)
987 {
988 int i;
989 gimple stmt;
990 stmt_vec_info stmt_info;
991 slp_void_p child;
992
993 if (!node)
994 return;
995
996 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
997 {
998 stmt_info = vinfo_for_stmt (stmt);
999 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1000 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1001 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1002 }
1003
1004 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1005 vect_mark_slp_stmts_relevant ((slp_tree) child);
1006 }
1007
1008
1009 /* Check if the permutation required by the SLP INSTANCE is supported.
1010 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1011
1012 static bool
1013 vect_supported_slp_permutation_p (slp_instance instance)
1014 {
1015 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1016 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1017 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1018 VEC (slp_tree, heap) *sorted_loads = NULL;
1019 int index;
1020 slp_tree *tmp_loads = NULL;
1021 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1022 slp_tree load;
1023
1024 /* FORNOW: The only supported loads permutation is loads from the same
1025 location in all the loads in the node, when the data-refs in
1026 nodes of LOADS constitute an interleaving chain.
1027 Sort the nodes according to the order of accesses in the chain. */
1028 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1029 for (i = 0, j = 0;
1030 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1031 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1032 i += group_size, j++)
1033 {
1034 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1035 /* Check that the loads are all in the same interleaving chain. */
1036 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1037 {
1038 if (vect_print_dump_info (REPORT_DETAILS))
1039 {
1040 fprintf (vect_dump, "Build SLP failed: unsupported data "
1041 "permutation ");
1042 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1043 }
1044
1045 free (tmp_loads);
1046 return false;
1047 }
1048
1049 tmp_loads[index] = load;
1050 }
1051
1052 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1053 for (i = 0; i < group_size; i++)
1054 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1055
1056 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1057 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1058 free (tmp_loads);
1059
1060 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1061 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1062 instance, true))
1063 return false;
1064
1065 return true;
1066 }
1067
1068
1069 /* Rearrange the statements of NODE according to PERMUTATION. */
1070
1071 static void
1072 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1073 VEC (int, heap) *permutation)
1074 {
1075 gimple stmt;
1076 VEC (gimple, heap) *tmp_stmts;
1077 unsigned int index, i;
1078 slp_void_p child;
1079
1080 if (!node)
1081 return;
1082
1083 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1084 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1085
1086 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1087 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1088
1089 for (i = 0; i < group_size; i++)
1090 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1091
1092 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1093 {
1094 index = VEC_index (int, permutation, i);
1095 VEC_replace (gimple, tmp_stmts, index, stmt);
1096 }
1097
1098 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1099 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1100 }
1101
1102
1103 /* Check if the required load permutation is supported.
1104 LOAD_PERMUTATION contains a list of indices of the loads.
1105 In SLP this permutation is relative to the order of strided stores that are
1106 the base of the SLP instance. */
1107
1108 static bool
1109 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1110 VEC (int, heap) *load_permutation)
1111 {
1112 int i = 0, j, prev = -1, next, k, number_of_groups;
1113 bool supported, bad_permutation = false;
1114 sbitmap load_index;
1115 slp_tree node, other_complex_node;
1116 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1117 unsigned complex_numbers = 0;
1118 struct data_reference *dr;
1119 bb_vec_info bb_vinfo;
1120
1121 /* FORNOW: permutations are only supported in SLP. */
1122 if (!slp_instn)
1123 return false;
1124
1125 if (vect_print_dump_info (REPORT_SLP))
1126 {
1127 fprintf (vect_dump, "Load permutation ");
1128 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1129 fprintf (vect_dump, "%d ", next);
1130 }
1131
1132 /* In case of reduction every load permutation is allowed, since the order
1133 of the reduction statements is not important (as opposed to the case of
1134 strided stores). The only condition we need to check is that all the
1135 load nodes are of the same size and have the same permutation (and then
1136 rearrange all the nodes of the SLP instance according to this
1137 permutation). */
1138
1139 /* Check that all the load nodes are of the same size. */
1140 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1141 {
1142 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1143 != (unsigned) group_size)
1144 return false;
1145
1146 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1147 if (is_gimple_assign (stmt)
1148 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1149 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1150 complex_numbers++;
1151 }
1152
1153 /* Complex operands can be swapped as following:
1154 real_c = real_b + real_a;
1155 imag_c = imag_a + imag_b;
1156 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1157 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1158 chains are mixed, they match the above pattern. */
1159 if (complex_numbers)
1160 {
1161 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1162 {
1163 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1164 {
1165 if (j == 0)
1166 first = stmt;
1167 else
1168 {
1169 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1170 {
1171 if (complex_numbers != 2)
1172 return false;
1173
1174 if (i == 0)
1175 k = 1;
1176 else
1177 k = 0;
1178
1179 other_complex_node = VEC_index (slp_tree,
1180 SLP_INSTANCE_LOADS (slp_instn), k);
1181 other_node_first = VEC_index (gimple,
1182 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1183
1184 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1185 != other_node_first)
1186 return false;
1187 }
1188 }
1189 }
1190 }
1191 }
1192
1193 /* We checked that this case ok, so there is no need to proceed with
1194 permutation tests. */
1195 if (complex_numbers == 2)
1196 {
1197 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1198 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1199 return true;
1200 }
1201
1202 node = SLP_INSTANCE_TREE (slp_instn);
1203 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1204 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1205 instance, not all the loads belong to the same node or interleaving
1206 group. Hence, we need to divide them into groups according to
1207 GROUP_SIZE. */
1208 number_of_groups = VEC_length (int, load_permutation) / group_size;
1209
1210 /* Reduction (there are no data-refs in the root).
1211 In reduction chain the order of the loads is important. */
1212 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1213 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1214 {
1215 int first_group_load_index;
1216
1217 /* Compare all the permutation sequences to the first one. */
1218 for (i = 1; i < number_of_groups; i++)
1219 {
1220 k = 0;
1221 for (j = i * group_size; j < i * group_size + group_size; j++)
1222 {
1223 next = VEC_index (int, load_permutation, j);
1224 first_group_load_index = VEC_index (int, load_permutation, k);
1225
1226 if (next != first_group_load_index)
1227 {
1228 bad_permutation = true;
1229 break;
1230 }
1231
1232 k++;
1233 }
1234
1235 if (bad_permutation)
1236 break;
1237 }
1238
1239 if (!bad_permutation)
1240 {
1241 /* Check that the loads in the first sequence are different and there
1242 are no gaps between them. */
1243 load_index = sbitmap_alloc (group_size);
1244 sbitmap_zero (load_index);
1245 for (k = 0; k < group_size; k++)
1246 {
1247 first_group_load_index = VEC_index (int, load_permutation, k);
1248 if (TEST_BIT (load_index, first_group_load_index))
1249 {
1250 bad_permutation = true;
1251 break;
1252 }
1253
1254 SET_BIT (load_index, first_group_load_index);
1255 }
1256
1257 if (!bad_permutation)
1258 for (k = 0; k < group_size; k++)
1259 if (!TEST_BIT (load_index, k))
1260 {
1261 bad_permutation = true;
1262 break;
1263 }
1264
1265 sbitmap_free (load_index);
1266 }
1267
1268 if (!bad_permutation)
1269 {
1270 /* This permutation is valid for reduction. Since the order of the
1271 statements in the nodes is not important unless they are memory
1272 accesses, we can rearrange the statements in all the nodes
1273 according to the order of the loads. */
1274 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1275 load_permutation);
1276 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1277 return true;
1278 }
1279 }
1280
1281 /* In basic block vectorization we allow any subchain of an interleaving
1282 chain.
1283 FORNOW: not supported in loop SLP because of realignment compications. */
1284 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1285 bad_permutation = false;
1286 /* Check that for every node in the instance teh loads form a subchain. */
1287 if (bb_vinfo)
1288 {
1289 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1290 {
1291 next_load = NULL;
1292 first_load = NULL;
1293 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1294 {
1295 if (!first_load)
1296 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1297 else if (first_load
1298 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1299 {
1300 bad_permutation = true;
1301 break;
1302 }
1303
1304 if (j != 0 && next_load != load)
1305 {
1306 bad_permutation = true;
1307 break;
1308 }
1309
1310 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1311 }
1312
1313 if (bad_permutation)
1314 break;
1315 }
1316
1317 /* Check that the alignment of the first load in every subchain, i.e.,
1318 the first statement in every load node, is supported. */
1319 if (!bad_permutation)
1320 {
1321 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1322 {
1323 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1324 if (first_load
1325 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1326 {
1327 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1328 if (vect_supportable_dr_alignment (dr, false)
1329 == dr_unaligned_unsupported)
1330 {
1331 if (vect_print_dump_info (REPORT_SLP))
1332 {
1333 fprintf (vect_dump, "unsupported unaligned load ");
1334 print_gimple_stmt (vect_dump, first_load, 0,
1335 TDF_SLIM);
1336 }
1337 bad_permutation = true;
1338 break;
1339 }
1340 }
1341 }
1342
1343 if (!bad_permutation)
1344 {
1345 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1346 return true;
1347 }
1348 }
1349 }
1350
1351 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1352 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1353 well (unless it's reduction). */
1354 if (VEC_length (int, load_permutation)
1355 != (unsigned int) (group_size * group_size))
1356 return false;
1357
1358 supported = true;
1359 load_index = sbitmap_alloc (group_size);
1360 sbitmap_zero (load_index);
1361 for (j = 0; j < group_size; j++)
1362 {
1363 for (i = j * group_size, k = 0;
1364 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1365 i++, k++)
1366 {
1367 if (i != j * group_size && next != prev)
1368 {
1369 supported = false;
1370 break;
1371 }
1372
1373 prev = next;
1374 }
1375
1376 if (TEST_BIT (load_index, prev))
1377 {
1378 supported = false;
1379 break;
1380 }
1381
1382 SET_BIT (load_index, prev);
1383 }
1384
1385 for (j = 0; j < group_size; j++)
1386 if (!TEST_BIT (load_index, j))
1387 return false;
1388
1389 sbitmap_free (load_index);
1390
1391 if (supported && i == group_size * group_size
1392 && vect_supported_slp_permutation_p (slp_instn))
1393 return true;
1394
1395 return false;
1396 }
1397
1398
1399 /* Find the first load in the loop that belongs to INSTANCE.
1400 When loads are in several SLP nodes, there can be a case in which the first
1401 load does not appear in the first SLP node to be transformed, causing
1402 incorrect order of statements. Since we generate all the loads together,
1403 they must be inserted before the first load of the SLP instance and not
1404 before the first load of the first node of the instance. */
1405
1406 static gimple
1407 vect_find_first_load_in_slp_instance (slp_instance instance)
1408 {
1409 int i, j;
1410 slp_tree load_node;
1411 gimple first_load = NULL, load;
1412
1413 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1414 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1415 first_load = get_earlier_stmt (load, first_load);
1416
1417 return first_load;
1418 }
1419
1420
1421 /* Find the last store in SLP INSTANCE. */
1422
1423 static gimple
1424 vect_find_last_store_in_slp_instance (slp_instance instance)
1425 {
1426 int i;
1427 slp_tree node;
1428 gimple last_store = NULL, store;
1429
1430 node = SLP_INSTANCE_TREE (instance);
1431 for (i = 0;
1432 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1433 i++)
1434 last_store = get_later_stmt (store, last_store);
1435
1436 return last_store;
1437 }
1438
1439
1440 /* Analyze an SLP instance starting from a group of strided stores. Call
1441 vect_build_slp_tree to build a tree of packed stmts if possible.
1442 Return FALSE if it's impossible to SLP any stmt in the loop. */
1443
1444 static bool
1445 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1446 gimple stmt)
1447 {
1448 slp_instance new_instance;
1449 slp_tree node;
1450 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1451 unsigned int unrolling_factor = 1, nunits;
1452 tree vectype, scalar_type = NULL_TREE;
1453 gimple next;
1454 unsigned int vectorization_factor = 0;
1455 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1456 unsigned int max_nunits = 0;
1457 VEC (int, heap) *load_permutation;
1458 VEC (slp_tree, heap) *loads;
1459 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1460 bool loads_permuted = false;
1461 VEC (gimple, heap) *scalar_stmts;
1462
1463 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1464 {
1465 if (dr)
1466 {
1467 scalar_type = TREE_TYPE (DR_REF (dr));
1468 vectype = get_vectype_for_scalar_type (scalar_type);
1469 }
1470 else
1471 {
1472 gcc_assert (loop_vinfo);
1473 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1474 }
1475
1476 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1477 }
1478 else
1479 {
1480 gcc_assert (loop_vinfo);
1481 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1482 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1483 }
1484
1485 if (!vectype)
1486 {
1487 if (vect_print_dump_info (REPORT_SLP))
1488 {
1489 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1490 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1491 }
1492
1493 return false;
1494 }
1495
1496 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1497 if (loop_vinfo)
1498 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1499 else
1500 vectorization_factor = nunits;
1501
1502 /* Calculate the unrolling factor. */
1503 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1504 if (unrolling_factor != 1 && !loop_vinfo)
1505 {
1506 if (vect_print_dump_info (REPORT_SLP))
1507 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1508 " block SLP");
1509
1510 return false;
1511 }
1512
1513 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1514 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1515 next = stmt;
1516 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1517 {
1518 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1519 while (next)
1520 {
1521 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1522 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1523 VEC_safe_push (gimple, heap, scalar_stmts,
1524 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1525 else
1526 VEC_safe_push (gimple, heap, scalar_stmts, next);
1527 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1528 }
1529 }
1530 else
1531 {
1532 /* Collect reduction statements. */
1533 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1534 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1535 VEC_safe_push (gimple, heap, scalar_stmts, next);
1536 }
1537
1538 node = vect_create_new_slp_node (scalar_stmts);
1539
1540 /* Calculate the number of vector stmts to create based on the unrolling
1541 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1542 GROUP_SIZE / NUNITS otherwise. */
1543 ncopies_for_cost = unrolling_factor * group_size / nunits;
1544
1545 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1546 loads = VEC_alloc (slp_tree, heap, group_size);
1547
1548 /* Build the tree for the SLP instance. */
1549 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1550 &inside_cost, &outside_cost, ncopies_for_cost,
1551 &max_nunits, &load_permutation, &loads,
1552 vectorization_factor, &loads_permuted))
1553 {
1554 /* Calculate the unrolling factor based on the smallest type. */
1555 if (max_nunits > nunits)
1556 unrolling_factor = least_common_multiple (max_nunits, group_size)
1557 / group_size;
1558
1559 if (unrolling_factor != 1 && !loop_vinfo)
1560 {
1561 if (vect_print_dump_info (REPORT_SLP))
1562 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1563 " block SLP");
1564 return false;
1565 }
1566
1567 /* Create a new SLP instance. */
1568 new_instance = XNEW (struct _slp_instance);
1569 SLP_INSTANCE_TREE (new_instance) = node;
1570 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1571 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1572 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1573 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1574 SLP_INSTANCE_LOADS (new_instance) = loads;
1575 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1576 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1577
1578 if (loads_permuted)
1579 {
1580 if (!vect_supported_load_permutation_p (new_instance, group_size,
1581 load_permutation))
1582 {
1583 if (vect_print_dump_info (REPORT_SLP))
1584 {
1585 fprintf (vect_dump, "Build SLP failed: unsupported load "
1586 "permutation ");
1587 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1588 }
1589
1590 vect_free_slp_instance (new_instance);
1591 return false;
1592 }
1593
1594 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1595 = vect_find_first_load_in_slp_instance (new_instance);
1596 }
1597 else
1598 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1599
1600 if (loop_vinfo)
1601 VEC_safe_push (slp_instance, heap,
1602 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1603 new_instance);
1604 else
1605 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1606 new_instance);
1607
1608 if (vect_print_dump_info (REPORT_SLP))
1609 vect_print_slp_tree (node);
1610
1611 return true;
1612 }
1613
1614 /* Failed to SLP. */
1615 /* Free the allocated memory. */
1616 vect_free_slp_tree (node);
1617 VEC_free (int, heap, load_permutation);
1618 VEC_free (slp_tree, heap, loads);
1619
1620 return false;
1621 }
1622
1623
1624 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1625 trees of packed scalar stmts if SLP is possible. */
1626
1627 bool
1628 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1629 {
1630 unsigned int i;
1631 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1632 gimple first_element;
1633 bool ok = false;
1634
1635 if (vect_print_dump_info (REPORT_SLP))
1636 fprintf (vect_dump, "=== vect_analyze_slp ===");
1637
1638 if (loop_vinfo)
1639 {
1640 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1641 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1642 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1643 }
1644 else
1645 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1646
1647 /* Find SLP sequences starting from groups of strided stores. */
1648 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1649 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1650 ok = true;
1651
1652 if (bb_vinfo && !ok)
1653 {
1654 if (vect_print_dump_info (REPORT_SLP))
1655 fprintf (vect_dump, "Failed to SLP the basic block.");
1656
1657 return false;
1658 }
1659
1660 if (loop_vinfo
1661 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1662 {
1663 /* Find SLP sequences starting from reduction chains. */
1664 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1665 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1666 ok = true;
1667 else
1668 return false;
1669
1670 /* Don't try to vectorize SLP reductions if reduction chain was
1671 detected. */
1672 return ok;
1673 }
1674
1675 /* Find SLP sequences starting from groups of reductions. */
1676 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1677 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1678 VEC_index (gimple, reductions, 0)))
1679 ok = true;
1680
1681 return true;
1682 }
1683
1684
1685 /* For each possible SLP instance decide whether to SLP it and calculate overall
1686 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1687 least one instance. */
1688
1689 bool
1690 vect_make_slp_decision (loop_vec_info loop_vinfo)
1691 {
1692 unsigned int i, unrolling_factor = 1;
1693 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1694 slp_instance instance;
1695 int decided_to_slp = 0;
1696
1697 if (vect_print_dump_info (REPORT_SLP))
1698 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1699
1700 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1701 {
1702 /* FORNOW: SLP if you can. */
1703 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1704 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1705
1706 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1707 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1708 loop-based vectorization. Such stmts will be marked as HYBRID. */
1709 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1710 decided_to_slp++;
1711 }
1712
1713 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1714
1715 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1716 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1717 decided_to_slp, unrolling_factor);
1718
1719 return (decided_to_slp > 0);
1720 }
1721
1722
1723 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1724 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1725
1726 static void
1727 vect_detect_hybrid_slp_stmts (slp_tree node)
1728 {
1729 int i;
1730 gimple stmt;
1731 imm_use_iterator imm_iter;
1732 gimple use_stmt;
1733 stmt_vec_info stmt_vinfo;
1734 slp_void_p child;
1735
1736 if (!node)
1737 return;
1738
1739 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1740 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1741 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1742 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1743 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1744 && !STMT_SLP_TYPE (stmt_vinfo)
1745 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1746 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1747 && !(gimple_code (use_stmt) == GIMPLE_PHI
1748 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1749 == vect_reduction_def))
1750 vect_mark_slp_stmts (node, hybrid, i);
1751
1752 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1753 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1754 }
1755
1756
1757 /* Find stmts that must be both vectorized and SLPed. */
1758
1759 void
1760 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1761 {
1762 unsigned int i;
1763 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1764 slp_instance instance;
1765
1766 if (vect_print_dump_info (REPORT_SLP))
1767 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1768
1769 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1770 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1771 }
1772
1773
1774 /* Create and initialize a new bb_vec_info struct for BB, as well as
1775 stmt_vec_info structs for all the stmts in it. */
1776
1777 static bb_vec_info
1778 new_bb_vec_info (basic_block bb)
1779 {
1780 bb_vec_info res = NULL;
1781 gimple_stmt_iterator gsi;
1782
1783 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1784 BB_VINFO_BB (res) = bb;
1785
1786 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1787 {
1788 gimple stmt = gsi_stmt (gsi);
1789 gimple_set_uid (stmt, 0);
1790 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1791 }
1792
1793 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1794 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1795
1796 bb->aux = res;
1797 return res;
1798 }
1799
1800
1801 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1802 stmts in the basic block. */
1803
1804 static void
1805 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1806 {
1807 basic_block bb;
1808 gimple_stmt_iterator si;
1809
1810 if (!bb_vinfo)
1811 return;
1812
1813 bb = BB_VINFO_BB (bb_vinfo);
1814
1815 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1816 {
1817 gimple stmt = gsi_stmt (si);
1818 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1819
1820 if (stmt_info)
1821 /* Free stmt_vec_info. */
1822 free_stmt_vec_info (stmt);
1823 }
1824
1825 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1826 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1827 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1828 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1829 free (bb_vinfo);
1830 bb->aux = NULL;
1831 }
1832
1833
1834 /* Analyze statements contained in SLP tree node after recursively analyzing
1835 the subtree. Return TRUE if the operations are supported. */
1836
1837 static bool
1838 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1839 {
1840 bool dummy;
1841 int i;
1842 gimple stmt;
1843 slp_void_p child;
1844
1845 if (!node)
1846 return true;
1847
1848 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1849 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1850 return false;
1851
1852 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1853 {
1854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1855 gcc_assert (stmt_info);
1856 gcc_assert (PURE_SLP_STMT (stmt_info));
1857
1858 if (!vect_analyze_stmt (stmt, &dummy, node))
1859 return false;
1860 }
1861
1862 return true;
1863 }
1864
1865
1866 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1867 operations are supported. */
1868
1869 static bool
1870 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1871 {
1872 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1873 slp_instance instance;
1874 int i;
1875
1876 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1877 {
1878 if (!vect_slp_analyze_node_operations (bb_vinfo,
1879 SLP_INSTANCE_TREE (instance)))
1880 {
1881 vect_free_slp_instance (instance);
1882 VEC_ordered_remove (slp_instance, slp_instances, i);
1883 }
1884 else
1885 i++;
1886 }
1887
1888 if (!VEC_length (slp_instance, slp_instances))
1889 return false;
1890
1891 return true;
1892 }
1893
1894 /* Check if vectorization of the basic block is profitable. */
1895
1896 static bool
1897 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1898 {
1899 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1900 slp_instance instance;
1901 int i;
1902 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1903 unsigned int stmt_cost;
1904 gimple stmt;
1905 gimple_stmt_iterator si;
1906 basic_block bb = BB_VINFO_BB (bb_vinfo);
1907 stmt_vec_info stmt_info = NULL;
1908 tree dummy_type = NULL;
1909 int dummy = 0;
1910
1911 /* Calculate vector costs. */
1912 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1913 {
1914 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1915 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1916 }
1917
1918 /* Calculate scalar cost. */
1919 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1920 {
1921 stmt = gsi_stmt (si);
1922 stmt_info = vinfo_for_stmt (stmt);
1923
1924 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1925 || !PURE_SLP_STMT (stmt_info))
1926 continue;
1927
1928 if (STMT_VINFO_DATA_REF (stmt_info))
1929 {
1930 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1931 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1932 (scalar_load, dummy_type, dummy);
1933 else
1934 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1935 (scalar_store, dummy_type, dummy);
1936 }
1937 else
1938 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1939 (scalar_stmt, dummy_type, dummy);
1940
1941 scalar_cost += stmt_cost;
1942 }
1943
1944 if (vect_print_dump_info (REPORT_COST))
1945 {
1946 fprintf (vect_dump, "Cost model analysis: \n");
1947 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1948 vec_inside_cost);
1949 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1950 vec_outside_cost);
1951 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1952 }
1953
1954 /* Vectorization is profitable if its cost is less than the cost of scalar
1955 version. */
1956 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1957 return false;
1958
1959 return true;
1960 }
1961
1962 /* Check if the basic block can be vectorized. */
1963
1964 static bb_vec_info
1965 vect_slp_analyze_bb_1 (basic_block bb)
1966 {
1967 bb_vec_info bb_vinfo;
1968 VEC (ddr_p, heap) *ddrs;
1969 VEC (slp_instance, heap) *slp_instances;
1970 slp_instance instance;
1971 int i;
1972 int min_vf = 2;
1973 int max_vf = MAX_VECTORIZATION_FACTOR;
1974
1975 bb_vinfo = new_bb_vec_info (bb);
1976 if (!bb_vinfo)
1977 return NULL;
1978
1979 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1980 {
1981 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1982 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1983 "block.\n");
1984
1985 destroy_bb_vec_info (bb_vinfo);
1986 return NULL;
1987 }
1988
1989 ddrs = BB_VINFO_DDRS (bb_vinfo);
1990 if (!VEC_length (ddr_p, ddrs))
1991 {
1992 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1993 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1994 "block.\n");
1995
1996 destroy_bb_vec_info (bb_vinfo);
1997 return NULL;
1998 }
1999
2000 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2001 || min_vf > max_vf)
2002 {
2003 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2004 fprintf (vect_dump, "not vectorized: unhandled data dependence "
2005 "in basic block.\n");
2006
2007 destroy_bb_vec_info (bb_vinfo);
2008 return NULL;
2009 }
2010
2011 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2012 {
2013 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2014 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2015 "block.\n");
2016
2017 destroy_bb_vec_info (bb_vinfo);
2018 return NULL;
2019 }
2020
2021 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2022 {
2023 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2024 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2025 "block.\n");
2026
2027 destroy_bb_vec_info (bb_vinfo);
2028 return NULL;
2029 }
2030
2031 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2032 {
2033 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2034 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2035 "block.\n");
2036
2037 destroy_bb_vec_info (bb_vinfo);
2038 return NULL;
2039 }
2040
2041 /* Check the SLP opportunities in the basic block, analyze and build SLP
2042 trees. */
2043 if (!vect_analyze_slp (NULL, bb_vinfo))
2044 {
2045 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2046 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2047 "in basic block.\n");
2048
2049 destroy_bb_vec_info (bb_vinfo);
2050 return NULL;
2051 }
2052
2053 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2054
2055 /* Mark all the statements that we want to vectorize as pure SLP and
2056 relevant. */
2057 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2058 {
2059 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2060 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2061 }
2062
2063 if (!vect_slp_analyze_operations (bb_vinfo))
2064 {
2065 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2066 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2067
2068 destroy_bb_vec_info (bb_vinfo);
2069 return NULL;
2070 }
2071
2072 /* Cost model: check if the vectorization is worthwhile. */
2073 if (flag_vect_cost_model
2074 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2075 {
2076 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2077 fprintf (vect_dump, "not vectorized: vectorization is not "
2078 "profitable.\n");
2079
2080 destroy_bb_vec_info (bb_vinfo);
2081 return NULL;
2082 }
2083
2084 if (vect_print_dump_info (REPORT_DETAILS))
2085 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2086
2087 return bb_vinfo;
2088 }
2089
2090
2091 bb_vec_info
2092 vect_slp_analyze_bb (basic_block bb)
2093 {
2094 bb_vec_info bb_vinfo;
2095 int insns = 0;
2096 gimple_stmt_iterator gsi;
2097 unsigned int vector_sizes;
2098
2099 if (vect_print_dump_info (REPORT_DETAILS))
2100 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2101
2102 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2103 {
2104 gimple stmt = gsi_stmt (gsi);
2105 if (!is_gimple_debug (stmt)
2106 && !gimple_nop_p (stmt)
2107 && gimple_code (stmt) != GIMPLE_LABEL)
2108 insns++;
2109 }
2110
2111 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2112 {
2113 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2114 fprintf (vect_dump, "not vectorized: too many instructions in basic "
2115 "block.\n");
2116
2117 return NULL;
2118 }
2119
2120 /* Autodetect first vector size we try. */
2121 current_vector_size = 0;
2122 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2123
2124 while (1)
2125 {
2126 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2127 if (bb_vinfo)
2128 return bb_vinfo;
2129
2130 destroy_bb_vec_info (bb_vinfo);
2131
2132 vector_sizes &= ~current_vector_size;
2133 if (vector_sizes == 0
2134 || current_vector_size == 0)
2135 return NULL;
2136
2137 /* Try the next biggest vector size. */
2138 current_vector_size = 1 << floor_log2 (vector_sizes);
2139 if (vect_print_dump_info (REPORT_DETAILS))
2140 fprintf (vect_dump, "***** Re-trying analysis with "
2141 "vector size %d\n", current_vector_size);
2142 }
2143 }
2144
2145
2146 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2147 the number of created vector stmts depends on the unrolling factor).
2148 However, the actual number of vector stmts for every SLP node depends on
2149 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2150 should be updated. In this function we assume that the inside costs
2151 calculated in vect_model_xxx_cost are linear in ncopies. */
2152
2153 void
2154 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2155 {
2156 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2157 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2158 slp_instance instance;
2159
2160 if (vect_print_dump_info (REPORT_SLP))
2161 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2162
2163 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2164 /* We assume that costs are linear in ncopies. */
2165 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2166 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2167 }
2168
2169
2170 /* For constant and loop invariant defs of SLP_NODE this function returns
2171 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2172 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2173 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2174 REDUC_INDEX is the index of the reduction operand in the statements, unless
2175 it is -1. */
2176
2177 static void
2178 vect_get_constant_vectors (tree op, slp_tree slp_node,
2179 VEC (tree, heap) **vec_oprnds,
2180 unsigned int op_num, unsigned int number_of_vectors,
2181 int reduc_index)
2182 {
2183 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2184 gimple stmt = VEC_index (gimple, stmts, 0);
2185 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2186 int nunits;
2187 tree vec_cst;
2188 tree t = NULL_TREE;
2189 int j, number_of_places_left_in_vector;
2190 tree vector_type;
2191 tree vop;
2192 int group_size = VEC_length (gimple, stmts);
2193 unsigned int vec_num, i;
2194 int number_of_copies = 1;
2195 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2196 bool constant_p, is_store;
2197 tree neutral_op = NULL;
2198 enum tree_code code = gimple_expr_code (stmt);
2199 gimple def_stmt;
2200 struct loop *loop;
2201
2202 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2203 && reduc_index != -1)
2204 {
2205 op_num = reduc_index - 1;
2206 op = gimple_op (stmt, reduc_index);
2207 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2208 we need either neutral operands or the original operands. See
2209 get_initial_def_for_reduction() for details. */
2210 switch (code)
2211 {
2212 case WIDEN_SUM_EXPR:
2213 case DOT_PROD_EXPR:
2214 case PLUS_EXPR:
2215 case MINUS_EXPR:
2216 case BIT_IOR_EXPR:
2217 case BIT_XOR_EXPR:
2218 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2219 neutral_op = build_real (TREE_TYPE (op), dconst0);
2220 else
2221 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2222
2223 break;
2224
2225 case MULT_EXPR:
2226 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2227 neutral_op = build_real (TREE_TYPE (op), dconst1);
2228 else
2229 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2230
2231 break;
2232
2233 case BIT_AND_EXPR:
2234 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2235 break;
2236
2237 case MAX_EXPR:
2238 case MIN_EXPR:
2239 def_stmt = SSA_NAME_DEF_STMT (op);
2240 loop = (gimple_bb (stmt))->loop_father;
2241 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2242 loop_preheader_edge (loop));
2243 break;
2244
2245 default:
2246 neutral_op = NULL;
2247 }
2248 }
2249
2250 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2251 {
2252 is_store = true;
2253 op = gimple_assign_rhs1 (stmt);
2254 }
2255 else
2256 is_store = false;
2257
2258 gcc_assert (op);
2259
2260 if (CONSTANT_CLASS_P (op))
2261 constant_p = true;
2262 else
2263 constant_p = false;
2264
2265 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2266 gcc_assert (vector_type);
2267 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2268
2269 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2270 created vectors. It is greater than 1 if unrolling is performed.
2271
2272 For example, we have two scalar operands, s1 and s2 (e.g., group of
2273 strided accesses of size two), while NUNITS is four (i.e., four scalars
2274 of this type can be packed in a vector). The output vector will contain
2275 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2276 will be 2).
2277
2278 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2279 containing the operands.
2280
2281 For example, NUNITS is four as before, and the group size is 8
2282 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2283 {s5, s6, s7, s8}. */
2284
2285 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2286
2287 number_of_places_left_in_vector = nunits;
2288 for (j = 0; j < number_of_copies; j++)
2289 {
2290 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2291 {
2292 if (is_store)
2293 op = gimple_assign_rhs1 (stmt);
2294 else
2295 {
2296 switch (code)
2297 {
2298 case COND_EXPR:
2299 if (op_num == 0 || op_num == 1)
2300 {
2301 tree cond = gimple_assign_rhs1 (stmt);
2302 op = TREE_OPERAND (cond, op_num);
2303 }
2304 else
2305 {
2306 if (op_num == 2)
2307 op = gimple_assign_rhs2 (stmt);
2308 else
2309 op = gimple_assign_rhs3 (stmt);
2310 }
2311 break;
2312
2313 case CALL_EXPR:
2314 op = gimple_call_arg (stmt, op_num);
2315 break;
2316
2317 default:
2318 op = gimple_op (stmt, op_num + 1);
2319 }
2320 }
2321
2322 if (reduc_index != -1)
2323 {
2324 loop = (gimple_bb (stmt))->loop_father;
2325 def_stmt = SSA_NAME_DEF_STMT (op);
2326
2327 gcc_assert (loop);
2328
2329 /* Get the def before the loop. In reduction chain we have only
2330 one initial value. */
2331 if ((j != (number_of_copies - 1)
2332 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2333 && i != 0))
2334 && neutral_op)
2335 op = neutral_op;
2336 else
2337 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2338 loop_preheader_edge (loop));
2339 }
2340
2341 /* Create 'vect_ = {op0,op1,...,opn}'. */
2342 t = tree_cons (NULL_TREE, op, t);
2343
2344 number_of_places_left_in_vector--;
2345
2346 if (number_of_places_left_in_vector == 0)
2347 {
2348 number_of_places_left_in_vector = nunits;
2349
2350 if (constant_p)
2351 vec_cst = build_vector (vector_type, t);
2352 else
2353 vec_cst = build_constructor_from_list (vector_type, t);
2354 VEC_quick_push (tree, voprnds,
2355 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2356 t = NULL_TREE;
2357 }
2358 }
2359 }
2360
2361 /* Since the vectors are created in the reverse order, we should invert
2362 them. */
2363 vec_num = VEC_length (tree, voprnds);
2364 for (j = vec_num - 1; j >= 0; j--)
2365 {
2366 vop = VEC_index (tree, voprnds, j);
2367 VEC_quick_push (tree, *vec_oprnds, vop);
2368 }
2369
2370 VEC_free (tree, heap, voprnds);
2371
2372 /* In case that VF is greater than the unrolling factor needed for the SLP
2373 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2374 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2375 to replicate the vectors. */
2376 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2377 {
2378 tree neutral_vec = NULL;
2379
2380 if (neutral_op)
2381 {
2382 if (!neutral_vec)
2383 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2384
2385 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2386 }
2387 else
2388 {
2389 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2390 VEC_quick_push (tree, *vec_oprnds, vop);
2391 }
2392 }
2393 }
2394
2395
2396 /* Get vectorized definitions from SLP_NODE that contains corresponding
2397 vectorized def-stmts. */
2398
2399 static void
2400 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2401 {
2402 tree vec_oprnd;
2403 gimple vec_def_stmt;
2404 unsigned int i;
2405
2406 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2407
2408 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2409 {
2410 gcc_assert (vec_def_stmt);
2411 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2412 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2413 }
2414 }
2415
2416
2417 /* Get vectorized definitions for SLP_NODE.
2418 If the scalar definitions are loop invariants or constants, collect them and
2419 call vect_get_constant_vectors() to create vector stmts.
2420 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2421 must be stored in the corresponding child of SLP_NODE, and we call
2422 vect_get_slp_vect_defs () to retrieve them. */
2423
2424 void
2425 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2426 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2427 {
2428 gimple first_stmt, first_def;
2429 int number_of_vects = 0, i;
2430 unsigned int child_index = 0;
2431 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2432 slp_tree child = NULL;
2433 VEC (tree, heap) *vec_defs;
2434 tree oprnd, def_lhs;
2435 bool vectorized_defs;
2436
2437 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2438 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2439 {
2440 /* For each operand we check if it has vectorized definitions in a child
2441 node or we need to create them (for invariants and constants). We
2442 check if the LHS of the first stmt of the next child matches OPRND.
2443 If it does, we found the correct child. Otherwise, we call
2444 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2445 to check this child node for the next operand. */
2446 vectorized_defs = false;
2447 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2448 {
2449 child = (slp_tree) VEC_index (slp_void_p,
2450 SLP_TREE_CHILDREN (slp_node),
2451 child_index);
2452 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2453
2454 /* In the end of a pattern sequence we have a use of the original stmt,
2455 so we need to compare OPRND with the original def. */
2456 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2457 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2458 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2459 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2460
2461 if (is_gimple_call (first_def))
2462 def_lhs = gimple_call_lhs (first_def);
2463 else
2464 def_lhs = gimple_assign_lhs (first_def);
2465
2466 if (operand_equal_p (oprnd, def_lhs, 0))
2467 {
2468 /* The number of vector defs is determined by the number of
2469 vector statements in the node from which we get those
2470 statements. */
2471 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2472 vectorized_defs = true;
2473 child_index++;
2474 }
2475 }
2476
2477 if (!vectorized_defs)
2478 {
2479 if (i == 0)
2480 {
2481 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2482 /* Number of vector stmts was calculated according to LHS in
2483 vect_schedule_slp_instance (), fix it by replacing LHS with
2484 RHS, if necessary. See vect_get_smallest_scalar_type () for
2485 details. */
2486 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2487 &rhs_size_unit);
2488 if (rhs_size_unit != lhs_size_unit)
2489 {
2490 number_of_vects *= rhs_size_unit;
2491 number_of_vects /= lhs_size_unit;
2492 }
2493 }
2494 }
2495
2496 /* Allocate memory for vectorized defs. */
2497 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2498
2499 /* For reduction defs we call vect_get_constant_vectors (), since we are
2500 looking for initial loop invariant values. */
2501 if (vectorized_defs && reduc_index == -1)
2502 /* The defs are already vectorized. */
2503 vect_get_slp_vect_defs (child, &vec_defs);
2504 else
2505 /* Build vectors from scalar defs. */
2506 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2507 number_of_vects, reduc_index);
2508
2509 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2510
2511 /* For reductions, we only need initial values. */
2512 if (reduc_index != -1)
2513 return;
2514 }
2515 }
2516
2517
2518 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2519 building a vector of type MASK_TYPE from it) and two input vectors placed in
2520 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2521 shifting by STRIDE elements of DR_CHAIN for every copy.
2522 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2523 copies).
2524 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2525 the created stmts must be inserted. */
2526
2527 static inline void
2528 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2529 tree mask, int first_vec_indx, int second_vec_indx,
2530 gimple_stmt_iterator *gsi, slp_tree node,
2531 tree vectype, VEC(tree,heap) *dr_chain,
2532 int ncopies, int vect_stmts_counter)
2533 {
2534 tree perm_dest;
2535 gimple perm_stmt = NULL;
2536 stmt_vec_info next_stmt_info;
2537 int i, stride;
2538 tree first_vec, second_vec, data_ref;
2539
2540 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2541
2542 /* Initialize the vect stmts of NODE to properly insert the generated
2543 stmts later. */
2544 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2545 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2546 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2547
2548 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2549 for (i = 0; i < ncopies; i++)
2550 {
2551 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2552 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2553
2554 /* Generate the permute statement. */
2555 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2556 first_vec, second_vec, mask);
2557 data_ref = make_ssa_name (perm_dest, perm_stmt);
2558 gimple_set_lhs (perm_stmt, data_ref);
2559 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2560
2561 /* Store the vector statement in NODE. */
2562 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2563 stride * i + vect_stmts_counter, perm_stmt);
2564
2565 first_vec_indx += stride;
2566 second_vec_indx += stride;
2567 }
2568
2569 /* Mark the scalar stmt as vectorized. */
2570 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2571 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2572 }
2573
2574
2575 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2576 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2577 representation. Check that the mask is valid and return FALSE if not.
2578 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2579 the next vector, i.e., the current first vector is not needed. */
2580
2581 static bool
2582 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2583 int mask_nunits, bool only_one_vec, int index,
2584 unsigned char *mask, int *current_mask_element,
2585 bool *need_next_vector, int *number_of_mask_fixes,
2586 bool *mask_fixed, bool *needs_first_vector)
2587 {
2588 int i;
2589
2590 /* Convert to target specific representation. */
2591 *current_mask_element = first_mask_element + m;
2592 /* Adjust the value in case it's a mask for second and third vectors. */
2593 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2594
2595 if (*current_mask_element < mask_nunits)
2596 *needs_first_vector = true;
2597
2598 /* We have only one input vector to permute but the mask accesses values in
2599 the next vector as well. */
2600 if (only_one_vec && *current_mask_element >= mask_nunits)
2601 {
2602 if (vect_print_dump_info (REPORT_DETAILS))
2603 {
2604 fprintf (vect_dump, "permutation requires at least two vectors ");
2605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2606 }
2607
2608 return false;
2609 }
2610
2611 /* The mask requires the next vector. */
2612 if (*current_mask_element >= mask_nunits * 2)
2613 {
2614 if (*needs_first_vector || *mask_fixed)
2615 {
2616 /* We either need the first vector too or have already moved to the
2617 next vector. In both cases, this permutation needs three
2618 vectors. */
2619 if (vect_print_dump_info (REPORT_DETAILS))
2620 {
2621 fprintf (vect_dump, "permutation requires at "
2622 "least three vectors ");
2623 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2624 }
2625
2626 return false;
2627 }
2628
2629 /* We move to the next vector, dropping the first one and working with
2630 the second and the third - we need to adjust the values of the mask
2631 accordingly. */
2632 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2633
2634 for (i = 0; i < index; i++)
2635 mask[i] -= mask_nunits * *number_of_mask_fixes;
2636
2637 (*number_of_mask_fixes)++;
2638 *mask_fixed = true;
2639 }
2640
2641 *need_next_vector = *mask_fixed;
2642
2643 /* This was the last element of this mask. Start a new one. */
2644 if (index == mask_nunits - 1)
2645 {
2646 *number_of_mask_fixes = 1;
2647 *mask_fixed = false;
2648 *needs_first_vector = false;
2649 }
2650
2651 return true;
2652 }
2653
2654
2655 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2656 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2657 permute statements for SLP_NODE_INSTANCE. */
2658 bool
2659 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2660 gimple_stmt_iterator *gsi, int vf,
2661 slp_instance slp_node_instance, bool analyze_only)
2662 {
2663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2664 tree mask_element_type = NULL_TREE, mask_type;
2665 int i, j, k, nunits, vec_index = 0, scalar_index;
2666 slp_tree node;
2667 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2668 gimple next_scalar_stmt;
2669 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2670 int first_mask_element;
2671 int index, unroll_factor, current_mask_element, ncopies;
2672 unsigned char *mask;
2673 bool only_one_vec = false, need_next_vector = false;
2674 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2675 int number_of_mask_fixes = 1;
2676 bool mask_fixed = false;
2677 bool needs_first_vector = false;
2678 enum machine_mode mode;
2679
2680 mode = TYPE_MODE (vectype);
2681
2682 if (!can_vec_perm_p (mode, false, NULL))
2683 {
2684 if (vect_print_dump_info (REPORT_DETAILS))
2685 {
2686 fprintf (vect_dump, "no vect permute for ");
2687 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2688 }
2689 return false;
2690 }
2691
2692 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2693 same size as the vector element being permuted. */
2694 mask_element_type
2695 = lang_hooks.types.type_for_size
2696 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2697 mask_type = get_vectype_for_scalar_type (mask_element_type);
2698 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2699 mask = XALLOCAVEC (unsigned char, nunits);
2700 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2701
2702 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2703 unrolling factor. */
2704 orig_vec_stmts_num = group_size *
2705 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2706 if (orig_vec_stmts_num == 1)
2707 only_one_vec = true;
2708
2709 /* Number of copies is determined by the final vectorization factor
2710 relatively to SLP_NODE_INSTANCE unrolling factor. */
2711 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2712
2713 /* Generate permutation masks for every NODE. Number of masks for each NODE
2714 is equal to GROUP_SIZE.
2715 E.g., we have a group of three nodes with three loads from the same
2716 location in each node, and the vector size is 4. I.e., we have a
2717 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2718 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2719 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2720 ...
2721
2722 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2723 The last mask is illegal since we assume two operands for permute
2724 operation, and the mask element values can't be outside that range.
2725 Hence, the last mask must be converted into {2,5,5,5}.
2726 For the first two permutations we need the first and the second input
2727 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2728 we need the second and the third vectors: {b1,c1,a2,b2} and
2729 {c2,a3,b3,c3}. */
2730
2731 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2732 {
2733 scalar_index = 0;
2734 index = 0;
2735 vect_stmts_counter = 0;
2736 vec_index = 0;
2737 first_vec_index = vec_index++;
2738 if (only_one_vec)
2739 second_vec_index = first_vec_index;
2740 else
2741 second_vec_index = vec_index++;
2742
2743 for (j = 0; j < unroll_factor; j++)
2744 {
2745 for (k = 0; k < group_size; k++)
2746 {
2747 first_mask_element = i + j * group_size;
2748 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2749 nunits, only_one_vec, index,
2750 mask, &current_mask_element,
2751 &need_next_vector,
2752 &number_of_mask_fixes, &mask_fixed,
2753 &needs_first_vector))
2754 return false;
2755 mask[index++] = current_mask_element;
2756
2757 if (index == nunits)
2758 {
2759 tree mask_vec = NULL;
2760
2761 if (!can_vec_perm_p (mode, false, mask))
2762 {
2763 if (vect_print_dump_info (REPORT_DETAILS))
2764 {
2765 fprintf (vect_dump, "unsupported vect permute { ");
2766 for (i = 0; i < nunits; ++i)
2767 fprintf (vect_dump, "%d ", mask[i]);
2768 fprintf (vect_dump, "}\n");
2769 }
2770 return false;
2771 }
2772
2773 while (--index >= 0)
2774 {
2775 tree t = build_int_cst (mask_element_type, mask[index]);
2776 mask_vec = tree_cons (NULL, t, mask_vec);
2777 }
2778 mask_vec = build_vector (mask_type, mask_vec);
2779 index = 0;
2780
2781 if (!analyze_only)
2782 {
2783 if (need_next_vector)
2784 {
2785 first_vec_index = second_vec_index;
2786 second_vec_index = vec_index;
2787 }
2788
2789 next_scalar_stmt = VEC_index (gimple,
2790 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2791
2792 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2793 mask_vec, first_vec_index, second_vec_index,
2794 gsi, node, vectype, dr_chain,
2795 ncopies, vect_stmts_counter++);
2796 }
2797 }
2798 }
2799 }
2800 }
2801
2802 return true;
2803 }
2804
2805
2806
2807 /* Vectorize SLP instance tree in postorder. */
2808
2809 static bool
2810 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2811 unsigned int vectorization_factor)
2812 {
2813 gimple stmt;
2814 bool strided_store, is_store;
2815 gimple_stmt_iterator si;
2816 stmt_vec_info stmt_info;
2817 unsigned int vec_stmts_size, nunits, group_size;
2818 tree vectype;
2819 int i;
2820 slp_tree loads_node;
2821 slp_void_p child;
2822
2823 if (!node)
2824 return false;
2825
2826 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2827 vect_schedule_slp_instance ((slp_tree) child, instance,
2828 vectorization_factor);
2829
2830 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2831 stmt_info = vinfo_for_stmt (stmt);
2832
2833 /* VECTYPE is the type of the destination. */
2834 vectype = STMT_VINFO_VECTYPE (stmt_info);
2835 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2836 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2837
2838 /* For each SLP instance calculate number of vector stmts to be created
2839 for the scalar stmts in each node of the SLP tree. Number of vector
2840 elements in one vector iteration is the number of scalar elements in
2841 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2842 size. */
2843 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2844
2845 /* In case of load permutation we have to allocate vectorized statements for
2846 all the nodes that participate in that permutation. */
2847 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2848 {
2849 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2850 {
2851 if (!SLP_TREE_VEC_STMTS (loads_node))
2852 {
2853 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2854 vec_stmts_size);
2855 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2856 }
2857 }
2858 }
2859
2860 if (!SLP_TREE_VEC_STMTS (node))
2861 {
2862 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2863 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2864 }
2865
2866 if (vect_print_dump_info (REPORT_DETAILS))
2867 {
2868 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2869 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2870 }
2871
2872 /* Loads should be inserted before the first load. */
2873 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2874 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2875 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2876 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2877 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2878 else if (is_pattern_stmt_p (stmt_info))
2879 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2880 else
2881 si = gsi_for_stmt (stmt);
2882
2883 /* Stores should be inserted just before the last store. */
2884 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2885 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2886 {
2887 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2888 si = gsi_for_stmt (last_store);
2889 }
2890
2891 /* Mark the first element of the reduction chain as reduction to properly
2892 transform the node. In the analysis phase only the last element of the
2893 chain is marked as reduction. */
2894 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2895 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2896 {
2897 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2898 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2899 }
2900
2901 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2902 return is_store;
2903 }
2904
2905
2906 /* Generate vector code for all SLP instances in the loop/basic block. */
2907
2908 bool
2909 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2910 {
2911 VEC (slp_instance, heap) *slp_instances;
2912 slp_instance instance;
2913 unsigned int i, vf;
2914 bool is_store = false;
2915
2916 if (loop_vinfo)
2917 {
2918 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2919 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2920 }
2921 else
2922 {
2923 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2924 vf = 1;
2925 }
2926
2927 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2928 {
2929 /* Schedule the tree of INSTANCE. */
2930 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2931 instance, vf);
2932 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2933 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2934 fprintf (vect_dump, "vectorizing stmts using SLP.");
2935 }
2936
2937 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2938 {
2939 slp_tree root = SLP_INSTANCE_TREE (instance);
2940 gimple store;
2941 unsigned int j;
2942 gimple_stmt_iterator gsi;
2943
2944 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2945 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2946 {
2947 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2948 break;
2949
2950 /* Free the attached stmt_vec_info and remove the stmt. */
2951 gsi = gsi_for_stmt (store);
2952 gsi_remove (&gsi, true);
2953 free_stmt_vec_info (store);
2954 }
2955 }
2956
2957 return is_store;
2958 }
2959
2960
2961 /* Vectorize the basic block. */
2962
2963 void
2964 vect_slp_transform_bb (basic_block bb)
2965 {
2966 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2967 gimple_stmt_iterator si;
2968
2969 gcc_assert (bb_vinfo);
2970
2971 if (vect_print_dump_info (REPORT_DETAILS))
2972 fprintf (vect_dump, "SLPing BB\n");
2973
2974 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2975 {
2976 gimple stmt = gsi_stmt (si);
2977 stmt_vec_info stmt_info;
2978
2979 if (vect_print_dump_info (REPORT_DETAILS))
2980 {
2981 fprintf (vect_dump, "------>SLPing statement: ");
2982 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2983 }
2984
2985 stmt_info = vinfo_for_stmt (stmt);
2986 gcc_assert (stmt_info);
2987
2988 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2989 if (STMT_SLP_TYPE (stmt_info))
2990 {
2991 vect_schedule_slp (NULL, bb_vinfo);
2992 break;
2993 }
2994 }
2995
2996 mark_sym_for_renaming (gimple_vop (cfun));
2997 /* The memory tags and pointers in vectorized statements need to
2998 have their SSA forms updated. FIXME, why can't this be delayed
2999 until all the loops have been transformed? */
3000 update_ssa (TODO_update_ssa);
3001
3002 if (vect_print_dump_info (REPORT_DETAILS))
3003 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
3004
3005 destroy_bb_vec_info (bb_vinfo);
3006 }
3007