inclhack.def (hpux_imaginary_i): Remove spaces.
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
40
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69 if (!node)
70 return;
71
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
74
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
77
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
79
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
82
83 free (node);
84 }
85
86
87 /* Free the memory allocated for the SLP instance. */
88
89 void
90 vect_free_slp_instance (slp_instance instance)
91 {
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
95 }
96
97
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
101
102 static bool
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
114 {
115 tree oprnd;
116 unsigned int i, number_of_oprnds;
117 tree def;
118 gimple def_stmt;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
124
125 if (loop_vinfo)
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
127
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
130
131 for (i = 0; i < number_of_oprnds; i++)
132 {
133 oprnd = gimple_op (stmt, i + 1);
134
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136 &dt[i])
137 || (!def_stmt && dt[i] != vect_constant_def))
138 {
139 if (vect_print_dump_info (REPORT_SLP))
140 {
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143 }
144
145 return false;
146 }
147
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
150 pattern. */
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
155 {
156 if (!*first_stmt_dt0)
157 *pattern0 = true;
158 else
159 {
160 if (i == 1 && !*first_stmt_dt1)
161 *pattern1 = true;
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
163 {
164 if (vect_print_dump_info (REPORT_DETAILS))
165 {
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
169 }
170
171 return false;
172 }
173 }
174
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
177
178 if (*dt == vect_unknown_def_type)
179 {
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
182 return false;
183 }
184
185 switch (gimple_code (def_stmt))
186 {
187 case GIMPLE_PHI:
188 def = gimple_phi_result (def_stmt);
189 break;
190
191 case GIMPLE_ASSIGN:
192 def = gimple_assign_lhs (def_stmt);
193 break;
194
195 default:
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
198 return false;
199 }
200 }
201
202 if (!*first_stmt_dt0)
203 {
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
206 if (def)
207 *first_stmt_def0_type = TREE_TYPE (def);
208 else
209 *first_stmt_const_oprnd = oprnd;
210
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 else
216 /* Store. */
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
218 }
219
220 else
221 {
222 if (!*first_stmt_dt1 && i == 1)
223 {
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
226 if (def)
227 *first_stmt_def1_type = TREE_TYPE (def);
228 else
229 {
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
233 {
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
236 "oprnds in stmt");
237 return false;
238 }
239 *first_stmt_const_oprnd = oprnd;
240 }
241 }
242 else
243 {
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
246 if ((i == 0
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && *first_stmt_def0_type != TREE_TYPE (def))))
250 || (i == 1
251 && (*first_stmt_dt1 != dt[i]
252 || (*first_stmt_def1_type && def
253 && *first_stmt_def1_type != TREE_TYPE (def))))
254 || (!def
255 && TREE_TYPE (*first_stmt_const_oprnd)
256 != TREE_TYPE (oprnd)))
257 {
258 if (vect_print_dump_info (REPORT_SLP))
259 fprintf (vect_dump, "Build SLP failed: different types ");
260
261 return false;
262 }
263 }
264 }
265
266 /* Check the types of the definitions. */
267 switch (dt[i])
268 {
269 case vect_constant_def:
270 case vect_external_def:
271 break;
272
273 case vect_internal_def:
274 if (i == 0)
275 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
276 else
277 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
278 break;
279
280 default:
281 /* FORNOW: Not supported. */
282 if (vect_print_dump_info (REPORT_SLP))
283 {
284 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
285 print_generic_expr (vect_dump, def, TDF_SLIM);
286 }
287
288 return false;
289 }
290 }
291
292 return true;
293 }
294
295
296 /* Recursively build an SLP tree starting from NODE.
297 Fail (and return FALSE) if def-stmts are not isomorphic, require data
298 permutation or are of unsupported types of operation. Otherwise, return
299 TRUE. */
300
301 static bool
302 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
303 slp_tree *node, unsigned int group_size,
304 int *inside_cost, int *outside_cost,
305 int ncopies_for_cost, unsigned int *max_nunits,
306 VEC (int, heap) **load_permutation,
307 VEC (slp_tree, heap) **loads,
308 unsigned int vectorization_factor)
309 {
310 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
311 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
312 unsigned int i;
313 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
314 gimple stmt = VEC_index (gimple, stmts, 0);
315 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
316 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
317 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
318 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
319 tree lhs;
320 bool stop_recursion = false, need_same_oprnds = false;
321 tree vectype, scalar_type, first_op1 = NULL_TREE;
322 unsigned int ncopies;
323 optab optab;
324 int icode;
325 enum machine_mode optab_op2_mode;
326 enum machine_mode vec_mode;
327 tree first_stmt_const_oprnd = NULL_TREE;
328 struct data_reference *first_dr;
329 bool pattern0 = false, pattern1 = false;
330 HOST_WIDE_INT dummy;
331 bool permutation = false;
332 unsigned int load_place;
333 gimple first_load;
334
335 /* For every stmt in NODE find its def stmt/s. */
336 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
337 {
338 if (vect_print_dump_info (REPORT_SLP))
339 {
340 fprintf (vect_dump, "Build SLP for ");
341 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
342 }
343
344 lhs = gimple_get_lhs (stmt);
345 if (lhs == NULL_TREE)
346 {
347 if (vect_print_dump_info (REPORT_SLP))
348 {
349 fprintf (vect_dump,
350 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
351 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
352 }
353
354 return false;
355 }
356
357 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
358 vectype = get_vectype_for_scalar_type (scalar_type);
359 if (!vectype)
360 {
361 if (vect_print_dump_info (REPORT_SLP))
362 {
363 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
364 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
365 }
366 return false;
367 }
368
369 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
370 if (ncopies != 1)
371 {
372 if (vect_print_dump_info (REPORT_SLP))
373 fprintf (vect_dump, "SLP with multiple types ");
374
375 /* FORNOW: multiple types are unsupported in BB SLP. */
376 if (bb_vinfo)
377 return false;
378 }
379
380 /* In case of multiple types we need to detect the smallest type. */
381 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
382 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
383
384 if (is_gimple_call (stmt))
385 rhs_code = CALL_EXPR;
386 else
387 rhs_code = gimple_assign_rhs_code (stmt);
388
389 /* Check the operation. */
390 if (i == 0)
391 {
392 first_stmt_code = rhs_code;
393
394 /* Shift arguments should be equal in all the packed stmts for a
395 vector shift with scalar shift operand. */
396 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
397 || rhs_code == LROTATE_EXPR
398 || rhs_code == RROTATE_EXPR)
399 {
400 vec_mode = TYPE_MODE (vectype);
401
402 /* First see if we have a vector/vector shift. */
403 optab = optab_for_tree_code (rhs_code, vectype,
404 optab_vector);
405
406 if (!optab
407 || (optab->handlers[(int) vec_mode].insn_code
408 == CODE_FOR_nothing))
409 {
410 /* No vector/vector shift, try for a vector/scalar shift. */
411 optab = optab_for_tree_code (rhs_code, vectype,
412 optab_scalar);
413
414 if (!optab)
415 {
416 if (vect_print_dump_info (REPORT_SLP))
417 fprintf (vect_dump, "Build SLP failed: no optab.");
418 return false;
419 }
420 icode = (int) optab->handlers[(int) vec_mode].insn_code;
421 if (icode == CODE_FOR_nothing)
422 {
423 if (vect_print_dump_info (REPORT_SLP))
424 fprintf (vect_dump, "Build SLP failed: "
425 "op not supported by target.");
426 return false;
427 }
428 optab_op2_mode = insn_data[icode].operand[2].mode;
429 if (!VECTOR_MODE_P (optab_op2_mode))
430 {
431 need_same_oprnds = true;
432 first_op1 = gimple_assign_rhs2 (stmt);
433 }
434 }
435 }
436 }
437 else
438 {
439 if (first_stmt_code != rhs_code
440 && (first_stmt_code != IMAGPART_EXPR
441 || rhs_code != REALPART_EXPR)
442 && (first_stmt_code != REALPART_EXPR
443 || rhs_code != IMAGPART_EXPR))
444 {
445 if (vect_print_dump_info (REPORT_SLP))
446 {
447 fprintf (vect_dump,
448 "Build SLP failed: different operation in stmt ");
449 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
450 }
451
452 return false;
453 }
454
455 if (need_same_oprnds
456 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
457 {
458 if (vect_print_dump_info (REPORT_SLP))
459 {
460 fprintf (vect_dump,
461 "Build SLP failed: different shift arguments in ");
462 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
463 }
464
465 return false;
466 }
467 }
468
469 /* Strided store or load. */
470 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
471 {
472 if (REFERENCE_CLASS_P (lhs))
473 {
474 /* Store. */
475 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
476 stmt, &def_stmts0, &def_stmts1,
477 &first_stmt_dt0,
478 &first_stmt_dt1,
479 &first_stmt_def0_type,
480 &first_stmt_def1_type,
481 &first_stmt_const_oprnd,
482 ncopies_for_cost,
483 &pattern0, &pattern1))
484 return false;
485 }
486 else
487 {
488 /* Load. */
489 /* FORNOW: Check that there is no gap between the loads. */
490 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
491 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
492 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
494 {
495 if (vect_print_dump_info (REPORT_SLP))
496 {
497 fprintf (vect_dump, "Build SLP failed: strided "
498 "loads have gaps ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 }
501
502 return false;
503 }
504
505 /* Check that the size of interleaved loads group is not
506 greater than the SLP group size. */
507 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
508 > ncopies * group_size)
509 {
510 if (vect_print_dump_info (REPORT_SLP))
511 {
512 fprintf (vect_dump, "Build SLP failed: the number of "
513 "interleaved loads is greater than"
514 " the SLP group size ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
516 }
517
518 return false;
519 }
520
521 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
522
523 if (first_load == stmt)
524 {
525 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
526 if (vect_supportable_dr_alignment (first_dr)
527 == dr_unaligned_unsupported)
528 {
529 if (vect_print_dump_info (REPORT_SLP))
530 {
531 fprintf (vect_dump, "Build SLP failed: unsupported "
532 "unaligned load ");
533 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
534 }
535
536 return false;
537 }
538
539 /* Analyze costs (for the first stmt in the group). */
540 vect_model_load_cost (vinfo_for_stmt (stmt),
541 ncopies_for_cost, *node);
542 }
543
544 /* Store the place of this load in the interleaving chain. In
545 case that permutation is needed we later decide if a specific
546 permutation is supported. */
547 load_place = vect_get_place_in_interleaving_chain (stmt,
548 first_load);
549 if (load_place != i)
550 permutation = true;
551
552 VEC_safe_push (int, heap, *load_permutation, load_place);
553
554 /* We stop the tree when we reach a group of loads. */
555 stop_recursion = true;
556 continue;
557 }
558 } /* Strided access. */
559 else
560 {
561 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
562 {
563 /* Not strided load. */
564 if (vect_print_dump_info (REPORT_SLP))
565 {
566 fprintf (vect_dump, "Build SLP failed: not strided load ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
568 }
569
570 /* FORNOW: Not strided loads are not supported. */
571 return false;
572 }
573
574 /* Not memory operation. */
575 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
576 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
577 {
578 if (vect_print_dump_info (REPORT_SLP))
579 {
580 fprintf (vect_dump, "Build SLP failed: operation");
581 fprintf (vect_dump, " unsupported ");
582 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
583 }
584
585 return false;
586 }
587
588 /* Find the def-stmts. */
589 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
590 &def_stmts0, &def_stmts1,
591 &first_stmt_dt0, &first_stmt_dt1,
592 &first_stmt_def0_type,
593 &first_stmt_def1_type,
594 &first_stmt_const_oprnd,
595 ncopies_for_cost,
596 &pattern0, &pattern1))
597 return false;
598 }
599 }
600
601 /* Add the costs of the node to the overall instance costs. */
602 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
603 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
604
605 /* Strided loads were reached - stop the recursion. */
606 if (stop_recursion)
607 {
608 if (permutation)
609 {
610 VEC_safe_push (slp_tree, heap, *loads, *node);
611 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
612 }
613
614 return true;
615 }
616
617 /* Create SLP_TREE nodes for the definition node/s. */
618 if (first_stmt_dt0 == vect_internal_def)
619 {
620 slp_tree left_node = XNEW (struct _slp_tree);
621 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
622 SLP_TREE_VEC_STMTS (left_node) = NULL;
623 SLP_TREE_LEFT (left_node) = NULL;
624 SLP_TREE_RIGHT (left_node) = NULL;
625 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
626 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
627 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
628 inside_cost, outside_cost, ncopies_for_cost,
629 max_nunits, load_permutation, loads,
630 vectorization_factor))
631 return false;
632
633 SLP_TREE_LEFT (*node) = left_node;
634 }
635
636 if (first_stmt_dt1 == vect_internal_def)
637 {
638 slp_tree right_node = XNEW (struct _slp_tree);
639 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
640 SLP_TREE_VEC_STMTS (right_node) = NULL;
641 SLP_TREE_LEFT (right_node) = NULL;
642 SLP_TREE_RIGHT (right_node) = NULL;
643 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
644 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
645 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
646 inside_cost, outside_cost, ncopies_for_cost,
647 max_nunits, load_permutation, loads,
648 vectorization_factor))
649 return false;
650
651 SLP_TREE_RIGHT (*node) = right_node;
652 }
653
654 return true;
655 }
656
657
658 static void
659 vect_print_slp_tree (slp_tree node)
660 {
661 int i;
662 gimple stmt;
663
664 if (!node)
665 return;
666
667 fprintf (vect_dump, "node ");
668 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
669 {
670 fprintf (vect_dump, "\n\tstmt %d ", i);
671 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
672 }
673 fprintf (vect_dump, "\n");
674
675 vect_print_slp_tree (SLP_TREE_LEFT (node));
676 vect_print_slp_tree (SLP_TREE_RIGHT (node));
677 }
678
679
680 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
681 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
682 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
683 stmts in NODE are to be marked. */
684
685 static void
686 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
687 {
688 int i;
689 gimple stmt;
690
691 if (!node)
692 return;
693
694 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
695 if (j < 0 || i == j)
696 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
697
698 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
699 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
700 }
701
702
703 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
704
705 static void
706 vect_mark_slp_stmts_relevant (slp_tree node)
707 {
708 int i;
709 gimple stmt;
710 stmt_vec_info stmt_info;
711
712 if (!node)
713 return;
714
715 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
716 {
717 stmt_info = vinfo_for_stmt (stmt);
718 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
719 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
720 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
721 }
722
723 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
724 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
725 }
726
727
728 /* Check if the permutation required by the SLP INSTANCE is supported.
729 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
730
731 static bool
732 vect_supported_slp_permutation_p (slp_instance instance)
733 {
734 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
735 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
736 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
737 VEC (slp_tree, heap) *sorted_loads = NULL;
738 int index;
739 slp_tree *tmp_loads = NULL;
740 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
741 slp_tree load;
742
743 /* FORNOW: The only supported loads permutation is loads from the same
744 location in all the loads in the node, when the data-refs in
745 nodes of LOADS constitute an interleaving chain.
746 Sort the nodes according to the order of accesses in the chain. */
747 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
748 for (i = 0, j = 0;
749 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
750 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
751 i += group_size, j++)
752 {
753 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
754 /* Check that the loads are all in the same interleaving chain. */
755 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
756 {
757 if (vect_print_dump_info (REPORT_DETAILS))
758 {
759 fprintf (vect_dump, "Build SLP failed: unsupported data "
760 "permutation ");
761 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
762 }
763
764 free (tmp_loads);
765 return false;
766 }
767
768 tmp_loads[index] = load;
769 }
770
771 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
772 for (i = 0; i < group_size; i++)
773 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
774
775 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
776 SLP_INSTANCE_LOADS (instance) = sorted_loads;
777 free (tmp_loads);
778
779 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
780 SLP_INSTANCE_UNROLLING_FACTOR (instance),
781 instance, true))
782 return false;
783
784 return true;
785 }
786
787
788 /* Check if the required load permutation is supported.
789 LOAD_PERMUTATION contains a list of indices of the loads.
790 In SLP this permutation is relative to the order of strided stores that are
791 the base of the SLP instance. */
792
793 static bool
794 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
795 VEC (int, heap) *load_permutation)
796 {
797 int i = 0, j, prev = -1, next, k;
798 bool supported;
799
800 /* FORNOW: permutations are only supported in SLP. */
801 if (!slp_instn)
802 return false;
803
804 if (vect_print_dump_info (REPORT_SLP))
805 {
806 fprintf (vect_dump, "Load permutation ");
807 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
808 fprintf (vect_dump, "%d ", next);
809 }
810
811 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
812 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
813 well. */
814 if (VEC_length (int, load_permutation)
815 != (unsigned int) (group_size * group_size))
816 return false;
817
818 supported = true;
819 for (j = 0; j < group_size; j++)
820 {
821 for (i = j * group_size, k = 0;
822 VEC_iterate (int, load_permutation, i, next) && k < group_size;
823 i++, k++)
824 {
825 if (i != j * group_size && next != prev)
826 {
827 supported = false;
828 break;
829 }
830
831 prev = next;
832 }
833 }
834
835 if (supported && i == group_size * group_size
836 && vect_supported_slp_permutation_p (slp_instn))
837 return true;
838
839 return false;
840 }
841
842
843 /* Find the first load in the loop that belongs to INSTANCE.
844 When loads are in several SLP nodes, there can be a case in which the first
845 load does not appear in the first SLP node to be transformed, causing
846 incorrect order of statements. Since we generate all the loads together,
847 they must be inserted before the first load of the SLP instance and not
848 before the first load of the first node of the instance. */
849 static gimple
850 vect_find_first_load_in_slp_instance (slp_instance instance)
851 {
852 int i, j;
853 slp_tree load_node;
854 gimple first_load = NULL, load;
855
856 for (i = 0;
857 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
858 i++)
859 for (j = 0;
860 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
861 j++)
862 first_load = get_earlier_stmt (load, first_load);
863
864 return first_load;
865 }
866
867
868 /* Analyze an SLP instance starting from a group of strided stores. Call
869 vect_build_slp_tree to build a tree of packed stmts if possible.
870 Return FALSE if it's impossible to SLP any stmt in the loop. */
871
872 static bool
873 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
874 gimple stmt)
875 {
876 slp_instance new_instance;
877 slp_tree node = XNEW (struct _slp_tree);
878 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
879 unsigned int unrolling_factor = 1, nunits;
880 tree vectype, scalar_type;
881 gimple next;
882 unsigned int vectorization_factor = 0, ncopies;
883 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
884 unsigned int max_nunits = 0;
885 VEC (int, heap) *load_permutation;
886 VEC (slp_tree, heap) *loads;
887
888 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
889 vinfo_for_stmt (stmt))));
890 vectype = get_vectype_for_scalar_type (scalar_type);
891 if (!vectype)
892 {
893 if (vect_print_dump_info (REPORT_SLP))
894 {
895 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
896 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
897 }
898 return false;
899 }
900
901 nunits = TYPE_VECTOR_SUBPARTS (vectype);
902 if (loop_vinfo)
903 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
904 else
905 /* No multitypes in BB SLP. */
906 vectorization_factor = nunits;
907
908 ncopies = vectorization_factor / nunits;
909
910 /* Calculate the unrolling factor. */
911 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
912 if (unrolling_factor != 1 && !loop_vinfo)
913 {
914 if (vect_print_dump_info (REPORT_SLP))
915 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
916 " block SLP");
917
918 return false;
919 }
920
921 /* Create a node (a root of the SLP tree) for the packed strided stores. */
922 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
923 next = stmt;
924 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
925 while (next)
926 {
927 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
928 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
929 }
930
931 SLP_TREE_VEC_STMTS (node) = NULL;
932 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
933 SLP_TREE_LEFT (node) = NULL;
934 SLP_TREE_RIGHT (node) = NULL;
935 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
936 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
937
938 /* Calculate the number of vector stmts to create based on the unrolling
939 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
940 GROUP_SIZE / NUNITS otherwise. */
941 ncopies_for_cost = unrolling_factor * group_size / nunits;
942
943 load_permutation = VEC_alloc (int, heap, group_size * group_size);
944 loads = VEC_alloc (slp_tree, heap, group_size);
945
946 /* Build the tree for the SLP instance. */
947 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
948 &inside_cost, &outside_cost, ncopies_for_cost,
949 &max_nunits, &load_permutation, &loads,
950 vectorization_factor))
951 {
952 /* Create a new SLP instance. */
953 new_instance = XNEW (struct _slp_instance);
954 SLP_INSTANCE_TREE (new_instance) = node;
955 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
956 /* Calculate the unrolling factor based on the smallest type in the
957 loop. */
958 if (max_nunits > nunits)
959 unrolling_factor = least_common_multiple (max_nunits, group_size)
960 / group_size;
961
962 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
963 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
964 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
965 SLP_INSTANCE_LOADS (new_instance) = loads;
966 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
967 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
968 if (VEC_length (slp_tree, loads))
969 {
970 if (!vect_supported_load_permutation_p (new_instance, group_size,
971 load_permutation))
972 {
973 if (vect_print_dump_info (REPORT_SLP))
974 {
975 fprintf (vect_dump, "Build SLP failed: unsupported load "
976 "permutation ");
977 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
978 }
979
980 vect_free_slp_instance (new_instance);
981 return false;
982 }
983
984 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
985 = vect_find_first_load_in_slp_instance (new_instance);
986 }
987 else
988 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
989
990 if (loop_vinfo)
991 VEC_safe_push (slp_instance, heap,
992 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
993 new_instance);
994 else
995 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
996 new_instance);
997
998 if (vect_print_dump_info (REPORT_SLP))
999 vect_print_slp_tree (node);
1000
1001 return true;
1002 }
1003
1004 /* Failed to SLP. */
1005 /* Free the allocated memory. */
1006 vect_free_slp_tree (node);
1007 VEC_free (int, heap, load_permutation);
1008 VEC_free (slp_tree, heap, loads);
1009
1010 return false;
1011 }
1012
1013
1014 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1015 trees of packed scalar stmts if SLP is possible. */
1016
1017 bool
1018 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1019 {
1020 unsigned int i;
1021 VEC (gimple, heap) *strided_stores;
1022 gimple store;
1023 bool ok = false;
1024
1025 if (vect_print_dump_info (REPORT_SLP))
1026 fprintf (vect_dump, "=== vect_analyze_slp ===");
1027
1028 if (loop_vinfo)
1029 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1030 else
1031 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1032
1033 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1034 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1035 ok = true;
1036
1037 if (bb_vinfo && !ok)
1038 {
1039 if (vect_print_dump_info (REPORT_SLP))
1040 fprintf (vect_dump, "Failed to SLP the basic block.");
1041
1042 return false;
1043 }
1044
1045 return true;
1046 }
1047
1048
1049 /* For each possible SLP instance decide whether to SLP it and calculate overall
1050 unrolling factor needed to SLP the loop. */
1051
1052 void
1053 vect_make_slp_decision (loop_vec_info loop_vinfo)
1054 {
1055 unsigned int i, unrolling_factor = 1;
1056 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1057 slp_instance instance;
1058 int decided_to_slp = 0;
1059
1060 if (vect_print_dump_info (REPORT_SLP))
1061 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1062
1063 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1064 {
1065 /* FORNOW: SLP if you can. */
1066 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1067 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1068
1069 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1070 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1071 loop-based vectorization. Such stmts will be marked as HYBRID. */
1072 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1073 decided_to_slp++;
1074 }
1075
1076 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1077
1078 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1079 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1080 decided_to_slp, unrolling_factor);
1081 }
1082
1083
1084 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1085 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1086
1087 static void
1088 vect_detect_hybrid_slp_stmts (slp_tree node)
1089 {
1090 int i;
1091 gimple stmt;
1092 imm_use_iterator imm_iter;
1093 gimple use_stmt;
1094
1095 if (!node)
1096 return;
1097
1098 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1099 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1100 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1101 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1102 if (vinfo_for_stmt (use_stmt)
1103 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
1104 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
1105 vect_mark_slp_stmts (node, hybrid, i);
1106
1107 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1108 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1109 }
1110
1111
1112 /* Find stmts that must be both vectorized and SLPed. */
1113
1114 void
1115 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1116 {
1117 unsigned int i;
1118 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1119 slp_instance instance;
1120
1121 if (vect_print_dump_info (REPORT_SLP))
1122 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1123
1124 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1125 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1126 }
1127
1128
1129 /* Create and initialize a new bb_vec_info struct for BB, as well as
1130 stmt_vec_info structs for all the stmts in it. */
1131
1132 static bb_vec_info
1133 new_bb_vec_info (basic_block bb)
1134 {
1135 bb_vec_info res = NULL;
1136 gimple_stmt_iterator gsi;
1137
1138 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1139 BB_VINFO_BB (res) = bb;
1140
1141 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1142 {
1143 gimple stmt = gsi_stmt (gsi);
1144 gimple_set_uid (stmt, 0);
1145 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1146 }
1147
1148 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1149 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1150
1151 bb->aux = res;
1152 return res;
1153 }
1154
1155
1156 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1157 stmts in the basic block. */
1158
1159 static void
1160 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1161 {
1162 basic_block bb;
1163 gimple_stmt_iterator si;
1164
1165 if (!bb_vinfo)
1166 return;
1167
1168 bb = BB_VINFO_BB (bb_vinfo);
1169
1170 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1171 {
1172 gimple stmt = gsi_stmt (si);
1173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174
1175 if (stmt_info)
1176 /* Free stmt_vec_info. */
1177 free_stmt_vec_info (stmt);
1178 }
1179
1180 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1181 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1182 free (bb_vinfo);
1183 bb->aux = NULL;
1184 }
1185
1186
1187 /* Analyze statements contained in SLP tree node after recursively analyzing
1188 the subtree. Return TRUE if the operations are supported. */
1189
1190 static bool
1191 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1192 {
1193 bool dummy;
1194 int i;
1195 gimple stmt;
1196
1197 if (!node)
1198 return true;
1199
1200 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1201 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1202 return false;
1203
1204 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1205 {
1206 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1207 gcc_assert (stmt_info);
1208 gcc_assert (PURE_SLP_STMT (stmt_info));
1209
1210 if (!vect_analyze_stmt (stmt, &dummy, node))
1211 return false;
1212 }
1213
1214 return true;
1215 }
1216
1217
1218 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1219 operations are supported. */
1220
1221 static bool
1222 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1223 {
1224 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1225 slp_instance instance;
1226 int i;
1227
1228 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1229 {
1230 if (!vect_slp_analyze_node_operations (bb_vinfo,
1231 SLP_INSTANCE_TREE (instance)))
1232 {
1233 vect_free_slp_instance (instance);
1234 VEC_ordered_remove (slp_instance, slp_instances, i);
1235 }
1236 else
1237 i++;
1238 }
1239
1240 if (!VEC_length (slp_instance, slp_instances))
1241 return false;
1242
1243 return true;
1244 }
1245
1246
1247 /* Cheick if the basic block can be vectorized. */
1248
1249 bb_vec_info
1250 vect_slp_analyze_bb (basic_block bb)
1251 {
1252 bb_vec_info bb_vinfo;
1253 VEC (ddr_p, heap) *ddrs;
1254 VEC (slp_instance, heap) *slp_instances;
1255 slp_instance instance;
1256 int i, insns = 0;
1257 gimple_stmt_iterator gsi;
1258
1259 if (vect_print_dump_info (REPORT_DETAILS))
1260 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1261
1262 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1263 insns++;
1264
1265 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1266 {
1267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1268 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1269 "block.\n");
1270
1271 return NULL;
1272 }
1273
1274 bb_vinfo = new_bb_vec_info (bb);
1275 if (!bb_vinfo)
1276 return NULL;
1277
1278 if (!vect_analyze_data_refs (NULL, bb_vinfo))
1279 {
1280 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1281 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1282 "block.\n");
1283
1284 destroy_bb_vec_info (bb_vinfo);
1285 return NULL;
1286 }
1287
1288 ddrs = BB_VINFO_DDRS (bb_vinfo);
1289 if (!VEC_length (ddr_p, ddrs))
1290 {
1291 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1292 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1293 "block.\n");
1294
1295 destroy_bb_vec_info (bb_vinfo);
1296 return NULL;
1297 }
1298
1299 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1300 {
1301 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1302 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1303 "block.\n");
1304
1305 destroy_bb_vec_info (bb_vinfo);
1306 return NULL;
1307 }
1308
1309 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1310 {
1311 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1312 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1313 " block.\n");
1314
1315 destroy_bb_vec_info (bb_vinfo);
1316 return NULL;
1317 }
1318
1319 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1320 {
1321 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1322 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1323 "block.\n");
1324
1325 destroy_bb_vec_info (bb_vinfo);
1326 return NULL;
1327 }
1328
1329 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1330 {
1331 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1332 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1333 "block.\n");
1334
1335 destroy_bb_vec_info (bb_vinfo);
1336 return NULL;
1337 }
1338
1339 /* Check the SLP opportunities in the basic block, analyze and build SLP
1340 trees. */
1341 if (!vect_analyze_slp (NULL, bb_vinfo))
1342 {
1343 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1344 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1345 "in basic block.\n");
1346
1347 destroy_bb_vec_info (bb_vinfo);
1348 return NULL;
1349 }
1350
1351 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1352
1353 /* Mark all the statements that we want to vectorize as pure SLP and
1354 relevant. */
1355 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1356 {
1357 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1358 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1359 }
1360
1361 if (!vect_slp_analyze_operations (bb_vinfo))
1362 {
1363 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1364 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1365
1366 destroy_bb_vec_info (bb_vinfo);
1367 return NULL;
1368 }
1369
1370 if (vect_print_dump_info (REPORT_DETAILS))
1371 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1372
1373 return bb_vinfo;
1374 }
1375
1376
1377 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1378 the number of created vector stmts depends on the unrolling factor). However,
1379 the actual number of vector stmts for every SLP node depends on VF which is
1380 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1381 In this function we assume that the inside costs calculated in
1382 vect_model_xxx_cost are linear in ncopies. */
1383
1384 void
1385 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1386 {
1387 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1388 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1389 slp_instance instance;
1390
1391 if (vect_print_dump_info (REPORT_SLP))
1392 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1393
1394 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1395 /* We assume that costs are linear in ncopies. */
1396 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1397 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1398 }
1399
1400
1401 /* For constant and loop invariant defs of SLP_NODE this function returns
1402 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1403 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1404 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1405
1406 static void
1407 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1408 unsigned int op_num, unsigned int number_of_vectors)
1409 {
1410 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1411 gimple stmt = VEC_index (gimple, stmts, 0);
1412 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1413 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1414 int nunits;
1415 tree vec_cst;
1416 tree t = NULL_TREE;
1417 int j, number_of_places_left_in_vector;
1418 tree vector_type;
1419 tree op, vop;
1420 int group_size = VEC_length (gimple, stmts);
1421 unsigned int vec_num, i;
1422 int number_of_copies = 1;
1423 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1424 bool constant_p, is_store;
1425
1426 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1427 {
1428 is_store = true;
1429 op = gimple_assign_rhs1 (stmt);
1430 }
1431 else
1432 {
1433 is_store = false;
1434 op = gimple_op (stmt, op_num + 1);
1435 }
1436
1437 if (CONSTANT_CLASS_P (op))
1438 {
1439 vector_type = vectype;
1440 constant_p = true;
1441 }
1442 else
1443 {
1444 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1445 gcc_assert (vector_type);
1446 constant_p = false;
1447 }
1448
1449 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1450
1451 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1452 created vectors. It is greater than 1 if unrolling is performed.
1453
1454 For example, we have two scalar operands, s1 and s2 (e.g., group of
1455 strided accesses of size two), while NUNITS is four (i.e., four scalars
1456 of this type can be packed in a vector). The output vector will contain
1457 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1458 will be 2).
1459
1460 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1461 containing the operands.
1462
1463 For example, NUNITS is four as before, and the group size is 8
1464 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1465 {s5, s6, s7, s8}. */
1466
1467 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1468
1469 number_of_places_left_in_vector = nunits;
1470 for (j = 0; j < number_of_copies; j++)
1471 {
1472 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1473 {
1474 if (is_store)
1475 op = gimple_assign_rhs1 (stmt);
1476 else
1477 op = gimple_op (stmt, op_num + 1);
1478
1479 /* Create 'vect_ = {op0,op1,...,opn}'. */
1480 t = tree_cons (NULL_TREE, op, t);
1481
1482 number_of_places_left_in_vector--;
1483
1484 if (number_of_places_left_in_vector == 0)
1485 {
1486 number_of_places_left_in_vector = nunits;
1487
1488 if (constant_p)
1489 vec_cst = build_vector (vector_type, t);
1490 else
1491 vec_cst = build_constructor_from_list (vector_type, t);
1492 VEC_quick_push (tree, voprnds,
1493 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1494 t = NULL_TREE;
1495 }
1496 }
1497 }
1498
1499 /* Since the vectors are created in the reverse order, we should invert
1500 them. */
1501 vec_num = VEC_length (tree, voprnds);
1502 for (j = vec_num - 1; j >= 0; j--)
1503 {
1504 vop = VEC_index (tree, voprnds, j);
1505 VEC_quick_push (tree, *vec_oprnds, vop);
1506 }
1507
1508 VEC_free (tree, heap, voprnds);
1509
1510 /* In case that VF is greater than the unrolling factor needed for the SLP
1511 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1512 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1513 to replicate the vectors. */
1514 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1515 {
1516 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1517 VEC_quick_push (tree, *vec_oprnds, vop);
1518 }
1519 }
1520
1521
1522 /* Get vectorized definitions from SLP_NODE that contains corresponding
1523 vectorized def-stmts. */
1524
1525 static void
1526 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1527 {
1528 tree vec_oprnd;
1529 gimple vec_def_stmt;
1530 unsigned int i;
1531
1532 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1533
1534 for (i = 0;
1535 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1536 i++)
1537 {
1538 gcc_assert (vec_def_stmt);
1539 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1540 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1541 }
1542 }
1543
1544
1545 /* Get vectorized definitions for SLP_NODE.
1546 If the scalar definitions are loop invariants or constants, collect them and
1547 call vect_get_constant_vectors() to create vector stmts.
1548 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1549 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1550 vect_get_slp_vect_defs() to retrieve them.
1551 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1552 the right node. This is used when the second operand must remain scalar. */
1553
1554 void
1555 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1556 VEC (tree,heap) **vec_oprnds1)
1557 {
1558 gimple first_stmt;
1559 enum tree_code code;
1560 int number_of_vects;
1561 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1562
1563 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1564 /* The number of vector defs is determined by the number of vector statements
1565 in the node from which we get those statements. */
1566 if (SLP_TREE_LEFT (slp_node))
1567 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1568 else
1569 {
1570 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1571 /* Number of vector stmts was calculated according to LHS in
1572 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1573 necessary. See vect_get_smallest_scalar_type() for details. */
1574 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1575 &rhs_size_unit);
1576 if (rhs_size_unit != lhs_size_unit)
1577 {
1578 number_of_vects *= rhs_size_unit;
1579 number_of_vects /= lhs_size_unit;
1580 }
1581 }
1582
1583 /* Allocate memory for vectorized defs. */
1584 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1585
1586 /* SLP_NODE corresponds either to a group of stores or to a group of
1587 unary/binary operations. We don't call this function for loads. */
1588 if (SLP_TREE_LEFT (slp_node))
1589 /* The defs are already vectorized. */
1590 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1591 else
1592 /* Build vectors from scalar defs. */
1593 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1594
1595 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1596 /* Since we don't call this function with loads, this is a group of
1597 stores. */
1598 return;
1599
1600 code = gimple_assign_rhs_code (first_stmt);
1601 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1602 return;
1603
1604 /* The number of vector defs is determined by the number of vector statements
1605 in the node from which we get those statements. */
1606 if (SLP_TREE_RIGHT (slp_node))
1607 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1608 else
1609 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1610
1611 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1612
1613 if (SLP_TREE_RIGHT (slp_node))
1614 /* The defs are already vectorized. */
1615 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1616 else
1617 /* Build vectors from scalar defs. */
1618 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1619 }
1620
1621
1622 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1623 building a vector of type MASK_TYPE from it) and two input vectors placed in
1624 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1625 shifting by STRIDE elements of DR_CHAIN for every copy.
1626 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1627 copies).
1628 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1629 the created stmts must be inserted. */
1630
1631 static inline void
1632 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1633 int *mask_array, int mask_nunits,
1634 tree mask_element_type, tree mask_type,
1635 int first_vec_indx, int second_vec_indx,
1636 gimple_stmt_iterator *gsi, slp_tree node,
1637 tree builtin_decl, tree vectype,
1638 VEC(tree,heap) *dr_chain,
1639 int ncopies, int vect_stmts_counter)
1640 {
1641 tree t = NULL_TREE, mask_vec, mask, perm_dest;
1642 gimple perm_stmt = NULL;
1643 stmt_vec_info next_stmt_info;
1644 int i, group_size, stride, dr_chain_size;
1645 tree first_vec, second_vec, data_ref;
1646 VEC (tree, heap) *params = NULL;
1647
1648 /* Create a vector mask. */
1649 for (i = mask_nunits - 1; i >= 0; --i)
1650 t = tree_cons (NULL_TREE, build_int_cst (mask_element_type, mask_array[i]),
1651 t);
1652 mask_vec = build_vector (mask_type, t);
1653 mask = vect_init_vector (stmt, mask_vec, mask_type, NULL);
1654
1655 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node));
1656 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1657 dr_chain_size = VEC_length (tree, dr_chain);
1658
1659 /* Initialize the vect stmts of NODE to properly insert the generated
1660 stmts later. */
1661 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1662 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1663 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1664
1665 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1666 for (i = 0; i < ncopies; i++)
1667 {
1668 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1669 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1670
1671 /* Build argument list for the vectorized call. */
1672 VEC_free (tree, heap, params);
1673 params = VEC_alloc (tree, heap, 3);
1674 VEC_quick_push (tree, params, first_vec);
1675 VEC_quick_push (tree, params, second_vec);
1676 VEC_quick_push (tree, params, mask);
1677
1678 /* Generate the permute statement. */
1679 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1680 data_ref = make_ssa_name (perm_dest, perm_stmt);
1681 gimple_call_set_lhs (perm_stmt, data_ref);
1682 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1683
1684 /* Store the vector statement in NODE. */
1685 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1686 stride * i + vect_stmts_counter, perm_stmt);
1687
1688 first_vec_indx += stride;
1689 second_vec_indx += stride;
1690 }
1691
1692 /* Mark the scalar stmt as vectorized. */
1693 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1694 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1695 }
1696
1697
1698 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1699 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1700 representation. Check that the mask is valid and return FALSE if not.
1701 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1702 the next vector, i.e., the current first vector is not needed. */
1703
1704 static bool
1705 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1706 int mask_nunits, bool only_one_vec, int index,
1707 int *mask, int *current_mask_element,
1708 bool *need_next_vector)
1709 {
1710 int i;
1711 static int number_of_mask_fixes = 1;
1712 static bool mask_fixed = false;
1713 static bool needs_first_vector = false;
1714
1715 /* Convert to target specific representation. */
1716 *current_mask_element = first_mask_element + m;
1717 /* Adjust the value in case it's a mask for second and third vectors. */
1718 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1719
1720 if (*current_mask_element < mask_nunits)
1721 needs_first_vector = true;
1722
1723 /* We have only one input vector to permute but the mask accesses values in
1724 the next vector as well. */
1725 if (only_one_vec && *current_mask_element >= mask_nunits)
1726 {
1727 if (vect_print_dump_info (REPORT_DETAILS))
1728 {
1729 fprintf (vect_dump, "permutation requires at least two vectors ");
1730 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1731 }
1732
1733 return false;
1734 }
1735
1736 /* The mask requires the next vector. */
1737 if (*current_mask_element >= mask_nunits * 2)
1738 {
1739 if (needs_first_vector || mask_fixed)
1740 {
1741 /* We either need the first vector too or have already moved to the
1742 next vector. In both cases, this permutation needs three
1743 vectors. */
1744 if (vect_print_dump_info (REPORT_DETAILS))
1745 {
1746 fprintf (vect_dump, "permutation requires at "
1747 "least three vectors ");
1748 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1749 }
1750
1751 return false;
1752 }
1753
1754 /* We move to the next vector, dropping the first one and working with
1755 the second and the third - we need to adjust the values of the mask
1756 accordingly. */
1757 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1758
1759 for (i = 0; i < index; i++)
1760 mask[i] -= mask_nunits * number_of_mask_fixes;
1761
1762 (number_of_mask_fixes)++;
1763 mask_fixed = true;
1764 }
1765
1766 *need_next_vector = mask_fixed;
1767
1768 /* This was the last element of this mask. Start a new one. */
1769 if (index == mask_nunits - 1)
1770 {
1771 number_of_mask_fixes = 1;
1772 mask_fixed = false;
1773 needs_first_vector = false;
1774 }
1775
1776 return true;
1777 }
1778
1779
1780 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1781 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1782 permute statements for SLP_NODE_INSTANCE. */
1783 bool
1784 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1785 gimple_stmt_iterator *gsi, int vf,
1786 slp_instance slp_node_instance, bool analyze_only)
1787 {
1788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1789 tree mask_element_type = NULL_TREE, mask_type;
1790 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1791 slp_tree node;
1792 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1793 gimple next_scalar_stmt;
1794 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1795 int first_mask_element;
1796 int index, unroll_factor, *mask, current_mask_element, ncopies;
1797 bool only_one_vec = false, need_next_vector = false;
1798 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1799
1800 if (!targetm.vectorize.builtin_vec_perm)
1801 {
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 {
1804 fprintf (vect_dump, "no builtin for vect permute for ");
1805 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1806 }
1807
1808 return false;
1809 }
1810
1811 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1812 &mask_element_type);
1813 if (!builtin_decl || !mask_element_type)
1814 {
1815 if (vect_print_dump_info (REPORT_DETAILS))
1816 {
1817 fprintf (vect_dump, "no builtin for vect permute for ");
1818 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1819 }
1820
1821 return false;
1822 }
1823
1824 mask_type = get_vectype_for_scalar_type (mask_element_type);
1825 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1826 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1827 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1828 scale = mask_nunits / nunits;
1829 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1830
1831 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1832 unrolling factor. */
1833 orig_vec_stmts_num = group_size *
1834 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1835 if (orig_vec_stmts_num == 1)
1836 only_one_vec = true;
1837
1838 /* Number of copies is determined by the final vectorization factor
1839 relatively to SLP_NODE_INSTANCE unrolling factor. */
1840 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1841
1842 /* Generate permutation masks for every NODE. Number of masks for each NODE
1843 is equal to GROUP_SIZE.
1844 E.g., we have a group of three nodes with three loads from the same
1845 location in each node, and the vector size is 4. I.e., we have a
1846 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1847 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1848 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1849 ...
1850
1851 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1852 scpecific type, e.g., in bytes for Altivec.
1853 The last mask is illegal since we assume two operands for permute
1854 operation, and the mask element values can't be outside that range. Hence,
1855 the last mask must be converted into {2,5,5,5}.
1856 For the first two permutations we need the first and the second input
1857 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1858 we need the second and the third vectors: {b1,c1,a2,b2} and
1859 {c2,a3,b3,c3}. */
1860
1861 for (i = 0;
1862 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1863 i, node);
1864 i++)
1865 {
1866 scalar_index = 0;
1867 index = 0;
1868 vect_stmts_counter = 0;
1869 vec_index = 0;
1870 first_vec_index = vec_index++;
1871 if (only_one_vec)
1872 second_vec_index = first_vec_index;
1873 else
1874 second_vec_index = vec_index++;
1875
1876 for (j = 0; j < unroll_factor; j++)
1877 {
1878 for (k = 0; k < group_size; k++)
1879 {
1880 first_mask_element = (i + j * group_size) * scale;
1881 for (m = 0; m < scale; m++)
1882 {
1883 if (!vect_get_mask_element (stmt, first_mask_element, m,
1884 mask_nunits, only_one_vec, index, mask,
1885 &current_mask_element, &need_next_vector))
1886 return false;
1887
1888 mask[index++] = current_mask_element;
1889 }
1890
1891 if (index == mask_nunits)
1892 {
1893 index = 0;
1894 if (!analyze_only)
1895 {
1896 if (need_next_vector)
1897 {
1898 first_vec_index = second_vec_index;
1899 second_vec_index = vec_index;
1900 }
1901
1902 next_scalar_stmt = VEC_index (gimple,
1903 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1904
1905 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1906 mask, mask_nunits, mask_element_type, mask_type,
1907 first_vec_index, second_vec_index, gsi, node,
1908 builtin_decl, vectype, dr_chain, ncopies,
1909 vect_stmts_counter++);
1910 }
1911 }
1912 }
1913 }
1914 }
1915
1916 free (mask);
1917 return true;
1918 }
1919
1920
1921
1922 /* Vectorize SLP instance tree in postorder. */
1923
1924 static bool
1925 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1926 unsigned int vectorization_factor)
1927 {
1928 gimple stmt;
1929 bool strided_store, is_store;
1930 gimple_stmt_iterator si;
1931 stmt_vec_info stmt_info;
1932 unsigned int vec_stmts_size, nunits, group_size;
1933 tree vectype;
1934 int i;
1935 slp_tree loads_node;
1936
1937 if (!node)
1938 return false;
1939
1940 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1941 vectorization_factor);
1942 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1943 vectorization_factor);
1944
1945 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1946 stmt_info = vinfo_for_stmt (stmt);
1947
1948 /* VECTYPE is the type of the destination. */
1949 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1950 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1951 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1952
1953 /* For each SLP instance calculate number of vector stmts to be created
1954 for the scalar stmts in each node of the SLP tree. Number of vector
1955 elements in one vector iteration is the number of scalar elements in
1956 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1957 size. */
1958 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1959
1960 /* In case of load permutation we have to allocate vectorized statements for
1961 all the nodes that participate in that permutation. */
1962 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1963 {
1964 for (i = 0;
1965 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1966 i++)
1967 {
1968 if (!SLP_TREE_VEC_STMTS (loads_node))
1969 {
1970 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
1971 vec_stmts_size);
1972 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
1973 }
1974 }
1975 }
1976
1977 if (!SLP_TREE_VEC_STMTS (node))
1978 {
1979 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
1980 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
1981 }
1982
1983 if (vect_print_dump_info (REPORT_DETAILS))
1984 {
1985 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
1986 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1987 }
1988
1989 /* Loads should be inserted before the first load. */
1990 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
1991 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
1992 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
1993 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
1994 else
1995 si = gsi_for_stmt (stmt);
1996
1997 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
1998 if (is_store)
1999 {
2000 if (DR_GROUP_FIRST_DR (stmt_info))
2001 /* If IS_STORE is TRUE, the vectorization of the
2002 interleaving chain was completed - free all the stores in
2003 the chain. */
2004 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2005 else
2006 /* FORNOW: SLP originates only from strided stores. */
2007 gcc_unreachable ();
2008
2009 return true;
2010 }
2011
2012 /* FORNOW: SLP originates only from strided stores. */
2013 return false;
2014 }
2015
2016
2017 bool
2018 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2019 {
2020 VEC (slp_instance, heap) *slp_instances;
2021 slp_instance instance;
2022 unsigned int i, vf;
2023 bool is_store = false;
2024
2025 if (loop_vinfo)
2026 {
2027 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2028 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2029 }
2030 else
2031 {
2032 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2033 vf = 1;
2034 }
2035
2036 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2037 {
2038 /* Schedule the tree of INSTANCE. */
2039 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2040 instance, vf);
2041 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2042 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2043 fprintf (vect_dump, "vectorizing stmts using SLP.");
2044 }
2045
2046 return is_store;
2047 }
2048
2049
2050 /* Vectorize the basic block. */
2051
2052 void
2053 vect_slp_transform_bb (basic_block bb)
2054 {
2055 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2056 gimple_stmt_iterator si;
2057
2058 gcc_assert (bb_vinfo);
2059
2060 if (vect_print_dump_info (REPORT_DETAILS))
2061 fprintf (vect_dump, "SLPing BB\n");
2062
2063 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2064 {
2065 gimple stmt = gsi_stmt (si);
2066 stmt_vec_info stmt_info;
2067
2068 if (vect_print_dump_info (REPORT_DETAILS))
2069 {
2070 fprintf (vect_dump, "------>SLPing statement: ");
2071 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2072 }
2073
2074 stmt_info = vinfo_for_stmt (stmt);
2075 gcc_assert (stmt_info);
2076
2077 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2078 if (STMT_SLP_TYPE (stmt_info))
2079 {
2080 vect_schedule_slp (NULL, bb_vinfo);
2081 break;
2082 }
2083 }
2084
2085 mark_sym_for_renaming (gimple_vop (cfun));
2086 /* The memory tags and pointers in vectorized statements need to
2087 have their SSA forms updated. FIXME, why can't this be delayed
2088 until all the loops have been transformed? */
2089 update_ssa (TODO_update_ssa);
2090
2091 if (vect_print_dump_info (REPORT_DETAILS))
2092 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2093
2094 destroy_bb_vec_info (bb_vinfo);
2095 }
2096