re PR preprocessor/36674 (#include location is offset by one row in errors from prepr...
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
40
41 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
42
43 static void
44 vect_free_slp_tree (slp_tree node)
45 {
46 if (!node)
47 return;
48
49 if (SLP_TREE_LEFT (node))
50 vect_free_slp_tree (SLP_TREE_LEFT (node));
51
52 if (SLP_TREE_RIGHT (node))
53 vect_free_slp_tree (SLP_TREE_RIGHT (node));
54
55 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
56
57 if (SLP_TREE_VEC_STMTS (node))
58 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
59
60 free (node);
61 }
62
63
64 /* Free the memory allocated for the SLP instance. */
65
66 void
67 vect_free_slp_instance (slp_instance instance)
68 {
69 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
70 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
71 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
72 }
73
74
75 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
76 they are of a legal type and that they match the defs of the first stmt of
77 the SLP group (stored in FIRST_STMT_...). */
78
79 static bool
80 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
81 gimple stmt, VEC (gimple, heap) **def_stmts0,
82 VEC (gimple, heap) **def_stmts1,
83 enum vect_def_type *first_stmt_dt0,
84 enum vect_def_type *first_stmt_dt1,
85 tree *first_stmt_def0_type,
86 tree *first_stmt_def1_type,
87 tree *first_stmt_const_oprnd,
88 int ncopies_for_cost,
89 bool *pattern0, bool *pattern1)
90 {
91 tree oprnd;
92 unsigned int i, number_of_oprnds;
93 tree def;
94 gimple def_stmt;
95 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
96 stmt_vec_info stmt_info =
97 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
98 enum gimple_rhs_class rhs_class;
99 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
100
101 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
102 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
103
104 for (i = 0; i < number_of_oprnds; i++)
105 {
106 oprnd = gimple_op (stmt, i + 1);
107
108 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
109 || (!def_stmt && dt[i] != vect_constant_def))
110 {
111 if (vect_print_dump_info (REPORT_SLP))
112 {
113 fprintf (vect_dump, "Build SLP failed: can't find def for ");
114 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
115 }
116
117 return false;
118 }
119
120 /* Check if DEF_STMT is a part of a pattern and get the def stmt from
121 the pattern. Check that all the stmts of the node are in the
122 pattern. */
123 if (def_stmt && gimple_bb (def_stmt)
124 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
125 && vinfo_for_stmt (def_stmt)
126 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
127 {
128 if (!*first_stmt_dt0)
129 *pattern0 = true;
130 else
131 {
132 if (i == 1 && !*first_stmt_dt1)
133 *pattern1 = true;
134 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
135 {
136 if (vect_print_dump_info (REPORT_DETAILS))
137 {
138 fprintf (vect_dump, "Build SLP failed: some of the stmts"
139 " are in a pattern, and others are not ");
140 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
141 }
142
143 return false;
144 }
145 }
146
147 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
148 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
149
150 if (*dt == vect_unknown_def_type)
151 {
152 if (vect_print_dump_info (REPORT_DETAILS))
153 fprintf (vect_dump, "Unsupported pattern.");
154 return false;
155 }
156
157 switch (gimple_code (def_stmt))
158 {
159 case GIMPLE_PHI:
160 def = gimple_phi_result (def_stmt);
161 break;
162
163 case GIMPLE_ASSIGN:
164 def = gimple_assign_lhs (def_stmt);
165 break;
166
167 default:
168 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "unsupported defining stmt: ");
170 return false;
171 }
172 }
173
174 if (!*first_stmt_dt0)
175 {
176 /* op0 of the first stmt of the group - store its info. */
177 *first_stmt_dt0 = dt[i];
178 if (def)
179 *first_stmt_def0_type = TREE_TYPE (def);
180 else
181 *first_stmt_const_oprnd = oprnd;
182
183 /* Analyze costs (for the first stmt of the group only). */
184 if (rhs_class != GIMPLE_SINGLE_RHS)
185 /* Not memory operation (we don't call this functions for loads). */
186 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
187 else
188 /* Store. */
189 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
190 }
191
192 else
193 {
194 if (!*first_stmt_dt1 && i == 1)
195 {
196 /* op1 of the first stmt of the group - store its info. */
197 *first_stmt_dt1 = dt[i];
198 if (def)
199 *first_stmt_def1_type = TREE_TYPE (def);
200 else
201 {
202 /* We assume that the stmt contains only one constant
203 operand. We fail otherwise, to be on the safe side. */
204 if (*first_stmt_const_oprnd)
205 {
206 if (vect_print_dump_info (REPORT_SLP))
207 fprintf (vect_dump, "Build SLP failed: two constant "
208 "oprnds in stmt");
209 return false;
210 }
211 *first_stmt_const_oprnd = oprnd;
212 }
213 }
214 else
215 {
216 /* Not first stmt of the group, check that the def-stmt/s match
217 the def-stmt/s of the first stmt. */
218 if ((i == 0
219 && (*first_stmt_dt0 != dt[i]
220 || (*first_stmt_def0_type && def
221 && *first_stmt_def0_type != TREE_TYPE (def))))
222 || (i == 1
223 && (*first_stmt_dt1 != dt[i]
224 || (*first_stmt_def1_type && def
225 && *first_stmt_def1_type != TREE_TYPE (def))))
226 || (!def
227 && TREE_TYPE (*first_stmt_const_oprnd)
228 != TREE_TYPE (oprnd)))
229 {
230 if (vect_print_dump_info (REPORT_SLP))
231 fprintf (vect_dump, "Build SLP failed: different types ");
232
233 return false;
234 }
235 }
236 }
237
238 /* Check the types of the definitions. */
239 switch (dt[i])
240 {
241 case vect_constant_def:
242 case vect_external_def:
243 break;
244
245 case vect_internal_def:
246 if (i == 0)
247 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
248 else
249 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
250 break;
251
252 default:
253 /* FORNOW: Not supported. */
254 if (vect_print_dump_info (REPORT_SLP))
255 {
256 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
257 print_generic_expr (vect_dump, def, TDF_SLIM);
258 }
259
260 return false;
261 }
262 }
263
264 return true;
265 }
266
267
268 /* Recursively build an SLP tree starting from NODE.
269 Fail (and return FALSE) if def-stmts are not isomorphic, require data
270 permutation or are of unsupported types of operation. Otherwise, return
271 TRUE. */
272
273 static bool
274 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
275 unsigned int group_size,
276 int *inside_cost, int *outside_cost,
277 int ncopies_for_cost, unsigned int *max_nunits,
278 VEC (int, heap) **load_permutation,
279 VEC (slp_tree, heap) **loads)
280 {
281 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
282 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
283 unsigned int i;
284 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
285 gimple stmt = VEC_index (gimple, stmts, 0);
286 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
287 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
288 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
289 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
290 tree lhs;
291 bool stop_recursion = false, need_same_oprnds = false;
292 tree vectype, scalar_type, first_op1 = NULL_TREE;
293 unsigned int vectorization_factor = 0, ncopies;
294 optab optab;
295 int icode;
296 enum machine_mode optab_op2_mode;
297 enum machine_mode vec_mode;
298 tree first_stmt_const_oprnd = NULL_TREE;
299 struct data_reference *first_dr;
300 bool pattern0 = false, pattern1 = false;
301 HOST_WIDE_INT dummy;
302 bool permutation = false;
303 unsigned int load_place;
304 gimple first_load;
305
306 /* For every stmt in NODE find its def stmt/s. */
307 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
308 {
309 if (vect_print_dump_info (REPORT_SLP))
310 {
311 fprintf (vect_dump, "Build SLP for ");
312 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
313 }
314
315 lhs = gimple_get_lhs (stmt);
316 if (lhs == NULL_TREE)
317 {
318 if (vect_print_dump_info (REPORT_SLP))
319 {
320 fprintf (vect_dump,
321 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
322 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
323 }
324
325 return false;
326 }
327
328 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
329 vectype = get_vectype_for_scalar_type (scalar_type);
330 if (!vectype)
331 {
332 if (vect_print_dump_info (REPORT_SLP))
333 {
334 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
335 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
336 }
337 return false;
338 }
339
340 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
341 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
342 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
343 if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
344 fprintf (vect_dump, "SLP with multiple types ");
345
346 /* In case of multiple types we need to detect the smallest type. */
347 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
348 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
349
350 if (is_gimple_call (stmt))
351 rhs_code = CALL_EXPR;
352 else
353 rhs_code = gimple_assign_rhs_code (stmt);
354
355 /* Check the operation. */
356 if (i == 0)
357 {
358 first_stmt_code = rhs_code;
359
360 /* Shift arguments should be equal in all the packed stmts for a
361 vector shift with scalar shift operand. */
362 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
363 || rhs_code == LROTATE_EXPR
364 || rhs_code == RROTATE_EXPR)
365 {
366 vec_mode = TYPE_MODE (vectype);
367
368 /* First see if we have a vector/vector shift. */
369 optab = optab_for_tree_code (rhs_code, vectype,
370 optab_vector);
371
372 if (!optab
373 || (optab->handlers[(int) vec_mode].insn_code
374 == CODE_FOR_nothing))
375 {
376 /* No vector/vector shift, try for a vector/scalar shift. */
377 optab = optab_for_tree_code (rhs_code, vectype,
378 optab_scalar);
379
380 if (!optab)
381 {
382 if (vect_print_dump_info (REPORT_SLP))
383 fprintf (vect_dump, "Build SLP failed: no optab.");
384 return false;
385 }
386 icode = (int) optab->handlers[(int) vec_mode].insn_code;
387 if (icode == CODE_FOR_nothing)
388 {
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "Build SLP failed: "
391 "op not supported by target.");
392 return false;
393 }
394 optab_op2_mode = insn_data[icode].operand[2].mode;
395 if (!VECTOR_MODE_P (optab_op2_mode))
396 {
397 need_same_oprnds = true;
398 first_op1 = gimple_assign_rhs2 (stmt);
399 }
400 }
401 }
402 }
403 else
404 {
405 if (first_stmt_code != rhs_code
406 && (first_stmt_code != IMAGPART_EXPR
407 || rhs_code != REALPART_EXPR)
408 && (first_stmt_code != REALPART_EXPR
409 || rhs_code != IMAGPART_EXPR))
410 {
411 if (vect_print_dump_info (REPORT_SLP))
412 {
413 fprintf (vect_dump,
414 "Build SLP failed: different operation in stmt ");
415 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
416 }
417
418 return false;
419 }
420
421 if (need_same_oprnds
422 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
423 {
424 if (vect_print_dump_info (REPORT_SLP))
425 {
426 fprintf (vect_dump,
427 "Build SLP failed: different shift arguments in ");
428 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
429 }
430
431 return false;
432 }
433 }
434
435 /* Strided store or load. */
436 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
437 {
438 if (REFERENCE_CLASS_P (lhs))
439 {
440 /* Store. */
441 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
442 &def_stmts0, &def_stmts1,
443 &first_stmt_dt0,
444 &first_stmt_dt1,
445 &first_stmt_def0_type,
446 &first_stmt_def1_type,
447 &first_stmt_const_oprnd,
448 ncopies_for_cost,
449 &pattern0, &pattern1))
450 return false;
451 }
452 else
453 {
454 /* Load. */
455 /* FORNOW: Check that there is no gap between the loads. */
456 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
457 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
458 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
459 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
460 {
461 if (vect_print_dump_info (REPORT_SLP))
462 {
463 fprintf (vect_dump, "Build SLP failed: strided "
464 "loads have gaps ");
465 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
466 }
467
468 return false;
469 }
470
471 /* Check that the size of interleaved loads group is not
472 greater than the SLP group size. */
473 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
474 > ncopies * group_size)
475 {
476 if (vect_print_dump_info (REPORT_SLP))
477 {
478 fprintf (vect_dump, "Build SLP failed: the number of "
479 "interleaved loads is greater than"
480 " the SLP group size ");
481 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
482 }
483
484 return false;
485 }
486
487 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
488
489 if (first_load == stmt)
490 {
491 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
492 if (vect_supportable_dr_alignment (first_dr)
493 == dr_unaligned_unsupported)
494 {
495 if (vect_print_dump_info (REPORT_SLP))
496 {
497 fprintf (vect_dump, "Build SLP failed: unsupported "
498 "unaligned load ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
500 }
501
502 return false;
503 }
504
505 /* Analyze costs (for the first stmt in the group). */
506 vect_model_load_cost (vinfo_for_stmt (stmt),
507 ncopies_for_cost, *node);
508 }
509
510 /* Store the place of this load in the interleaving chain. In
511 case that permutation is needed we later decide if a specific
512 permutation is supported. */
513 load_place = vect_get_place_in_interleaving_chain (stmt,
514 first_load);
515 if (load_place != i)
516 permutation = true;
517
518 VEC_safe_push (int, heap, *load_permutation, load_place);
519
520 /* We stop the tree when we reach a group of loads. */
521 stop_recursion = true;
522 continue;
523 }
524 } /* Strided access. */
525 else
526 {
527 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
528 {
529 /* Not strided load. */
530 if (vect_print_dump_info (REPORT_SLP))
531 {
532 fprintf (vect_dump, "Build SLP failed: not strided load ");
533 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
534 }
535
536 /* FORNOW: Not strided loads are not supported. */
537 return false;
538 }
539
540 /* Not memory operation. */
541 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
542 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
543 {
544 if (vect_print_dump_info (REPORT_SLP))
545 {
546 fprintf (vect_dump, "Build SLP failed: operation");
547 fprintf (vect_dump, " unsupported ");
548 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 }
550
551 return false;
552 }
553
554 /* Find the def-stmts. */
555 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
556 &def_stmts0, &def_stmts1,
557 &first_stmt_dt0, &first_stmt_dt1,
558 &first_stmt_def0_type,
559 &first_stmt_def1_type,
560 &first_stmt_const_oprnd,
561 ncopies_for_cost,
562 &pattern0, &pattern1))
563 return false;
564 }
565 }
566
567 /* Add the costs of the node to the overall instance costs. */
568 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
569 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
570
571 /* Strided loads were reached - stop the recursion. */
572 if (stop_recursion)
573 {
574 if (permutation)
575 {
576 VEC_safe_push (slp_tree, heap, *loads, *node);
577 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
578 }
579
580 return true;
581 }
582
583 /* Create SLP_TREE nodes for the definition node/s. */
584 if (first_stmt_dt0 == vect_internal_def)
585 {
586 slp_tree left_node = XNEW (struct _slp_tree);
587 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
588 SLP_TREE_VEC_STMTS (left_node) = NULL;
589 SLP_TREE_LEFT (left_node) = NULL;
590 SLP_TREE_RIGHT (left_node) = NULL;
591 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
592 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
593 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
594 inside_cost, outside_cost, ncopies_for_cost,
595 max_nunits, load_permutation, loads))
596 return false;
597
598 SLP_TREE_LEFT (*node) = left_node;
599 }
600
601 if (first_stmt_dt1 == vect_internal_def)
602 {
603 slp_tree right_node = XNEW (struct _slp_tree);
604 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
605 SLP_TREE_VEC_STMTS (right_node) = NULL;
606 SLP_TREE_LEFT (right_node) = NULL;
607 SLP_TREE_RIGHT (right_node) = NULL;
608 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
609 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
610 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
611 inside_cost, outside_cost, ncopies_for_cost,
612 max_nunits, load_permutation, loads))
613 return false;
614
615 SLP_TREE_RIGHT (*node) = right_node;
616 }
617
618 return true;
619 }
620
621
622 static void
623 vect_print_slp_tree (slp_tree node)
624 {
625 int i;
626 gimple stmt;
627
628 if (!node)
629 return;
630
631 fprintf (vect_dump, "node ");
632 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
633 {
634 fprintf (vect_dump, "\n\tstmt %d ", i);
635 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
636 }
637 fprintf (vect_dump, "\n");
638
639 vect_print_slp_tree (SLP_TREE_LEFT (node));
640 vect_print_slp_tree (SLP_TREE_RIGHT (node));
641 }
642
643
644 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
645 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
646 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
647 stmts in NODE are to be marked. */
648
649 static void
650 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
651 {
652 int i;
653 gimple stmt;
654
655 if (!node)
656 return;
657
658 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
659 if (j < 0 || i == j)
660 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
661
662 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
663 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
664 }
665
666
667 /* Check if the permutation required by the SLP INSTANCE is supported.
668 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
669
670 static bool
671 vect_supported_slp_permutation_p (slp_instance instance)
672 {
673 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
674 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
675 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
676 VEC (slp_tree, heap) *sorted_loads = NULL;
677 int index;
678 slp_tree *tmp_loads = NULL;
679 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
680 slp_tree load;
681
682 /* FORNOW: The only supported loads permutation is loads from the same
683 location in all the loads in the node, when the data-refs in
684 nodes of LOADS constitute an interleaving chain.
685 Sort the nodes according to the order of accesses in the chain. */
686 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
687 for (i = 0, j = 0;
688 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
689 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
690 i += group_size, j++)
691 {
692 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
693 /* Check that the loads are all in the same interleaving chain. */
694 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
695 {
696 if (vect_print_dump_info (REPORT_DETAILS))
697 {
698 fprintf (vect_dump, "Build SLP failed: unsupported data "
699 "permutation ");
700 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
701 }
702
703 free (tmp_loads);
704 return false;
705 }
706
707 tmp_loads[index] = load;
708 }
709
710 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
711 for (i = 0; i < group_size; i++)
712 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
713
714 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
715 SLP_INSTANCE_LOADS (instance) = sorted_loads;
716 free (tmp_loads);
717
718 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
719 SLP_INSTANCE_UNROLLING_FACTOR (instance),
720 instance, true))
721 return false;
722
723 return true;
724 }
725
726
727 /* Check if the required load permutation is supported.
728 LOAD_PERMUTATION contains a list of indices of the loads.
729 In SLP this permutation is relative to the order of strided stores that are
730 the base of the SLP instance. */
731
732 static bool
733 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
734 VEC (int, heap) *load_permutation)
735 {
736 int i = 0, j, prev = -1, next, k;
737 bool supported;
738
739 /* FORNOW: permutations are only supported for loop-aware SLP. */
740 if (!slp_instn)
741 return false;
742
743 if (vect_print_dump_info (REPORT_SLP))
744 {
745 fprintf (vect_dump, "Load permutation ");
746 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
747 fprintf (vect_dump, "%d ", next);
748 }
749
750 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
751 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
752 well. */
753 if (VEC_length (int, load_permutation)
754 != (unsigned int) (group_size * group_size))
755 return false;
756
757 supported = true;
758 for (j = 0; j < group_size; j++)
759 {
760 for (i = j * group_size, k = 0;
761 VEC_iterate (int, load_permutation, i, next) && k < group_size;
762 i++, k++)
763 {
764 if (i != j * group_size && next != prev)
765 {
766 supported = false;
767 break;
768 }
769
770 prev = next;
771 }
772 }
773
774 if (supported && i == group_size * group_size
775 && vect_supported_slp_permutation_p (slp_instn))
776 return true;
777
778 return false;
779 }
780
781
782 /* Find the first load in the loop that belongs to INSTANCE.
783 When loads are in several SLP nodes, there can be a case in which the first
784 load does not appear in the first SLP node to be transformed, causing
785 incorrect order of statements. Since we generate all the loads together,
786 they must be inserted before the first load of the SLP instance and not
787 before the first load of the first node of the instance. */
788 static gimple
789 vect_find_first_load_in_slp_instance (slp_instance instance)
790 {
791 int i, j;
792 slp_tree load_node;
793 gimple first_load = NULL, load;
794
795 for (i = 0;
796 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
797 i++)
798 for (j = 0;
799 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
800 j++)
801 first_load = get_earlier_stmt (load, first_load);
802
803 return first_load;
804 }
805
806
807 /* Analyze an SLP instance starting from a group of strided stores. Call
808 vect_build_slp_tree to build a tree of packed stmts if possible.
809 Return FALSE if it's impossible to SLP any stmt in the loop. */
810
811 static bool
812 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
813 {
814 slp_instance new_instance;
815 slp_tree node = XNEW (struct _slp_tree);
816 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
817 unsigned int unrolling_factor = 1, nunits;
818 tree vectype, scalar_type;
819 gimple next;
820 unsigned int vectorization_factor = 0, ncopies;
821 bool slp_impossible = false;
822 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
823 unsigned int max_nunits = 0;
824 VEC (int, heap) *load_permutation;
825 VEC (slp_tree, heap) *loads;
826
827 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
828 vinfo_for_stmt (stmt))));
829 vectype = get_vectype_for_scalar_type (scalar_type);
830 if (!vectype)
831 {
832 if (vect_print_dump_info (REPORT_SLP))
833 {
834 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
835 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
836 }
837 return false;
838 }
839
840 nunits = TYPE_VECTOR_SUBPARTS (vectype);
841 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
842 ncopies = vectorization_factor / nunits;
843
844 /* Create a node (a root of the SLP tree) for the packed strided stores. */
845 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
846 next = stmt;
847 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
848 while (next)
849 {
850 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
851 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
852 }
853
854 SLP_TREE_VEC_STMTS (node) = NULL;
855 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
856 SLP_TREE_LEFT (node) = NULL;
857 SLP_TREE_RIGHT (node) = NULL;
858 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
859 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
860
861 /* Calculate the unrolling factor. */
862 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
863
864 /* Calculate the number of vector stmts to create based on the unrolling
865 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
866 GROUP_SIZE / NUNITS otherwise. */
867 ncopies_for_cost = unrolling_factor * group_size / nunits;
868
869 load_permutation = VEC_alloc (int, heap, group_size * group_size);
870 loads = VEC_alloc (slp_tree, heap, group_size);
871
872 /* Build the tree for the SLP instance. */
873 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
874 &outside_cost, ncopies_for_cost, &max_nunits,
875 &load_permutation, &loads))
876 {
877 /* Create a new SLP instance. */
878 new_instance = XNEW (struct _slp_instance);
879 SLP_INSTANCE_TREE (new_instance) = node;
880 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
881 /* Calculate the unrolling factor based on the smallest type in the
882 loop. */
883 if (max_nunits > nunits)
884 unrolling_factor = least_common_multiple (max_nunits, group_size)
885 / group_size;
886
887 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
888 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
889 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
890 SLP_INSTANCE_LOADS (new_instance) = loads;
891 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
892 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
893 if (VEC_length (slp_tree, loads))
894 {
895 if (!vect_supported_load_permutation_p (new_instance, group_size,
896 load_permutation))
897 {
898 if (vect_print_dump_info (REPORT_SLP))
899 {
900 fprintf (vect_dump, "Build SLP failed: unsupported load "
901 "permutation ");
902 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
903 }
904
905 vect_free_slp_instance (new_instance);
906 return false;
907 }
908
909 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
910 = vect_find_first_load_in_slp_instance (new_instance);
911 }
912 else
913 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
914
915 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
916 new_instance);
917 if (vect_print_dump_info (REPORT_SLP))
918 vect_print_slp_tree (node);
919
920 return true;
921 }
922
923 /* Failed to SLP. */
924 /* Free the allocated memory. */
925 vect_free_slp_tree (node);
926 VEC_free (int, heap, load_permutation);
927 VEC_free (slp_tree, heap, loads);
928
929 if (slp_impossible)
930 return false;
931
932 /* SLP failed for this instance, but it is still possible to SLP other stmts
933 in the loop. */
934 return true;
935 }
936
937
938 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
939 trees of packed scalar stmts if SLP is possible. */
940
941 bool
942 vect_analyze_slp (loop_vec_info loop_vinfo)
943 {
944 unsigned int i;
945 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
946 gimple store;
947
948 if (vect_print_dump_info (REPORT_SLP))
949 fprintf (vect_dump, "=== vect_analyze_slp ===");
950
951 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
952 if (!vect_analyze_slp_instance (loop_vinfo, store))
953 {
954 /* SLP failed. No instance can be SLPed in the loop. */
955 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
956 fprintf (vect_dump, "SLP failed.");
957
958 return false;
959 }
960
961 return true;
962 }
963
964
965 /* For each possible SLP instance decide whether to SLP it and calculate overall
966 unrolling factor needed to SLP the loop. */
967
968 void
969 vect_make_slp_decision (loop_vec_info loop_vinfo)
970 {
971 unsigned int i, unrolling_factor = 1;
972 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
973 slp_instance instance;
974 int decided_to_slp = 0;
975
976 if (vect_print_dump_info (REPORT_SLP))
977 fprintf (vect_dump, "=== vect_make_slp_decision ===");
978
979 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
980 {
981 /* FORNOW: SLP if you can. */
982 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
983 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
984
985 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
986 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
987 loop-based vectorization. Such stmts will be marked as HYBRID. */
988 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
989 decided_to_slp++;
990 }
991
992 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
993
994 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
995 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
996 decided_to_slp, unrolling_factor);
997 }
998
999
1000 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1001 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1002
1003 static void
1004 vect_detect_hybrid_slp_stmts (slp_tree node)
1005 {
1006 int i;
1007 gimple stmt;
1008 imm_use_iterator imm_iter;
1009 gimple use_stmt;
1010
1011 if (!node)
1012 return;
1013
1014 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1015 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1016 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1017 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1018 if (vinfo_for_stmt (use_stmt)
1019 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
1020 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
1021 vect_mark_slp_stmts (node, hybrid, i);
1022
1023 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1024 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1025 }
1026
1027
1028 /* Find stmts that must be both vectorized and SLPed. */
1029
1030 void
1031 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1032 {
1033 unsigned int i;
1034 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1035 slp_instance instance;
1036
1037 if (vect_print_dump_info (REPORT_SLP))
1038 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1039
1040 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1041 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1042 }
1043
1044 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1045 the number of created vector stmts depends on the unrolling factor). However,
1046 the actual number of vector stmts for every SLP node depends on VF which is
1047 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1048 In this function we assume that the inside costs calculated in
1049 vect_model_xxx_cost are linear in ncopies. */
1050
1051 void
1052 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1053 {
1054 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1055 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1056 slp_instance instance;
1057
1058 if (vect_print_dump_info (REPORT_SLP))
1059 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1060
1061 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1062 /* We assume that costs are linear in ncopies. */
1063 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1064 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1065 }
1066
1067 /* For constant and loop invariant defs of SLP_NODE this function returns
1068 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1069 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1070 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1071
1072 static void
1073 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1074 unsigned int op_num, unsigned int number_of_vectors)
1075 {
1076 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1077 gimple stmt = VEC_index (gimple, stmts, 0);
1078 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1079 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1080 int nunits;
1081 tree vec_cst;
1082 tree t = NULL_TREE;
1083 int j, number_of_places_left_in_vector;
1084 tree vector_type;
1085 tree op, vop;
1086 int group_size = VEC_length (gimple, stmts);
1087 unsigned int vec_num, i;
1088 int number_of_copies = 1;
1089 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1090 bool constant_p, is_store;
1091
1092 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1093 {
1094 is_store = true;
1095 op = gimple_assign_rhs1 (stmt);
1096 }
1097 else
1098 {
1099 is_store = false;
1100 op = gimple_op (stmt, op_num + 1);
1101 }
1102
1103 if (CONSTANT_CLASS_P (op))
1104 {
1105 vector_type = vectype;
1106 constant_p = true;
1107 }
1108 else
1109 {
1110 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1111 gcc_assert (vector_type);
1112 constant_p = false;
1113 }
1114
1115 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1116
1117 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1118 created vectors. It is greater than 1 if unrolling is performed.
1119
1120 For example, we have two scalar operands, s1 and s2 (e.g., group of
1121 strided accesses of size two), while NUNITS is four (i.e., four scalars
1122 of this type can be packed in a vector). The output vector will contain
1123 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1124 will be 2).
1125
1126 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1127 containing the operands.
1128
1129 For example, NUNITS is four as before, and the group size is 8
1130 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1131 {s5, s6, s7, s8}. */
1132
1133 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1134
1135 number_of_places_left_in_vector = nunits;
1136 for (j = 0; j < number_of_copies; j++)
1137 {
1138 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1139 {
1140 if (is_store)
1141 op = gimple_assign_rhs1 (stmt);
1142 else
1143 op = gimple_op (stmt, op_num + 1);
1144
1145 /* Create 'vect_ = {op0,op1,...,opn}'. */
1146 t = tree_cons (NULL_TREE, op, t);
1147
1148 number_of_places_left_in_vector--;
1149
1150 if (number_of_places_left_in_vector == 0)
1151 {
1152 number_of_places_left_in_vector = nunits;
1153
1154 if (constant_p)
1155 vec_cst = build_vector (vector_type, t);
1156 else
1157 vec_cst = build_constructor_from_list (vector_type, t);
1158 VEC_quick_push (tree, voprnds,
1159 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1160 t = NULL_TREE;
1161 }
1162 }
1163 }
1164
1165 /* Since the vectors are created in the reverse order, we should invert
1166 them. */
1167 vec_num = VEC_length (tree, voprnds);
1168 for (j = vec_num - 1; j >= 0; j--)
1169 {
1170 vop = VEC_index (tree, voprnds, j);
1171 VEC_quick_push (tree, *vec_oprnds, vop);
1172 }
1173
1174 VEC_free (tree, heap, voprnds);
1175
1176 /* In case that VF is greater than the unrolling factor needed for the SLP
1177 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1178 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1179 to replicate the vectors. */
1180 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1181 {
1182 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1183 VEC_quick_push (tree, *vec_oprnds, vop);
1184 }
1185 }
1186
1187
1188 /* Get vectorized definitions from SLP_NODE that contains corresponding
1189 vectorized def-stmts. */
1190
1191 static void
1192 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1193 {
1194 tree vec_oprnd;
1195 gimple vec_def_stmt;
1196 unsigned int i;
1197
1198 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1199
1200 for (i = 0;
1201 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1202 i++)
1203 {
1204 gcc_assert (vec_def_stmt);
1205 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1206 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1207 }
1208 }
1209
1210
1211 /* Get vectorized definitions for SLP_NODE.
1212 If the scalar definitions are loop invariants or constants, collect them and
1213 call vect_get_constant_vectors() to create vector stmts.
1214 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1215 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1216 vect_get_slp_vect_defs() to retrieve them.
1217 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1218 the right node. This is used when the second operand must remain scalar. */
1219
1220 void
1221 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1222 VEC (tree,heap) **vec_oprnds1)
1223 {
1224 gimple first_stmt;
1225 enum tree_code code;
1226 int number_of_vects;
1227 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1228
1229 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1230 /* The number of vector defs is determined by the number of vector statements
1231 in the node from which we get those statements. */
1232 if (SLP_TREE_LEFT (slp_node))
1233 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1234 else
1235 {
1236 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1237 /* Number of vector stmts was calculated according to LHS in
1238 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1239 necessary. See vect_get_smallest_scalar_type() for details. */
1240 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1241 &rhs_size_unit);
1242 if (rhs_size_unit != lhs_size_unit)
1243 {
1244 number_of_vects *= rhs_size_unit;
1245 number_of_vects /= lhs_size_unit;
1246 }
1247 }
1248
1249 /* Allocate memory for vectorized defs. */
1250 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1251
1252 /* SLP_NODE corresponds either to a group of stores or to a group of
1253 unary/binary operations. We don't call this function for loads. */
1254 if (SLP_TREE_LEFT (slp_node))
1255 /* The defs are already vectorized. */
1256 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1257 else
1258 /* Build vectors from scalar defs. */
1259 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1260
1261 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1262 /* Since we don't call this function with loads, this is a group of
1263 stores. */
1264 return;
1265
1266 code = gimple_assign_rhs_code (first_stmt);
1267 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1268 return;
1269
1270 /* The number of vector defs is determined by the number of vector statements
1271 in the node from which we get those statements. */
1272 if (SLP_TREE_RIGHT (slp_node))
1273 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1274 else
1275 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1276
1277 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1278
1279 if (SLP_TREE_RIGHT (slp_node))
1280 /* The defs are already vectorized. */
1281 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1282 else
1283 /* Build vectors from scalar defs. */
1284 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1285 }
1286
1287 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1288 building a vector of type MASK_TYPE from it) and two input vectors placed in
1289 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1290 shifting by STRIDE elements of DR_CHAIN for every copy.
1291 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1292 copies).
1293 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1294 the created stmts must be inserted. */
1295
1296 static inline void
1297 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1298 int *mask_array, int mask_nunits,
1299 tree mask_element_type, tree mask_type,
1300 int first_vec_indx, int second_vec_indx,
1301 gimple_stmt_iterator *gsi, slp_tree node,
1302 tree builtin_decl, tree vectype,
1303 VEC(tree,heap) *dr_chain,
1304 int ncopies, int vect_stmts_counter)
1305 {
1306 tree t = NULL_TREE, mask_vec, mask, perm_dest;
1307 gimple perm_stmt = NULL;
1308 stmt_vec_info next_stmt_info;
1309 int i, group_size, stride, dr_chain_size;
1310 tree first_vec, second_vec, data_ref;
1311 VEC (tree, heap) *params = NULL;
1312
1313 /* Create a vector mask. */
1314 for (i = mask_nunits - 1; i >= 0; --i)
1315 t = tree_cons (NULL_TREE, build_int_cst (mask_element_type, mask_array[i]),
1316 t);
1317 mask_vec = build_vector (mask_type, t);
1318 mask = vect_init_vector (stmt, mask_vec, mask_type, NULL);
1319
1320 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node));
1321 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1322 dr_chain_size = VEC_length (tree, dr_chain);
1323
1324 /* Initialize the vect stmts of NODE to properly insert the generated
1325 stmts later. */
1326 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1327 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1328 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1329
1330 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1331 for (i = 0; i < ncopies; i++)
1332 {
1333 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1334 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1335
1336 /* Build argument list for the vectorized call. */
1337 VEC_free (tree, heap, params);
1338 params = VEC_alloc (tree, heap, 3);
1339 VEC_quick_push (tree, params, first_vec);
1340 VEC_quick_push (tree, params, second_vec);
1341 VEC_quick_push (tree, params, mask);
1342
1343 /* Generate the permute statement. */
1344 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1345 data_ref = make_ssa_name (perm_dest, perm_stmt);
1346 gimple_call_set_lhs (perm_stmt, data_ref);
1347 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1348
1349 /* Store the vector statement in NODE. */
1350 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1351 stride * i + vect_stmts_counter, perm_stmt);
1352
1353 first_vec_indx += stride;
1354 second_vec_indx += stride;
1355 }
1356
1357 /* Mark the scalar stmt as vectorized. */
1358 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1359 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1360 }
1361
1362
1363 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1364 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1365 representation. Check that the mask is valid and return FALSE if not.
1366 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1367 the next vector, i.e., the current first vector is not needed. */
1368
1369 static bool
1370 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1371 int mask_nunits, bool only_one_vec, int index,
1372 int *mask, int *current_mask_element,
1373 bool *need_next_vector)
1374 {
1375 int i;
1376 static int number_of_mask_fixes = 1;
1377 static bool mask_fixed = false;
1378 static bool needs_first_vector = false;
1379
1380 /* Convert to target specific representation. */
1381 *current_mask_element = first_mask_element + m;
1382 /* Adjust the value in case it's a mask for second and third vectors. */
1383 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1384
1385 if (*current_mask_element < mask_nunits)
1386 needs_first_vector = true;
1387
1388 /* We have only one input vector to permute but the mask accesses values in
1389 the next vector as well. */
1390 if (only_one_vec && *current_mask_element >= mask_nunits)
1391 {
1392 if (vect_print_dump_info (REPORT_DETAILS))
1393 {
1394 fprintf (vect_dump, "permutation requires at least two vectors ");
1395 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1396 }
1397
1398 return false;
1399 }
1400
1401 /* The mask requires the next vector. */
1402 if (*current_mask_element >= mask_nunits * 2)
1403 {
1404 if (needs_first_vector || mask_fixed)
1405 {
1406 /* We either need the first vector too or have already moved to the
1407 next vector. In both cases, this permutation needs three
1408 vectors. */
1409 if (vect_print_dump_info (REPORT_DETAILS))
1410 {
1411 fprintf (vect_dump, "permutation requires at "
1412 "least three vectors ");
1413 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1414 }
1415
1416 return false;
1417 }
1418
1419 /* We move to the next vector, dropping the first one and working with
1420 the second and the third - we need to adjust the values of the mask
1421 accordingly. */
1422 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1423
1424 for (i = 0; i < index; i++)
1425 mask[i] -= mask_nunits * number_of_mask_fixes;
1426
1427 (number_of_mask_fixes)++;
1428 mask_fixed = true;
1429 }
1430
1431 *need_next_vector = mask_fixed;
1432
1433 /* This was the last element of this mask. Start a new one. */
1434 if (index == mask_nunits - 1)
1435 {
1436 number_of_mask_fixes = 1;
1437 mask_fixed = false;
1438 needs_first_vector = false;
1439 }
1440
1441 return true;
1442 }
1443
1444
1445 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1446 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1447 permute statements for SLP_NODE_INSTANCE. */
1448 bool
1449 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1450 gimple_stmt_iterator *gsi, int vf,
1451 slp_instance slp_node_instance, bool analyze_only)
1452 {
1453 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1454 tree mask_element_type = NULL_TREE, mask_type;
1455 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1456 slp_tree node;
1457 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1458 gimple next_scalar_stmt;
1459 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1460 int first_mask_element;
1461 int index, unroll_factor, *mask, current_mask_element, ncopies;
1462 bool only_one_vec = false, need_next_vector = false;
1463 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1464
1465 if (!targetm.vectorize.builtin_vec_perm)
1466 {
1467 if (vect_print_dump_info (REPORT_DETAILS))
1468 {
1469 fprintf (vect_dump, "no builtin for vect permute for ");
1470 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1471 }
1472
1473 return false;
1474 }
1475
1476 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1477 &mask_element_type);
1478 if (!builtin_decl || !mask_element_type)
1479 {
1480 if (vect_print_dump_info (REPORT_DETAILS))
1481 {
1482 fprintf (vect_dump, "no builtin for vect permute for ");
1483 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1484 }
1485
1486 return false;
1487 }
1488
1489 mask_type = get_vectype_for_scalar_type (mask_element_type);
1490 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1491 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1492 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1493 scale = mask_nunits / nunits;
1494 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1495
1496 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1497 unrolling factor. */
1498 orig_vec_stmts_num = group_size *
1499 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1500 if (orig_vec_stmts_num == 1)
1501 only_one_vec = true;
1502
1503 /* Number of copies is determined by the final vectorization factor
1504 relatively to SLP_NODE_INSTANCE unrolling factor. */
1505 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1506
1507 /* Generate permutation masks for every NODE. Number of masks for each NODE
1508 is equal to GROUP_SIZE.
1509 E.g., we have a group of three nodes with three loads from the same
1510 location in each node, and the vector size is 4. I.e., we have a
1511 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1512 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1513 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1514 ...
1515
1516 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1517 scpecific type, e.g., in bytes for Altivec.
1518 The last mask is illegal since we assume two operands for permute
1519 operation, and the mask element values can't be outside that range. Hence,
1520 the last mask must be converted into {2,5,5,5}.
1521 For the first two permutations we need the first and the second input
1522 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1523 we need the second and the third vectors: {b1,c1,a2,b2} and
1524 {c2,a3,b3,c3}. */
1525
1526 for (i = 0;
1527 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1528 i, node);
1529 i++)
1530 {
1531 scalar_index = 0;
1532 index = 0;
1533 vect_stmts_counter = 0;
1534 vec_index = 0;
1535 first_vec_index = vec_index++;
1536 if (only_one_vec)
1537 second_vec_index = first_vec_index;
1538 else
1539 second_vec_index = vec_index++;
1540
1541 for (j = 0; j < unroll_factor; j++)
1542 {
1543 for (k = 0; k < group_size; k++)
1544 {
1545 first_mask_element = (i + j * group_size) * scale;
1546 for (m = 0; m < scale; m++)
1547 {
1548 if (!vect_get_mask_element (stmt, first_mask_element, m,
1549 mask_nunits, only_one_vec, index, mask,
1550 &current_mask_element, &need_next_vector))
1551 return false;
1552
1553 mask[index++] = current_mask_element;
1554 }
1555
1556 if (index == mask_nunits)
1557 {
1558 index = 0;
1559 if (!analyze_only)
1560 {
1561 if (need_next_vector)
1562 {
1563 first_vec_index = second_vec_index;
1564 second_vec_index = vec_index;
1565 }
1566
1567 next_scalar_stmt = VEC_index (gimple,
1568 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1569
1570 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1571 mask, mask_nunits, mask_element_type, mask_type,
1572 first_vec_index, second_vec_index, gsi, node,
1573 builtin_decl, vectype, dr_chain, ncopies,
1574 vect_stmts_counter++);
1575 }
1576 }
1577 }
1578 }
1579 }
1580
1581 free (mask);
1582 return true;
1583 }
1584
1585
1586
1587 /* Vectorize SLP instance tree in postorder. */
1588
1589 static bool
1590 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1591 unsigned int vectorization_factor)
1592 {
1593 gimple stmt;
1594 bool strided_store, is_store;
1595 gimple_stmt_iterator si;
1596 stmt_vec_info stmt_info;
1597 unsigned int vec_stmts_size, nunits, group_size;
1598 tree vectype;
1599 int i;
1600 slp_tree loads_node;
1601
1602 if (!node)
1603 return false;
1604
1605 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1606 vectorization_factor);
1607 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1608 vectorization_factor);
1609
1610 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1611 stmt_info = vinfo_for_stmt (stmt);
1612
1613 /* VECTYPE is the type of the destination. */
1614 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1615 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1616 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1617
1618 /* For each SLP instance calculate number of vector stmts to be created
1619 for the scalar stmts in each node of the SLP tree. Number of vector
1620 elements in one vector iteration is the number of scalar elements in
1621 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1622 size. */
1623 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1624
1625 /* In case of load permutation we have to allocate vectorized statements for
1626 all the nodes that participate in that permutation. */
1627 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1628 {
1629 for (i = 0;
1630 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1631 i++)
1632 {
1633 if (!SLP_TREE_VEC_STMTS (loads_node))
1634 {
1635 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
1636 vec_stmts_size);
1637 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
1638 }
1639 }
1640 }
1641
1642 if (!SLP_TREE_VEC_STMTS (node))
1643 {
1644 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
1645 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
1646 }
1647
1648 if (vect_print_dump_info (REPORT_DETAILS))
1649 {
1650 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
1651 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1652 }
1653
1654 /* Loads should be inserted before the first load. */
1655 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
1656 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
1657 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
1658 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
1659 else
1660 si = gsi_for_stmt (stmt);
1661
1662 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
1663 if (is_store)
1664 {
1665 if (DR_GROUP_FIRST_DR (stmt_info))
1666 /* If IS_STORE is TRUE, the vectorization of the
1667 interleaving chain was completed - free all the stores in
1668 the chain. */
1669 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
1670 else
1671 /* FORNOW: SLP originates only from strided stores. */
1672 gcc_unreachable ();
1673
1674 return true;
1675 }
1676
1677 /* FORNOW: SLP originates only from strided stores. */
1678 return false;
1679 }
1680
1681
1682 bool
1683 vect_schedule_slp (loop_vec_info loop_vinfo)
1684 {
1685 VEC (slp_instance, heap) *slp_instances =
1686 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1687 slp_instance instance;
1688 unsigned int i;
1689 bool is_store = false;
1690
1691 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1692 {
1693 /* Schedule the tree of INSTANCE. */
1694 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
1695 instance, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1696
1697 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
1698 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1699 fprintf (vect_dump, "vectorizing stmts using SLP.");
1700 }
1701
1702 return is_store;
1703 }