Add -static-libasan option to the GCC driver
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "dumpfile.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-pass.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
44
45 LOC
46 find_bb_location (basic_block bb)
47 {
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
50
51 if (!bb)
52 return UNKNOWN_LOC;
53
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 {
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
59 }
60
61 return UNKNOWN_LOC;
62 }
63
64
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66
67 static void
68 vect_free_slp_tree (slp_tree node)
69 {
70 int i;
71 slp_void_p child;
72
73 if (!node)
74 return;
75
76 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
77 vect_free_slp_tree ((slp_tree) child);
78
79 VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84
85 free (node);
86 }
87
88
89 /* Free the memory allocated for the SLP instance. */
90
91 void
92 vect_free_slp_instance (slp_instance instance)
93 {
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
97 VEC_free (stmt_info_for_cost, heap, SLP_INSTANCE_BODY_COST_VEC (instance));
98 free (instance);
99 }
100
101
102 /* Create an SLP node for SCALAR_STMTS. */
103
104 static slp_tree
105 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
106 {
107 slp_tree node;
108 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
109 unsigned int nops;
110
111 if (is_gimple_call (stmt))
112 nops = gimple_call_num_args (stmt);
113 else if (is_gimple_assign (stmt))
114 {
115 nops = gimple_num_ops (stmt) - 1;
116 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
117 nops++;
118 }
119 else
120 return NULL;
121
122 node = XNEW (struct _slp_tree);
123 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
124 SLP_TREE_VEC_STMTS (node) = NULL;
125 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
126
127 return node;
128 }
129
130
131 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
132 operand. */
133 static VEC (slp_oprnd_info, heap) *
134 vect_create_oprnd_info (int nops, int group_size)
135 {
136 int i;
137 slp_oprnd_info oprnd_info;
138 VEC (slp_oprnd_info, heap) *oprnds_info;
139
140 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
141 for (i = 0; i < nops; i++)
142 {
143 oprnd_info = XNEW (struct _slp_oprnd_info);
144 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
145 oprnd_info->first_dt = vect_uninitialized_def;
146 oprnd_info->first_def_type = NULL_TREE;
147 oprnd_info->first_const_oprnd = NULL_TREE;
148 oprnd_info->first_pattern = false;
149 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
150 }
151
152 return oprnds_info;
153 }
154
155
156 /* Free operands info. */
157
158 static void
159 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
160 {
161 int i;
162 slp_oprnd_info oprnd_info;
163
164 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
165 {
166 VEC_free (gimple, heap, oprnd_info->def_stmts);
167 XDELETE (oprnd_info);
168 }
169
170 VEC_free (slp_oprnd_info, heap, *oprnds_info);
171 }
172
173
174 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
175 they are of a valid type and that they match the defs of the first stmt of
176 the SLP group (stored in OPRNDS_INFO). */
177
178 static bool
179 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
180 slp_tree slp_node, gimple stmt,
181 int ncopies_for_cost, bool first,
182 VEC (slp_oprnd_info, heap) **oprnds_info,
183 stmt_vector_for_cost *prologue_cost_vec,
184 stmt_vector_for_cost *body_cost_vec)
185 {
186 tree oprnd;
187 unsigned int i, number_of_oprnds;
188 tree def, def_op0 = NULL_TREE;
189 gimple def_stmt;
190 enum vect_def_type dt = vect_uninitialized_def;
191 enum vect_def_type dt_op0 = vect_uninitialized_def;
192 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
193 tree lhs = gimple_get_lhs (stmt);
194 struct loop *loop = NULL;
195 enum tree_code rhs_code;
196 bool different_types = false;
197 bool pattern = false;
198 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
199 int op_idx = 1;
200 tree compare_rhs = NULL_TREE;
201
202 if (loop_vinfo)
203 loop = LOOP_VINFO_LOOP (loop_vinfo);
204
205 if (is_gimple_call (stmt))
206 {
207 number_of_oprnds = gimple_call_num_args (stmt);
208 op_idx = 3;
209 }
210 else if (is_gimple_assign (stmt))
211 {
212 number_of_oprnds = gimple_num_ops (stmt) - 1;
213 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
214 number_of_oprnds++;
215 }
216 else
217 return false;
218
219 for (i = 0; i < number_of_oprnds; i++)
220 {
221 if (compare_rhs)
222 {
223 oprnd = compare_rhs;
224 compare_rhs = NULL_TREE;
225 }
226 else
227 oprnd = gimple_op (stmt, op_idx++);
228
229 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
230
231 if (COMPARISON_CLASS_P (oprnd))
232 {
233 compare_rhs = TREE_OPERAND (oprnd, 1);
234 oprnd = TREE_OPERAND (oprnd, 0);
235 }
236
237 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
238 &def, &dt)
239 || (!def_stmt && dt != vect_constant_def))
240 {
241 if (dump_enabled_p ())
242 {
243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
244 "Build SLP failed: can't find def for ");
245 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
246 }
247
248 return false;
249 }
250
251 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
252 from the pattern. Check that all the stmts of the node are in the
253 pattern. */
254 if (def_stmt && gimple_bb (def_stmt)
255 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
256 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
257 && gimple_code (def_stmt) != GIMPLE_PHI))
258 && vinfo_for_stmt (def_stmt)
259 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
260 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
261 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
262 {
263 pattern = true;
264 if (!first && !oprnd_info->first_pattern)
265 {
266 if (dump_enabled_p ())
267 {
268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
269 "Build SLP failed: some of the stmts"
270 " are in a pattern, and others are not ");
271 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
272 }
273
274 return false;
275 }
276
277 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
278 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
279
280 if (dt == vect_unknown_def_type)
281 {
282 if (dump_enabled_p ())
283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
284 "Unsupported pattern.");
285 return false;
286 }
287
288 switch (gimple_code (def_stmt))
289 {
290 case GIMPLE_PHI:
291 def = gimple_phi_result (def_stmt);
292 break;
293
294 case GIMPLE_ASSIGN:
295 def = gimple_assign_lhs (def_stmt);
296 break;
297
298 default:
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
301 "unsupported defining stmt: ");
302 return false;
303 }
304 }
305
306 if (first)
307 {
308 oprnd_info->first_dt = dt;
309 oprnd_info->first_pattern = pattern;
310 if (def)
311 {
312 oprnd_info->first_def_type = TREE_TYPE (def);
313 oprnd_info->first_const_oprnd = NULL_TREE;
314 }
315 else
316 {
317 oprnd_info->first_def_type = NULL_TREE;
318 oprnd_info->first_const_oprnd = oprnd;
319 }
320
321 if (i == 0)
322 {
323 def_op0 = def;
324 dt_op0 = dt;
325 /* Analyze costs (for the first stmt of the group only). */
326 if (REFERENCE_CLASS_P (lhs))
327 /* Store. */
328 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
329 dt, slp_node, prologue_cost_vec,
330 body_cost_vec);
331 else
332 {
333 enum vect_def_type dts[2];
334 dts[0] = dt;
335 dts[1] = vect_uninitialized_def;
336 /* Not memory operation (we don't call this function for
337 loads). */
338 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
339 prologue_cost_vec, body_cost_vec);
340 }
341 }
342 }
343 else
344 {
345 /* Not first stmt of the group, check that the def-stmt/s match
346 the def-stmt/s of the first stmt. Allow different definition
347 types for reduction chains: the first stmt must be a
348 vect_reduction_def (a phi node), and the rest
349 vect_internal_def. */
350 if (((oprnd_info->first_dt != dt
351 && !(oprnd_info->first_dt == vect_reduction_def
352 && dt == vect_internal_def))
353 || (oprnd_info->first_def_type != NULL_TREE
354 && def
355 && !types_compatible_p (oprnd_info->first_def_type,
356 TREE_TYPE (def))))
357 || (!def
358 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
359 TREE_TYPE (oprnd)))
360 || different_types)
361 {
362 if (number_of_oprnds != 2)
363 {
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
366 "Build SLP failed: different types ");
367
368 return false;
369 }
370
371 /* Try to swap operands in case of binary operation. */
372 if (i == 0)
373 different_types = true;
374 else
375 {
376 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
377 if (is_gimple_assign (stmt)
378 && (rhs_code = gimple_assign_rhs_code (stmt))
379 && TREE_CODE_CLASS (rhs_code) == tcc_binary
380 && commutative_tree_code (rhs_code)
381 && oprnd0_info->first_dt == dt
382 && oprnd_info->first_dt == dt_op0
383 && def_op0 && def
384 && !(oprnd0_info->first_def_type
385 && !types_compatible_p (oprnd0_info->first_def_type,
386 TREE_TYPE (def)))
387 && !(oprnd_info->first_def_type
388 && !types_compatible_p (oprnd_info->first_def_type,
389 TREE_TYPE (def_op0))))
390 {
391 if (dump_enabled_p ())
392 {
393 dump_printf_loc (MSG_NOTE, vect_location,
394 "Swapping operands of ");
395 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
396 }
397
398 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
399 gimple_assign_rhs2_ptr (stmt));
400 }
401 else
402 {
403 if (dump_enabled_p ())
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405 "Build SLP failed: different types ");
406
407 return false;
408 }
409 }
410 }
411 }
412
413 /* Check the types of the definitions. */
414 switch (dt)
415 {
416 case vect_constant_def:
417 case vect_external_def:
418 case vect_reduction_def:
419 break;
420
421 case vect_internal_def:
422 if (different_types)
423 {
424 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
425 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
426 if (i == 0)
427 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
428 else
429 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
430 }
431 else
432 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
433
434 break;
435
436 default:
437 /* FORNOW: Not supported. */
438 if (dump_enabled_p ())
439 {
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441 "Build SLP failed: illegal type of def ");
442 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
443 }
444
445 return false;
446 }
447 }
448
449 return true;
450 }
451
452
453 /* Recursively build an SLP tree starting from NODE.
454 Fail (and return FALSE) if def-stmts are not isomorphic, require data
455 permutation or are of unsupported types of operation. Otherwise, return
456 TRUE. */
457
458 static bool
459 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
460 slp_tree *node, unsigned int group_size, int *outside_cost,
461 int ncopies_for_cost, unsigned int *max_nunits,
462 VEC (int, heap) **load_permutation,
463 VEC (slp_tree, heap) **loads,
464 unsigned int vectorization_factor, bool *loads_permuted,
465 stmt_vector_for_cost *prologue_cost_vec,
466 stmt_vector_for_cost *body_cost_vec)
467 {
468 unsigned int i;
469 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
470 gimple stmt = VEC_index (gimple, stmts, 0);
471 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
472 enum tree_code first_cond_code = ERROR_MARK;
473 tree lhs;
474 bool stop_recursion = false, need_same_oprnds = false;
475 tree vectype, scalar_type, first_op1 = NULL_TREE;
476 unsigned int ncopies;
477 optab optab;
478 int icode;
479 enum machine_mode optab_op2_mode;
480 enum machine_mode vec_mode;
481 struct data_reference *first_dr;
482 HOST_WIDE_INT dummy;
483 bool permutation = false;
484 unsigned int load_place;
485 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
486 VEC (slp_oprnd_info, heap) *oprnds_info;
487 unsigned int nops;
488 slp_oprnd_info oprnd_info;
489 tree cond;
490
491 if (is_gimple_call (stmt))
492 nops = gimple_call_num_args (stmt);
493 else if (is_gimple_assign (stmt))
494 {
495 nops = gimple_num_ops (stmt) - 1;
496 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
497 nops++;
498 }
499 else
500 return false;
501
502 oprnds_info = vect_create_oprnd_info (nops, group_size);
503
504 /* For every stmt in NODE find its def stmt/s. */
505 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
506 {
507 if (dump_enabled_p ())
508 {
509 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
510 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
511 }
512
513 /* Fail to vectorize statements marked as unvectorizable. */
514 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
515 {
516 if (dump_enabled_p ())
517 {
518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
519 "Build SLP failed: unvectorizable statement ");
520 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
521 }
522
523 vect_free_oprnd_info (&oprnds_info);
524 return false;
525 }
526
527 lhs = gimple_get_lhs (stmt);
528 if (lhs == NULL_TREE)
529 {
530 if (dump_enabled_p ())
531 {
532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
533 "Build SLP failed: not GIMPLE_ASSIGN nor "
534 "GIMPLE_CALL ");
535 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
536 }
537
538 vect_free_oprnd_info (&oprnds_info);
539 return false;
540 }
541
542 if (is_gimple_assign (stmt)
543 && gimple_assign_rhs_code (stmt) == COND_EXPR
544 && (cond = gimple_assign_rhs1 (stmt))
545 && !COMPARISON_CLASS_P (cond))
546 {
547 if (dump_enabled_p ())
548 {
549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
550 "Build SLP failed: condition is not "
551 "comparison ");
552 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
553 }
554
555 vect_free_oprnd_info (&oprnds_info);
556 return false;
557 }
558
559 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
560 vectype = get_vectype_for_scalar_type (scalar_type);
561 if (!vectype)
562 {
563 if (dump_enabled_p ())
564 {
565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566 "Build SLP failed: unsupported data-type ");
567 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
568 scalar_type);
569 }
570
571 vect_free_oprnd_info (&oprnds_info);
572 return false;
573 }
574
575 /* In case of multiple types we need to detect the smallest type. */
576 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
577 {
578 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
579 if (bb_vinfo)
580 vectorization_factor = *max_nunits;
581 }
582
583 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
584
585 if (is_gimple_call (stmt))
586 {
587 rhs_code = CALL_EXPR;
588 if (gimple_call_internal_p (stmt)
589 || gimple_call_tail_p (stmt)
590 || gimple_call_noreturn_p (stmt)
591 || !gimple_call_nothrow_p (stmt)
592 || gimple_call_chain (stmt))
593 {
594 if (dump_enabled_p ())
595 {
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
597 "Build SLP failed: unsupported call type ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
599 }
600
601 vect_free_oprnd_info (&oprnds_info);
602 return false;
603 }
604 }
605 else
606 rhs_code = gimple_assign_rhs_code (stmt);
607
608 /* Check the operation. */
609 if (i == 0)
610 {
611 first_stmt_code = rhs_code;
612
613 /* Shift arguments should be equal in all the packed stmts for a
614 vector shift with scalar shift operand. */
615 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
616 || rhs_code == LROTATE_EXPR
617 || rhs_code == RROTATE_EXPR)
618 {
619 vec_mode = TYPE_MODE (vectype);
620
621 /* First see if we have a vector/vector shift. */
622 optab = optab_for_tree_code (rhs_code, vectype,
623 optab_vector);
624
625 if (!optab
626 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
627 {
628 /* No vector/vector shift, try for a vector/scalar shift. */
629 optab = optab_for_tree_code (rhs_code, vectype,
630 optab_scalar);
631
632 if (!optab)
633 {
634 if (dump_enabled_p ())
635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
636 "Build SLP failed: no optab.");
637 vect_free_oprnd_info (&oprnds_info);
638 return false;
639 }
640 icode = (int) optab_handler (optab, vec_mode);
641 if (icode == CODE_FOR_nothing)
642 {
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
645 "Build SLP failed: "
646 "op not supported by target.");
647 vect_free_oprnd_info (&oprnds_info);
648 return false;
649 }
650 optab_op2_mode = insn_data[icode].operand[2].mode;
651 if (!VECTOR_MODE_P (optab_op2_mode))
652 {
653 need_same_oprnds = true;
654 first_op1 = gimple_assign_rhs2 (stmt);
655 }
656 }
657 }
658 else if (rhs_code == WIDEN_LSHIFT_EXPR)
659 {
660 need_same_oprnds = true;
661 first_op1 = gimple_assign_rhs2 (stmt);
662 }
663 }
664 else
665 {
666 if (first_stmt_code != rhs_code
667 && (first_stmt_code != IMAGPART_EXPR
668 || rhs_code != REALPART_EXPR)
669 && (first_stmt_code != REALPART_EXPR
670 || rhs_code != IMAGPART_EXPR)
671 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
672 && (first_stmt_code == ARRAY_REF
673 || first_stmt_code == INDIRECT_REF
674 || first_stmt_code == COMPONENT_REF
675 || first_stmt_code == MEM_REF)))
676 {
677 if (dump_enabled_p ())
678 {
679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
680 "Build SLP failed: different operation "
681 "in stmt ");
682 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
683 }
684
685 vect_free_oprnd_info (&oprnds_info);
686 return false;
687 }
688
689 if (need_same_oprnds
690 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
691 {
692 if (dump_enabled_p ())
693 {
694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
695 "Build SLP failed: different shift "
696 "arguments in ");
697 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
698 }
699
700 vect_free_oprnd_info (&oprnds_info);
701 return false;
702 }
703
704 if (rhs_code == CALL_EXPR)
705 {
706 gimple first_stmt = VEC_index (gimple, stmts, 0);
707 if (gimple_call_num_args (stmt) != nops
708 || !operand_equal_p (gimple_call_fn (first_stmt),
709 gimple_call_fn (stmt), 0)
710 || gimple_call_fntype (first_stmt)
711 != gimple_call_fntype (stmt))
712 {
713 if (dump_enabled_p ())
714 {
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
716 "Build SLP failed: different calls in ");
717 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
718 stmt, 0);
719 }
720
721 vect_free_oprnd_info (&oprnds_info);
722 return false;
723 }
724 }
725 }
726
727 /* Grouped store or load. */
728 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
729 {
730 if (REFERENCE_CLASS_P (lhs))
731 {
732 /* Store. */
733 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
734 stmt, ncopies_for_cost,
735 (i == 0), &oprnds_info,
736 prologue_cost_vec,
737 body_cost_vec))
738 {
739 vect_free_oprnd_info (&oprnds_info);
740 return false;
741 }
742 }
743 else
744 {
745 /* Load. */
746 /* FORNOW: Check that there is no gap between the loads. */
747 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
748 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
749 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
750 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
751 {
752 if (dump_enabled_p ())
753 {
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "Build SLP failed: grouped "
756 "loads have gaps ");
757 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
758 stmt, 0);
759 }
760
761 vect_free_oprnd_info (&oprnds_info);
762 return false;
763 }
764
765 /* Check that the size of interleaved loads group is not
766 greater than the SLP group size. */
767 if (loop_vinfo
768 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
769 {
770 if (dump_enabled_p ())
771 {
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
773 "Build SLP failed: the number "
774 "of interleaved loads is greater than "
775 "the SLP group size ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
778 }
779
780 vect_free_oprnd_info (&oprnds_info);
781 return false;
782 }
783
784 old_first_load = first_load;
785 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
786 if (prev_first_load)
787 {
788 /* Check that there are no loads from different interleaving
789 chains in the same node. The only exception is complex
790 numbers. */
791 if (prev_first_load != first_load
792 && rhs_code != REALPART_EXPR
793 && rhs_code != IMAGPART_EXPR)
794 {
795 if (dump_enabled_p ())
796 {
797 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
798 vect_location,
799 "Build SLP failed: different "
800 "interleaving chains in one node ");
801 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
802 stmt, 0);
803 }
804
805 vect_free_oprnd_info (&oprnds_info);
806 return false;
807 }
808 }
809 else
810 prev_first_load = first_load;
811
812 /* In some cases a group of loads is just the same load
813 repeated N times. Only analyze its cost once. */
814 if (first_load == stmt && old_first_load != first_load)
815 {
816 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
817 if (vect_supportable_dr_alignment (first_dr, false)
818 == dr_unaligned_unsupported)
819 {
820 if (dump_enabled_p ())
821 {
822 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
823 vect_location,
824 "Build SLP failed: unsupported "
825 "unaligned load ");
826 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
827 stmt, 0);
828 }
829
830 vect_free_oprnd_info (&oprnds_info);
831 return false;
832 }
833
834 /* Analyze costs (for the first stmt in the group). */
835 vect_model_load_cost (vinfo_for_stmt (stmt),
836 ncopies_for_cost, false, *node,
837 prologue_cost_vec, body_cost_vec);
838 }
839
840 /* Store the place of this load in the interleaving chain. In
841 case that permutation is needed we later decide if a specific
842 permutation is supported. */
843 load_place = vect_get_place_in_interleaving_chain (stmt,
844 first_load);
845 if (load_place != i)
846 permutation = true;
847
848 VEC_safe_push (int, heap, *load_permutation, load_place);
849
850 /* We stop the tree when we reach a group of loads. */
851 stop_recursion = true;
852 continue;
853 }
854 } /* Grouped access. */
855 else
856 {
857 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
858 {
859 /* Not grouped load. */
860 if (dump_enabled_p ())
861 {
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
863 "Build SLP failed: not grouped load ");
864 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
865 }
866
867 /* FORNOW: Not grouped loads are not supported. */
868 vect_free_oprnd_info (&oprnds_info);
869 return false;
870 }
871
872 /* Not memory operation. */
873 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
874 && TREE_CODE_CLASS (rhs_code) != tcc_unary
875 && rhs_code != COND_EXPR
876 && rhs_code != CALL_EXPR)
877 {
878 if (dump_enabled_p ())
879 {
880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
881 "Build SLP failed: operation");
882 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
884 }
885
886 vect_free_oprnd_info (&oprnds_info);
887 return false;
888 }
889
890 if (rhs_code == COND_EXPR)
891 {
892 tree cond_expr = gimple_assign_rhs1 (stmt);
893
894 if (i == 0)
895 first_cond_code = TREE_CODE (cond_expr);
896 else if (first_cond_code != TREE_CODE (cond_expr))
897 {
898 if (dump_enabled_p ())
899 {
900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
901 "Build SLP failed: different"
902 " operation");
903 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
904 stmt, 0);
905 }
906
907 vect_free_oprnd_info (&oprnds_info);
908 return false;
909 }
910 }
911
912 /* Find the def-stmts. */
913 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
914 ncopies_for_cost, (i == 0),
915 &oprnds_info, prologue_cost_vec,
916 body_cost_vec))
917 {
918 vect_free_oprnd_info (&oprnds_info);
919 return false;
920 }
921 }
922 }
923
924 /* Grouped loads were reached - stop the recursion. */
925 if (stop_recursion)
926 {
927 VEC_safe_push (slp_tree, heap, *loads, *node);
928 if (permutation)
929 {
930 gimple first_stmt = VEC_index (gimple, stmts, 0);
931 *loads_permuted = true;
932 (void) record_stmt_cost (body_cost_vec, group_size, vec_perm,
933 vinfo_for_stmt (first_stmt), 0, vect_body);
934 }
935 else
936 {
937 /* We don't check here complex numbers chains, so we set
938 LOADS_PERMUTED for further check in
939 vect_supported_load_permutation_p. */
940 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
941 *loads_permuted = true;
942 }
943
944 vect_free_oprnd_info (&oprnds_info);
945 return true;
946 }
947
948 /* Create SLP_TREE nodes for the definition node/s. */
949 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
950 {
951 slp_tree child;
952
953 if (oprnd_info->first_dt != vect_internal_def)
954 continue;
955
956 child = vect_create_new_slp_node (oprnd_info->def_stmts);
957 if (!child
958 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
959 outside_cost, ncopies_for_cost,
960 max_nunits, load_permutation, loads,
961 vectorization_factor, loads_permuted,
962 prologue_cost_vec, body_cost_vec))
963 {
964 if (child)
965 oprnd_info->def_stmts = NULL;
966 vect_free_slp_tree (child);
967 vect_free_oprnd_info (&oprnds_info);
968 return false;
969 }
970
971 oprnd_info->def_stmts = NULL;
972 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
973 }
974
975 vect_free_oprnd_info (&oprnds_info);
976 return true;
977 }
978
979 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
980
981 static void
982 vect_print_slp_tree (int dump_kind, slp_tree node)
983 {
984 int i;
985 gimple stmt;
986 slp_void_p child;
987
988 if (!node)
989 return;
990
991 dump_printf (dump_kind, "node ");
992 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
993 {
994 dump_printf (dump_kind, "\n\tstmt %d ", i);
995 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
996 }
997 dump_printf (dump_kind, "\n");
998
999 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1000 vect_print_slp_tree (dump_kind, (slp_tree) child);
1001 }
1002
1003
1004 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1005 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1006 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1007 stmts in NODE are to be marked. */
1008
1009 static void
1010 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1011 {
1012 int i;
1013 gimple stmt;
1014 slp_void_p child;
1015
1016 if (!node)
1017 return;
1018
1019 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1020 if (j < 0 || i == j)
1021 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1022
1023 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1024 vect_mark_slp_stmts ((slp_tree) child, mark, j);
1025 }
1026
1027
1028 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1029
1030 static void
1031 vect_mark_slp_stmts_relevant (slp_tree node)
1032 {
1033 int i;
1034 gimple stmt;
1035 stmt_vec_info stmt_info;
1036 slp_void_p child;
1037
1038 if (!node)
1039 return;
1040
1041 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1042 {
1043 stmt_info = vinfo_for_stmt (stmt);
1044 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1045 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1046 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1047 }
1048
1049 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1050 vect_mark_slp_stmts_relevant ((slp_tree) child);
1051 }
1052
1053
1054 /* Check if the permutation required by the SLP INSTANCE is supported.
1055 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1056
1057 static bool
1058 vect_supported_slp_permutation_p (slp_instance instance)
1059 {
1060 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1061 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1062 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1063 VEC (slp_tree, heap) *sorted_loads = NULL;
1064 int index;
1065 slp_tree *tmp_loads = NULL;
1066 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1067 slp_tree load;
1068
1069 /* FORNOW: The only supported loads permutation is loads from the same
1070 location in all the loads in the node, when the data-refs in
1071 nodes of LOADS constitute an interleaving chain.
1072 Sort the nodes according to the order of accesses in the chain. */
1073 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1074 for (i = 0, j = 0;
1075 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1076 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1077 i += group_size, j++)
1078 {
1079 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1080 /* Check that the loads are all in the same interleaving chain. */
1081 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1082 {
1083 if (dump_enabled_p ())
1084 {
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "Build SLP failed: unsupported data "
1087 "permutation ");
1088 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1089 scalar_stmt, 0);
1090 }
1091
1092 free (tmp_loads);
1093 return false;
1094 }
1095
1096 tmp_loads[index] = load;
1097 }
1098
1099 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1100 for (i = 0; i < group_size; i++)
1101 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1102
1103 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1104 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1105 free (tmp_loads);
1106
1107 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1108 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1109 instance, true))
1110 return false;
1111
1112 return true;
1113 }
1114
1115
1116 /* Rearrange the statements of NODE according to PERMUTATION. */
1117
1118 static void
1119 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1120 VEC (int, heap) *permutation)
1121 {
1122 gimple stmt;
1123 VEC (gimple, heap) *tmp_stmts;
1124 unsigned int index, i;
1125 slp_void_p child;
1126
1127 if (!node)
1128 return;
1129
1130 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1131 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1132
1133 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1134 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1135
1136 for (i = 0; i < group_size; i++)
1137 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1138
1139 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1140 {
1141 index = VEC_index (int, permutation, i);
1142 VEC_replace (gimple, tmp_stmts, index, stmt);
1143 }
1144
1145 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1146 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1147 }
1148
1149
1150 /* Check if the required load permutation is supported.
1151 LOAD_PERMUTATION contains a list of indices of the loads.
1152 In SLP this permutation is relative to the order of grouped stores that are
1153 the base of the SLP instance. */
1154
1155 static bool
1156 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1157 VEC (int, heap) *load_permutation)
1158 {
1159 int i = 0, j, prev = -1, next, k, number_of_groups;
1160 bool supported, bad_permutation = false;
1161 sbitmap load_index;
1162 slp_tree node, other_complex_node;
1163 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1164 unsigned complex_numbers = 0;
1165 struct data_reference *dr;
1166 bb_vec_info bb_vinfo;
1167
1168 /* FORNOW: permutations are only supported in SLP. */
1169 if (!slp_instn)
1170 return false;
1171
1172 if (dump_enabled_p ())
1173 {
1174 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1175 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1176 dump_printf (MSG_NOTE, "%d ", next);
1177 }
1178
1179 /* In case of reduction every load permutation is allowed, since the order
1180 of the reduction statements is not important (as opposed to the case of
1181 grouped stores). The only condition we need to check is that all the
1182 load nodes are of the same size and have the same permutation (and then
1183 rearrange all the nodes of the SLP instance according to this
1184 permutation). */
1185
1186 /* Check that all the load nodes are of the same size. */
1187 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1188 {
1189 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1190 != (unsigned) group_size)
1191 return false;
1192
1193 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1194 if (is_gimple_assign (stmt)
1195 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1196 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1197 complex_numbers++;
1198 }
1199
1200 /* Complex operands can be swapped as following:
1201 real_c = real_b + real_a;
1202 imag_c = imag_a + imag_b;
1203 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1204 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1205 chains are mixed, they match the above pattern. */
1206 if (complex_numbers)
1207 {
1208 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1209 {
1210 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1211 {
1212 if (j == 0)
1213 first = stmt;
1214 else
1215 {
1216 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1217 {
1218 if (complex_numbers != 2)
1219 return false;
1220
1221 if (i == 0)
1222 k = 1;
1223 else
1224 k = 0;
1225
1226 other_complex_node = VEC_index (slp_tree,
1227 SLP_INSTANCE_LOADS (slp_instn), k);
1228 other_node_first = VEC_index (gimple,
1229 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1230
1231 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1232 != other_node_first)
1233 return false;
1234 }
1235 }
1236 }
1237 }
1238 }
1239
1240 /* We checked that this case ok, so there is no need to proceed with
1241 permutation tests. */
1242 if (complex_numbers == 2
1243 && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
1244 {
1245 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1246 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1247 return true;
1248 }
1249
1250 node = SLP_INSTANCE_TREE (slp_instn);
1251 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1252 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1253 instance, not all the loads belong to the same node or interleaving
1254 group. Hence, we need to divide them into groups according to
1255 GROUP_SIZE. */
1256 number_of_groups = VEC_length (int, load_permutation) / group_size;
1257
1258 /* Reduction (there are no data-refs in the root).
1259 In reduction chain the order of the loads is important. */
1260 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1261 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1262 {
1263 int first_group_load_index;
1264
1265 /* Compare all the permutation sequences to the first one. */
1266 for (i = 1; i < number_of_groups; i++)
1267 {
1268 k = 0;
1269 for (j = i * group_size; j < i * group_size + group_size; j++)
1270 {
1271 next = VEC_index (int, load_permutation, j);
1272 first_group_load_index = VEC_index (int, load_permutation, k);
1273
1274 if (next != first_group_load_index)
1275 {
1276 bad_permutation = true;
1277 break;
1278 }
1279
1280 k++;
1281 }
1282
1283 if (bad_permutation)
1284 break;
1285 }
1286
1287 if (!bad_permutation)
1288 {
1289 /* Check that the loads in the first sequence are different and there
1290 are no gaps between them. */
1291 load_index = sbitmap_alloc (group_size);
1292 bitmap_clear (load_index);
1293 for (k = 0; k < group_size; k++)
1294 {
1295 first_group_load_index = VEC_index (int, load_permutation, k);
1296 if (bitmap_bit_p (load_index, first_group_load_index))
1297 {
1298 bad_permutation = true;
1299 break;
1300 }
1301
1302 bitmap_set_bit (load_index, first_group_load_index);
1303 }
1304
1305 if (!bad_permutation)
1306 for (k = 0; k < group_size; k++)
1307 if (!bitmap_bit_p (load_index, k))
1308 {
1309 bad_permutation = true;
1310 break;
1311 }
1312
1313 sbitmap_free (load_index);
1314 }
1315
1316 if (!bad_permutation)
1317 {
1318 /* This permutation is valid for reduction. Since the order of the
1319 statements in the nodes is not important unless they are memory
1320 accesses, we can rearrange the statements in all the nodes
1321 according to the order of the loads. */
1322 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1323 load_permutation);
1324 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1325 return true;
1326 }
1327 }
1328
1329 /* In basic block vectorization we allow any subchain of an interleaving
1330 chain.
1331 FORNOW: not supported in loop SLP because of realignment compications. */
1332 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1333 bad_permutation = false;
1334 /* Check that for every node in the instance the loads form a subchain. */
1335 if (bb_vinfo)
1336 {
1337 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1338 {
1339 next_load = NULL;
1340 first_load = NULL;
1341 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1342 {
1343 if (!first_load)
1344 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1345 else if (first_load
1346 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1347 {
1348 bad_permutation = true;
1349 break;
1350 }
1351
1352 if (j != 0 && next_load != load)
1353 {
1354 bad_permutation = true;
1355 break;
1356 }
1357
1358 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1359 }
1360
1361 if (bad_permutation)
1362 break;
1363 }
1364
1365 /* Check that the alignment of the first load in every subchain, i.e.,
1366 the first statement in every load node, is supported. */
1367 if (!bad_permutation)
1368 {
1369 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1370 {
1371 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1372 if (first_load
1373 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1374 {
1375 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1376 if (vect_supportable_dr_alignment (dr, false)
1377 == dr_unaligned_unsupported)
1378 {
1379 if (dump_enabled_p ())
1380 {
1381 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1382 vect_location,
1383 "unsupported unaligned load ");
1384 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1385 first_load, 0);
1386 }
1387 bad_permutation = true;
1388 break;
1389 }
1390 }
1391 }
1392
1393 if (!bad_permutation)
1394 {
1395 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1396 return true;
1397 }
1398 }
1399 }
1400
1401 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1402 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1403 well (unless it's reduction). */
1404 if (VEC_length (int, load_permutation)
1405 != (unsigned int) (group_size * group_size))
1406 return false;
1407
1408 supported = true;
1409 load_index = sbitmap_alloc (group_size);
1410 bitmap_clear (load_index);
1411 for (j = 0; j < group_size; j++)
1412 {
1413 for (i = j * group_size, k = 0;
1414 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1415 i++, k++)
1416 {
1417 if (i != j * group_size && next != prev)
1418 {
1419 supported = false;
1420 break;
1421 }
1422
1423 prev = next;
1424 }
1425
1426 if (bitmap_bit_p (load_index, prev))
1427 {
1428 supported = false;
1429 break;
1430 }
1431
1432 bitmap_set_bit (load_index, prev);
1433 }
1434
1435 for (j = 0; j < group_size; j++)
1436 if (!bitmap_bit_p (load_index, j))
1437 return false;
1438
1439 sbitmap_free (load_index);
1440
1441 if (supported && i == group_size * group_size
1442 && vect_supported_slp_permutation_p (slp_instn))
1443 return true;
1444
1445 return false;
1446 }
1447
1448
1449 /* Find the first load in the loop that belongs to INSTANCE.
1450 When loads are in several SLP nodes, there can be a case in which the first
1451 load does not appear in the first SLP node to be transformed, causing
1452 incorrect order of statements. Since we generate all the loads together,
1453 they must be inserted before the first load of the SLP instance and not
1454 before the first load of the first node of the instance. */
1455
1456 static gimple
1457 vect_find_first_load_in_slp_instance (slp_instance instance)
1458 {
1459 int i, j;
1460 slp_tree load_node;
1461 gimple first_load = NULL, load;
1462
1463 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1464 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1465 first_load = get_earlier_stmt (load, first_load);
1466
1467 return first_load;
1468 }
1469
1470
1471 /* Find the last store in SLP INSTANCE. */
1472
1473 static gimple
1474 vect_find_last_store_in_slp_instance (slp_instance instance)
1475 {
1476 int i;
1477 slp_tree node;
1478 gimple last_store = NULL, store;
1479
1480 node = SLP_INSTANCE_TREE (instance);
1481 for (i = 0;
1482 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1483 i++)
1484 last_store = get_later_stmt (store, last_store);
1485
1486 return last_store;
1487 }
1488
1489
1490 /* Analyze an SLP instance starting from a group of grouped stores. Call
1491 vect_build_slp_tree to build a tree of packed stmts if possible.
1492 Return FALSE if it's impossible to SLP any stmt in the loop. */
1493
1494 static bool
1495 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1496 gimple stmt)
1497 {
1498 slp_instance new_instance;
1499 slp_tree node;
1500 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1501 unsigned int unrolling_factor = 1, nunits;
1502 tree vectype, scalar_type = NULL_TREE;
1503 gimple next;
1504 unsigned int vectorization_factor = 0;
1505 int outside_cost = 0, ncopies_for_cost, i;
1506 unsigned int max_nunits = 0;
1507 VEC (int, heap) *load_permutation;
1508 VEC (slp_tree, heap) *loads;
1509 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1510 bool loads_permuted = false;
1511 VEC (gimple, heap) *scalar_stmts;
1512 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1513 stmt_info_for_cost *si;
1514
1515 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1516 {
1517 if (dr)
1518 {
1519 scalar_type = TREE_TYPE (DR_REF (dr));
1520 vectype = get_vectype_for_scalar_type (scalar_type);
1521 }
1522 else
1523 {
1524 gcc_assert (loop_vinfo);
1525 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1526 }
1527
1528 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1529 }
1530 else
1531 {
1532 gcc_assert (loop_vinfo);
1533 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1534 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1535 }
1536
1537 if (!vectype)
1538 {
1539 if (dump_enabled_p ())
1540 {
1541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1542 "Build SLP failed: unsupported data-type ");
1543 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1544 }
1545
1546 return false;
1547 }
1548
1549 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1550 if (loop_vinfo)
1551 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1552 else
1553 vectorization_factor = nunits;
1554
1555 /* Calculate the unrolling factor. */
1556 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1557 if (unrolling_factor != 1 && !loop_vinfo)
1558 {
1559 if (dump_enabled_p ())
1560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1561 "Build SLP failed: unrolling required in basic"
1562 " block SLP");
1563
1564 return false;
1565 }
1566
1567 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1568 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1569 next = stmt;
1570 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1571 {
1572 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1573 while (next)
1574 {
1575 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1576 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1577 VEC_safe_push (gimple, heap, scalar_stmts,
1578 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1579 else
1580 VEC_safe_push (gimple, heap, scalar_stmts, next);
1581 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1582 }
1583 }
1584 else
1585 {
1586 /* Collect reduction statements. */
1587 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1588 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1589 VEC_safe_push (gimple, heap, scalar_stmts, next);
1590 }
1591
1592 node = vect_create_new_slp_node (scalar_stmts);
1593
1594 /* Calculate the number of vector stmts to create based on the unrolling
1595 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1596 GROUP_SIZE / NUNITS otherwise. */
1597 ncopies_for_cost = unrolling_factor * group_size / nunits;
1598
1599 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1600 loads = VEC_alloc (slp_tree, heap, group_size);
1601 prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 10);
1602 body_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 10);
1603
1604 /* Build the tree for the SLP instance. */
1605 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1606 &outside_cost, ncopies_for_cost,
1607 &max_nunits, &load_permutation, &loads,
1608 vectorization_factor, &loads_permuted,
1609 &prologue_cost_vec, &body_cost_vec))
1610 {
1611 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1612 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1613
1614 /* Calculate the unrolling factor based on the smallest type. */
1615 if (max_nunits > nunits)
1616 unrolling_factor = least_common_multiple (max_nunits, group_size)
1617 / group_size;
1618
1619 if (unrolling_factor != 1 && !loop_vinfo)
1620 {
1621 if (dump_enabled_p ())
1622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1623 "Build SLP failed: unrolling required in basic"
1624 " block SLP");
1625 vect_free_slp_tree (node);
1626 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1627 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1628 VEC_free (int, heap, load_permutation);
1629 VEC_free (slp_tree, heap, loads);
1630 return false;
1631 }
1632
1633 /* Create a new SLP instance. */
1634 new_instance = XNEW (struct _slp_instance);
1635 SLP_INSTANCE_TREE (new_instance) = node;
1636 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1637 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1638 SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
1639 SLP_INSTANCE_LOADS (new_instance) = loads;
1640 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1641 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1642
1643 if (loads_permuted)
1644 {
1645 if (!vect_supported_load_permutation_p (new_instance, group_size,
1646 load_permutation))
1647 {
1648 if (dump_enabled_p ())
1649 {
1650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1651 "Build SLP failed: unsupported load "
1652 "permutation ");
1653 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1654 }
1655
1656 vect_free_slp_instance (new_instance);
1657 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1658 return false;
1659 }
1660
1661 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1662 = vect_find_first_load_in_slp_instance (new_instance);
1663 }
1664 else
1665 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1666
1667 /* Record the prologue costs, which were delayed until we were
1668 sure that SLP was successful. Unlike the body costs, we know
1669 the final values now regardless of the loop vectorization factor. */
1670 FOR_EACH_VEC_ELT (stmt_info_for_cost, prologue_cost_vec, i, si)
1671 {
1672 struct _stmt_vec_info *stmt_info
1673 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1674 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1675 si->misalign, vect_prologue);
1676 }
1677
1678 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1679
1680 if (loop_vinfo)
1681 VEC_safe_push (slp_instance, heap,
1682 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1683 new_instance);
1684 else
1685 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1686 new_instance);
1687
1688 if (dump_enabled_p ())
1689 vect_print_slp_tree (MSG_NOTE, node);
1690
1691 return true;
1692 }
1693 else
1694 {
1695 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1696 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1697 }
1698
1699 /* Failed to SLP. */
1700 /* Free the allocated memory. */
1701 vect_free_slp_tree (node);
1702 VEC_free (int, heap, load_permutation);
1703 VEC_free (slp_tree, heap, loads);
1704
1705 return false;
1706 }
1707
1708
1709 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1710 trees of packed scalar stmts if SLP is possible. */
1711
1712 bool
1713 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1714 {
1715 unsigned int i;
1716 VEC (gimple, heap) *grouped_stores, *reductions = NULL, *reduc_chains = NULL;
1717 gimple first_element;
1718 bool ok = false;
1719
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1722
1723 if (loop_vinfo)
1724 {
1725 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1726 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1727 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1728 }
1729 else
1730 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1731
1732 /* Find SLP sequences starting from groups of grouped stores. */
1733 FOR_EACH_VEC_ELT (gimple, grouped_stores, i, first_element)
1734 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1735 ok = true;
1736
1737 if (bb_vinfo && !ok)
1738 {
1739 if (dump_enabled_p ())
1740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1741 "Failed to SLP the basic block.");
1742
1743 return false;
1744 }
1745
1746 if (loop_vinfo
1747 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1748 {
1749 /* Find SLP sequences starting from reduction chains. */
1750 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1751 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1752 ok = true;
1753 else
1754 return false;
1755
1756 /* Don't try to vectorize SLP reductions if reduction chain was
1757 detected. */
1758 return ok;
1759 }
1760
1761 /* Find SLP sequences starting from groups of reductions. */
1762 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1763 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1764 VEC_index (gimple, reductions, 0)))
1765 ok = true;
1766
1767 return true;
1768 }
1769
1770
1771 /* For each possible SLP instance decide whether to SLP it and calculate overall
1772 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1773 least one instance. */
1774
1775 bool
1776 vect_make_slp_decision (loop_vec_info loop_vinfo)
1777 {
1778 unsigned int i, unrolling_factor = 1;
1779 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1780 slp_instance instance;
1781 int decided_to_slp = 0;
1782
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1785
1786 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1787 {
1788 /* FORNOW: SLP if you can. */
1789 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1790 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1791
1792 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1793 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1794 loop-based vectorization. Such stmts will be marked as HYBRID. */
1795 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1796 decided_to_slp++;
1797 }
1798
1799 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1800
1801 if (decided_to_slp && dump_enabled_p ())
1802 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1803 "Decided to SLP %d instances. Unrolling factor %d",
1804 decided_to_slp, unrolling_factor);
1805
1806 return (decided_to_slp > 0);
1807 }
1808
1809
1810 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1811 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1812
1813 static void
1814 vect_detect_hybrid_slp_stmts (slp_tree node)
1815 {
1816 int i;
1817 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1818 gimple stmt = VEC_index (gimple, stmts, 0);
1819 imm_use_iterator imm_iter;
1820 gimple use_stmt;
1821 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1822 slp_void_p child;
1823 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1824 struct loop *loop = NULL;
1825 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1826 basic_block bb = NULL;
1827
1828 if (!node)
1829 return;
1830
1831 if (loop_vinfo)
1832 loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 else
1834 bb = BB_VINFO_BB (bb_vinfo);
1835
1836 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1837 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1838 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1839 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1840 if (gimple_bb (use_stmt)
1841 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1842 || bb == gimple_bb (use_stmt))
1843 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1844 && !STMT_SLP_TYPE (stmt_vinfo)
1845 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1846 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1847 && !(gimple_code (use_stmt) == GIMPLE_PHI
1848 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1849 == vect_reduction_def))
1850 vect_mark_slp_stmts (node, hybrid, i);
1851
1852 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1853 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1854 }
1855
1856
1857 /* Find stmts that must be both vectorized and SLPed. */
1858
1859 void
1860 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1861 {
1862 unsigned int i;
1863 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1864 slp_instance instance;
1865
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1868
1869 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1870 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1871 }
1872
1873
1874 /* Create and initialize a new bb_vec_info struct for BB, as well as
1875 stmt_vec_info structs for all the stmts in it. */
1876
1877 static bb_vec_info
1878 new_bb_vec_info (basic_block bb)
1879 {
1880 bb_vec_info res = NULL;
1881 gimple_stmt_iterator gsi;
1882
1883 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1884 BB_VINFO_BB (res) = bb;
1885
1886 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1887 {
1888 gimple stmt = gsi_stmt (gsi);
1889 gimple_set_uid (stmt, 0);
1890 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1891 }
1892
1893 BB_VINFO_GROUPED_STORES (res) = VEC_alloc (gimple, heap, 10);
1894 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1895 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1896
1897 bb->aux = res;
1898 return res;
1899 }
1900
1901
1902 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1903 stmts in the basic block. */
1904
1905 static void
1906 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1907 {
1908 VEC (slp_instance, heap) *slp_instances;
1909 slp_instance instance;
1910 basic_block bb;
1911 gimple_stmt_iterator si;
1912 unsigned i;
1913
1914 if (!bb_vinfo)
1915 return;
1916
1917 bb = BB_VINFO_BB (bb_vinfo);
1918
1919 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1920 {
1921 gimple stmt = gsi_stmt (si);
1922 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1923
1924 if (stmt_info)
1925 /* Free stmt_vec_info. */
1926 free_stmt_vec_info (stmt);
1927 }
1928
1929 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1930 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1931 VEC_free (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo));
1932 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1933 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1934 vect_free_slp_instance (instance);
1935 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1936 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1937 free (bb_vinfo);
1938 bb->aux = NULL;
1939 }
1940
1941
1942 /* Analyze statements contained in SLP tree node after recursively analyzing
1943 the subtree. Return TRUE if the operations are supported. */
1944
1945 static bool
1946 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1947 {
1948 bool dummy;
1949 int i;
1950 gimple stmt;
1951 slp_void_p child;
1952
1953 if (!node)
1954 return true;
1955
1956 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1957 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1958 return false;
1959
1960 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1961 {
1962 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1963 gcc_assert (stmt_info);
1964 gcc_assert (PURE_SLP_STMT (stmt_info));
1965
1966 if (!vect_analyze_stmt (stmt, &dummy, node))
1967 return false;
1968 }
1969
1970 return true;
1971 }
1972
1973
1974 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1975 operations are supported. */
1976
1977 static bool
1978 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1979 {
1980 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1981 slp_instance instance;
1982 int i;
1983
1984 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1985 {
1986 if (!vect_slp_analyze_node_operations (bb_vinfo,
1987 SLP_INSTANCE_TREE (instance)))
1988 {
1989 vect_free_slp_instance (instance);
1990 VEC_ordered_remove (slp_instance, slp_instances, i);
1991 }
1992 else
1993 i++;
1994 }
1995
1996 if (!VEC_length (slp_instance, slp_instances))
1997 return false;
1998
1999 return true;
2000 }
2001
2002 /* Check if vectorization of the basic block is profitable. */
2003
2004 static bool
2005 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2006 {
2007 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2008 slp_instance instance;
2009 int i, j;
2010 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2011 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2012 unsigned int stmt_cost;
2013 gimple stmt;
2014 gimple_stmt_iterator si;
2015 basic_block bb = BB_VINFO_BB (bb_vinfo);
2016 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2017 stmt_vec_info stmt_info = NULL;
2018 stmt_vector_for_cost body_cost_vec;
2019 stmt_info_for_cost *ci;
2020
2021 /* Calculate vector costs. */
2022 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2023 {
2024 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2025
2026 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, j, ci)
2027 {
2028 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2029 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2030 stmt_info, ci->misalign, vect_body);
2031 }
2032 }
2033
2034 /* Calculate scalar cost. */
2035 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2036 {
2037 stmt = gsi_stmt (si);
2038 stmt_info = vinfo_for_stmt (stmt);
2039
2040 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2041 || !PURE_SLP_STMT (stmt_info))
2042 continue;
2043
2044 if (STMT_VINFO_DATA_REF (stmt_info))
2045 {
2046 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2047 stmt_cost = vect_get_stmt_cost (scalar_load);
2048 else
2049 stmt_cost = vect_get_stmt_cost (scalar_store);
2050 }
2051 else
2052 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2053
2054 scalar_cost += stmt_cost;
2055 }
2056
2057 /* Complete the target-specific cost calculation. */
2058 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2059 &vec_inside_cost, &vec_epilogue_cost);
2060
2061 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2062
2063 if (dump_enabled_p ())
2064 {
2065 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2066 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2067 vec_inside_cost);
2068 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2069 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2070 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2071 }
2072
2073 /* Vectorization is profitable if its cost is less than the cost of scalar
2074 version. */
2075 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2076 return false;
2077
2078 return true;
2079 }
2080
2081 /* Check if the basic block can be vectorized. */
2082
2083 static bb_vec_info
2084 vect_slp_analyze_bb_1 (basic_block bb)
2085 {
2086 bb_vec_info bb_vinfo;
2087 VEC (ddr_p, heap) *ddrs;
2088 VEC (slp_instance, heap) *slp_instances;
2089 slp_instance instance;
2090 int i;
2091 int min_vf = 2;
2092 int max_vf = MAX_VECTORIZATION_FACTOR;
2093
2094 bb_vinfo = new_bb_vec_info (bb);
2095 if (!bb_vinfo)
2096 return NULL;
2097
2098 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2099 {
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2102 "not vectorized: unhandled data-ref in basic "
2103 "block.\n");
2104
2105 destroy_bb_vec_info (bb_vinfo);
2106 return NULL;
2107 }
2108
2109 ddrs = BB_VINFO_DDRS (bb_vinfo);
2110 if (!VEC_length (ddr_p, ddrs))
2111 {
2112 if (dump_enabled_p ())
2113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2114 "not vectorized: not enough data-refs in "
2115 "basic block.\n");
2116
2117 destroy_bb_vec_info (bb_vinfo);
2118 return NULL;
2119 }
2120
2121 vect_pattern_recog (NULL, bb_vinfo);
2122
2123 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2124 || min_vf > max_vf)
2125 {
2126 if (dump_enabled_p ())
2127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2128 "not vectorized: unhandled data dependence "
2129 "in basic block.\n");
2130
2131 destroy_bb_vec_info (bb_vinfo);
2132 return NULL;
2133 }
2134
2135 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2136 {
2137 if (dump_enabled_p ())
2138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2139 "not vectorized: bad data alignment in basic "
2140 "block.\n");
2141
2142 destroy_bb_vec_info (bb_vinfo);
2143 return NULL;
2144 }
2145
2146 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2147 {
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2150 "not vectorized: unhandled data access in "
2151 "basic block.\n");
2152
2153 destroy_bb_vec_info (bb_vinfo);
2154 return NULL;
2155 }
2156
2157 /* Check the SLP opportunities in the basic block, analyze and build SLP
2158 trees. */
2159 if (!vect_analyze_slp (NULL, bb_vinfo))
2160 {
2161 if (dump_enabled_p ())
2162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2163 "not vectorized: failed to find SLP opportunities "
2164 "in basic block.\n");
2165
2166 destroy_bb_vec_info (bb_vinfo);
2167 return NULL;
2168 }
2169
2170 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2171
2172 /* Mark all the statements that we want to vectorize as pure SLP and
2173 relevant. */
2174 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2175 {
2176 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2177 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2178 }
2179
2180 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2181 {
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184 "not vectorized: unsupported alignment in basic "
2185 "block.\n");
2186 destroy_bb_vec_info (bb_vinfo);
2187 return NULL;
2188 }
2189
2190 if (!vect_slp_analyze_operations (bb_vinfo))
2191 {
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2194 "not vectorized: bad operation in basic block.\n");
2195
2196 destroy_bb_vec_info (bb_vinfo);
2197 return NULL;
2198 }
2199
2200 /* Cost model: check if the vectorization is worthwhile. */
2201 if (flag_vect_cost_model
2202 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2203 {
2204 if (dump_enabled_p ())
2205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2206 "not vectorized: vectorization is not "
2207 "profitable.\n");
2208
2209 destroy_bb_vec_info (bb_vinfo);
2210 return NULL;
2211 }
2212
2213 if (dump_enabled_p ())
2214 dump_printf_loc (MSG_NOTE, vect_location,
2215 "Basic block will be vectorized using SLP\n");
2216
2217 return bb_vinfo;
2218 }
2219
2220
2221 bb_vec_info
2222 vect_slp_analyze_bb (basic_block bb)
2223 {
2224 bb_vec_info bb_vinfo;
2225 int insns = 0;
2226 gimple_stmt_iterator gsi;
2227 unsigned int vector_sizes;
2228
2229 if (dump_enabled_p ())
2230 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2231
2232 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2233 {
2234 gimple stmt = gsi_stmt (gsi);
2235 if (!is_gimple_debug (stmt)
2236 && !gimple_nop_p (stmt)
2237 && gimple_code (stmt) != GIMPLE_LABEL)
2238 insns++;
2239 }
2240
2241 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2242 {
2243 if (dump_enabled_p ())
2244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2245 "not vectorized: too many instructions in "
2246 "basic block.\n");
2247
2248 return NULL;
2249 }
2250
2251 /* Autodetect first vector size we try. */
2252 current_vector_size = 0;
2253 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2254
2255 while (1)
2256 {
2257 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2258 if (bb_vinfo)
2259 return bb_vinfo;
2260
2261 destroy_bb_vec_info (bb_vinfo);
2262
2263 vector_sizes &= ~current_vector_size;
2264 if (vector_sizes == 0
2265 || current_vector_size == 0)
2266 return NULL;
2267
2268 /* Try the next biggest vector size. */
2269 current_vector_size = 1 << floor_log2 (vector_sizes);
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_NOTE, vect_location,
2272 "***** Re-trying analysis with "
2273 "vector size %d\n", current_vector_size);
2274 }
2275 }
2276
2277
2278 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2279 the number of created vector stmts depends on the unrolling factor).
2280 However, the actual number of vector stmts for every SLP node depends on
2281 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2282 should be updated. In this function we assume that the inside costs
2283 calculated in vect_model_xxx_cost are linear in ncopies. */
2284
2285 void
2286 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2287 {
2288 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2289 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2290 slp_instance instance;
2291 stmt_vector_for_cost body_cost_vec;
2292 stmt_info_for_cost *si;
2293 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2294
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_NOTE, vect_location,
2297 "=== vect_update_slp_costs_according_to_vf ===");
2298
2299 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2300 {
2301 /* We assume that costs are linear in ncopies. */
2302 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2303
2304 /* Record the instance's instructions in the target cost model.
2305 This was delayed until here because the count of instructions
2306 isn't known beforehand. */
2307 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2308
2309 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, j, si)
2310 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2311 vinfo_for_stmt (si->stmt), si->misalign,
2312 vect_body);
2313 }
2314 }
2315
2316
2317 /* For constant and loop invariant defs of SLP_NODE this function returns
2318 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2319 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2320 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2321 REDUC_INDEX is the index of the reduction operand in the statements, unless
2322 it is -1. */
2323
2324 static void
2325 vect_get_constant_vectors (tree op, slp_tree slp_node,
2326 VEC (tree, heap) **vec_oprnds,
2327 unsigned int op_num, unsigned int number_of_vectors,
2328 int reduc_index)
2329 {
2330 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2331 gimple stmt = VEC_index (gimple, stmts, 0);
2332 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2333 unsigned nunits;
2334 tree vec_cst;
2335 tree *elts;
2336 unsigned j, number_of_places_left_in_vector;
2337 tree vector_type;
2338 tree vop;
2339 int group_size = VEC_length (gimple, stmts);
2340 unsigned int vec_num, i;
2341 unsigned number_of_copies = 1;
2342 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2343 bool constant_p, is_store;
2344 tree neutral_op = NULL;
2345 enum tree_code code = gimple_expr_code (stmt);
2346 gimple def_stmt;
2347 struct loop *loop;
2348 gimple_seq ctor_seq = NULL;
2349
2350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2351 && reduc_index != -1)
2352 {
2353 op_num = reduc_index - 1;
2354 op = gimple_op (stmt, reduc_index);
2355 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2356 we need either neutral operands or the original operands. See
2357 get_initial_def_for_reduction() for details. */
2358 switch (code)
2359 {
2360 case WIDEN_SUM_EXPR:
2361 case DOT_PROD_EXPR:
2362 case PLUS_EXPR:
2363 case MINUS_EXPR:
2364 case BIT_IOR_EXPR:
2365 case BIT_XOR_EXPR:
2366 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2367 neutral_op = build_real (TREE_TYPE (op), dconst0);
2368 else
2369 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2370
2371 break;
2372
2373 case MULT_EXPR:
2374 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2375 neutral_op = build_real (TREE_TYPE (op), dconst1);
2376 else
2377 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2378
2379 break;
2380
2381 case BIT_AND_EXPR:
2382 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2383 break;
2384
2385 case MAX_EXPR:
2386 case MIN_EXPR:
2387 def_stmt = SSA_NAME_DEF_STMT (op);
2388 loop = (gimple_bb (stmt))->loop_father;
2389 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2390 loop_preheader_edge (loop));
2391 break;
2392
2393 default:
2394 neutral_op = NULL;
2395 }
2396 }
2397
2398 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2399 {
2400 is_store = true;
2401 op = gimple_assign_rhs1 (stmt);
2402 }
2403 else
2404 is_store = false;
2405
2406 gcc_assert (op);
2407
2408 if (CONSTANT_CLASS_P (op))
2409 constant_p = true;
2410 else
2411 constant_p = false;
2412
2413 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2414 gcc_assert (vector_type);
2415 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2416
2417 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2418 created vectors. It is greater than 1 if unrolling is performed.
2419
2420 For example, we have two scalar operands, s1 and s2 (e.g., group of
2421 strided accesses of size two), while NUNITS is four (i.e., four scalars
2422 of this type can be packed in a vector). The output vector will contain
2423 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2424 will be 2).
2425
2426 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2427 containing the operands.
2428
2429 For example, NUNITS is four as before, and the group size is 8
2430 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2431 {s5, s6, s7, s8}. */
2432
2433 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2434
2435 number_of_places_left_in_vector = nunits;
2436 elts = XALLOCAVEC (tree, nunits);
2437 for (j = 0; j < number_of_copies; j++)
2438 {
2439 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2440 {
2441 if (is_store)
2442 op = gimple_assign_rhs1 (stmt);
2443 else
2444 {
2445 switch (code)
2446 {
2447 case COND_EXPR:
2448 if (op_num == 0 || op_num == 1)
2449 {
2450 tree cond = gimple_assign_rhs1 (stmt);
2451 op = TREE_OPERAND (cond, op_num);
2452 }
2453 else
2454 {
2455 if (op_num == 2)
2456 op = gimple_assign_rhs2 (stmt);
2457 else
2458 op = gimple_assign_rhs3 (stmt);
2459 }
2460 break;
2461
2462 case CALL_EXPR:
2463 op = gimple_call_arg (stmt, op_num);
2464 break;
2465
2466 case LSHIFT_EXPR:
2467 case RSHIFT_EXPR:
2468 case LROTATE_EXPR:
2469 case RROTATE_EXPR:
2470 op = gimple_op (stmt, op_num + 1);
2471 /* Unlike the other binary operators, shifts/rotates have
2472 the shift count being int, instead of the same type as
2473 the lhs, so make sure the scalar is the right type if
2474 we are dealing with vectors of
2475 long long/long/short/char. */
2476 if (op_num == 1 && constant_p)
2477 op = fold_convert (TREE_TYPE (vector_type), op);
2478 break;
2479
2480 default:
2481 op = gimple_op (stmt, op_num + 1);
2482 break;
2483 }
2484 }
2485
2486 if (reduc_index != -1)
2487 {
2488 loop = (gimple_bb (stmt))->loop_father;
2489 def_stmt = SSA_NAME_DEF_STMT (op);
2490
2491 gcc_assert (loop);
2492
2493 /* Get the def before the loop. In reduction chain we have only
2494 one initial value. */
2495 if ((j != (number_of_copies - 1)
2496 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2497 && i != 0))
2498 && neutral_op)
2499 op = neutral_op;
2500 else
2501 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2502 loop_preheader_edge (loop));
2503 }
2504
2505 /* Create 'vect_ = {op0,op1,...,opn}'. */
2506 number_of_places_left_in_vector--;
2507 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2508 {
2509 if (constant_p)
2510 {
2511 op = fold_unary (VIEW_CONVERT_EXPR,
2512 TREE_TYPE (vector_type), op);
2513 gcc_assert (op && CONSTANT_CLASS_P (op));
2514 }
2515 else
2516 {
2517 tree new_temp
2518 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2519 gimple init_stmt;
2520 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2521 op);
2522 init_stmt
2523 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2524 new_temp, op, NULL_TREE);
2525 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2526 op = new_temp;
2527 }
2528 }
2529 elts[number_of_places_left_in_vector] = op;
2530
2531 if (number_of_places_left_in_vector == 0)
2532 {
2533 number_of_places_left_in_vector = nunits;
2534
2535 if (constant_p)
2536 vec_cst = build_vector (vector_type, elts);
2537 else
2538 {
2539 VEC(constructor_elt,gc) *v;
2540 unsigned k;
2541 v = VEC_alloc (constructor_elt, gc, nunits);
2542 for (k = 0; k < nunits; ++k)
2543 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2544 vec_cst = build_constructor (vector_type, v);
2545 }
2546 VEC_quick_push (tree, voprnds,
2547 vect_init_vector (stmt, vec_cst,
2548 vector_type, NULL));
2549 if (ctor_seq != NULL)
2550 {
2551 gimple init_stmt
2552 = SSA_NAME_DEF_STMT (VEC_last (tree, voprnds));
2553 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2554 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2555 GSI_SAME_STMT);
2556 ctor_seq = NULL;
2557 }
2558 }
2559 }
2560 }
2561
2562 /* Since the vectors are created in the reverse order, we should invert
2563 them. */
2564 vec_num = VEC_length (tree, voprnds);
2565 for (j = vec_num; j != 0; j--)
2566 {
2567 vop = VEC_index (tree, voprnds, j - 1);
2568 VEC_quick_push (tree, *vec_oprnds, vop);
2569 }
2570
2571 VEC_free (tree, heap, voprnds);
2572
2573 /* In case that VF is greater than the unrolling factor needed for the SLP
2574 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2575 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2576 to replicate the vectors. */
2577 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2578 {
2579 tree neutral_vec = NULL;
2580
2581 if (neutral_op)
2582 {
2583 if (!neutral_vec)
2584 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2585
2586 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2587 }
2588 else
2589 {
2590 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2591 VEC_quick_push (tree, *vec_oprnds, vop);
2592 }
2593 }
2594 }
2595
2596
2597 /* Get vectorized definitions from SLP_NODE that contains corresponding
2598 vectorized def-stmts. */
2599
2600 static void
2601 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2602 {
2603 tree vec_oprnd;
2604 gimple vec_def_stmt;
2605 unsigned int i;
2606
2607 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2608
2609 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2610 {
2611 gcc_assert (vec_def_stmt);
2612 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2613 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2614 }
2615 }
2616
2617
2618 /* Get vectorized definitions for SLP_NODE.
2619 If the scalar definitions are loop invariants or constants, collect them and
2620 call vect_get_constant_vectors() to create vector stmts.
2621 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2622 must be stored in the corresponding child of SLP_NODE, and we call
2623 vect_get_slp_vect_defs () to retrieve them. */
2624
2625 void
2626 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2627 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2628 {
2629 gimple first_stmt, first_def;
2630 int number_of_vects = 0, i;
2631 unsigned int child_index = 0;
2632 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2633 slp_tree child = NULL;
2634 VEC (tree, heap) *vec_defs;
2635 tree oprnd, def_lhs;
2636 bool vectorized_defs;
2637
2638 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2639 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2640 {
2641 /* For each operand we check if it has vectorized definitions in a child
2642 node or we need to create them (for invariants and constants). We
2643 check if the LHS of the first stmt of the next child matches OPRND.
2644 If it does, we found the correct child. Otherwise, we call
2645 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2646 to check this child node for the next operand. */
2647 vectorized_defs = false;
2648 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2649 {
2650 child = (slp_tree) VEC_index (slp_void_p,
2651 SLP_TREE_CHILDREN (slp_node),
2652 child_index);
2653 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2654
2655 /* In the end of a pattern sequence we have a use of the original stmt,
2656 so we need to compare OPRND with the original def. */
2657 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2658 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2659 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2660 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2661
2662 if (is_gimple_call (first_def))
2663 def_lhs = gimple_call_lhs (first_def);
2664 else
2665 def_lhs = gimple_assign_lhs (first_def);
2666
2667 if (operand_equal_p (oprnd, def_lhs, 0))
2668 {
2669 /* The number of vector defs is determined by the number of
2670 vector statements in the node from which we get those
2671 statements. */
2672 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2673 vectorized_defs = true;
2674 child_index++;
2675 }
2676 }
2677
2678 if (!vectorized_defs)
2679 {
2680 if (i == 0)
2681 {
2682 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2683 /* Number of vector stmts was calculated according to LHS in
2684 vect_schedule_slp_instance (), fix it by replacing LHS with
2685 RHS, if necessary. See vect_get_smallest_scalar_type () for
2686 details. */
2687 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2688 &rhs_size_unit);
2689 if (rhs_size_unit != lhs_size_unit)
2690 {
2691 number_of_vects *= rhs_size_unit;
2692 number_of_vects /= lhs_size_unit;
2693 }
2694 }
2695 }
2696
2697 /* Allocate memory for vectorized defs. */
2698 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2699
2700 /* For reduction defs we call vect_get_constant_vectors (), since we are
2701 looking for initial loop invariant values. */
2702 if (vectorized_defs && reduc_index == -1)
2703 /* The defs are already vectorized. */
2704 vect_get_slp_vect_defs (child, &vec_defs);
2705 else
2706 /* Build vectors from scalar defs. */
2707 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2708 number_of_vects, reduc_index);
2709
2710 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2711
2712 /* For reductions, we only need initial values. */
2713 if (reduc_index != -1)
2714 return;
2715 }
2716 }
2717
2718
2719 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2720 building a vector of type MASK_TYPE from it) and two input vectors placed in
2721 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2722 shifting by STRIDE elements of DR_CHAIN for every copy.
2723 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2724 copies).
2725 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2726 the created stmts must be inserted. */
2727
2728 static inline void
2729 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2730 tree mask, int first_vec_indx, int second_vec_indx,
2731 gimple_stmt_iterator *gsi, slp_tree node,
2732 tree vectype, VEC(tree,heap) *dr_chain,
2733 int ncopies, int vect_stmts_counter)
2734 {
2735 tree perm_dest;
2736 gimple perm_stmt = NULL;
2737 stmt_vec_info next_stmt_info;
2738 int i, stride;
2739 tree first_vec, second_vec, data_ref;
2740
2741 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2742
2743 /* Initialize the vect stmts of NODE to properly insert the generated
2744 stmts later. */
2745 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2746 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2747 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2748
2749 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2750 for (i = 0; i < ncopies; i++)
2751 {
2752 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2753 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2754
2755 /* Generate the permute statement. */
2756 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2757 first_vec, second_vec, mask);
2758 data_ref = make_ssa_name (perm_dest, perm_stmt);
2759 gimple_set_lhs (perm_stmt, data_ref);
2760 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2761
2762 /* Store the vector statement in NODE. */
2763 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2764 stride * i + vect_stmts_counter, perm_stmt);
2765
2766 first_vec_indx += stride;
2767 second_vec_indx += stride;
2768 }
2769
2770 /* Mark the scalar stmt as vectorized. */
2771 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2772 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2773 }
2774
2775
2776 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2777 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2778 representation. Check that the mask is valid and return FALSE if not.
2779 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2780 the next vector, i.e., the current first vector is not needed. */
2781
2782 static bool
2783 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2784 int mask_nunits, bool only_one_vec, int index,
2785 unsigned char *mask, int *current_mask_element,
2786 bool *need_next_vector, int *number_of_mask_fixes,
2787 bool *mask_fixed, bool *needs_first_vector)
2788 {
2789 int i;
2790
2791 /* Convert to target specific representation. */
2792 *current_mask_element = first_mask_element + m;
2793 /* Adjust the value in case it's a mask for second and third vectors. */
2794 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2795
2796 if (*current_mask_element < mask_nunits)
2797 *needs_first_vector = true;
2798
2799 /* We have only one input vector to permute but the mask accesses values in
2800 the next vector as well. */
2801 if (only_one_vec && *current_mask_element >= mask_nunits)
2802 {
2803 if (dump_enabled_p ())
2804 {
2805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2806 "permutation requires at least two vectors ");
2807 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2808 }
2809
2810 return false;
2811 }
2812
2813 /* The mask requires the next vector. */
2814 if (*current_mask_element >= mask_nunits * 2)
2815 {
2816 if (*needs_first_vector || *mask_fixed)
2817 {
2818 /* We either need the first vector too or have already moved to the
2819 next vector. In both cases, this permutation needs three
2820 vectors. */
2821 if (dump_enabled_p ())
2822 {
2823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2824 "permutation requires at "
2825 "least three vectors ");
2826 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2827 }
2828
2829 return false;
2830 }
2831
2832 /* We move to the next vector, dropping the first one and working with
2833 the second and the third - we need to adjust the values of the mask
2834 accordingly. */
2835 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2836
2837 for (i = 0; i < index; i++)
2838 mask[i] -= mask_nunits * *number_of_mask_fixes;
2839
2840 (*number_of_mask_fixes)++;
2841 *mask_fixed = true;
2842 }
2843
2844 *need_next_vector = *mask_fixed;
2845
2846 /* This was the last element of this mask. Start a new one. */
2847 if (index == mask_nunits - 1)
2848 {
2849 *number_of_mask_fixes = 1;
2850 *mask_fixed = false;
2851 *needs_first_vector = false;
2852 }
2853
2854 return true;
2855 }
2856
2857
2858 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2859 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2860 permute statements for SLP_NODE_INSTANCE. */
2861 bool
2862 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2863 gimple_stmt_iterator *gsi, int vf,
2864 slp_instance slp_node_instance, bool analyze_only)
2865 {
2866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2867 tree mask_element_type = NULL_TREE, mask_type;
2868 int i, j, k, nunits, vec_index = 0, scalar_index;
2869 slp_tree node;
2870 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2871 gimple next_scalar_stmt;
2872 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2873 int first_mask_element;
2874 int index, unroll_factor, current_mask_element, ncopies;
2875 unsigned char *mask;
2876 bool only_one_vec = false, need_next_vector = false;
2877 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2878 int number_of_mask_fixes = 1;
2879 bool mask_fixed = false;
2880 bool needs_first_vector = false;
2881 enum machine_mode mode;
2882
2883 mode = TYPE_MODE (vectype);
2884
2885 if (!can_vec_perm_p (mode, false, NULL))
2886 {
2887 if (dump_enabled_p ())
2888 {
2889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2890 "no vect permute for ");
2891 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2892 }
2893 return false;
2894 }
2895
2896 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2897 same size as the vector element being permuted. */
2898 mask_element_type = lang_hooks.types.type_for_mode
2899 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2900 mask_type = get_vectype_for_scalar_type (mask_element_type);
2901 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2902 mask = XALLOCAVEC (unsigned char, nunits);
2903 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2904
2905 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2906 unrolling factor. */
2907 orig_vec_stmts_num = group_size *
2908 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2909 if (orig_vec_stmts_num == 1)
2910 only_one_vec = true;
2911
2912 /* Number of copies is determined by the final vectorization factor
2913 relatively to SLP_NODE_INSTANCE unrolling factor. */
2914 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2915
2916 /* Generate permutation masks for every NODE. Number of masks for each NODE
2917 is equal to GROUP_SIZE.
2918 E.g., we have a group of three nodes with three loads from the same
2919 location in each node, and the vector size is 4. I.e., we have a
2920 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2921 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2922 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2923 ...
2924
2925 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2926 The last mask is illegal since we assume two operands for permute
2927 operation, and the mask element values can't be outside that range.
2928 Hence, the last mask must be converted into {2,5,5,5}.
2929 For the first two permutations we need the first and the second input
2930 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2931 we need the second and the third vectors: {b1,c1,a2,b2} and
2932 {c2,a3,b3,c3}. */
2933
2934 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2935 {
2936 scalar_index = 0;
2937 index = 0;
2938 vect_stmts_counter = 0;
2939 vec_index = 0;
2940 first_vec_index = vec_index++;
2941 if (only_one_vec)
2942 second_vec_index = first_vec_index;
2943 else
2944 second_vec_index = vec_index++;
2945
2946 for (j = 0; j < unroll_factor; j++)
2947 {
2948 for (k = 0; k < group_size; k++)
2949 {
2950 first_mask_element = i + j * group_size;
2951 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2952 nunits, only_one_vec, index,
2953 mask, &current_mask_element,
2954 &need_next_vector,
2955 &number_of_mask_fixes, &mask_fixed,
2956 &needs_first_vector))
2957 return false;
2958 mask[index++] = current_mask_element;
2959
2960 if (index == nunits)
2961 {
2962 tree mask_vec, *mask_elts;
2963 int l;
2964
2965 if (!can_vec_perm_p (mode, false, mask))
2966 {
2967 if (dump_enabled_p ())
2968 {
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2970 vect_location,
2971 "unsupported vect permute { ");
2972 for (i = 0; i < nunits; ++i)
2973 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2974 mask[i]);
2975 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2976 }
2977 return false;
2978 }
2979
2980 mask_elts = XALLOCAVEC (tree, nunits);
2981 for (l = 0; l < nunits; ++l)
2982 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2983 mask_vec = build_vector (mask_type, mask_elts);
2984 index = 0;
2985
2986 if (!analyze_only)
2987 {
2988 if (need_next_vector)
2989 {
2990 first_vec_index = second_vec_index;
2991 second_vec_index = vec_index;
2992 }
2993
2994 next_scalar_stmt = VEC_index (gimple,
2995 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2996
2997 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2998 mask_vec, first_vec_index, second_vec_index,
2999 gsi, node, vectype, dr_chain,
3000 ncopies, vect_stmts_counter++);
3001 }
3002 }
3003 }
3004 }
3005 }
3006
3007 return true;
3008 }
3009
3010
3011
3012 /* Vectorize SLP instance tree in postorder. */
3013
3014 static bool
3015 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3016 unsigned int vectorization_factor)
3017 {
3018 gimple stmt;
3019 bool grouped_store, is_store;
3020 gimple_stmt_iterator si;
3021 stmt_vec_info stmt_info;
3022 unsigned int vec_stmts_size, nunits, group_size;
3023 tree vectype;
3024 int i;
3025 slp_tree loads_node;
3026 slp_void_p child;
3027
3028 if (!node)
3029 return false;
3030
3031 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
3032 vect_schedule_slp_instance ((slp_tree) child, instance,
3033 vectorization_factor);
3034
3035 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
3036 stmt_info = vinfo_for_stmt (stmt);
3037
3038 /* VECTYPE is the type of the destination. */
3039 vectype = STMT_VINFO_VECTYPE (stmt_info);
3040 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3041 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3042
3043 /* For each SLP instance calculate number of vector stmts to be created
3044 for the scalar stmts in each node of the SLP tree. Number of vector
3045 elements in one vector iteration is the number of scalar elements in
3046 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3047 size. */
3048 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3049
3050 /* In case of load permutation we have to allocate vectorized statements for
3051 all the nodes that participate in that permutation. */
3052 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
3053 {
3054 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
3055 {
3056 if (!SLP_TREE_VEC_STMTS (loads_node))
3057 {
3058 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
3059 vec_stmts_size);
3060 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3061 }
3062 }
3063 }
3064
3065 if (!SLP_TREE_VEC_STMTS (node))
3066 {
3067 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
3068 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3069 }
3070
3071 if (dump_enabled_p ())
3072 {
3073 dump_printf_loc (MSG_NOTE,vect_location,
3074 "------>vectorizing SLP node starting from: ");
3075 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3076 }
3077
3078 /* Loads should be inserted before the first load. */
3079 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3080 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3081 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3082 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
3083 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3084 else if (is_pattern_stmt_p (stmt_info))
3085 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3086 else
3087 si = gsi_for_stmt (stmt);
3088
3089 /* Stores should be inserted just before the last store. */
3090 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3091 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3092 {
3093 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3094 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3095 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3096 si = gsi_for_stmt (last_store);
3097 }
3098
3099 /* Mark the first element of the reduction chain as reduction to properly
3100 transform the node. In the analysis phase only the last element of the
3101 chain is marked as reduction. */
3102 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3103 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3104 {
3105 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3106 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3107 }
3108
3109 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3110 return is_store;
3111 }
3112
3113 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3114 For loop vectorization this is done in vectorizable_call, but for SLP
3115 it needs to be deferred until end of vect_schedule_slp, because multiple
3116 SLP instances may refer to the same scalar stmt. */
3117
3118 static void
3119 vect_remove_slp_scalar_calls (slp_tree node)
3120 {
3121 gimple stmt, new_stmt;
3122 gimple_stmt_iterator gsi;
3123 int i;
3124 slp_void_p child;
3125 tree lhs;
3126 stmt_vec_info stmt_info;
3127
3128 if (!node)
3129 return;
3130
3131 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
3132 vect_remove_slp_scalar_calls ((slp_tree) child);
3133
3134 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
3135 {
3136 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3137 continue;
3138 stmt_info = vinfo_for_stmt (stmt);
3139 if (stmt_info == NULL
3140 || is_pattern_stmt_p (stmt_info)
3141 || !PURE_SLP_STMT (stmt_info))
3142 continue;
3143 lhs = gimple_call_lhs (stmt);
3144 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3145 set_vinfo_for_stmt (new_stmt, stmt_info);
3146 set_vinfo_for_stmt (stmt, NULL);
3147 STMT_VINFO_STMT (stmt_info) = new_stmt;
3148 gsi = gsi_for_stmt (stmt);
3149 gsi_replace (&gsi, new_stmt, false);
3150 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3151 }
3152 }
3153
3154 /* Generate vector code for all SLP instances in the loop/basic block. */
3155
3156 bool
3157 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3158 {
3159 VEC (slp_instance, heap) *slp_instances;
3160 slp_instance instance;
3161 unsigned int i, vf;
3162 bool is_store = false;
3163
3164 if (loop_vinfo)
3165 {
3166 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3167 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3168 }
3169 else
3170 {
3171 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3172 vf = 1;
3173 }
3174
3175 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3176 {
3177 /* Schedule the tree of INSTANCE. */
3178 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3179 instance, vf);
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_NOTE, vect_location,
3182 "vectorizing stmts using SLP.");
3183 }
3184
3185 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3186 {
3187 slp_tree root = SLP_INSTANCE_TREE (instance);
3188 gimple store;
3189 unsigned int j;
3190 gimple_stmt_iterator gsi;
3191
3192 vect_remove_slp_scalar_calls (root);
3193
3194 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3195 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3196 {
3197 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3198 break;
3199
3200 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3201 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3202 /* Free the attached stmt_vec_info and remove the stmt. */
3203 gsi = gsi_for_stmt (store);
3204 unlink_stmt_vdef (store);
3205 gsi_remove (&gsi, true);
3206 release_defs (store);
3207 free_stmt_vec_info (store);
3208 }
3209 }
3210
3211 return is_store;
3212 }
3213
3214
3215 /* Vectorize the basic block. */
3216
3217 void
3218 vect_slp_transform_bb (basic_block bb)
3219 {
3220 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3221 gimple_stmt_iterator si;
3222
3223 gcc_assert (bb_vinfo);
3224
3225 if (dump_enabled_p ())
3226 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3227
3228 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3229 {
3230 gimple stmt = gsi_stmt (si);
3231 stmt_vec_info stmt_info;
3232
3233 if (dump_enabled_p ())
3234 {
3235 dump_printf_loc (MSG_NOTE, vect_location,
3236 "------>SLPing statement: ");
3237 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3238 }
3239
3240 stmt_info = vinfo_for_stmt (stmt);
3241 gcc_assert (stmt_info);
3242
3243 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3244 if (STMT_SLP_TYPE (stmt_info))
3245 {
3246 vect_schedule_slp (NULL, bb_vinfo);
3247 break;
3248 }
3249 }
3250
3251 if (dump_enabled_p ())
3252 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3253
3254 destroy_bb_vec_info (bb_vinfo);
3255 }