[10/46] Temporarily make stmt_vec_info a class
[gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
48
49
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
53
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
56 {
57 int i;
58 slp_tree child;
59
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
61 vect_free_slp_tree (child, final_p);
62
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
68 {
69 gimple *stmt;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
71 {
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
73 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
74 }
75 }
76
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
80 SLP_TREE_LOAD_PERMUTATION (node).release ();
81
82 free (node);
83 }
84
85
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
89
90 void
91 vect_free_slp_instance (slp_instance instance, bool final_p)
92 {
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
94 SLP_INSTANCE_LOADS (instance).release ();
95 free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS. */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
103 {
104 slp_tree node;
105 gimple *stmt = scalar_stmts[0];
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
116 else if (gimple_code (stmt) == GIMPLE_PHI)
117 nops = 0;
118 else
119 return NULL;
120
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
125 SLP_TREE_CHILDREN (node).create (nops);
126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
127 SLP_TREE_TWO_OPERATORS (node) = false;
128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
129
130 unsigned i;
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
132 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
133
134 return node;
135 }
136
137
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141 typedef struct _slp_oprnd_info
142 {
143 /* Def-stmts for the operands. */
144 vec<gimple *> def_stmts;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
148 tree first_op_type;
149 enum vect_def_type first_dt;
150 bool first_pattern;
151 bool second_pattern;
152 } *slp_oprnd_info;
153
154
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
157 static vec<slp_oprnd_info>
158 vect_create_oprnd_info (int nops, int group_size)
159 {
160 int i;
161 slp_oprnd_info oprnd_info;
162 vec<slp_oprnd_info> oprnds_info;
163
164 oprnds_info.create (nops);
165 for (i = 0; i < nops; i++)
166 {
167 oprnd_info = XNEW (struct _slp_oprnd_info);
168 oprnd_info->def_stmts.create (group_size);
169 oprnd_info->first_dt = vect_uninitialized_def;
170 oprnd_info->first_op_type = NULL_TREE;
171 oprnd_info->first_pattern = false;
172 oprnd_info->second_pattern = false;
173 oprnds_info.quick_push (oprnd_info);
174 }
175
176 return oprnds_info;
177 }
178
179
180 /* Free operands info. */
181
182 static void
183 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
184 {
185 int i;
186 slp_oprnd_info oprnd_info;
187
188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
189 {
190 oprnd_info->def_stmts.release ();
191 XDELETE (oprnd_info);
192 }
193
194 oprnds_info.release ();
195 }
196
197
198 /* Find the place of the data-ref in STMT in the interleaving chain that starts
199 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
200
201 int
202 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
203 {
204 gimple *next_stmt = first_stmt;
205 int result = 0;
206
207 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
208 return -1;
209
210 do
211 {
212 if (next_stmt == stmt)
213 return result;
214 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
215 if (next_stmt)
216 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
217 }
218 while (next_stmt);
219
220 return -1;
221 }
222
223 /* Check whether it is possible to load COUNT elements of type ELT_MODE
224 using the method implemented by duplicate_and_interleave. Return true
225 if so, returning the number of intermediate vectors in *NVECTORS_OUT
226 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
227 (if nonnull). */
228
229 bool
230 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
231 unsigned int *nvectors_out,
232 tree *vector_type_out,
233 tree *permutes)
234 {
235 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
236 poly_int64 nelts;
237 unsigned int nvectors = 1;
238 for (;;)
239 {
240 scalar_int_mode int_mode;
241 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
242 if (multiple_p (current_vector_size, elt_bytes, &nelts)
243 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
244 {
245 tree int_type = build_nonstandard_integer_type
246 (GET_MODE_BITSIZE (int_mode), 1);
247 tree vector_type = build_vector_type (int_type, nelts);
248 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
249 {
250 vec_perm_builder sel1 (nelts, 2, 3);
251 vec_perm_builder sel2 (nelts, 2, 3);
252 poly_int64 half_nelts = exact_div (nelts, 2);
253 for (unsigned int i = 0; i < 3; ++i)
254 {
255 sel1.quick_push (i);
256 sel1.quick_push (i + nelts);
257 sel2.quick_push (half_nelts + i);
258 sel2.quick_push (half_nelts + i + nelts);
259 }
260 vec_perm_indices indices1 (sel1, 2, nelts);
261 vec_perm_indices indices2 (sel2, 2, nelts);
262 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
263 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
264 {
265 if (nvectors_out)
266 *nvectors_out = nvectors;
267 if (vector_type_out)
268 *vector_type_out = vector_type;
269 if (permutes)
270 {
271 permutes[0] = vect_gen_perm_mask_checked (vector_type,
272 indices1);
273 permutes[1] = vect_gen_perm_mask_checked (vector_type,
274 indices2);
275 }
276 return true;
277 }
278 }
279 }
280 if (!multiple_p (elt_bytes, 2, &elt_bytes))
281 return false;
282 nvectors *= 2;
283 }
284 }
285
286 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
287 they are of a valid type and that they match the defs of the first stmt of
288 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
289 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
290 indicates swap is required for cond_expr stmts. Specifically, *SWAP
291 is 1 if STMT is cond and operands of comparison need to be swapped;
292 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
293 If there is any operand swap in this function, *SWAP is set to non-zero
294 value.
295 If there was a fatal error return -1; if the error could be corrected by
296 swapping operands of father node of this one, return 1; if everything is
297 ok return 0. */
298 static int
299 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
300 vec<gimple *> stmts, unsigned stmt_num,
301 vec<slp_oprnd_info> *oprnds_info)
302 {
303 gimple *stmt = stmts[stmt_num];
304 tree oprnd;
305 unsigned int i, number_of_oprnds;
306 gimple *def_stmt;
307 enum vect_def_type dt = vect_uninitialized_def;
308 bool pattern = false;
309 slp_oprnd_info oprnd_info;
310 int first_op_idx = 1;
311 bool commutative = false;
312 bool first_op_cond = false;
313 bool first = stmt_num == 0;
314 bool second = stmt_num == 1;
315
316 if (is_gimple_call (stmt))
317 {
318 number_of_oprnds = gimple_call_num_args (stmt);
319 first_op_idx = 3;
320 }
321 else if (is_gimple_assign (stmt))
322 {
323 enum tree_code code = gimple_assign_rhs_code (stmt);
324 number_of_oprnds = gimple_num_ops (stmt) - 1;
325 /* Swap can only be done for cond_expr if asked to, otherwise we
326 could result in different comparison code to the first stmt. */
327 if (gimple_assign_rhs_code (stmt) == COND_EXPR
328 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
329 {
330 first_op_cond = true;
331 number_of_oprnds++;
332 }
333 else
334 commutative = commutative_tree_code (code);
335 }
336 else
337 return -1;
338
339 bool swapped = (*swap != 0);
340 gcc_assert (!swapped || first_op_cond);
341 for (i = 0; i < number_of_oprnds; i++)
342 {
343 again:
344 if (first_op_cond)
345 {
346 /* Map indicating how operands of cond_expr should be swapped. */
347 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
348 int *map = maps[*swap];
349
350 if (i < 2)
351 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
352 else
353 oprnd = gimple_op (stmt, map[i]);
354 }
355 else
356 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
357
358 oprnd_info = (*oprnds_info)[i];
359
360 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt))
361 {
362 if (dump_enabled_p ())
363 {
364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
365 "Build SLP failed: can't analyze def for ");
366 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
367 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
368 }
369
370 return -1;
371 }
372
373 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
374 from the pattern. Check that all the stmts of the node are in the
375 pattern. */
376 if (def_stmt && gimple_bb (def_stmt)
377 && vect_stmt_in_region_p (vinfo, def_stmt)
378 && vinfo_for_stmt (def_stmt)
379 && is_pattern_stmt_p (vinfo_for_stmt (def_stmt)))
380 {
381 pattern = true;
382 if (!first && !oprnd_info->first_pattern
383 /* Allow different pattern state for the defs of the
384 first stmt in reduction chains. */
385 && (oprnd_info->first_dt != vect_reduction_def
386 || (!second && !oprnd_info->second_pattern)))
387 {
388 if (i == 0
389 && !swapped
390 && commutative)
391 {
392 swapped = true;
393 goto again;
394 }
395
396 if (dump_enabled_p ())
397 {
398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
399 "Build SLP failed: some of the stmts"
400 " are in a pattern, and others are not ");
401 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
402 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
403 }
404
405 return 1;
406 }
407
408 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
409
410 if (dt == vect_unknown_def_type)
411 {
412 if (dump_enabled_p ())
413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
414 "Unsupported pattern.\n");
415 return -1;
416 }
417
418 switch (gimple_code (def_stmt))
419 {
420 case GIMPLE_PHI:
421 case GIMPLE_ASSIGN:
422 break;
423
424 default:
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427 "unsupported defining stmt:\n");
428 return -1;
429 }
430 }
431
432 if (second)
433 oprnd_info->second_pattern = pattern;
434
435 if (first)
436 {
437 oprnd_info->first_dt = dt;
438 oprnd_info->first_pattern = pattern;
439 oprnd_info->first_op_type = TREE_TYPE (oprnd);
440 }
441 else
442 {
443 /* Not first stmt of the group, check that the def-stmt/s match
444 the def-stmt/s of the first stmt. Allow different definition
445 types for reduction chains: the first stmt must be a
446 vect_reduction_def (a phi node), and the rest
447 vect_internal_def. */
448 tree type = TREE_TYPE (oprnd);
449 if ((oprnd_info->first_dt != dt
450 && !(oprnd_info->first_dt == vect_reduction_def
451 && dt == vect_internal_def)
452 && !((oprnd_info->first_dt == vect_external_def
453 || oprnd_info->first_dt == vect_constant_def)
454 && (dt == vect_external_def
455 || dt == vect_constant_def)))
456 || !types_compatible_p (oprnd_info->first_op_type, type))
457 {
458 /* Try swapping operands if we got a mismatch. */
459 if (i == 0
460 && !swapped
461 && commutative)
462 {
463 swapped = true;
464 goto again;
465 }
466
467 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
469 "Build SLP failed: different types\n");
470
471 return 1;
472 }
473 if ((dt == vect_constant_def
474 || dt == vect_external_def)
475 && !current_vector_size.is_constant ()
476 && (TREE_CODE (type) == BOOLEAN_TYPE
477 || !can_duplicate_and_interleave_p (stmts.length (),
478 TYPE_MODE (type))))
479 {
480 if (dump_enabled_p ())
481 {
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
483 "Build SLP failed: invalid type of def "
484 "for variable-length SLP ");
485 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
486 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
487 }
488 return -1;
489 }
490 }
491
492 /* Check the types of the definitions. */
493 switch (dt)
494 {
495 case vect_constant_def:
496 case vect_external_def:
497 break;
498
499 case vect_reduction_def:
500 case vect_induction_def:
501 case vect_internal_def:
502 oprnd_info->def_stmts.quick_push (def_stmt);
503 break;
504
505 default:
506 /* FORNOW: Not supported. */
507 if (dump_enabled_p ())
508 {
509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
510 "Build SLP failed: illegal type of def ");
511 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
512 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
513 }
514
515 return -1;
516 }
517 }
518
519 /* Swap operands. */
520 if (swapped)
521 {
522 /* If there are already uses of this stmt in a SLP instance then
523 we've committed to the operand order and can't swap it. */
524 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
525 {
526 if (dump_enabled_p ())
527 {
528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
529 "Build SLP failed: cannot swap operands of "
530 "shared stmt ");
531 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
532 }
533 return -1;
534 }
535
536 if (first_op_cond)
537 {
538 tree cond = gimple_assign_rhs1 (stmt);
539 enum tree_code code = TREE_CODE (cond);
540
541 /* Swap. */
542 if (*swap == 1)
543 {
544 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
545 &TREE_OPERAND (cond, 1));
546 TREE_SET_CODE (cond, swap_tree_comparison (code));
547 }
548 /* Invert. */
549 else
550 {
551 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
552 gimple_assign_rhs3_ptr (stmt));
553 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
554 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
555 gcc_assert (code != ERROR_MARK);
556 TREE_SET_CODE (cond, code);
557 }
558 }
559 else
560 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
561 gimple_assign_rhs2_ptr (stmt));
562 if (dump_enabled_p ())
563 {
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "swapped operands to match def types in ");
566 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
567 }
568 }
569
570 *swap = swapped;
571 return 0;
572 }
573
574 /* Return true if call statements CALL1 and CALL2 are similar enough
575 to be combined into the same SLP group. */
576
577 static bool
578 compatible_calls_p (gcall *call1, gcall *call2)
579 {
580 unsigned int nargs = gimple_call_num_args (call1);
581 if (nargs != gimple_call_num_args (call2))
582 return false;
583
584 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
585 return false;
586
587 if (gimple_call_internal_p (call1))
588 {
589 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
590 TREE_TYPE (gimple_call_lhs (call2))))
591 return false;
592 for (unsigned int i = 0; i < nargs; ++i)
593 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
594 TREE_TYPE (gimple_call_arg (call2, i))))
595 return false;
596 }
597 else
598 {
599 if (!operand_equal_p (gimple_call_fn (call1),
600 gimple_call_fn (call2), 0))
601 return false;
602
603 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
604 return false;
605 }
606 return true;
607 }
608
609 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
610 caller's attempt to find the vector type in STMT with the narrowest
611 element type. Return true if VECTYPE is nonnull and if it is valid
612 for VINFO. When returning true, update MAX_NUNITS to reflect the
613 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
614 as for vect_build_slp_tree. */
615
616 static bool
617 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
618 tree vectype, poly_uint64 *max_nunits)
619 {
620 if (!vectype)
621 {
622 if (dump_enabled_p ())
623 {
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: unsupported data-type in ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
627 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
628 }
629 /* Fatal mismatch. */
630 return false;
631 }
632
633 /* If populating the vector type requires unrolling then fail
634 before adjusting *max_nunits for basic-block vectorization. */
635 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
636 unsigned HOST_WIDE_INT const_nunits;
637 if (is_a <bb_vec_info> (vinfo)
638 && (!nunits.is_constant (&const_nunits)
639 || const_nunits > group_size))
640 {
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: unrolling required "
643 "in basic block SLP\n");
644 /* Fatal mismatch. */
645 return false;
646 }
647
648 /* In case of multiple types we need to detect the smallest type. */
649 vect_update_max_nunits (max_nunits, vectype);
650 return true;
651 }
652
653 /* STMTS is a group of GROUP_SIZE SLP statements in which some
654 statements do the same operation as the first statement and in which
655 the others do ALT_STMT_CODE. Return true if we can take one vector
656 of the first operation and one vector of the second and permute them
657 to get the required result. VECTYPE is the type of the vector that
658 would be permuted. */
659
660 static bool
661 vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
662 tree vectype, tree_code alt_stmt_code)
663 {
664 unsigned HOST_WIDE_INT count;
665 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
666 return false;
667
668 vec_perm_builder sel (count, count, 1);
669 for (unsigned int i = 0; i < count; ++i)
670 {
671 unsigned int elt = i;
672 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
673 elt += count;
674 sel.quick_push (elt);
675 }
676 vec_perm_indices indices (sel, 2, count);
677 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
678 }
679
680 /* Verify if the scalar stmts STMTS are isomorphic, require data
681 permutation or are of unsupported types of operation. Return
682 true if they are, otherwise return false and indicate in *MATCHES
683 which stmts are not isomorphic to the first one. If MATCHES[0]
684 is false then this indicates the comparison could not be
685 carried out or the stmts will never be vectorized by SLP.
686
687 Note COND_EXPR is possibly ismorphic to another one after swapping its
688 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
689 the first stmt by swapping the two operands of comparison; set SWAP[i]
690 to 2 if stmt I is isormorphic to the first stmt by inverting the code
691 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
692 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
693
694 static bool
695 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
696 vec<gimple *> stmts, unsigned int group_size,
697 poly_uint64 *max_nunits, bool *matches,
698 bool *two_operators)
699 {
700 unsigned int i;
701 gimple *first_stmt = stmts[0], *stmt = stmts[0];
702 enum tree_code first_stmt_code = ERROR_MARK;
703 enum tree_code alt_stmt_code = ERROR_MARK;
704 enum tree_code rhs_code = ERROR_MARK;
705 enum tree_code first_cond_code = ERROR_MARK;
706 tree lhs;
707 bool need_same_oprnds = false;
708 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
709 optab optab;
710 int icode;
711 machine_mode optab_op2_mode;
712 machine_mode vec_mode;
713 gimple *first_load = NULL, *prev_first_load = NULL;
714
715 /* For every stmt in NODE find its def stmt/s. */
716 FOR_EACH_VEC_ELT (stmts, i, stmt)
717 {
718 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
719 swap[i] = 0;
720 matches[i] = false;
721
722 if (dump_enabled_p ())
723 {
724 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
725 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
726 }
727
728 /* Fail to vectorize statements marked as unvectorizable. */
729 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
730 {
731 if (dump_enabled_p ())
732 {
733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
734 "Build SLP failed: unvectorizable statement ");
735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
736 }
737 /* Fatal mismatch. */
738 matches[0] = false;
739 return false;
740 }
741
742 lhs = gimple_get_lhs (stmt);
743 if (lhs == NULL_TREE)
744 {
745 if (dump_enabled_p ())
746 {
747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
748 "Build SLP failed: not GIMPLE_ASSIGN nor "
749 "GIMPLE_CALL ");
750 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
751 }
752 /* Fatal mismatch. */
753 matches[0] = false;
754 return false;
755 }
756
757 tree nunits_vectype;
758 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
759 &nunits_vectype)
760 || (nunits_vectype
761 && !vect_record_max_nunits (vinfo, stmt, group_size,
762 nunits_vectype, max_nunits)))
763 {
764 /* Fatal mismatch. */
765 matches[0] = false;
766 return false;
767 }
768
769 gcc_assert (vectype);
770
771 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
772 {
773 rhs_code = CALL_EXPR;
774 if ((gimple_call_internal_p (call_stmt)
775 && (!vectorizable_internal_fn_p
776 (gimple_call_internal_fn (call_stmt))))
777 || gimple_call_tail_p (call_stmt)
778 || gimple_call_noreturn_p (call_stmt)
779 || !gimple_call_nothrow_p (call_stmt)
780 || gimple_call_chain (call_stmt))
781 {
782 if (dump_enabled_p ())
783 {
784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
785 "Build SLP failed: unsupported call type ");
786 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
787 call_stmt, 0);
788 }
789 /* Fatal mismatch. */
790 matches[0] = false;
791 return false;
792 }
793 }
794 else
795 rhs_code = gimple_assign_rhs_code (stmt);
796
797 /* Check the operation. */
798 if (i == 0)
799 {
800 first_stmt_code = rhs_code;
801
802 /* Shift arguments should be equal in all the packed stmts for a
803 vector shift with scalar shift operand. */
804 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
805 || rhs_code == LROTATE_EXPR
806 || rhs_code == RROTATE_EXPR)
807 {
808 if (vectype == boolean_type_node)
809 {
810 if (dump_enabled_p ())
811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
812 "Build SLP failed: shift of a"
813 " boolean.\n");
814 /* Fatal mismatch. */
815 matches[0] = false;
816 return false;
817 }
818
819 vec_mode = TYPE_MODE (vectype);
820
821 /* First see if we have a vector/vector shift. */
822 optab = optab_for_tree_code (rhs_code, vectype,
823 optab_vector);
824
825 if (!optab
826 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
827 {
828 /* No vector/vector shift, try for a vector/scalar shift. */
829 optab = optab_for_tree_code (rhs_code, vectype,
830 optab_scalar);
831
832 if (!optab)
833 {
834 if (dump_enabled_p ())
835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
836 "Build SLP failed: no optab.\n");
837 /* Fatal mismatch. */
838 matches[0] = false;
839 return false;
840 }
841 icode = (int) optab_handler (optab, vec_mode);
842 if (icode == CODE_FOR_nothing)
843 {
844 if (dump_enabled_p ())
845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
846 "Build SLP failed: "
847 "op not supported by target.\n");
848 /* Fatal mismatch. */
849 matches[0] = false;
850 return false;
851 }
852 optab_op2_mode = insn_data[icode].operand[2].mode;
853 if (!VECTOR_MODE_P (optab_op2_mode))
854 {
855 need_same_oprnds = true;
856 first_op1 = gimple_assign_rhs2 (stmt);
857 }
858 }
859 }
860 else if (rhs_code == WIDEN_LSHIFT_EXPR)
861 {
862 need_same_oprnds = true;
863 first_op1 = gimple_assign_rhs2 (stmt);
864 }
865 }
866 else
867 {
868 if (first_stmt_code != rhs_code
869 && alt_stmt_code == ERROR_MARK)
870 alt_stmt_code = rhs_code;
871 if (first_stmt_code != rhs_code
872 && (first_stmt_code != IMAGPART_EXPR
873 || rhs_code != REALPART_EXPR)
874 && (first_stmt_code != REALPART_EXPR
875 || rhs_code != IMAGPART_EXPR)
876 /* Handle mismatches in plus/minus by computing both
877 and merging the results. */
878 && !((first_stmt_code == PLUS_EXPR
879 || first_stmt_code == MINUS_EXPR)
880 && (alt_stmt_code == PLUS_EXPR
881 || alt_stmt_code == MINUS_EXPR)
882 && rhs_code == alt_stmt_code)
883 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
884 && (first_stmt_code == ARRAY_REF
885 || first_stmt_code == BIT_FIELD_REF
886 || first_stmt_code == INDIRECT_REF
887 || first_stmt_code == COMPONENT_REF
888 || first_stmt_code == MEM_REF)))
889 {
890 if (dump_enabled_p ())
891 {
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: different operation "
894 "in stmt ");
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
897 "original stmt ");
898 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
899 first_stmt, 0);
900 }
901 /* Mismatch. */
902 continue;
903 }
904
905 if (need_same_oprnds
906 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
907 {
908 if (dump_enabled_p ())
909 {
910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
911 "Build SLP failed: different shift "
912 "arguments in ");
913 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
914 }
915 /* Mismatch. */
916 continue;
917 }
918
919 if (rhs_code == CALL_EXPR)
920 {
921 gimple *first_stmt = stmts[0];
922 if (!compatible_calls_p (as_a <gcall *> (first_stmt),
923 as_a <gcall *> (stmt)))
924 {
925 if (dump_enabled_p ())
926 {
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
928 "Build SLP failed: different calls in ");
929 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
930 stmt, 0);
931 }
932 /* Mismatch. */
933 continue;
934 }
935 }
936 }
937
938 /* Grouped store or load. */
939 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
940 {
941 if (REFERENCE_CLASS_P (lhs))
942 {
943 /* Store. */
944 ;
945 }
946 else
947 {
948 /* Load. */
949 first_load = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
950 if (prev_first_load)
951 {
952 /* Check that there are no loads from different interleaving
953 chains in the same node. */
954 if (prev_first_load != first_load)
955 {
956 if (dump_enabled_p ())
957 {
958 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
959 vect_location,
960 "Build SLP failed: different "
961 "interleaving chains in one node ");
962 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
963 stmt, 0);
964 }
965 /* Mismatch. */
966 continue;
967 }
968 }
969 else
970 prev_first_load = first_load;
971 }
972 } /* Grouped access. */
973 else
974 {
975 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
976 {
977 /* Not grouped load. */
978 if (dump_enabled_p ())
979 {
980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
981 "Build SLP failed: not grouped load ");
982 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
983 }
984
985 /* FORNOW: Not grouped loads are not supported. */
986 /* Fatal mismatch. */
987 matches[0] = false;
988 return false;
989 }
990
991 /* Not memory operation. */
992 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
993 && TREE_CODE_CLASS (rhs_code) != tcc_unary
994 && TREE_CODE_CLASS (rhs_code) != tcc_expression
995 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
996 && rhs_code != CALL_EXPR)
997 {
998 if (dump_enabled_p ())
999 {
1000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1001 "Build SLP failed: operation");
1002 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
1003 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1004 }
1005 /* Fatal mismatch. */
1006 matches[0] = false;
1007 return false;
1008 }
1009
1010 if (rhs_code == COND_EXPR)
1011 {
1012 tree cond_expr = gimple_assign_rhs1 (stmt);
1013 enum tree_code cond_code = TREE_CODE (cond_expr);
1014 enum tree_code swap_code = ERROR_MARK;
1015 enum tree_code invert_code = ERROR_MARK;
1016
1017 if (i == 0)
1018 first_cond_code = TREE_CODE (cond_expr);
1019 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1020 {
1021 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1022 swap_code = swap_tree_comparison (cond_code);
1023 invert_code = invert_tree_comparison (cond_code, honor_nans);
1024 }
1025
1026 if (first_cond_code == cond_code)
1027 ;
1028 /* Isomorphic can be achieved by swapping. */
1029 else if (first_cond_code == swap_code)
1030 swap[i] = 1;
1031 /* Isomorphic can be achieved by inverting. */
1032 else if (first_cond_code == invert_code)
1033 swap[i] = 2;
1034 else
1035 {
1036 if (dump_enabled_p ())
1037 {
1038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1039 "Build SLP failed: different"
1040 " operation");
1041 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1042 stmt, 0);
1043 }
1044 /* Mismatch. */
1045 continue;
1046 }
1047 }
1048 }
1049
1050 matches[i] = true;
1051 }
1052
1053 for (i = 0; i < group_size; ++i)
1054 if (!matches[i])
1055 return false;
1056
1057 /* If we allowed a two-operation SLP node verify the target can cope
1058 with the permute we are going to use. */
1059 if (alt_stmt_code != ERROR_MARK
1060 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1061 {
1062 if (vectype == boolean_type_node
1063 || !vect_two_operations_perm_ok_p (stmts, group_size,
1064 vectype, alt_stmt_code))
1065 {
1066 for (i = 0; i < group_size; ++i)
1067 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
1068 {
1069 matches[i] = false;
1070 if (dump_enabled_p ())
1071 {
1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073 "Build SLP failed: different operation "
1074 "in stmt ");
1075 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1076 stmts[i], 0);
1077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1078 "original stmt ");
1079 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1080 first_stmt, 0);
1081 }
1082 }
1083 return false;
1084 }
1085 *two_operators = true;
1086 }
1087
1088 return true;
1089 }
1090
1091 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1092 Note we never remove apart from at destruction time so we do not
1093 need a special value for deleted that differs from empty. */
1094 struct bst_traits
1095 {
1096 typedef vec <gimple *> value_type;
1097 typedef vec <gimple *> compare_type;
1098 static inline hashval_t hash (value_type);
1099 static inline bool equal (value_type existing, value_type candidate);
1100 static inline bool is_empty (value_type x) { return !x.exists (); }
1101 static inline bool is_deleted (value_type x) { return !x.exists (); }
1102 static inline void mark_empty (value_type &x) { x.release (); }
1103 static inline void mark_deleted (value_type &x) { x.release (); }
1104 static inline void remove (value_type &x) { x.release (); }
1105 };
1106 inline hashval_t
1107 bst_traits::hash (value_type x)
1108 {
1109 inchash::hash h;
1110 for (unsigned i = 0; i < x.length (); ++i)
1111 h.add_int (gimple_uid (x[i]));
1112 return h.end ();
1113 }
1114 inline bool
1115 bst_traits::equal (value_type existing, value_type candidate)
1116 {
1117 if (existing.length () != candidate.length ())
1118 return false;
1119 for (unsigned i = 0; i < existing.length (); ++i)
1120 if (existing[i] != candidate[i])
1121 return false;
1122 return true;
1123 }
1124
1125 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1126 static scalar_stmts_set_t *bst_fail;
1127
1128 typedef hash_map <vec <gimple *>, slp_tree,
1129 simple_hashmap_traits <bst_traits, slp_tree> >
1130 scalar_stmts_to_slp_tree_map_t;
1131
1132 static slp_tree
1133 vect_build_slp_tree_2 (vec_info *vinfo,
1134 vec<gimple *> stmts, unsigned int group_size,
1135 poly_uint64 *max_nunits,
1136 vec<slp_tree> *loads,
1137 bool *matches, unsigned *npermutes, unsigned *tree_size,
1138 unsigned max_tree_size);
1139
1140 static slp_tree
1141 vect_build_slp_tree (vec_info *vinfo,
1142 vec<gimple *> stmts, unsigned int group_size,
1143 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1144 bool *matches, unsigned *npermutes, unsigned *tree_size,
1145 unsigned max_tree_size)
1146 {
1147 if (bst_fail->contains (stmts))
1148 return NULL;
1149 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1150 loads, matches, npermutes, tree_size,
1151 max_tree_size);
1152 /* When SLP build fails for stmts record this, otherwise SLP build
1153 can be exponential in time when we allow to construct parts from
1154 scalars, see PR81723. */
1155 if (! res)
1156 {
1157 vec <gimple *> x;
1158 x.create (stmts.length ());
1159 x.splice (stmts);
1160 bst_fail->add (x);
1161 }
1162 return res;
1163 }
1164
1165 /* Recursively build an SLP tree starting from NODE.
1166 Fail (and return a value not equal to zero) if def-stmts are not
1167 isomorphic, require data permutation or are of unsupported types of
1168 operation. Otherwise, return 0.
1169 The value returned is the depth in the SLP tree where a mismatch
1170 was found. */
1171
1172 static slp_tree
1173 vect_build_slp_tree_2 (vec_info *vinfo,
1174 vec<gimple *> stmts, unsigned int group_size,
1175 poly_uint64 *max_nunits,
1176 vec<slp_tree> *loads,
1177 bool *matches, unsigned *npermutes, unsigned *tree_size,
1178 unsigned max_tree_size)
1179 {
1180 unsigned nops, i, this_tree_size = 0;
1181 poly_uint64 this_max_nunits = *max_nunits;
1182 gimple *stmt;
1183 slp_tree node;
1184
1185 matches[0] = false;
1186
1187 stmt = stmts[0];
1188 if (is_gimple_call (stmt))
1189 nops = gimple_call_num_args (stmt);
1190 else if (is_gimple_assign (stmt))
1191 {
1192 nops = gimple_num_ops (stmt) - 1;
1193 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1194 nops++;
1195 }
1196 else if (gimple_code (stmt) == GIMPLE_PHI)
1197 nops = 0;
1198 else
1199 return NULL;
1200
1201 /* If the SLP node is a PHI (induction or reduction), terminate
1202 the recursion. */
1203 if (gimple_code (stmt) == GIMPLE_PHI)
1204 {
1205 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1206 tree vectype = get_vectype_for_scalar_type (scalar_type);
1207 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1208 max_nunits))
1209 return NULL;
1210
1211 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1212 /* Induction from different IVs is not supported. */
1213 if (def_type == vect_induction_def)
1214 {
1215 FOR_EACH_VEC_ELT (stmts, i, stmt)
1216 if (stmt != stmts[0])
1217 return NULL;
1218 }
1219 else
1220 {
1221 /* Else def types have to match. */
1222 FOR_EACH_VEC_ELT (stmts, i, stmt)
1223 {
1224 /* But for reduction chains only check on the first stmt. */
1225 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1226 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1227 continue;
1228 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1229 return NULL;
1230 }
1231 }
1232 node = vect_create_new_slp_node (stmts);
1233 return node;
1234 }
1235
1236
1237 bool two_operators = false;
1238 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1239 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
1240 &this_max_nunits, matches, &two_operators))
1241 return NULL;
1242
1243 /* If the SLP node is a load, terminate the recursion. */
1244 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1245 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1246 {
1247 *max_nunits = this_max_nunits;
1248 node = vect_create_new_slp_node (stmts);
1249 loads->safe_push (node);
1250 return node;
1251 }
1252
1253 /* Get at the operands, verifying they are compatible. */
1254 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1255 slp_oprnd_info oprnd_info;
1256 FOR_EACH_VEC_ELT (stmts, i, stmt)
1257 {
1258 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1259 stmts, i, &oprnds_info);
1260 if (res != 0)
1261 matches[(res == -1) ? 0 : i] = false;
1262 if (!matches[0])
1263 break;
1264 }
1265 for (i = 0; i < group_size; ++i)
1266 if (!matches[i])
1267 {
1268 vect_free_oprnd_info (oprnds_info);
1269 return NULL;
1270 }
1271
1272 auto_vec<slp_tree, 4> children;
1273 auto_vec<slp_tree> this_loads;
1274
1275 stmt = stmts[0];
1276
1277 if (tree_size)
1278 max_tree_size -= *tree_size;
1279
1280 /* Create SLP_TREE nodes for the definition node/s. */
1281 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1282 {
1283 slp_tree child;
1284 unsigned old_nloads = this_loads.length ();
1285 unsigned old_tree_size = this_tree_size;
1286 unsigned int j;
1287
1288 if (oprnd_info->first_dt != vect_internal_def
1289 && oprnd_info->first_dt != vect_reduction_def
1290 && oprnd_info->first_dt != vect_induction_def)
1291 continue;
1292
1293 if (++this_tree_size > max_tree_size)
1294 {
1295 FOR_EACH_VEC_ELT (children, j, child)
1296 vect_free_slp_tree (child, false);
1297 vect_free_oprnd_info (oprnds_info);
1298 return NULL;
1299 }
1300
1301 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1302 group_size, &this_max_nunits,
1303 &this_loads, matches, npermutes,
1304 &this_tree_size,
1305 max_tree_size)) != NULL)
1306 {
1307 /* If we have all children of child built up from scalars then just
1308 throw that away and build it up this node from scalars. */
1309 if (!SLP_TREE_CHILDREN (child).is_empty ()
1310 /* ??? Rejecting patterns this way doesn't work. We'd have to
1311 do extra work to cancel the pattern so the uses see the
1312 scalar version. */
1313 && !is_pattern_stmt_p
1314 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1315 {
1316 slp_tree grandchild;
1317
1318 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1319 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1320 break;
1321 if (!grandchild)
1322 {
1323 /* Roll back. */
1324 this_loads.truncate (old_nloads);
1325 this_tree_size = old_tree_size;
1326 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1327 vect_free_slp_tree (grandchild, false);
1328 SLP_TREE_CHILDREN (child).truncate (0);
1329
1330 dump_printf_loc (MSG_NOTE, vect_location,
1331 "Building parent vector operands from "
1332 "scalars instead\n");
1333 oprnd_info->def_stmts = vNULL;
1334 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1335 children.safe_push (child);
1336 continue;
1337 }
1338 }
1339
1340 oprnd_info->def_stmts = vNULL;
1341 children.safe_push (child);
1342 continue;
1343 }
1344
1345 /* If the SLP build failed fatally and we analyze a basic-block
1346 simply treat nodes we fail to build as externally defined
1347 (and thus build vectors from the scalar defs).
1348 The cost model will reject outright expensive cases.
1349 ??? This doesn't treat cases where permutation ultimatively
1350 fails (or we don't try permutation below). Ideally we'd
1351 even compute a permutation that will end up with the maximum
1352 SLP tree size... */
1353 if (is_a <bb_vec_info> (vinfo)
1354 && !matches[0]
1355 /* ??? Rejecting patterns this way doesn't work. We'd have to
1356 do extra work to cancel the pattern so the uses see the
1357 scalar version. */
1358 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1359 {
1360 dump_printf_loc (MSG_NOTE, vect_location,
1361 "Building vector operands from scalars\n");
1362 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1363 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1364 children.safe_push (child);
1365 oprnd_info->def_stmts = vNULL;
1366 continue;
1367 }
1368
1369 /* If the SLP build for operand zero failed and operand zero
1370 and one can be commutated try that for the scalar stmts
1371 that failed the match. */
1372 if (i == 0
1373 /* A first scalar stmt mismatch signals a fatal mismatch. */
1374 && matches[0]
1375 /* ??? For COND_EXPRs we can swap the comparison operands
1376 as well as the arms under some constraints. */
1377 && nops == 2
1378 && oprnds_info[1]->first_dt == vect_internal_def
1379 && is_gimple_assign (stmt)
1380 /* Do so only if the number of not successful permutes was nor more
1381 than a cut-ff as re-trying the recursive match on
1382 possibly each level of the tree would expose exponential
1383 behavior. */
1384 && *npermutes < 4)
1385 {
1386 /* See whether we can swap the matching or the non-matching
1387 stmt operands. */
1388 bool swap_not_matching = true;
1389 do
1390 {
1391 for (j = 0; j < group_size; ++j)
1392 {
1393 if (matches[j] != !swap_not_matching)
1394 continue;
1395 gimple *stmt = stmts[j];
1396 /* Verify if we can swap operands of this stmt. */
1397 if (!is_gimple_assign (stmt)
1398 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1399 {
1400 if (!swap_not_matching)
1401 goto fail;
1402 swap_not_matching = false;
1403 break;
1404 }
1405 /* Verify if we can safely swap or if we committed to a
1406 specific operand order already.
1407 ??? Instead of modifying GIMPLE stmts here we could
1408 record whether we want to swap operands in the SLP
1409 node and temporarily do that when processing it
1410 (or wrap operand accessors in a helper). */
1411 else if (swap[j] != 0
1412 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
1413 {
1414 if (!swap_not_matching)
1415 {
1416 if (dump_enabled_p ())
1417 {
1418 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1419 vect_location,
1420 "Build SLP failed: cannot swap "
1421 "operands of shared stmt ");
1422 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1423 TDF_SLIM, stmts[j], 0);
1424 }
1425 goto fail;
1426 }
1427 swap_not_matching = false;
1428 break;
1429 }
1430 }
1431 }
1432 while (j != group_size);
1433
1434 /* Swap mismatched definition stmts. */
1435 dump_printf_loc (MSG_NOTE, vect_location,
1436 "Re-trying with swapped operands of stmts ");
1437 for (j = 0; j < group_size; ++j)
1438 if (matches[j] == !swap_not_matching)
1439 {
1440 std::swap (oprnds_info[0]->def_stmts[j],
1441 oprnds_info[1]->def_stmts[j]);
1442 dump_printf (MSG_NOTE, "%d ", j);
1443 }
1444 dump_printf (MSG_NOTE, "\n");
1445 /* And try again with scratch 'matches' ... */
1446 bool *tem = XALLOCAVEC (bool, group_size);
1447 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1448 group_size, &this_max_nunits,
1449 &this_loads, tem, npermutes,
1450 &this_tree_size,
1451 max_tree_size)) != NULL)
1452 {
1453 /* ... so if successful we can apply the operand swapping
1454 to the GIMPLE IL. This is necessary because for example
1455 vect_get_slp_defs uses operand indexes and thus expects
1456 canonical operand order. This is also necessary even
1457 if we end up building the operand from scalars as
1458 we'll continue to process swapped operand two. */
1459 for (j = 0; j < group_size; ++j)
1460 {
1461 gimple *stmt = stmts[j];
1462 gimple_set_plf (stmt, GF_PLF_1, false);
1463 }
1464 for (j = 0; j < group_size; ++j)
1465 {
1466 gimple *stmt = stmts[j];
1467 if (matches[j] == !swap_not_matching)
1468 {
1469 /* Avoid swapping operands twice. */
1470 if (gimple_plf (stmt, GF_PLF_1))
1471 continue;
1472 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1473 gimple_assign_rhs2_ptr (stmt));
1474 gimple_set_plf (stmt, GF_PLF_1, true);
1475 }
1476 }
1477 /* Verify we swap all duplicates or none. */
1478 if (flag_checking)
1479 for (j = 0; j < group_size; ++j)
1480 {
1481 gimple *stmt = stmts[j];
1482 gcc_assert (gimple_plf (stmt, GF_PLF_1)
1483 == (matches[j] == !swap_not_matching));
1484 }
1485
1486 /* If we have all children of child built up from scalars then
1487 just throw that away and build it up this node from scalars. */
1488 if (!SLP_TREE_CHILDREN (child).is_empty ()
1489 /* ??? Rejecting patterns this way doesn't work. We'd have
1490 to do extra work to cancel the pattern so the uses see the
1491 scalar version. */
1492 && !is_pattern_stmt_p
1493 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1494 {
1495 unsigned int j;
1496 slp_tree grandchild;
1497
1498 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1499 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1500 break;
1501 if (!grandchild)
1502 {
1503 /* Roll back. */
1504 this_loads.truncate (old_nloads);
1505 this_tree_size = old_tree_size;
1506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1507 vect_free_slp_tree (grandchild, false);
1508 SLP_TREE_CHILDREN (child).truncate (0);
1509
1510 dump_printf_loc (MSG_NOTE, vect_location,
1511 "Building parent vector operands from "
1512 "scalars instead\n");
1513 oprnd_info->def_stmts = vNULL;
1514 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1515 children.safe_push (child);
1516 continue;
1517 }
1518 }
1519
1520 oprnd_info->def_stmts = vNULL;
1521 children.safe_push (child);
1522 continue;
1523 }
1524
1525 ++*npermutes;
1526 }
1527
1528 fail:
1529 gcc_assert (child == NULL);
1530 FOR_EACH_VEC_ELT (children, j, child)
1531 vect_free_slp_tree (child, false);
1532 vect_free_oprnd_info (oprnds_info);
1533 return NULL;
1534 }
1535
1536 vect_free_oprnd_info (oprnds_info);
1537
1538 if (tree_size)
1539 *tree_size += this_tree_size;
1540 *max_nunits = this_max_nunits;
1541 loads->safe_splice (this_loads);
1542
1543 node = vect_create_new_slp_node (stmts);
1544 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1545 SLP_TREE_CHILDREN (node).splice (children);
1546 return node;
1547 }
1548
1549 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1550
1551 static void
1552 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1553 slp_tree node)
1554 {
1555 int i;
1556 gimple *stmt;
1557 slp_tree child;
1558
1559 dump_printf_loc (dump_kind, loc, "node%s\n",
1560 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1561 ? " (external)" : "");
1562 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1563 {
1564 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1565 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1566 }
1567 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1568 vect_print_slp_tree (dump_kind, loc, child);
1569 }
1570
1571
1572 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1573 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1574 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1575 stmts in NODE are to be marked. */
1576
1577 static void
1578 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1579 {
1580 int i;
1581 gimple *stmt;
1582 slp_tree child;
1583
1584 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1585 return;
1586
1587 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1588 if (j < 0 || i == j)
1589 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1590
1591 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1592 vect_mark_slp_stmts (child, mark, j);
1593 }
1594
1595
1596 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1597
1598 static void
1599 vect_mark_slp_stmts_relevant (slp_tree node)
1600 {
1601 int i;
1602 gimple *stmt;
1603 stmt_vec_info stmt_info;
1604 slp_tree child;
1605
1606 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1607 return;
1608
1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1610 {
1611 stmt_info = vinfo_for_stmt (stmt);
1612 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1613 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1614 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1615 }
1616
1617 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1618 vect_mark_slp_stmts_relevant (child);
1619 }
1620
1621
1622 /* Rearrange the statements of NODE according to PERMUTATION. */
1623
1624 static void
1625 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1626 vec<unsigned> permutation)
1627 {
1628 gimple *stmt;
1629 vec<gimple *> tmp_stmts;
1630 unsigned int i;
1631 slp_tree child;
1632
1633 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1634 vect_slp_rearrange_stmts (child, group_size, permutation);
1635
1636 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1637 tmp_stmts.create (group_size);
1638 tmp_stmts.quick_grow_cleared (group_size);
1639
1640 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1641 tmp_stmts[permutation[i]] = stmt;
1642
1643 SLP_TREE_SCALAR_STMTS (node).release ();
1644 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1645 }
1646
1647
1648 /* Attempt to reorder stmts in a reduction chain so that we don't
1649 require any load permutation. Return true if that was possible,
1650 otherwise return false. */
1651
1652 static bool
1653 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1654 {
1655 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1656 unsigned int i, j;
1657 unsigned int lidx;
1658 slp_tree node, load;
1659
1660 /* Compare all the permutation sequences to the first one. We know
1661 that at least one load is permuted. */
1662 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1663 if (!node->load_permutation.exists ())
1664 return false;
1665 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1666 {
1667 if (!load->load_permutation.exists ())
1668 return false;
1669 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1670 if (lidx != node->load_permutation[j])
1671 return false;
1672 }
1673
1674 /* Check that the loads in the first sequence are different and there
1675 are no gaps between them. */
1676 auto_sbitmap load_index (group_size);
1677 bitmap_clear (load_index);
1678 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1679 {
1680 if (lidx >= group_size)
1681 return false;
1682 if (bitmap_bit_p (load_index, lidx))
1683 return false;
1684
1685 bitmap_set_bit (load_index, lidx);
1686 }
1687 for (i = 0; i < group_size; i++)
1688 if (!bitmap_bit_p (load_index, i))
1689 return false;
1690
1691 /* This permutation is valid for reduction. Since the order of the
1692 statements in the nodes is not important unless they are memory
1693 accesses, we can rearrange the statements in all the nodes
1694 according to the order of the loads. */
1695 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1696 node->load_permutation);
1697
1698 /* We are done, no actual permutations need to be generated. */
1699 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1700 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1701 {
1702 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1703 first_stmt = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1704 /* But we have to keep those permutations that are required because
1705 of handling of gaps. */
1706 if (known_eq (unrolling_factor, 1U)
1707 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
1708 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1709 SLP_TREE_LOAD_PERMUTATION (node).release ();
1710 else
1711 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1712 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1713 }
1714
1715 return true;
1716 }
1717
1718 /* Check if the required load permutations in the SLP instance
1719 SLP_INSTN are supported. */
1720
1721 static bool
1722 vect_supported_load_permutation_p (slp_instance slp_instn)
1723 {
1724 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1725 unsigned int i, j, k, next;
1726 slp_tree node;
1727 gimple *stmt, *load, *next_load;
1728
1729 if (dump_enabled_p ())
1730 {
1731 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1732 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1733 if (node->load_permutation.exists ())
1734 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1735 dump_printf (MSG_NOTE, "%d ", next);
1736 else
1737 for (k = 0; k < group_size; ++k)
1738 dump_printf (MSG_NOTE, "%d ", k);
1739 dump_printf (MSG_NOTE, "\n");
1740 }
1741
1742 /* In case of reduction every load permutation is allowed, since the order
1743 of the reduction statements is not important (as opposed to the case of
1744 grouped stores). The only condition we need to check is that all the
1745 load nodes are of the same size and have the same permutation (and then
1746 rearrange all the nodes of the SLP instance according to this
1747 permutation). */
1748
1749 /* Check that all the load nodes are of the same size. */
1750 /* ??? Can't we assert this? */
1751 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1752 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1753 return false;
1754
1755 node = SLP_INSTANCE_TREE (slp_instn);
1756 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1757
1758 /* Reduction (there are no data-refs in the root).
1759 In reduction chain the order of the loads is not important. */
1760 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1761 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1762 vect_attempt_slp_rearrange_stmts (slp_instn);
1763
1764 /* In basic block vectorization we allow any subchain of an interleaving
1765 chain.
1766 FORNOW: not supported in loop SLP because of realignment compications. */
1767 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1768 {
1769 /* Check whether the loads in an instance form a subchain and thus
1770 no permutation is necessary. */
1771 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1772 {
1773 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1774 continue;
1775 bool subchain_p = true;
1776 next_load = NULL;
1777 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1778 {
1779 if (j != 0
1780 && (next_load != load
1781 || DR_GROUP_GAP (vinfo_for_stmt (load)) != 1))
1782 {
1783 subchain_p = false;
1784 break;
1785 }
1786 next_load = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1787 }
1788 if (subchain_p)
1789 SLP_TREE_LOAD_PERMUTATION (node).release ();
1790 else
1791 {
1792 stmt_vec_info group_info
1793 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1794 group_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
1795 unsigned HOST_WIDE_INT nunits;
1796 unsigned k, maxk = 0;
1797 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1798 if (k > maxk)
1799 maxk = k;
1800 /* In BB vectorization we may not actually use a loaded vector
1801 accessing elements in excess of DR_GROUP_SIZE. */
1802 tree vectype = STMT_VINFO_VECTYPE (group_info);
1803 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1804 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1805 {
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "BB vectorization with gaps at the end of "
1808 "a load is not supported\n");
1809 return false;
1810 }
1811
1812 /* Verify the permutation can be generated. */
1813 vec<tree> tem;
1814 unsigned n_perms;
1815 if (!vect_transform_slp_perm_load (node, tem, NULL,
1816 1, slp_instn, true, &n_perms))
1817 {
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1819 vect_location,
1820 "unsupported load permutation\n");
1821 return false;
1822 }
1823 }
1824 }
1825 return true;
1826 }
1827
1828 /* For loop vectorization verify we can generate the permutation. Be
1829 conservative about the vectorization factor, there are permutations
1830 that will use three vector inputs only starting from a specific factor
1831 and the vectorization factor is not yet final.
1832 ??? The SLP instance unrolling factor might not be the maximum one. */
1833 unsigned n_perms;
1834 poly_uint64 test_vf
1835 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1836 LOOP_VINFO_VECT_FACTOR
1837 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1838 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1839 if (node->load_permutation.exists ()
1840 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1841 slp_instn, true, &n_perms))
1842 return false;
1843
1844 return true;
1845 }
1846
1847
1848 /* Find the last store in SLP INSTANCE. */
1849
1850 gimple *
1851 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1852 {
1853 gimple *last = NULL, *stmt;
1854
1855 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1856 {
1857 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1858 if (is_pattern_stmt_p (stmt_vinfo))
1859 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1860 else
1861 last = get_later_stmt (stmt, last);
1862 }
1863
1864 return last;
1865 }
1866
1867 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1868 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1869 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1870 containing the remainder.
1871 Return the first stmt in the second group. */
1872
1873 static gimple *
1874 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1875 {
1876 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1877 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1878 gcc_assert (group1_size > 0);
1879 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1880 gcc_assert (group2_size > 0);
1881 DR_GROUP_SIZE (first_vinfo) = group1_size;
1882
1883 gimple *stmt = first_stmt;
1884 for (unsigned i = group1_size; i > 1; i--)
1885 {
1886 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1887 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1888 }
1889 /* STMT is now the last element of the first group. */
1890 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1891 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1892
1893 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1894 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1895 {
1896 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1897 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1898 }
1899
1900 /* For the second group, the DR_GROUP_GAP is that before the original group,
1901 plus skipping over the first vector. */
1902 DR_GROUP_GAP (vinfo_for_stmt (group2))
1903 = DR_GROUP_GAP (first_vinfo) + group1_size;
1904
1905 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1906 DR_GROUP_GAP (first_vinfo) += group2_size;
1907
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1910 group1_size, group2_size);
1911
1912 return group2;
1913 }
1914
1915 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1916 statements and a vector of NUNITS elements. */
1917
1918 static poly_uint64
1919 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1920 {
1921 return exact_div (common_multiple (nunits, group_size), group_size);
1922 }
1923
1924 /* Analyze an SLP instance starting from a group of grouped stores. Call
1925 vect_build_slp_tree to build a tree of packed stmts if possible.
1926 Return FALSE if it's impossible to SLP any stmt in the loop. */
1927
1928 static bool
1929 vect_analyze_slp_instance (vec_info *vinfo,
1930 gimple *stmt, unsigned max_tree_size)
1931 {
1932 slp_instance new_instance;
1933 slp_tree node;
1934 unsigned int group_size;
1935 tree vectype, scalar_type = NULL_TREE;
1936 gimple *next;
1937 unsigned int i;
1938 vec<slp_tree> loads;
1939 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1940 vec<gimple *> scalar_stmts;
1941
1942 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1943 {
1944 scalar_type = TREE_TYPE (DR_REF (dr));
1945 vectype = get_vectype_for_scalar_type (scalar_type);
1946 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1947 }
1948 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1949 {
1950 gcc_assert (is_a <loop_vec_info> (vinfo));
1951 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1952 group_size = REDUC_GROUP_SIZE (vinfo_for_stmt (stmt));
1953 }
1954 else
1955 {
1956 gcc_assert (is_a <loop_vec_info> (vinfo));
1957 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1958 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1959 }
1960
1961 if (!vectype)
1962 {
1963 if (dump_enabled_p ())
1964 {
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "Build SLP failed: unsupported data-type ");
1967 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1968 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1969 }
1970
1971 return false;
1972 }
1973 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1974
1975 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1976 scalar_stmts.create (group_size);
1977 next = stmt;
1978 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1979 {
1980 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1981 while (next)
1982 {
1983 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1984 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1985 scalar_stmts.safe_push (
1986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1987 else
1988 scalar_stmts.safe_push (next);
1989 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1990 }
1991 }
1992 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1993 {
1994 /* Collect the reduction stmts and store them in
1995 SLP_TREE_SCALAR_STMTS. */
1996 while (next)
1997 {
1998 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1999 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2000 scalar_stmts.safe_push (
2001 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2002 else
2003 scalar_stmts.safe_push (next);
2004 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2005 }
2006 /* Mark the first element of the reduction chain as reduction to properly
2007 transform the node. In the reduction analysis phase only the last
2008 element of the chain is marked as reduction. */
2009 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2010 }
2011 else
2012 {
2013 /* Collect reduction statements. */
2014 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2015 for (i = 0; reductions.iterate (i, &next); i++)
2016 scalar_stmts.safe_push (next);
2017 }
2018
2019 loads.create (group_size);
2020
2021 /* Build the tree for the SLP instance. */
2022 bool *matches = XALLOCAVEC (bool, group_size);
2023 unsigned npermutes = 0;
2024 bst_fail = new scalar_stmts_set_t ();
2025 poly_uint64 max_nunits = nunits;
2026 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2027 &max_nunits, &loads, matches, &npermutes,
2028 NULL, max_tree_size);
2029 delete bst_fail;
2030 if (node != NULL)
2031 {
2032 /* Calculate the unrolling factor based on the smallest type. */
2033 poly_uint64 unrolling_factor
2034 = calculate_unrolling_factor (max_nunits, group_size);
2035
2036 if (maybe_ne (unrolling_factor, 1U)
2037 && is_a <bb_vec_info> (vinfo))
2038 {
2039 unsigned HOST_WIDE_INT const_max_nunits;
2040 if (!max_nunits.is_constant (&const_max_nunits)
2041 || const_max_nunits > group_size)
2042 {
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2045 "Build SLP failed: store group "
2046 "size not a multiple of the vector size "
2047 "in basic block SLP\n");
2048 vect_free_slp_tree (node, false);
2049 loads.release ();
2050 return false;
2051 }
2052 /* Fatal mismatch. */
2053 matches[group_size / const_max_nunits * const_max_nunits] = false;
2054 vect_free_slp_tree (node, false);
2055 loads.release ();
2056 }
2057 else
2058 {
2059 /* Create a new SLP instance. */
2060 new_instance = XNEW (struct _slp_instance);
2061 SLP_INSTANCE_TREE (new_instance) = node;
2062 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2063 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2064 SLP_INSTANCE_LOADS (new_instance) = loads;
2065
2066 /* Compute the load permutation. */
2067 slp_tree load_node;
2068 bool loads_permuted = false;
2069 FOR_EACH_VEC_ELT (loads, i, load_node)
2070 {
2071 vec<unsigned> load_permutation;
2072 int j;
2073 gimple *load, *first_stmt;
2074 bool this_load_permuted = false;
2075 load_permutation.create (group_size);
2076 first_stmt = DR_GROUP_FIRST_ELEMENT
2077 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2078 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2079 {
2080 int load_place = vect_get_place_in_interleaving_chain
2081 (load, first_stmt);
2082 gcc_assert (load_place != -1);
2083 if (load_place != j)
2084 this_load_permuted = true;
2085 load_permutation.safe_push (load_place);
2086 }
2087 if (!this_load_permuted
2088 /* The load requires permutation when unrolling exposes
2089 a gap either because the group is larger than the SLP
2090 group-size or because there is a gap between the groups. */
2091 && (known_eq (unrolling_factor, 1U)
2092 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2093 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2094 {
2095 load_permutation.release ();
2096 continue;
2097 }
2098 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2099 loads_permuted = true;
2100 }
2101
2102 if (loads_permuted)
2103 {
2104 if (!vect_supported_load_permutation_p (new_instance))
2105 {
2106 if (dump_enabled_p ())
2107 {
2108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2109 "Build SLP failed: unsupported load "
2110 "permutation ");
2111 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2112 TDF_SLIM, stmt, 0);
2113 }
2114 vect_free_slp_instance (new_instance, false);
2115 return false;
2116 }
2117 }
2118
2119 /* If the loads and stores can be handled with load/store-lan
2120 instructions do not generate this SLP instance. */
2121 if (is_a <loop_vec_info> (vinfo)
2122 && loads_permuted
2123 && dr && vect_store_lanes_supported (vectype, group_size, false))
2124 {
2125 slp_tree load_node;
2126 FOR_EACH_VEC_ELT (loads, i, load_node)
2127 {
2128 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
2129 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2130 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2131 /* Use SLP for strided accesses (or if we
2132 can't load-lanes). */
2133 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2134 || ! vect_load_lanes_supported
2135 (STMT_VINFO_VECTYPE (stmt_vinfo),
2136 DR_GROUP_SIZE (stmt_vinfo), false))
2137 break;
2138 }
2139 if (i == loads.length ())
2140 {
2141 if (dump_enabled_p ())
2142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2143 "Built SLP cancelled: can use "
2144 "load/store-lanes\n");
2145 vect_free_slp_instance (new_instance, false);
2146 return false;
2147 }
2148 }
2149
2150 vinfo->slp_instances.safe_push (new_instance);
2151
2152 if (dump_enabled_p ())
2153 {
2154 dump_printf_loc (MSG_NOTE, vect_location,
2155 "Final SLP tree for instance:\n");
2156 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2157 }
2158
2159 return true;
2160 }
2161 }
2162 else
2163 {
2164 /* Failed to SLP. */
2165 /* Free the allocated memory. */
2166 scalar_stmts.release ();
2167 loads.release ();
2168 }
2169
2170 /* For basic block SLP, try to break the group up into multiples of the
2171 vector size. */
2172 unsigned HOST_WIDE_INT const_nunits;
2173 if (is_a <bb_vec_info> (vinfo)
2174 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2175 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2176 && nunits.is_constant (&const_nunits))
2177 {
2178 /* We consider breaking the group only on VF boundaries from the existing
2179 start. */
2180 for (i = 0; i < group_size; i++)
2181 if (!matches[i]) break;
2182
2183 if (i >= const_nunits && i < group_size)
2184 {
2185 /* Split into two groups at the first vector boundary before i. */
2186 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2187 unsigned group1_size = i & ~(const_nunits - 1);
2188
2189 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2190 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2191 /* If the first non-match was in the middle of a vector,
2192 skip the rest of that vector. */
2193 if (group1_size < i)
2194 {
2195 i = group1_size + const_nunits;
2196 if (i < group_size)
2197 rest = vect_split_slp_store_group (rest, const_nunits);
2198 }
2199 if (i < group_size)
2200 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2201 return res;
2202 }
2203 /* Even though the first vector did not all match, we might be able to SLP
2204 (some) of the remainder. FORNOW ignore this possibility. */
2205 }
2206
2207 return false;
2208 }
2209
2210
2211 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2212 trees of packed scalar stmts if SLP is possible. */
2213
2214 bool
2215 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2216 {
2217 unsigned int i;
2218 gimple *first_element;
2219
2220 DUMP_VECT_SCOPE ("vect_analyze_slp");
2221
2222 /* Find SLP sequences starting from groups of grouped stores. */
2223 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2224 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2225
2226 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2227 {
2228 if (loop_vinfo->reduction_chains.length () > 0)
2229 {
2230 /* Find SLP sequences starting from reduction chains. */
2231 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2232 if (! vect_analyze_slp_instance (vinfo, first_element,
2233 max_tree_size))
2234 {
2235 /* Dissolve reduction chain group. */
2236 gimple *next, *stmt = first_element;
2237 while (stmt)
2238 {
2239 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2240 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2241 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2242 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2243 stmt = next;
2244 }
2245 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2246 = vect_internal_def;
2247 }
2248 }
2249
2250 /* Find SLP sequences starting from groups of reductions. */
2251 if (loop_vinfo->reductions.length () > 1)
2252 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2253 max_tree_size);
2254 }
2255
2256 return true;
2257 }
2258
2259
2260 /* For each possible SLP instance decide whether to SLP it and calculate overall
2261 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2262 least one instance. */
2263
2264 bool
2265 vect_make_slp_decision (loop_vec_info loop_vinfo)
2266 {
2267 unsigned int i;
2268 poly_uint64 unrolling_factor = 1;
2269 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2270 slp_instance instance;
2271 int decided_to_slp = 0;
2272
2273 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2274
2275 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2276 {
2277 /* FORNOW: SLP if you can. */
2278 /* All unroll factors have the form current_vector_size * X for some
2279 rational X, so they must have a common multiple. */
2280 unrolling_factor
2281 = force_common_multiple (unrolling_factor,
2282 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2283
2284 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2285 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2286 loop-based vectorization. Such stmts will be marked as HYBRID. */
2287 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2288 decided_to_slp++;
2289 }
2290
2291 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2292
2293 if (decided_to_slp && dump_enabled_p ())
2294 {
2295 dump_printf_loc (MSG_NOTE, vect_location,
2296 "Decided to SLP %d instances. Unrolling factor ",
2297 decided_to_slp);
2298 dump_dec (MSG_NOTE, unrolling_factor);
2299 dump_printf (MSG_NOTE, "\n");
2300 }
2301
2302 return (decided_to_slp > 0);
2303 }
2304
2305
2306 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2307 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2308
2309 static void
2310 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2311 {
2312 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2313 imm_use_iterator imm_iter;
2314 gimple *use_stmt;
2315 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2316 slp_tree child;
2317 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2318 int j;
2319
2320 /* Propagate hybrid down the SLP tree. */
2321 if (stype == hybrid)
2322 ;
2323 else if (HYBRID_SLP_STMT (stmt_vinfo))
2324 stype = hybrid;
2325 else
2326 {
2327 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2328 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2329 /* If we get a pattern stmt here we have to use the LHS of the
2330 original stmt for immediate uses. */
2331 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2332 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2333 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2334 tree def;
2335 if (gimple_code (stmt) == GIMPLE_PHI)
2336 def = gimple_phi_result (stmt);
2337 else
2338 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2339 if (def)
2340 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2341 {
2342 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2343 if (!use_vinfo)
2344 continue;
2345 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2346 && STMT_VINFO_RELATED_STMT (use_vinfo))
2347 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2348 if (!STMT_SLP_TYPE (use_vinfo)
2349 && (STMT_VINFO_RELEVANT (use_vinfo)
2350 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2351 && !(gimple_code (use_stmt) == GIMPLE_PHI
2352 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2353 {
2354 if (dump_enabled_p ())
2355 {
2356 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2357 "def in non-SLP stmt: ");
2358 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2359 }
2360 stype = hybrid;
2361 }
2362 }
2363 }
2364
2365 if (stype == hybrid
2366 && !HYBRID_SLP_STMT (stmt_vinfo))
2367 {
2368 if (dump_enabled_p ())
2369 {
2370 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2371 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2372 }
2373 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2374 }
2375
2376 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2377 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2378 vect_detect_hybrid_slp_stmts (child, i, stype);
2379 }
2380
2381 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2382
2383 static tree
2384 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2385 {
2386 walk_stmt_info *wi = (walk_stmt_info *)data;
2387 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2388
2389 if (wi->is_lhs)
2390 return NULL_TREE;
2391
2392 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2393 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2394 {
2395 if (dump_enabled_p ())
2396 {
2397 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2398 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt_info->stmt, 0);
2399 }
2400 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2401 }
2402
2403 return NULL_TREE;
2404 }
2405
2406 static tree
2407 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2408 walk_stmt_info *wi)
2409 {
2410 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2411 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2412 /* If the stmt is in a SLP instance then this isn't a reason
2413 to mark use definitions in other SLP instances as hybrid. */
2414 if (! STMT_SLP_TYPE (use_vinfo)
2415 && (STMT_VINFO_RELEVANT (use_vinfo)
2416 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2417 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2418 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2419 ;
2420 else
2421 *handled = true;
2422 return NULL_TREE;
2423 }
2424
2425 /* Find stmts that must be both vectorized and SLPed. */
2426
2427 void
2428 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2429 {
2430 unsigned int i;
2431 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2432 slp_instance instance;
2433
2434 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2435
2436 /* First walk all pattern stmt in the loop and mark defs of uses as
2437 hybrid because immediate uses in them are not recorded. */
2438 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2439 {
2440 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2441 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2442 gsi_next (&gsi))
2443 {
2444 gimple *stmt = gsi_stmt (gsi);
2445 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2446 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2447 {
2448 walk_stmt_info wi;
2449 memset (&wi, 0, sizeof (wi));
2450 wi.info = loop_vinfo;
2451 gimple_stmt_iterator gsi2
2452 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2453 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2454 vect_detect_hybrid_slp_1, &wi);
2455 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2456 vect_detect_hybrid_slp_2,
2457 vect_detect_hybrid_slp_1, &wi);
2458 }
2459 }
2460 }
2461
2462 /* Then walk the SLP instance trees marking stmts with uses in
2463 non-SLP stmts as hybrid, also propagating hybrid down the
2464 SLP tree, collecting the above info on-the-fly. */
2465 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2466 {
2467 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2468 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2469 i, pure_slp);
2470 }
2471 }
2472
2473
2474 /* Initialize a bb_vec_info struct for the statements between
2475 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2476
2477 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2478 gimple_stmt_iterator region_end_in,
2479 vec_info_shared *shared)
2480 : vec_info (vec_info::bb, init_cost (NULL), shared),
2481 bb (gsi_bb (region_begin_in)),
2482 region_begin (region_begin_in),
2483 region_end (region_end_in)
2484 {
2485 gimple_stmt_iterator gsi;
2486
2487 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2488 gsi_next (&gsi))
2489 {
2490 gimple *stmt = gsi_stmt (gsi);
2491 gimple_set_uid (stmt, 0);
2492 add_stmt (stmt);
2493 }
2494
2495 bb->aux = this;
2496 }
2497
2498
2499 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2500 stmts in the basic block. */
2501
2502 _bb_vec_info::~_bb_vec_info ()
2503 {
2504 for (gimple_stmt_iterator si = region_begin;
2505 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2506 {
2507 gimple *stmt = gsi_stmt (si);
2508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2509
2510 if (stmt_info)
2511 /* Free stmt_vec_info. */
2512 free_stmt_vec_info (stmt);
2513
2514 /* Reset region marker. */
2515 gimple_set_uid (stmt, -1);
2516 }
2517
2518 bb->aux = NULL;
2519 }
2520
2521 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2522 given then that child nodes have already been processed, and that
2523 their def types currently match their SLP node's def type. */
2524
2525 static bool
2526 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2527 slp_instance node_instance,
2528 stmt_vector_for_cost *cost_vec)
2529 {
2530 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2532 gcc_assert (stmt_info);
2533 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2534
2535 /* For BB vectorization vector types are assigned here.
2536 Memory accesses already got their vector type assigned
2537 in vect_analyze_data_refs. */
2538 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2539 if (bb_vinfo
2540 && ! STMT_VINFO_DATA_REF (stmt_info))
2541 {
2542 tree vectype, nunits_vectype;
2543 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2544 &nunits_vectype))
2545 /* We checked this when building the node. */
2546 gcc_unreachable ();
2547 if (vectype == boolean_type_node)
2548 {
2549 vectype = vect_get_mask_type_for_stmt (stmt_info);
2550 if (!vectype)
2551 /* vect_get_mask_type_for_stmt has already explained the
2552 failure. */
2553 return false;
2554 }
2555
2556 gimple *sstmt;
2557 unsigned int i;
2558 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2559 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2560 }
2561
2562 /* Calculate the number of vector statements to be created for the
2563 scalar stmts in this node. For SLP reductions it is equal to the
2564 number of vector statements in the children (which has already been
2565 calculated by the recursive call). Otherwise it is the number of
2566 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2567 VF divided by the number of elements in a vector. */
2568 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2569 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2570 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2571 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2572 else
2573 {
2574 poly_uint64 vf;
2575 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2576 vf = loop_vinfo->vectorization_factor;
2577 else
2578 vf = 1;
2579 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2580 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2581 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2582 = vect_get_num_vectors (vf * group_size, vectype);
2583 }
2584
2585 bool dummy;
2586 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2587 }
2588
2589 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2590 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2591
2592 Return true if the operations are supported. */
2593
2594 static bool
2595 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2596 slp_instance node_instance,
2597 scalar_stmts_to_slp_tree_map_t *visited,
2598 scalar_stmts_to_slp_tree_map_t *lvisited,
2599 stmt_vector_for_cost *cost_vec)
2600 {
2601 int i, j;
2602 slp_tree child;
2603
2604 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2605 return true;
2606
2607 /* If we already analyzed the exact same set of scalar stmts we're done.
2608 We share the generated vector stmts for those. */
2609 slp_tree *leader;
2610 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2611 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2612 {
2613 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2614 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2615 return true;
2616 }
2617
2618 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2619 doesn't result in any issue since we throw away the lvisited set
2620 when we fail. */
2621 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2622
2623 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2624 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2625 visited, lvisited, cost_vec))
2626 return false;
2627
2628 /* Push SLP node def-type to stmt operands. */
2629 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2630 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2631 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2632 = SLP_TREE_DEF_TYPE (child);
2633 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2634 cost_vec);
2635 /* Restore def-types. */
2636 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2637 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2638 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2639 = vect_internal_def;
2640 if (! res)
2641 return false;
2642
2643 return true;
2644 }
2645
2646
2647 /* Analyze statements in SLP instances of VINFO. Return true if the
2648 operations are supported. */
2649
2650 bool
2651 vect_slp_analyze_operations (vec_info *vinfo)
2652 {
2653 slp_instance instance;
2654 int i;
2655
2656 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2657
2658 scalar_stmts_to_slp_tree_map_t *visited
2659 = new scalar_stmts_to_slp_tree_map_t ();
2660 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2661 {
2662 scalar_stmts_to_slp_tree_map_t lvisited;
2663 stmt_vector_for_cost cost_vec;
2664 cost_vec.create (2);
2665 if (!vect_slp_analyze_node_operations (vinfo,
2666 SLP_INSTANCE_TREE (instance),
2667 instance, visited, &lvisited,
2668 &cost_vec))
2669 {
2670 dump_printf_loc (MSG_NOTE, vect_location,
2671 "removing SLP instance operations starting from: ");
2672 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2673 SLP_TREE_SCALAR_STMTS
2674 (SLP_INSTANCE_TREE (instance))[0], 0);
2675 vect_free_slp_instance (instance, false);
2676 vinfo->slp_instances.ordered_remove (i);
2677 cost_vec.release ();
2678 }
2679 else
2680 {
2681 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2682 x != lvisited.end(); ++x)
2683 visited->put ((*x).first.copy (), (*x).second);
2684 i++;
2685
2686 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2687 cost_vec.release ();
2688 }
2689 }
2690 delete visited;
2691
2692 return !vinfo->slp_instances.is_empty ();
2693 }
2694
2695
2696 /* Compute the scalar cost of the SLP node NODE and its children
2697 and return it. Do not account defs that are marked in LIFE and
2698 update LIFE according to uses of NODE. */
2699
2700 static void
2701 vect_bb_slp_scalar_cost (basic_block bb,
2702 slp_tree node, vec<bool, va_heap> *life,
2703 stmt_vector_for_cost *cost_vec)
2704 {
2705 unsigned i;
2706 gimple *stmt;
2707 slp_tree child;
2708
2709 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2710 {
2711 ssa_op_iter op_iter;
2712 def_operand_p def_p;
2713 stmt_vec_info stmt_info;
2714
2715 if ((*life)[i])
2716 continue;
2717
2718 /* If there is a non-vectorized use of the defs then the scalar
2719 stmt is kept live in which case we do not account it or any
2720 required defs in the SLP children in the scalar cost. This
2721 way we make the vectorization more costly when compared to
2722 the scalar cost. */
2723 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2724 {
2725 imm_use_iterator use_iter;
2726 gimple *use_stmt;
2727 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2728 if (!is_gimple_debug (use_stmt)
2729 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2730 use_stmt)
2731 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2732 {
2733 (*life)[i] = true;
2734 BREAK_FROM_IMM_USE_STMT (use_iter);
2735 }
2736 }
2737 if ((*life)[i])
2738 continue;
2739
2740 /* Count scalar stmts only once. */
2741 if (gimple_visited_p (stmt))
2742 continue;
2743 gimple_set_visited (stmt, true);
2744
2745 stmt_info = vinfo_for_stmt (stmt);
2746 vect_cost_for_stmt kind;
2747 if (STMT_VINFO_DATA_REF (stmt_info))
2748 {
2749 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2750 kind = scalar_load;
2751 else
2752 kind = scalar_store;
2753 }
2754 else
2755 kind = scalar_stmt;
2756 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2757 }
2758
2759 auto_vec<bool, 20> subtree_life;
2760 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2761 {
2762 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2763 {
2764 /* Do not directly pass LIFE to the recursive call, copy it to
2765 confine changes in the callee to the current child/subtree. */
2766 subtree_life.safe_splice (*life);
2767 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2768 subtree_life.truncate (0);
2769 }
2770 }
2771 }
2772
2773 /* Check if vectorization of the basic block is profitable. */
2774
2775 static bool
2776 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2777 {
2778 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2779 slp_instance instance;
2780 int i;
2781 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2782 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2783
2784 /* Calculate scalar cost. */
2785 stmt_vector_for_cost scalar_costs;
2786 scalar_costs.create (0);
2787 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2788 {
2789 auto_vec<bool, 20> life;
2790 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2791 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2792 SLP_INSTANCE_TREE (instance),
2793 &life, &scalar_costs);
2794 }
2795 void *target_cost_data = init_cost (NULL);
2796 add_stmt_costs (target_cost_data, &scalar_costs);
2797 scalar_costs.release ();
2798 unsigned dummy;
2799 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2800 destroy_cost_data (target_cost_data);
2801
2802 /* Unset visited flag. */
2803 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2804 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2805 gimple_set_visited (gsi_stmt (gsi), false);
2806
2807 /* Complete the target-specific cost calculation. */
2808 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2809 &vec_inside_cost, &vec_epilogue_cost);
2810
2811 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2812
2813 if (dump_enabled_p ())
2814 {
2815 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2816 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2817 vec_inside_cost);
2818 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2819 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2820 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2821 }
2822
2823 /* Vectorization is profitable if its cost is more than the cost of scalar
2824 version. Note that we err on the vector side for equal cost because
2825 the cost estimate is otherwise quite pessimistic (constant uses are
2826 free on the scalar side but cost a load on the vector side for
2827 example). */
2828 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2829 return false;
2830
2831 return true;
2832 }
2833
2834 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2835 if so and sets fatal to true if failure is independent of
2836 current_vector_size. */
2837
2838 static bb_vec_info
2839 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2840 gimple_stmt_iterator region_end,
2841 vec<data_reference_p> datarefs, int n_stmts,
2842 bool &fatal, vec_info_shared *shared)
2843 {
2844 bb_vec_info bb_vinfo;
2845 slp_instance instance;
2846 int i;
2847 poly_uint64 min_vf = 2;
2848
2849 /* The first group of checks is independent of the vector size. */
2850 fatal = true;
2851
2852 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2853 {
2854 if (dump_enabled_p ())
2855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2856 "not vectorized: too many instructions in "
2857 "basic block.\n");
2858 free_data_refs (datarefs);
2859 return NULL;
2860 }
2861
2862 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2863 if (!bb_vinfo)
2864 return NULL;
2865
2866 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2867 bb_vinfo->shared->save_datarefs ();
2868
2869 /* Analyze the data references. */
2870
2871 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2872 {
2873 if (dump_enabled_p ())
2874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2875 "not vectorized: unhandled data-ref in basic "
2876 "block.\n");
2877
2878 delete bb_vinfo;
2879 return NULL;
2880 }
2881
2882 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2883 {
2884 if (dump_enabled_p ())
2885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2886 "not vectorized: not enough data-refs in "
2887 "basic block.\n");
2888
2889 delete bb_vinfo;
2890 return NULL;
2891 }
2892
2893 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2894 {
2895 if (dump_enabled_p ())
2896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2897 "not vectorized: unhandled data access in "
2898 "basic block.\n");
2899
2900 delete bb_vinfo;
2901 return NULL;
2902 }
2903
2904 /* If there are no grouped stores in the region there is no need
2905 to continue with pattern recog as vect_analyze_slp will fail
2906 anyway. */
2907 if (bb_vinfo->grouped_stores.is_empty ())
2908 {
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2911 "not vectorized: no grouped stores in "
2912 "basic block.\n");
2913
2914 delete bb_vinfo;
2915 return NULL;
2916 }
2917
2918 /* While the rest of the analysis below depends on it in some way. */
2919 fatal = false;
2920
2921 vect_pattern_recog (bb_vinfo);
2922
2923 /* Check the SLP opportunities in the basic block, analyze and build SLP
2924 trees. */
2925 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2926 {
2927 if (dump_enabled_p ())
2928 {
2929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2930 "Failed to SLP the basic block.\n");
2931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2932 "not vectorized: failed to find SLP opportunities "
2933 "in basic block.\n");
2934 }
2935
2936 delete bb_vinfo;
2937 return NULL;
2938 }
2939
2940 vect_record_base_alignments (bb_vinfo);
2941
2942 /* Analyze and verify the alignment of data references and the
2943 dependence in the SLP instances. */
2944 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2945 {
2946 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2947 || ! vect_slp_analyze_instance_dependence (instance))
2948 {
2949 dump_printf_loc (MSG_NOTE, vect_location,
2950 "removing SLP instance operations starting from: ");
2951 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2952 SLP_TREE_SCALAR_STMTS
2953 (SLP_INSTANCE_TREE (instance))[0], 0);
2954 vect_free_slp_instance (instance, false);
2955 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2956 continue;
2957 }
2958
2959 /* Mark all the statements that we want to vectorize as pure SLP and
2960 relevant. */
2961 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2962 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2963
2964 i++;
2965 }
2966 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2967 {
2968 delete bb_vinfo;
2969 return NULL;
2970 }
2971
2972 if (!vect_slp_analyze_operations (bb_vinfo))
2973 {
2974 if (dump_enabled_p ())
2975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2976 "not vectorized: bad operation in basic block.\n");
2977
2978 delete bb_vinfo;
2979 return NULL;
2980 }
2981
2982 /* Cost model: check if the vectorization is worthwhile. */
2983 if (!unlimited_cost_model (NULL)
2984 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2985 {
2986 if (dump_enabled_p ())
2987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2988 "not vectorized: vectorization is not "
2989 "profitable.\n");
2990
2991 delete bb_vinfo;
2992 return NULL;
2993 }
2994
2995 if (dump_enabled_p ())
2996 dump_printf_loc (MSG_NOTE, vect_location,
2997 "Basic block will be vectorized using SLP\n");
2998
2999 return bb_vinfo;
3000 }
3001
3002
3003 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3004 true if anything in the basic-block was vectorized. */
3005
3006 bool
3007 vect_slp_bb (basic_block bb)
3008 {
3009 bb_vec_info bb_vinfo;
3010 gimple_stmt_iterator gsi;
3011 bool any_vectorized = false;
3012 auto_vector_sizes vector_sizes;
3013
3014 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3015
3016 /* Autodetect first vector size we try. */
3017 current_vector_size = 0;
3018 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3019 unsigned int next_size = 0;
3020
3021 gsi = gsi_start_bb (bb);
3022
3023 poly_uint64 autodetected_vector_size = 0;
3024 while (1)
3025 {
3026 if (gsi_end_p (gsi))
3027 break;
3028
3029 gimple_stmt_iterator region_begin = gsi;
3030 vec<data_reference_p> datarefs = vNULL;
3031 int insns = 0;
3032
3033 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3034 {
3035 gimple *stmt = gsi_stmt (gsi);
3036 if (is_gimple_debug (stmt))
3037 continue;
3038 insns++;
3039
3040 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3041 vect_location = stmt;
3042
3043 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3044 break;
3045 }
3046
3047 /* Skip leading unhandled stmts. */
3048 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3049 {
3050 gsi_next (&gsi);
3051 continue;
3052 }
3053
3054 gimple_stmt_iterator region_end = gsi;
3055
3056 bool vectorized = false;
3057 bool fatal = false;
3058 vec_info_shared shared;
3059 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3060 datarefs, insns, fatal, &shared);
3061 if (bb_vinfo
3062 && dbg_cnt (vect_slp))
3063 {
3064 if (dump_enabled_p ())
3065 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3066
3067 bb_vinfo->shared->check_datarefs ();
3068 vect_schedule_slp (bb_vinfo);
3069
3070 unsigned HOST_WIDE_INT bytes;
3071 if (current_vector_size.is_constant (&bytes))
3072 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3073 "basic block part vectorized using "
3074 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3075 "vectors\n", bytes);
3076 else
3077 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3078 "basic block part vectorized using variable "
3079 "length vectors\n");
3080
3081 vectorized = true;
3082 }
3083 delete bb_vinfo;
3084
3085 any_vectorized |= vectorized;
3086
3087 if (next_size == 0)
3088 autodetected_vector_size = current_vector_size;
3089
3090 if (next_size < vector_sizes.length ()
3091 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3092 next_size += 1;
3093
3094 if (vectorized
3095 || next_size == vector_sizes.length ()
3096 || known_eq (current_vector_size, 0U)
3097 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3098 vector sizes will fail do not bother iterating. */
3099 || fatal)
3100 {
3101 if (gsi_end_p (region_end))
3102 break;
3103
3104 /* Skip the unhandled stmt. */
3105 gsi_next (&gsi);
3106
3107 /* And reset vector sizes. */
3108 current_vector_size = 0;
3109 next_size = 0;
3110 }
3111 else
3112 {
3113 /* Try the next biggest vector size. */
3114 current_vector_size = vector_sizes[next_size++];
3115 if (dump_enabled_p ())
3116 {
3117 dump_printf_loc (MSG_NOTE, vect_location,
3118 "***** Re-trying analysis with "
3119 "vector size ");
3120 dump_dec (MSG_NOTE, current_vector_size);
3121 dump_printf (MSG_NOTE, "\n");
3122 }
3123
3124 /* Start over. */
3125 gsi = region_begin;
3126 }
3127 }
3128
3129 return any_vectorized;
3130 }
3131
3132
3133 /* Return 1 if vector type of boolean constant which is OPNUM
3134 operand in statement STMT is a boolean vector. */
3135
3136 static bool
3137 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3138 {
3139 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3140 enum tree_code code = gimple_expr_code (stmt);
3141 tree op, vectype;
3142 enum vect_def_type dt;
3143
3144 /* For comparison and COND_EXPR type is chosen depending
3145 on the other comparison operand. */
3146 if (TREE_CODE_CLASS (code) == tcc_comparison)
3147 {
3148 if (opnum)
3149 op = gimple_assign_rhs1 (stmt);
3150 else
3151 op = gimple_assign_rhs2 (stmt);
3152
3153 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3154 gcc_unreachable ();
3155
3156 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3157 }
3158
3159 if (code == COND_EXPR)
3160 {
3161 tree cond = gimple_assign_rhs1 (stmt);
3162
3163 if (TREE_CODE (cond) == SSA_NAME)
3164 op = cond;
3165 else if (opnum)
3166 op = TREE_OPERAND (cond, 1);
3167 else
3168 op = TREE_OPERAND (cond, 0);
3169
3170 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3171 gcc_unreachable ();
3172
3173 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3174 }
3175
3176 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3177 }
3178
3179 /* Build a variable-length vector in which the elements in ELTS are repeated
3180 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3181 RESULTS and add any new instructions to SEQ.
3182
3183 The approach we use is:
3184
3185 (1) Find a vector mode VM with integer elements of mode IM.
3186
3187 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3188 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3189 from small vectors to IM.
3190
3191 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3192
3193 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3194 correct byte contents.
3195
3196 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3197
3198 We try to find the largest IM for which this sequence works, in order
3199 to cut down on the number of interleaves. */
3200
3201 void
3202 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3203 unsigned int nresults, vec<tree> &results)
3204 {
3205 unsigned int nelts = elts.length ();
3206 tree element_type = TREE_TYPE (vector_type);
3207
3208 /* (1) Find a vector mode VM with integer elements of mode IM. */
3209 unsigned int nvectors = 1;
3210 tree new_vector_type;
3211 tree permutes[2];
3212 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3213 &nvectors, &new_vector_type,
3214 permutes))
3215 gcc_unreachable ();
3216
3217 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3218 unsigned int partial_nelts = nelts / nvectors;
3219 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3220
3221 tree_vector_builder partial_elts;
3222 auto_vec<tree, 32> pieces (nvectors * 2);
3223 pieces.quick_grow (nvectors * 2);
3224 for (unsigned int i = 0; i < nvectors; ++i)
3225 {
3226 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3227 ELTS' has mode IM. */
3228 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3229 for (unsigned int j = 0; j < partial_nelts; ++j)
3230 partial_elts.quick_push (elts[i * partial_nelts + j]);
3231 tree t = gimple_build_vector (seq, &partial_elts);
3232 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3233 TREE_TYPE (new_vector_type), t);
3234
3235 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3236 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3237 }
3238
3239 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3240 correct byte contents.
3241
3242 We need to repeat the following operation log2(nvectors) times:
3243
3244 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3245 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3246
3247 However, if each input repeats every N elements and the VF is
3248 a multiple of N * 2, the HI result is the same as the LO. */
3249 unsigned int in_start = 0;
3250 unsigned int out_start = nvectors;
3251 unsigned int hi_start = nvectors / 2;
3252 /* A bound on the number of outputs needed to produce NRESULTS results
3253 in the final iteration. */
3254 unsigned int noutputs_bound = nvectors * nresults;
3255 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3256 {
3257 noutputs_bound /= 2;
3258 unsigned int limit = MIN (noutputs_bound, nvectors);
3259 for (unsigned int i = 0; i < limit; ++i)
3260 {
3261 if ((i & 1) != 0
3262 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3263 2 * in_repeat))
3264 {
3265 pieces[out_start + i] = pieces[out_start + i - 1];
3266 continue;
3267 }
3268
3269 tree output = make_ssa_name (new_vector_type);
3270 tree input1 = pieces[in_start + (i / 2)];
3271 tree input2 = pieces[in_start + (i / 2) + hi_start];
3272 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3273 input1, input2,
3274 permutes[i & 1]);
3275 gimple_seq_add_stmt (seq, stmt);
3276 pieces[out_start + i] = output;
3277 }
3278 std::swap (in_start, out_start);
3279 }
3280
3281 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3282 results.reserve (nresults);
3283 for (unsigned int i = 0; i < nresults; ++i)
3284 if (i < nvectors)
3285 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3286 pieces[in_start + i]));
3287 else
3288 results.quick_push (results[i - nvectors]);
3289 }
3290
3291
3292 /* For constant and loop invariant defs of SLP_NODE this function returns
3293 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3294 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3295 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3296 REDUC_INDEX is the index of the reduction operand in the statements, unless
3297 it is -1. */
3298
3299 static void
3300 vect_get_constant_vectors (tree op, slp_tree slp_node,
3301 vec<tree> *vec_oprnds,
3302 unsigned int op_num, unsigned int number_of_vectors)
3303 {
3304 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3305 gimple *stmt = stmts[0];
3306 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3307 unsigned HOST_WIDE_INT nunits;
3308 tree vec_cst;
3309 unsigned j, number_of_places_left_in_vector;
3310 tree vector_type;
3311 tree vop;
3312 int group_size = stmts.length ();
3313 unsigned int vec_num, i;
3314 unsigned number_of_copies = 1;
3315 vec<tree> voprnds;
3316 voprnds.create (number_of_vectors);
3317 bool constant_p, is_store;
3318 tree neutral_op = NULL;
3319 enum tree_code code = gimple_expr_code (stmt);
3320 gimple_seq ctor_seq = NULL;
3321 auto_vec<tree, 16> permute_results;
3322
3323 /* Check if vector type is a boolean vector. */
3324 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3325 && vect_mask_constant_operand_p (stmt, op_num))
3326 vector_type
3327 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3328 else
3329 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3330
3331 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3332 {
3333 is_store = true;
3334 op = gimple_assign_rhs1 (stmt);
3335 }
3336 else
3337 is_store = false;
3338
3339 gcc_assert (op);
3340
3341 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3342 created vectors. It is greater than 1 if unrolling is performed.
3343
3344 For example, we have two scalar operands, s1 and s2 (e.g., group of
3345 strided accesses of size two), while NUNITS is four (i.e., four scalars
3346 of this type can be packed in a vector). The output vector will contain
3347 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3348 will be 2).
3349
3350 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3351 containing the operands.
3352
3353 For example, NUNITS is four as before, and the group size is 8
3354 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3355 {s5, s6, s7, s8}. */
3356
3357 /* When using duplicate_and_interleave, we just need one element for
3358 each scalar statement. */
3359 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3360 nunits = group_size;
3361
3362 number_of_copies = nunits * number_of_vectors / group_size;
3363
3364 number_of_places_left_in_vector = nunits;
3365 constant_p = true;
3366 tree_vector_builder elts (vector_type, nunits, 1);
3367 elts.quick_grow (nunits);
3368 bool place_after_defs = false;
3369 for (j = 0; j < number_of_copies; j++)
3370 {
3371 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3372 {
3373 if (is_store)
3374 op = gimple_assign_rhs1 (stmt);
3375 else
3376 {
3377 switch (code)
3378 {
3379 case COND_EXPR:
3380 {
3381 tree cond = gimple_assign_rhs1 (stmt);
3382 if (TREE_CODE (cond) == SSA_NAME)
3383 op = gimple_op (stmt, op_num + 1);
3384 else if (op_num == 0 || op_num == 1)
3385 op = TREE_OPERAND (cond, op_num);
3386 else
3387 {
3388 if (op_num == 2)
3389 op = gimple_assign_rhs2 (stmt);
3390 else
3391 op = gimple_assign_rhs3 (stmt);
3392 }
3393 }
3394 break;
3395
3396 case CALL_EXPR:
3397 op = gimple_call_arg (stmt, op_num);
3398 break;
3399
3400 case LSHIFT_EXPR:
3401 case RSHIFT_EXPR:
3402 case LROTATE_EXPR:
3403 case RROTATE_EXPR:
3404 op = gimple_op (stmt, op_num + 1);
3405 /* Unlike the other binary operators, shifts/rotates have
3406 the shift count being int, instead of the same type as
3407 the lhs, so make sure the scalar is the right type if
3408 we are dealing with vectors of
3409 long long/long/short/char. */
3410 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3411 op = fold_convert (TREE_TYPE (vector_type), op);
3412 break;
3413
3414 default:
3415 op = gimple_op (stmt, op_num + 1);
3416 break;
3417 }
3418 }
3419
3420 /* Create 'vect_ = {op0,op1,...,opn}'. */
3421 number_of_places_left_in_vector--;
3422 tree orig_op = op;
3423 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3424 {
3425 if (CONSTANT_CLASS_P (op))
3426 {
3427 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3428 {
3429 /* Can't use VIEW_CONVERT_EXPR for booleans because
3430 of possibly different sizes of scalar value and
3431 vector element. */
3432 if (integer_zerop (op))
3433 op = build_int_cst (TREE_TYPE (vector_type), 0);
3434 else if (integer_onep (op))
3435 op = build_all_ones_cst (TREE_TYPE (vector_type));
3436 else
3437 gcc_unreachable ();
3438 }
3439 else
3440 op = fold_unary (VIEW_CONVERT_EXPR,
3441 TREE_TYPE (vector_type), op);
3442 gcc_assert (op && CONSTANT_CLASS_P (op));
3443 }
3444 else
3445 {
3446 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3447 gimple *init_stmt;
3448 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3449 {
3450 tree true_val
3451 = build_all_ones_cst (TREE_TYPE (vector_type));
3452 tree false_val
3453 = build_zero_cst (TREE_TYPE (vector_type));
3454 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3455 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3456 op, true_val,
3457 false_val);
3458 }
3459 else
3460 {
3461 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3462 op);
3463 init_stmt
3464 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3465 op);
3466 }
3467 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3468 op = new_temp;
3469 }
3470 }
3471 elts[number_of_places_left_in_vector] = op;
3472 if (!CONSTANT_CLASS_P (op))
3473 constant_p = false;
3474 if (TREE_CODE (orig_op) == SSA_NAME
3475 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3476 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3477 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3478 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3479 place_after_defs = true;
3480
3481 if (number_of_places_left_in_vector == 0)
3482 {
3483 if (constant_p
3484 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3485 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3486 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3487 else
3488 {
3489 if (vec_oprnds->is_empty ())
3490 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3491 number_of_vectors,
3492 permute_results);
3493 vec_cst = permute_results[number_of_vectors - j - 1];
3494 }
3495 tree init;
3496 gimple_stmt_iterator gsi;
3497 if (place_after_defs)
3498 {
3499 gsi = gsi_for_stmt
3500 (vect_find_last_scalar_stmt_in_slp (slp_node));
3501 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3502 }
3503 else
3504 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3505 if (ctor_seq != NULL)
3506 {
3507 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3508 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3509 ctor_seq = NULL;
3510 }
3511 voprnds.quick_push (init);
3512 place_after_defs = false;
3513 number_of_places_left_in_vector = nunits;
3514 constant_p = true;
3515 elts.new_vector (vector_type, nunits, 1);
3516 elts.quick_grow (nunits);
3517 }
3518 }
3519 }
3520
3521 /* Since the vectors are created in the reverse order, we should invert
3522 them. */
3523 vec_num = voprnds.length ();
3524 for (j = vec_num; j != 0; j--)
3525 {
3526 vop = voprnds[j - 1];
3527 vec_oprnds->quick_push (vop);
3528 }
3529
3530 voprnds.release ();
3531
3532 /* In case that VF is greater than the unrolling factor needed for the SLP
3533 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3534 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3535 to replicate the vectors. */
3536 while (number_of_vectors > vec_oprnds->length ())
3537 {
3538 tree neutral_vec = NULL;
3539
3540 if (neutral_op)
3541 {
3542 if (!neutral_vec)
3543 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3544
3545 vec_oprnds->quick_push (neutral_vec);
3546 }
3547 else
3548 {
3549 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3550 vec_oprnds->quick_push (vop);
3551 }
3552 }
3553 }
3554
3555
3556 /* Get vectorized definitions from SLP_NODE that contains corresponding
3557 vectorized def-stmts. */
3558
3559 static void
3560 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3561 {
3562 tree vec_oprnd;
3563 gimple *vec_def_stmt;
3564 unsigned int i;
3565
3566 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3567
3568 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3569 {
3570 gcc_assert (vec_def_stmt);
3571 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3572 vec_oprnd = gimple_phi_result (vec_def_stmt);
3573 else
3574 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3575 vec_oprnds->quick_push (vec_oprnd);
3576 }
3577 }
3578
3579
3580 /* Get vectorized definitions for SLP_NODE.
3581 If the scalar definitions are loop invariants or constants, collect them and
3582 call vect_get_constant_vectors() to create vector stmts.
3583 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3584 must be stored in the corresponding child of SLP_NODE, and we call
3585 vect_get_slp_vect_defs () to retrieve them. */
3586
3587 void
3588 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3589 vec<vec<tree> > *vec_oprnds)
3590 {
3591 gimple *first_stmt;
3592 int number_of_vects = 0, i;
3593 unsigned int child_index = 0;
3594 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3595 slp_tree child = NULL;
3596 vec<tree> vec_defs;
3597 tree oprnd;
3598 bool vectorized_defs;
3599
3600 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3601 FOR_EACH_VEC_ELT (ops, i, oprnd)
3602 {
3603 /* For each operand we check if it has vectorized definitions in a child
3604 node or we need to create them (for invariants and constants). We
3605 check if the LHS of the first stmt of the next child matches OPRND.
3606 If it does, we found the correct child. Otherwise, we call
3607 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3608 to check this child node for the next operand. */
3609 vectorized_defs = false;
3610 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3611 {
3612 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3613
3614 /* We have to check both pattern and original def, if available. */
3615 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3616 {
3617 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3618 gimple *related
3619 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3620 tree first_def_op;
3621
3622 if (gimple_code (first_def) == GIMPLE_PHI)
3623 first_def_op = gimple_phi_result (first_def);
3624 else
3625 first_def_op = gimple_get_lhs (first_def);
3626 if (operand_equal_p (oprnd, first_def_op, 0)
3627 || (related
3628 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3629 {
3630 /* The number of vector defs is determined by the number of
3631 vector statements in the node from which we get those
3632 statements. */
3633 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3634 vectorized_defs = true;
3635 child_index++;
3636 }
3637 }
3638 else
3639 child_index++;
3640 }
3641
3642 if (!vectorized_defs)
3643 {
3644 if (i == 0)
3645 {
3646 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3647 /* Number of vector stmts was calculated according to LHS in
3648 vect_schedule_slp_instance (), fix it by replacing LHS with
3649 RHS, if necessary. See vect_get_smallest_scalar_type () for
3650 details. */
3651 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3652 &rhs_size_unit);
3653 if (rhs_size_unit != lhs_size_unit)
3654 {
3655 number_of_vects *= rhs_size_unit;
3656 number_of_vects /= lhs_size_unit;
3657 }
3658 }
3659 }
3660
3661 /* Allocate memory for vectorized defs. */
3662 vec_defs = vNULL;
3663 vec_defs.create (number_of_vects);
3664
3665 /* For reduction defs we call vect_get_constant_vectors (), since we are
3666 looking for initial loop invariant values. */
3667 if (vectorized_defs)
3668 /* The defs are already vectorized. */
3669 vect_get_slp_vect_defs (child, &vec_defs);
3670 else
3671 /* Build vectors from scalar defs. */
3672 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3673 number_of_vects);
3674
3675 vec_oprnds->quick_push (vec_defs);
3676 }
3677 }
3678
3679 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3680 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3681 permute statements for the SLP node NODE of the SLP instance
3682 SLP_NODE_INSTANCE. */
3683
3684 bool
3685 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3686 gimple_stmt_iterator *gsi, poly_uint64 vf,
3687 slp_instance slp_node_instance, bool analyze_only,
3688 unsigned *n_perms)
3689 {
3690 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3691 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3692 tree mask_element_type = NULL_TREE, mask_type;
3693 int vec_index = 0;
3694 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3695 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3696 unsigned int mask_element;
3697 machine_mode mode;
3698 unsigned HOST_WIDE_INT nunits, const_vf;
3699
3700 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3701 return false;
3702
3703 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
3704
3705 mode = TYPE_MODE (vectype);
3706
3707 /* At the moment, all permutations are represented using per-element
3708 indices, so we can't cope with variable vector lengths or
3709 vectorization factors. */
3710 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3711 || !vf.is_constant (&const_vf))
3712 return false;
3713
3714 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3715 same size as the vector element being permuted. */
3716 mask_element_type = lang_hooks.types.type_for_mode
3717 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3718 mask_type = get_vectype_for_scalar_type (mask_element_type);
3719 vec_perm_builder mask (nunits, nunits, 1);
3720 mask.quick_grow (nunits);
3721 vec_perm_indices indices;
3722
3723 /* Initialize the vect stmts of NODE to properly insert the generated
3724 stmts later. */
3725 if (! analyze_only)
3726 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3727 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3728 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3729
3730 /* Generate permutation masks for every NODE. Number of masks for each NODE
3731 is equal to GROUP_SIZE.
3732 E.g., we have a group of three nodes with three loads from the same
3733 location in each node, and the vector size is 4. I.e., we have a
3734 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3735 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3736 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3737 ...
3738
3739 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3740 The last mask is illegal since we assume two operands for permute
3741 operation, and the mask element values can't be outside that range.
3742 Hence, the last mask must be converted into {2,5,5,5}.
3743 For the first two permutations we need the first and the second input
3744 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3745 we need the second and the third vectors: {b1,c1,a2,b2} and
3746 {c2,a3,b3,c3}. */
3747
3748 int vect_stmts_counter = 0;
3749 unsigned int index = 0;
3750 int first_vec_index = -1;
3751 int second_vec_index = -1;
3752 bool noop_p = true;
3753 *n_perms = 0;
3754
3755 for (unsigned int j = 0; j < const_vf; j++)
3756 {
3757 for (int k = 0; k < group_size; k++)
3758 {
3759 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3760 + j * DR_GROUP_SIZE (stmt_info));
3761 vec_index = i / nunits;
3762 mask_element = i % nunits;
3763 if (vec_index == first_vec_index
3764 || first_vec_index == -1)
3765 {
3766 first_vec_index = vec_index;
3767 }
3768 else if (vec_index == second_vec_index
3769 || second_vec_index == -1)
3770 {
3771 second_vec_index = vec_index;
3772 mask_element += nunits;
3773 }
3774 else
3775 {
3776 if (dump_enabled_p ())
3777 {
3778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3779 "permutation requires at "
3780 "least three vectors ");
3781 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3782 stmt, 0);
3783 }
3784 gcc_assert (analyze_only);
3785 return false;
3786 }
3787
3788 gcc_assert (mask_element < 2 * nunits);
3789 if (mask_element != index)
3790 noop_p = false;
3791 mask[index++] = mask_element;
3792
3793 if (index == nunits && !noop_p)
3794 {
3795 indices.new_vector (mask, 2, nunits);
3796 if (!can_vec_perm_const_p (mode, indices))
3797 {
3798 if (dump_enabled_p ())
3799 {
3800 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3801 vect_location,
3802 "unsupported vect permute { ");
3803 for (i = 0; i < nunits; ++i)
3804 {
3805 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3806 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3807 }
3808 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3809 }
3810 gcc_assert (analyze_only);
3811 return false;
3812 }
3813
3814 ++*n_perms;
3815 }
3816
3817 if (index == nunits)
3818 {
3819 if (!analyze_only)
3820 {
3821 tree mask_vec = NULL_TREE;
3822
3823 if (! noop_p)
3824 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3825
3826 if (second_vec_index == -1)
3827 second_vec_index = first_vec_index;
3828
3829 /* Generate the permute statement if necessary. */
3830 tree first_vec = dr_chain[first_vec_index];
3831 tree second_vec = dr_chain[second_vec_index];
3832 gimple *perm_stmt;
3833 if (! noop_p)
3834 {
3835 tree perm_dest
3836 = vect_create_destination_var (gimple_assign_lhs (stmt),
3837 vectype);
3838 perm_dest = make_ssa_name (perm_dest);
3839 perm_stmt = gimple_build_assign (perm_dest,
3840 VEC_PERM_EXPR,
3841 first_vec, second_vec,
3842 mask_vec);
3843 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3844 }
3845 else
3846 /* If mask was NULL_TREE generate the requested
3847 identity transform. */
3848 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3849
3850 /* Store the vector statement in NODE. */
3851 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3852 }
3853
3854 index = 0;
3855 first_vec_index = -1;
3856 second_vec_index = -1;
3857 noop_p = true;
3858 }
3859 }
3860 }
3861
3862 return true;
3863 }
3864
3865 /* Vectorize SLP instance tree in postorder. */
3866
3867 static bool
3868 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3869 scalar_stmts_to_slp_tree_map_t *bst_map)
3870 {
3871 gimple *stmt;
3872 bool grouped_store, is_store;
3873 gimple_stmt_iterator si;
3874 stmt_vec_info stmt_info;
3875 unsigned int group_size;
3876 tree vectype;
3877 int i, j;
3878 slp_tree child;
3879
3880 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3881 return false;
3882
3883 /* See if we have already vectorized the same set of stmts and reuse their
3884 vectorized stmts. */
3885 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3886 {
3887 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3888 return false;
3889 }
3890
3891 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3892 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3893 vect_schedule_slp_instance (child, instance, bst_map);
3894
3895 /* Push SLP node def-type to stmts. */
3896 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3897 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3898 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3899 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3900
3901 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3902 stmt_info = vinfo_for_stmt (stmt);
3903
3904 /* VECTYPE is the type of the destination. */
3905 vectype = STMT_VINFO_VECTYPE (stmt_info);
3906 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3907 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3908
3909 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3910 if (!SLP_TREE_VEC_STMTS (node).exists ())
3911 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3912
3913 if (dump_enabled_p ())
3914 {
3915 dump_printf_loc (MSG_NOTE,vect_location,
3916 "------>vectorizing SLP node starting from: ");
3917 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3918 }
3919
3920 /* Vectorized stmts go before the last scalar stmt which is where
3921 all uses are ready. */
3922 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3923
3924 /* Mark the first element of the reduction chain as reduction to properly
3925 transform the node. In the analysis phase only the last element of the
3926 chain is marked as reduction. */
3927 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3928 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3929 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3930 {
3931 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3932 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3933 }
3934
3935 /* Handle two-operation SLP nodes by vectorizing the group with
3936 both operations and then performing a merge. */
3937 if (SLP_TREE_TWO_OPERATORS (node))
3938 {
3939 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3940 enum tree_code ocode = ERROR_MARK;
3941 gimple *ostmt;
3942 vec_perm_builder mask (group_size, group_size, 1);
3943 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3944 if (gimple_assign_rhs_code (ostmt) != code0)
3945 {
3946 mask.quick_push (1);
3947 ocode = gimple_assign_rhs_code (ostmt);
3948 }
3949 else
3950 mask.quick_push (0);
3951 if (ocode != ERROR_MARK)
3952 {
3953 vec<gimple *> v0;
3954 vec<gimple *> v1;
3955 unsigned j;
3956 tree tmask = NULL_TREE;
3957 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3958 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3959 SLP_TREE_VEC_STMTS (node).truncate (0);
3960 gimple_assign_set_rhs_code (stmt, ocode);
3961 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3962 gimple_assign_set_rhs_code (stmt, code0);
3963 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3964 SLP_TREE_VEC_STMTS (node).truncate (0);
3965 tree meltype = build_nonstandard_integer_type
3966 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3967 tree mvectype = get_same_sized_vectype (meltype, vectype);
3968 unsigned k = 0, l;
3969 for (j = 0; j < v0.length (); ++j)
3970 {
3971 /* Enforced by vect_build_slp_tree, which rejects variable-length
3972 vectors for SLP_TREE_TWO_OPERATORS. */
3973 unsigned int const_nunits = nunits.to_constant ();
3974 tree_vector_builder melts (mvectype, const_nunits, 1);
3975 for (l = 0; l < const_nunits; ++l)
3976 {
3977 if (k >= group_size)
3978 k = 0;
3979 tree t = build_int_cst (meltype,
3980 mask[k++] * const_nunits + l);
3981 melts.quick_push (t);
3982 }
3983 tmask = melts.build ();
3984
3985 /* ??? Not all targets support a VEC_PERM_EXPR with a
3986 constant mask that would translate to a vec_merge RTX
3987 (with their vec_perm_const_ok). We can either not
3988 vectorize in that case or let veclower do its job.
3989 Unfortunately that isn't too great and at least for
3990 plus/minus we'd eventually like to match targets
3991 vector addsub instructions. */
3992 gimple *vstmt;
3993 vstmt = gimple_build_assign (make_ssa_name (vectype),
3994 VEC_PERM_EXPR,
3995 gimple_assign_lhs (v0[j]),
3996 gimple_assign_lhs (v1[j]), tmask);
3997 vect_finish_stmt_generation (stmt, vstmt, &si);
3998 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3999 }
4000 v0.release ();
4001 v1.release ();
4002 return false;
4003 }
4004 }
4005 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
4006
4007 /* Restore stmt def-types. */
4008 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4009 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4010 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
4011 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
4012
4013 return is_store;
4014 }
4015
4016 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4017 For loop vectorization this is done in vectorizable_call, but for SLP
4018 it needs to be deferred until end of vect_schedule_slp, because multiple
4019 SLP instances may refer to the same scalar stmt. */
4020
4021 static void
4022 vect_remove_slp_scalar_calls (slp_tree node)
4023 {
4024 gimple *stmt, *new_stmt;
4025 gimple_stmt_iterator gsi;
4026 int i;
4027 slp_tree child;
4028 tree lhs;
4029 stmt_vec_info stmt_info;
4030
4031 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4032 return;
4033
4034 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4035 vect_remove_slp_scalar_calls (child);
4036
4037 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
4038 {
4039 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
4040 continue;
4041 stmt_info = vinfo_for_stmt (stmt);
4042 if (stmt_info == NULL_STMT_VEC_INFO
4043 || is_pattern_stmt_p (stmt_info)
4044 || !PURE_SLP_STMT (stmt_info))
4045 continue;
4046 lhs = gimple_call_lhs (stmt);
4047 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4048 set_vinfo_for_stmt (new_stmt, stmt_info);
4049 set_vinfo_for_stmt (stmt, NULL);
4050 STMT_VINFO_STMT (stmt_info) = new_stmt;
4051 gsi = gsi_for_stmt (stmt);
4052 gsi_replace (&gsi, new_stmt, false);
4053 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4054 }
4055 }
4056
4057 /* Generate vector code for all SLP instances in the loop/basic block. */
4058
4059 bool
4060 vect_schedule_slp (vec_info *vinfo)
4061 {
4062 vec<slp_instance> slp_instances;
4063 slp_instance instance;
4064 unsigned int i;
4065 bool is_store = false;
4066
4067
4068 scalar_stmts_to_slp_tree_map_t *bst_map
4069 = new scalar_stmts_to_slp_tree_map_t ();
4070 slp_instances = vinfo->slp_instances;
4071 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4072 {
4073 /* Schedule the tree of INSTANCE. */
4074 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4075 instance, bst_map);
4076 if (dump_enabled_p ())
4077 dump_printf_loc (MSG_NOTE, vect_location,
4078 "vectorizing stmts using SLP.\n");
4079 }
4080 delete bst_map;
4081
4082 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4083 {
4084 slp_tree root = SLP_INSTANCE_TREE (instance);
4085 gimple *store;
4086 unsigned int j;
4087 gimple_stmt_iterator gsi;
4088
4089 /* Remove scalar call stmts. Do not do this for basic-block
4090 vectorization as not all uses may be vectorized.
4091 ??? Why should this be necessary? DCE should be able to
4092 remove the stmts itself.
4093 ??? For BB vectorization we can as well remove scalar
4094 stmts starting from the SLP tree root if they have no
4095 uses. */
4096 if (is_a <loop_vec_info> (vinfo))
4097 vect_remove_slp_scalar_calls (root);
4098
4099 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
4100 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4101 {
4102 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4103 break;
4104
4105 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4106 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
4107 /* Free the attached stmt_vec_info and remove the stmt. */
4108 gsi = gsi_for_stmt (store);
4109 unlink_stmt_vdef (store);
4110 gsi_remove (&gsi, true);
4111 release_defs (store);
4112 free_stmt_vec_info (store);
4113 }
4114 }
4115
4116 return is_store;
4117 }