re PR middle-end/91272 ([SVE] Use fully-masked loops for CLASTB reductions)
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
96 {
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
103
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
106
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 {
130 tree vect_type, vect, vect_name, array_ref;
131 gimple *new_stmt;
132
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
139
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144
145 return vect_name;
146 }
147
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
151
152 static void
153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 tree vect, tree array, unsigned HOST_WIDE_INT n)
155 {
156 tree array_ref;
157 gimple *new_stmt;
158
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
162
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
165 }
166
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
170
171 static tree
172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 {
174 tree mem_ref;
175
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
180 }
181
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
184
185 static void
186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 tree var)
188 {
189 tree clobber = build_clobber (TREE_TYPE (var));
190 gimple *new_stmt = gimple_build_assign (var, clobber);
191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
192 }
193
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195
196 /* Function vect_mark_relevant.
197
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
199
200 static void
201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 enum vect_relevant relevant, bool live_p)
203 {
204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: %G", relevant, live_p,
210 stmt_info->stmt);
211
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 {
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
222
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE, vect_location,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info = stmt_info;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232 }
233
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 {
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
245 }
246
247 worklist->safe_push (stmt_info);
248 }
249
250
251 /* Function is_simple_and_all_uses_invariant
252
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
254
255 bool
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 loop_vec_info loop_vinfo)
258 {
259 tree op;
260 ssa_op_iter iter;
261
262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263 if (!stmt)
264 return false;
265
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 {
268 enum vect_def_type dt = vect_uninitialized_def;
269
270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 {
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
276 }
277
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
280 }
281 return true;
282 }
283
284 /* Function vect_stmt_relevant_p.
285
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
288
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
293
294 CHECKME: what other side effects would the vectorizer allow? */
295
296 static bool
297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
299 {
300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
305
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
308
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info->stmt)
311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
313
314 /* changing memory. */
315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt_info->stmt)
317 && !gimple_clobber_p (stmt_info->stmt))
318 {
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE, vect_location,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant = vect_used_in_scope;
323 }
324
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 {
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 {
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
332 {
333 if (dump_enabled_p ())
334 dump_printf_loc (MSG_NOTE, vect_location,
335 "vec_stmt_relevant_p: used out of loop.\n");
336
337 if (is_gimple_debug (USE_STMT (use_p)))
338 continue;
339
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
344
345 *live_p = true;
346 }
347 }
348 }
349
350 if (*live_p && *relevant == vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 {
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant = vect_used_only_live;
357 }
358
359 return (*live_p || *relevant);
360 }
361
362
363 /* Function exist_non_indexing_operands_for_use_p
364
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
367
368 static bool
369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 {
371 tree operand;
372
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
378
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
384
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
388
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
391
392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
394 {
395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396 if (call && gimple_call_internal_p (call))
397 {
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
410 }
411 return false;
412 }
413
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
419
420 if (operand == use)
421 return true;
422
423 return false;
424 }
425
426
427 /*
428 Function process_use.
429
430 Inputs:
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
437
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
451
452 Return true if everything is as expected. Return false otherwise. */
453
454 static opt_result
455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 bool force)
458 {
459 stmt_vec_info dstmt_vinfo;
460 basic_block bb, def_bb;
461 enum vect_def_type dt;
462
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
466 return opt_result::success ();
467
468 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
469 return opt_result::failure_at (stmt_vinfo->stmt,
470 "not vectorized:"
471 " unsupported use in stmt.\n");
472
473 if (!dstmt_vinfo)
474 return opt_result::success ();
475
476 def_bb = gimple_bb (dstmt_vinfo->stmt);
477
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 DSTMT_VINFO must have already been processed, because this should be the
480 only way that STMT, which is a reduction-phi, was put in the worklist,
481 as there should be no other uses for DSTMT_VINFO in the loop. So we just
482 check that everything is as expected, and we are done. */
483 bb = gimple_bb (stmt_vinfo->stmt);
484 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
485 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
486 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
488 && bb->loop_father == def_bb->loop_father)
489 {
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE, vect_location,
492 "reduc-stmt defining reduc-phi in the same nest.\n");
493 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
494 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
495 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
496 return opt_result::success ();
497 }
498
499 /* case 3a: outer-loop stmt defining an inner-loop stmt:
500 outer-loop-header-bb:
501 d = dstmt_vinfo
502 inner-loop:
503 stmt # use (d)
504 outer-loop-tail-bb:
505 ... */
506 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
507 {
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location,
510 "outer-loop def-stmt defining inner-loop stmt.\n");
511
512 switch (relevant)
513 {
514 case vect_unused_in_scope:
515 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
516 vect_used_in_scope : vect_unused_in_scope;
517 break;
518
519 case vect_used_in_outer_by_reduction:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_by_reduction;
522 break;
523
524 case vect_used_in_outer:
525 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
526 relevant = vect_used_in_scope;
527 break;
528
529 case vect_used_in_scope:
530 break;
531
532 default:
533 gcc_unreachable ();
534 }
535 }
536
537 /* case 3b: inner-loop stmt defining an outer-loop stmt:
538 outer-loop-header-bb:
539 ...
540 inner-loop:
541 d = dstmt_vinfo
542 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
543 stmt # use (d) */
544 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
545 {
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE, vect_location,
548 "inner-loop def-stmt defining outer-loop stmt.\n");
549
550 switch (relevant)
551 {
552 case vect_unused_in_scope:
553 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
554 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
555 vect_used_in_outer_by_reduction : vect_unused_in_scope;
556 break;
557
558 case vect_used_by_reduction:
559 case vect_used_only_live:
560 relevant = vect_used_in_outer_by_reduction;
561 break;
562
563 case vect_used_in_scope:
564 relevant = vect_used_in_outer;
565 break;
566
567 default:
568 gcc_unreachable ();
569 }
570 }
571 /* We are also not interested in uses on loop PHI backedges that are
572 inductions. Otherwise we'll needlessly vectorize the IV increment
573 and cause hybrid SLP for SLP inductions. Unless the PHI is live
574 of course. */
575 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
576 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
577 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
578 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
579 loop_latch_edge (bb->loop_father))
580 == use))
581 {
582 if (dump_enabled_p ())
583 dump_printf_loc (MSG_NOTE, vect_location,
584 "induction value on backedge.\n");
585 return opt_result::success ();
586 }
587
588
589 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
590 return opt_result::success ();
591 }
592
593
594 /* Function vect_mark_stmts_to_be_vectorized.
595
596 Not all stmts in the loop need to be vectorized. For example:
597
598 for i...
599 for j...
600 1. T0 = i + j
601 2. T1 = a[T0]
602
603 3. j = j + 1
604
605 Stmt 1 and 3 do not need to be vectorized, because loop control and
606 addressing of vectorized data-refs are handled differently.
607
608 This pass detects such stmts. */
609
610 opt_result
611 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
612 {
613 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
614 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
615 unsigned int nbbs = loop->num_nodes;
616 gimple_stmt_iterator si;
617 unsigned int i;
618 basic_block bb;
619 bool live_p;
620 enum vect_relevant relevant;
621
622 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
623
624 auto_vec<stmt_vec_info, 64> worklist;
625
626 /* 1. Init worklist. */
627 for (i = 0; i < nbbs; i++)
628 {
629 bb = bbs[i];
630 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 {
632 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
635 phi_info->stmt);
636
637 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
638 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
639 }
640 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
641 {
642 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "init: stmt relevant? %G", stmt_info->stmt);
646
647 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
648 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
649 }
650 }
651
652 /* 2. Process_worklist */
653 while (worklist.length () > 0)
654 {
655 use_operand_p use_p;
656 ssa_op_iter iter;
657
658 stmt_vec_info stmt_vinfo = worklist.pop ();
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location,
661 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662
663 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
664 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 of STMT. */
666 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667
668 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
669 propagated as is to the DEF_STMTs of its USEs.
670
671 One exception is when STMT has been identified as defining a reduction
672 variable; in this case we set the relevance to vect_used_by_reduction.
673 This is because we distinguish between two kinds of relevant stmts -
674 those that are used by a reduction computation, and those that are
675 (also) used by a regular computation. This allows us later on to
676 identify stmts that are used solely by a reduction, and therefore the
677 order of the results that they produce does not have to be kept. */
678
679 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 {
681 case vect_reduction_def:
682 gcc_assert (relevant != vect_unused_in_scope);
683 if (relevant != vect_unused_in_scope
684 && relevant != vect_used_in_scope
685 && relevant != vect_used_by_reduction
686 && relevant != vect_used_only_live)
687 return opt_result::failure_at
688 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
689 break;
690
691 case vect_nested_cycle:
692 if (relevant != vect_unused_in_scope
693 && relevant != vect_used_in_outer_by_reduction
694 && relevant != vect_used_in_outer)
695 return opt_result::failure_at
696 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
697 break;
698
699 case vect_double_reduction_def:
700 if (relevant != vect_unused_in_scope
701 && relevant != vect_used_by_reduction
702 && relevant != vect_used_only_live)
703 return opt_result::failure_at
704 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
705 break;
706
707 default:
708 break;
709 }
710
711 if (is_pattern_stmt_p (stmt_vinfo))
712 {
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 {
718 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
719 tree op = gimple_assign_rhs1 (assign);
720
721 i = 1;
722 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 {
724 opt_result res
725 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
726 loop_vinfo, relevant, &worklist, false);
727 if (!res)
728 return res;
729 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
730 loop_vinfo, relevant, &worklist, false);
731 if (!res)
732 return res;
733 i = 2;
734 }
735 for (; i < gimple_num_ops (assign); i++)
736 {
737 op = gimple_op (assign, i);
738 if (TREE_CODE (op) == SSA_NAME)
739 {
740 opt_result res
741 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
742 &worklist, false);
743 if (!res)
744 return res;
745 }
746 }
747 }
748 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 {
750 for (i = 0; i < gimple_call_num_args (call); i++)
751 {
752 tree arg = gimple_call_arg (call, i);
753 opt_result res
754 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
755 &worklist, false);
756 if (!res)
757 return res;
758 }
759 }
760 }
761 else
762 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 {
764 tree op = USE_FROM_PTR (use_p);
765 opt_result res
766 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
767 &worklist, false);
768 if (!res)
769 return res;
770 }
771
772 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 {
774 gather_scatter_info gs_info;
775 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
776 gcc_unreachable ();
777 opt_result res
778 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
779 &worklist, true);
780 if (!res)
781 {
782 if (fatal)
783 *fatal = false;
784 return res;
785 }
786 }
787 } /* while worklist */
788
789 return opt_result::success ();
790 }
791
792 /* Compute the prologue cost for invariant or constant operands. */
793
794 static unsigned
795 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
796 unsigned opno, enum vect_def_type dt,
797 stmt_vector_for_cost *cost_vec)
798 {
799 vec_info *vinfo = stmt_info->vinfo;
800 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
801 tree op = gimple_op (stmt, opno);
802 unsigned prologue_cost = 0;
803
804 /* Without looking at the actual initializer a vector of
805 constants can be implemented as load from the constant pool.
806 When all elements are the same we can use a splat. */
807 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
808 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
809 unsigned num_vects_to_check;
810 unsigned HOST_WIDE_INT const_nunits;
811 unsigned nelt_limit;
812 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
813 && ! multiple_p (const_nunits, group_size))
814 {
815 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
816 nelt_limit = const_nunits;
817 }
818 else
819 {
820 /* If either the vector has variable length or the vectors
821 are composed of repeated whole groups we only need to
822 cost construction once. All vectors will be the same. */
823 num_vects_to_check = 1;
824 nelt_limit = group_size;
825 }
826 tree elt = NULL_TREE;
827 unsigned nelt = 0;
828 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
829 {
830 unsigned si = j % group_size;
831 if (nelt == 0)
832 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
833 /* ??? We're just tracking whether all operands of a single
834 vector initializer are the same, ideally we'd check if
835 we emitted the same one already. */
836 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
837 opno))
838 elt = NULL_TREE;
839 nelt++;
840 if (nelt == nelt_limit)
841 {
842 /* ??? We need to pass down stmt_info for a vector type
843 even if it points to the wrong stmt. */
844 prologue_cost += record_stmt_cost
845 (cost_vec, 1,
846 dt == vect_external_def
847 ? (elt ? scalar_to_vec : vec_construct)
848 : vector_load,
849 stmt_info, 0, vect_prologue);
850 nelt = 0;
851 }
852 }
853
854 return prologue_cost;
855 }
856
857 /* Function vect_model_simple_cost.
858
859 Models cost for simple operations, i.e. those that only emit ncopies of a
860 single op. Right now, this does not account for multiple insns that could
861 be generated for the single vector op. We will handle that shortly. */
862
863 static void
864 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
865 enum vect_def_type *dt,
866 int ndts,
867 slp_tree node,
868 stmt_vector_for_cost *cost_vec)
869 {
870 int inside_cost = 0, prologue_cost = 0;
871
872 gcc_assert (cost_vec != NULL);
873
874 /* ??? Somehow we need to fix this at the callers. */
875 if (node)
876 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
877
878 if (node)
879 {
880 /* Scan operands and account for prologue cost of constants/externals.
881 ??? This over-estimates cost for multiple uses and should be
882 re-engineered. */
883 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
884 tree lhs = gimple_get_lhs (stmt);
885 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
886 {
887 tree op = gimple_op (stmt, i);
888 enum vect_def_type dt;
889 if (!op || op == lhs)
890 continue;
891 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
892 && (dt == vect_constant_def || dt == vect_external_def))
893 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
894 i, dt, cost_vec);
895 }
896 }
897 else
898 /* Cost the "broadcast" of a scalar operand in to a vector operand.
899 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
900 cost model. */
901 for (int i = 0; i < ndts; i++)
902 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
903 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
904 stmt_info, 0, vect_prologue);
905
906 /* Adjust for two-operator SLP nodes. */
907 if (node && SLP_TREE_TWO_OPERATORS (node))
908 {
909 ncopies *= 2;
910 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
911 stmt_info, 0, vect_body);
912 }
913
914 /* Pass the inside-of-loop statements to the target-specific cost model. */
915 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
916 stmt_info, 0, vect_body);
917
918 if (dump_enabled_p ())
919 dump_printf_loc (MSG_NOTE, vect_location,
920 "vect_model_simple_cost: inside_cost = %d, "
921 "prologue_cost = %d .\n", inside_cost, prologue_cost);
922 }
923
924
925 /* Model cost for type demotion and promotion operations. PWR is normally
926 zero for single-step promotions and demotions. It will be one if
927 two-step promotion/demotion is required, and so on. Each additional
928 step doubles the number of instructions required. */
929
930 static void
931 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
932 enum vect_def_type *dt, int pwr,
933 stmt_vector_for_cost *cost_vec)
934 {
935 int i, tmp;
936 int inside_cost = 0, prologue_cost = 0;
937
938 for (i = 0; i < pwr + 1; i++)
939 {
940 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
941 (i + 1) : i;
942 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
943 vec_promote_demote, stmt_info, 0,
944 vect_body);
945 }
946
947 /* FORNOW: Assuming maximum 2 args per stmts. */
948 for (i = 0; i < 2; i++)
949 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
950 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
951 stmt_info, 0, vect_prologue);
952
953 if (dump_enabled_p ())
954 dump_printf_loc (MSG_NOTE, vect_location,
955 "vect_model_promotion_demotion_cost: inside_cost = %d, "
956 "prologue_cost = %d .\n", inside_cost, prologue_cost);
957 }
958
959 /* Returns true if the current function returns DECL. */
960
961 static bool
962 cfun_returns (tree decl)
963 {
964 edge_iterator ei;
965 edge e;
966 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
967 {
968 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
969 if (!ret)
970 continue;
971 if (gimple_return_retval (ret) == decl)
972 return true;
973 /* We often end up with an aggregate copy to the result decl,
974 handle that case as well. First skip intermediate clobbers
975 though. */
976 gimple *def = ret;
977 do
978 {
979 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
980 }
981 while (gimple_clobber_p (def));
982 if (is_a <gassign *> (def)
983 && gimple_assign_lhs (def) == gimple_return_retval (ret)
984 && gimple_assign_rhs1 (def) == decl)
985 return true;
986 }
987 return false;
988 }
989
990 /* Function vect_model_store_cost
991
992 Models cost for stores. In the case of grouped accesses, one access
993 has the overhead of the grouped access attributed to it. */
994
995 static void
996 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
997 enum vect_def_type dt,
998 vect_memory_access_type memory_access_type,
999 vec_load_store_type vls_type, slp_tree slp_node,
1000 stmt_vector_for_cost *cost_vec)
1001 {
1002 unsigned int inside_cost = 0, prologue_cost = 0;
1003 stmt_vec_info first_stmt_info = stmt_info;
1004 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1005
1006 /* ??? Somehow we need to fix this at the callers. */
1007 if (slp_node)
1008 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1009
1010 if (vls_type == VLS_STORE_INVARIANT)
1011 {
1012 if (slp_node)
1013 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1014 1, dt, cost_vec);
1015 else
1016 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1017 stmt_info, 0, vect_prologue);
1018 }
1019
1020 /* Grouped stores update all elements in the group at once,
1021 so we want the DR for the first statement. */
1022 if (!slp_node && grouped_access_p)
1023 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1024
1025 /* True if we should include any once-per-group costs as well as
1026 the cost of the statement itself. For SLP we only get called
1027 once per group anyhow. */
1028 bool first_stmt_p = (first_stmt_info == stmt_info);
1029
1030 /* We assume that the cost of a single store-lanes instruction is
1031 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1032 access is instead being provided by a permute-and-store operation,
1033 include the cost of the permutes. */
1034 if (first_stmt_p
1035 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1036 {
1037 /* Uses a high and low interleave or shuffle operations for each
1038 needed permute. */
1039 int group_size = DR_GROUP_SIZE (first_stmt_info);
1040 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1041 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1042 stmt_info, 0, vect_body);
1043
1044 if (dump_enabled_p ())
1045 dump_printf_loc (MSG_NOTE, vect_location,
1046 "vect_model_store_cost: strided group_size = %d .\n",
1047 group_size);
1048 }
1049
1050 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1051 /* Costs of the stores. */
1052 if (memory_access_type == VMAT_ELEMENTWISE
1053 || memory_access_type == VMAT_GATHER_SCATTER)
1054 {
1055 /* N scalar stores plus extracting the elements. */
1056 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1057 inside_cost += record_stmt_cost (cost_vec,
1058 ncopies * assumed_nunits,
1059 scalar_store, stmt_info, 0, vect_body);
1060 }
1061 else
1062 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1063
1064 if (memory_access_type == VMAT_ELEMENTWISE
1065 || memory_access_type == VMAT_STRIDED_SLP)
1066 {
1067 /* N scalar stores plus extracting the elements. */
1068 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1069 inside_cost += record_stmt_cost (cost_vec,
1070 ncopies * assumed_nunits,
1071 vec_to_scalar, stmt_info, 0, vect_body);
1072 }
1073
1074 /* When vectorizing a store into the function result assign
1075 a penalty if the function returns in a multi-register location.
1076 In this case we assume we'll end up with having to spill the
1077 vector result and do piecewise loads as a conservative estimate. */
1078 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1079 if (base
1080 && (TREE_CODE (base) == RESULT_DECL
1081 || (DECL_P (base) && cfun_returns (base)))
1082 && !aggregate_value_p (base, cfun->decl))
1083 {
1084 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1085 /* ??? Handle PARALLEL in some way. */
1086 if (REG_P (reg))
1087 {
1088 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1089 /* Assume that a single reg-reg move is possible and cheap,
1090 do not account for vector to gp register move cost. */
1091 if (nregs > 1)
1092 {
1093 /* Spill. */
1094 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1095 vector_store,
1096 stmt_info, 0, vect_epilogue);
1097 /* Loads. */
1098 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1099 scalar_load,
1100 stmt_info, 0, vect_epilogue);
1101 }
1102 }
1103 }
1104
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_store_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1109 }
1110
1111
1112 /* Calculate cost of DR's memory access. */
1113 void
1114 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1115 unsigned int *inside_cost,
1116 stmt_vector_for_cost *body_cost_vec)
1117 {
1118 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1119 int alignment_support_scheme
1120 = vect_supportable_dr_alignment (dr_info, false);
1121
1122 switch (alignment_support_scheme)
1123 {
1124 case dr_aligned:
1125 {
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1127 vector_store, stmt_info, 0,
1128 vect_body);
1129
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_store_cost: aligned.\n");
1133 break;
1134 }
1135
1136 case dr_unaligned_supported:
1137 {
1138 /* Here, we assign an additional cost for the unaligned store. */
1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1140 unaligned_store, stmt_info,
1141 DR_MISALIGNMENT (dr_info),
1142 vect_body);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_store_cost: unaligned supported by "
1146 "hardware.\n");
1147 break;
1148 }
1149
1150 case dr_unaligned_unsupported:
1151 {
1152 *inside_cost = VECT_MAX_COST;
1153
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1156 "vect_model_store_cost: unsupported access.\n");
1157 break;
1158 }
1159
1160 default:
1161 gcc_unreachable ();
1162 }
1163 }
1164
1165
1166 /* Function vect_model_load_cost
1167
1168 Models cost for loads. In the case of grouped accesses, one access has
1169 the overhead of the grouped access attributed to it. Since unaligned
1170 accesses are supported for loads, we also account for the costs of the
1171 access scheme chosen. */
1172
1173 static void
1174 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1175 vect_memory_access_type memory_access_type,
1176 slp_instance instance,
1177 slp_tree slp_node,
1178 stmt_vector_for_cost *cost_vec)
1179 {
1180 unsigned int inside_cost = 0, prologue_cost = 0;
1181 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1182
1183 gcc_assert (cost_vec);
1184
1185 /* ??? Somehow we need to fix this at the callers. */
1186 if (slp_node)
1187 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1188
1189 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1190 {
1191 /* If the load is permuted then the alignment is determined by
1192 the first group element not by the first scalar stmt DR. */
1193 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1194 /* Record the cost for the permutation. */
1195 unsigned n_perms;
1196 unsigned assumed_nunits
1197 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1198 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1199 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1200 slp_vf, instance, true,
1201 &n_perms);
1202 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1203 first_stmt_info, 0, vect_body);
1204 /* And adjust the number of loads performed. This handles
1205 redundancies as well as loads that are later dead. */
1206 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1207 bitmap_clear (perm);
1208 for (unsigned i = 0;
1209 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1210 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1211 ncopies = 0;
1212 bool load_seen = false;
1213 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1214 {
1215 if (i % assumed_nunits == 0)
1216 {
1217 if (load_seen)
1218 ncopies++;
1219 load_seen = false;
1220 }
1221 if (bitmap_bit_p (perm, i))
1222 load_seen = true;
1223 }
1224 if (load_seen)
1225 ncopies++;
1226 gcc_assert (ncopies
1227 <= (DR_GROUP_SIZE (first_stmt_info)
1228 - DR_GROUP_GAP (first_stmt_info)
1229 + assumed_nunits - 1) / assumed_nunits);
1230 }
1231
1232 /* Grouped loads read all elements in the group at once,
1233 so we want the DR for the first statement. */
1234 stmt_vec_info first_stmt_info = stmt_info;
1235 if (!slp_node && grouped_access_p)
1236 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1237
1238 /* True if we should include any once-per-group costs as well as
1239 the cost of the statement itself. For SLP we only get called
1240 once per group anyhow. */
1241 bool first_stmt_p = (first_stmt_info == stmt_info);
1242
1243 /* We assume that the cost of a single load-lanes instruction is
1244 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1245 access is instead being provided by a load-and-permute operation,
1246 include the cost of the permutes. */
1247 if (first_stmt_p
1248 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1249 {
1250 /* Uses an even and odd extract operations or shuffle operations
1251 for each needed permute. */
1252 int group_size = DR_GROUP_SIZE (first_stmt_info);
1253 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1254 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1255 stmt_info, 0, vect_body);
1256
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: strided group_size = %d .\n",
1260 group_size);
1261 }
1262
1263 /* The loads themselves. */
1264 if (memory_access_type == VMAT_ELEMENTWISE
1265 || memory_access_type == VMAT_GATHER_SCATTER)
1266 {
1267 /* N scalar loads plus gathering them into a vector. */
1268 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1269 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1270 inside_cost += record_stmt_cost (cost_vec,
1271 ncopies * assumed_nunits,
1272 scalar_load, stmt_info, 0, vect_body);
1273 }
1274 else
1275 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1276 &inside_cost, &prologue_cost,
1277 cost_vec, cost_vec, true);
1278 if (memory_access_type == VMAT_ELEMENTWISE
1279 || memory_access_type == VMAT_STRIDED_SLP)
1280 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1281 stmt_info, 0, vect_body);
1282
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: inside_cost = %d, "
1286 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1287 }
1288
1289
1290 /* Calculate cost of DR's memory access. */
1291 void
1292 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1293 bool add_realign_cost, unsigned int *inside_cost,
1294 unsigned int *prologue_cost,
1295 stmt_vector_for_cost *prologue_cost_vec,
1296 stmt_vector_for_cost *body_cost_vec,
1297 bool record_prologue_costs)
1298 {
1299 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1300 int alignment_support_scheme
1301 = vect_supportable_dr_alignment (dr_info, false);
1302
1303 switch (alignment_support_scheme)
1304 {
1305 case dr_aligned:
1306 {
1307 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1308 stmt_info, 0, vect_body);
1309
1310 if (dump_enabled_p ())
1311 dump_printf_loc (MSG_NOTE, vect_location,
1312 "vect_model_load_cost: aligned.\n");
1313
1314 break;
1315 }
1316 case dr_unaligned_supported:
1317 {
1318 /* Here, we assign an additional cost for the unaligned load. */
1319 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1320 unaligned_load, stmt_info,
1321 DR_MISALIGNMENT (dr_info),
1322 vect_body);
1323
1324 if (dump_enabled_p ())
1325 dump_printf_loc (MSG_NOTE, vect_location,
1326 "vect_model_load_cost: unaligned supported by "
1327 "hardware.\n");
1328
1329 break;
1330 }
1331 case dr_explicit_realign:
1332 {
1333 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1334 vector_load, stmt_info, 0, vect_body);
1335 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1336 vec_perm, stmt_info, 0, vect_body);
1337
1338 /* FIXME: If the misalignment remains fixed across the iterations of
1339 the containing loop, the following cost should be added to the
1340 prologue costs. */
1341 if (targetm.vectorize.builtin_mask_for_load)
1342 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1343 stmt_info, 0, vect_body);
1344
1345 if (dump_enabled_p ())
1346 dump_printf_loc (MSG_NOTE, vect_location,
1347 "vect_model_load_cost: explicit realign\n");
1348
1349 break;
1350 }
1351 case dr_explicit_realign_optimized:
1352 {
1353 if (dump_enabled_p ())
1354 dump_printf_loc (MSG_NOTE, vect_location,
1355 "vect_model_load_cost: unaligned software "
1356 "pipelined.\n");
1357
1358 /* Unaligned software pipeline has a load of an address, an initial
1359 load, and possibly a mask operation to "prime" the loop. However,
1360 if this is an access in a group of loads, which provide grouped
1361 access, then the above cost should only be considered for one
1362 access in the group. Inside the loop, there is a load op
1363 and a realignment op. */
1364
1365 if (add_realign_cost && record_prologue_costs)
1366 {
1367 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1368 vector_stmt, stmt_info,
1369 0, vect_prologue);
1370 if (targetm.vectorize.builtin_mask_for_load)
1371 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1372 vector_stmt, stmt_info,
1373 0, vect_prologue);
1374 }
1375
1376 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1377 stmt_info, 0, vect_body);
1378 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1379 stmt_info, 0, vect_body);
1380
1381 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE, vect_location,
1383 "vect_model_load_cost: explicit realign optimized"
1384 "\n");
1385
1386 break;
1387 }
1388
1389 case dr_unaligned_unsupported:
1390 {
1391 *inside_cost = VECT_MAX_COST;
1392
1393 if (dump_enabled_p ())
1394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1395 "vect_model_load_cost: unsupported access.\n");
1396 break;
1397 }
1398
1399 default:
1400 gcc_unreachable ();
1401 }
1402 }
1403
1404 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1405 the loop preheader for the vectorized stmt STMT_VINFO. */
1406
1407 static void
1408 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1409 gimple_stmt_iterator *gsi)
1410 {
1411 if (gsi)
1412 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1413 else
1414 {
1415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1416
1417 if (loop_vinfo)
1418 {
1419 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1420 basic_block new_bb;
1421 edge pe;
1422
1423 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1424 loop = loop->inner;
1425
1426 pe = loop_preheader_edge (loop);
1427 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1428 gcc_assert (!new_bb);
1429 }
1430 else
1431 {
1432 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1433 basic_block bb;
1434 gimple_stmt_iterator gsi_bb_start;
1435
1436 gcc_assert (bb_vinfo);
1437 bb = BB_VINFO_BB (bb_vinfo);
1438 gsi_bb_start = gsi_after_labels (bb);
1439 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1440 }
1441 }
1442
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "created new init_stmt: %G", new_stmt);
1446 }
1447
1448 /* Function vect_init_vector.
1449
1450 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1451 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1452 vector type a vector with all elements equal to VAL is created first.
1453 Place the initialization at GSI if it is not NULL. Otherwise, place the
1454 initialization at the loop preheader.
1455 Return the DEF of INIT_STMT.
1456 It will be used in the vectorization of STMT_INFO. */
1457
1458 tree
1459 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1460 gimple_stmt_iterator *gsi)
1461 {
1462 gimple *init_stmt;
1463 tree new_temp;
1464
1465 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1466 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1467 {
1468 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1469 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1470 {
1471 /* Scalar boolean value should be transformed into
1472 all zeros or all ones value before building a vector. */
1473 if (VECTOR_BOOLEAN_TYPE_P (type))
1474 {
1475 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1476 tree false_val = build_zero_cst (TREE_TYPE (type));
1477
1478 if (CONSTANT_CLASS_P (val))
1479 val = integer_zerop (val) ? false_val : true_val;
1480 else
1481 {
1482 new_temp = make_ssa_name (TREE_TYPE (type));
1483 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1484 val, true_val, false_val);
1485 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1486 val = new_temp;
1487 }
1488 }
1489 else
1490 {
1491 gimple_seq stmts = NULL;
1492 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1493 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1494 TREE_TYPE (type), val);
1495 else
1496 /* ??? Condition vectorization expects us to do
1497 promotion of invariant/external defs. */
1498 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1499 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1500 !gsi_end_p (gsi2); )
1501 {
1502 init_stmt = gsi_stmt (gsi2);
1503 gsi_remove (&gsi2, false);
1504 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1505 }
1506 }
1507 }
1508 val = build_vector_from_val (type, val);
1509 }
1510
1511 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1512 init_stmt = gimple_build_assign (new_temp, val);
1513 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1514 return new_temp;
1515 }
1516
1517 /* Function vect_get_vec_def_for_operand_1.
1518
1519 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1520 with type DT that will be used in the vectorized stmt. */
1521
1522 tree
1523 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1524 enum vect_def_type dt)
1525 {
1526 tree vec_oprnd;
1527 stmt_vec_info vec_stmt_info;
1528
1529 switch (dt)
1530 {
1531 /* operand is a constant or a loop invariant. */
1532 case vect_constant_def:
1533 case vect_external_def:
1534 /* Code should use vect_get_vec_def_for_operand. */
1535 gcc_unreachable ();
1536
1537 /* Operand is defined by a loop header phi. In case of nested
1538 cycles we also may have uses of the backedge def. */
1539 case vect_reduction_def:
1540 case vect_double_reduction_def:
1541 case vect_nested_cycle:
1542 case vect_induction_def:
1543 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1544 || dt == vect_nested_cycle);
1545 /* Fallthru. */
1546
1547 /* operand is defined inside the loop. */
1548 case vect_internal_def:
1549 {
1550 /* Get the def from the vectorized stmt. */
1551 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1552 /* Get vectorized pattern statement. */
1553 if (!vec_stmt_info
1554 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1555 && !STMT_VINFO_RELEVANT (def_stmt_info))
1556 vec_stmt_info = (STMT_VINFO_VEC_STMT
1557 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1558 gcc_assert (vec_stmt_info);
1559 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1560 vec_oprnd = PHI_RESULT (phi);
1561 else
1562 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1563 return vec_oprnd;
1564 }
1565
1566 default:
1567 gcc_unreachable ();
1568 }
1569 }
1570
1571
1572 /* Function vect_get_vec_def_for_operand.
1573
1574 OP is an operand in STMT_VINFO. This function returns a (vector) def
1575 that will be used in the vectorized stmt for STMT_VINFO.
1576
1577 In the case that OP is an SSA_NAME which is defined in the loop, then
1578 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1579
1580 In case OP is an invariant or constant, a new stmt that creates a vector def
1581 needs to be introduced. VECTYPE may be used to specify a required type for
1582 vector invariant. */
1583
1584 tree
1585 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1586 {
1587 gimple *def_stmt;
1588 enum vect_def_type dt;
1589 bool is_simple_use;
1590 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1591
1592 if (dump_enabled_p ())
1593 dump_printf_loc (MSG_NOTE, vect_location,
1594 "vect_get_vec_def_for_operand: %T\n", op);
1595
1596 stmt_vec_info def_stmt_info;
1597 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1598 &def_stmt_info, &def_stmt);
1599 gcc_assert (is_simple_use);
1600 if (def_stmt && dump_enabled_p ())
1601 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1602
1603 if (dt == vect_constant_def || dt == vect_external_def)
1604 {
1605 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1606 tree vector_type;
1607
1608 if (vectype)
1609 vector_type = vectype;
1610 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1611 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1612 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1613 else
1614 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1615
1616 gcc_assert (vector_type);
1617 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1618 }
1619 else
1620 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1621 }
1622
1623
1624 /* Function vect_get_vec_def_for_stmt_copy
1625
1626 Return a vector-def for an operand. This function is used when the
1627 vectorized stmt to be created (by the caller to this function) is a "copy"
1628 created in case the vectorized result cannot fit in one vector, and several
1629 copies of the vector-stmt are required. In this case the vector-def is
1630 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1631 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1632
1633 Context:
1634 In case the vectorization factor (VF) is bigger than the number
1635 of elements that can fit in a vectype (nunits), we have to generate
1636 more than one vector stmt to vectorize the scalar stmt. This situation
1637 arises when there are multiple data-types operated upon in the loop; the
1638 smallest data-type determines the VF, and as a result, when vectorizing
1639 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1640 vector stmt (each computing a vector of 'nunits' results, and together
1641 computing 'VF' results in each iteration). This function is called when
1642 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1643 which VF=16 and nunits=4, so the number of copies required is 4):
1644
1645 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1646
1647 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1648 VS1.1: vx.1 = memref1 VS1.2
1649 VS1.2: vx.2 = memref2 VS1.3
1650 VS1.3: vx.3 = memref3
1651
1652 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1653 VSnew.1: vz1 = vx.1 + ... VSnew.2
1654 VSnew.2: vz2 = vx.2 + ... VSnew.3
1655 VSnew.3: vz3 = vx.3 + ...
1656
1657 The vectorization of S1 is explained in vectorizable_load.
1658 The vectorization of S2:
1659 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1660 the function 'vect_get_vec_def_for_operand' is called to
1661 get the relevant vector-def for each operand of S2. For operand x it
1662 returns the vector-def 'vx.0'.
1663
1664 To create the remaining copies of the vector-stmt (VSnew.j), this
1665 function is called to get the relevant vector-def for each operand. It is
1666 obtained from the respective VS1.j stmt, which is recorded in the
1667 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1668
1669 For example, to obtain the vector-def 'vx.1' in order to create the
1670 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1671 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1672 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1673 and return its def ('vx.1').
1674 Overall, to create the above sequence this function will be called 3 times:
1675 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1676 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1677 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1678
1679 tree
1680 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1681 {
1682 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1683 if (!def_stmt_info)
1684 /* Do nothing; can reuse same def. */
1685 return vec_oprnd;
1686
1687 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1688 gcc_assert (def_stmt_info);
1689 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1690 vec_oprnd = PHI_RESULT (phi);
1691 else
1692 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1693 return vec_oprnd;
1694 }
1695
1696
1697 /* Get vectorized definitions for the operands to create a copy of an original
1698 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1699
1700 void
1701 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1702 vec<tree> *vec_oprnds0,
1703 vec<tree> *vec_oprnds1)
1704 {
1705 tree vec_oprnd = vec_oprnds0->pop ();
1706
1707 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1708 vec_oprnds0->quick_push (vec_oprnd);
1709
1710 if (vec_oprnds1 && vec_oprnds1->length ())
1711 {
1712 vec_oprnd = vec_oprnds1->pop ();
1713 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1714 vec_oprnds1->quick_push (vec_oprnd);
1715 }
1716 }
1717
1718
1719 /* Get vectorized definitions for OP0 and OP1. */
1720
1721 void
1722 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1723 vec<tree> *vec_oprnds0,
1724 vec<tree> *vec_oprnds1,
1725 slp_tree slp_node)
1726 {
1727 if (slp_node)
1728 {
1729 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1730 vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1731 *vec_oprnds0 = vec_defs[0];
1732 if (op1)
1733 *vec_oprnds1 = vec_defs[1];
1734 }
1735 else
1736 {
1737 tree vec_oprnd;
1738
1739 vec_oprnds0->create (1);
1740 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1741 vec_oprnds0->quick_push (vec_oprnd);
1742
1743 if (op1)
1744 {
1745 vec_oprnds1->create (1);
1746 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1747 vec_oprnds1->quick_push (vec_oprnd);
1748 }
1749 }
1750 }
1751
1752 /* Helper function called by vect_finish_replace_stmt and
1753 vect_finish_stmt_generation. Set the location of the new
1754 statement and create and return a stmt_vec_info for it. */
1755
1756 static stmt_vec_info
1757 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1758 {
1759 vec_info *vinfo = stmt_info->vinfo;
1760
1761 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1762
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1765
1766 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1767
1768 /* While EH edges will generally prevent vectorization, stmt might
1769 e.g. be in a must-not-throw region. Ensure newly created stmts
1770 that could throw are part of the same region. */
1771 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1772 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1773 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1774
1775 return vec_stmt_info;
1776 }
1777
1778 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1779 which sets the same scalar result as STMT_INFO did. Create and return a
1780 stmt_vec_info for VEC_STMT. */
1781
1782 stmt_vec_info
1783 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1784 {
1785 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1786
1787 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1788 gsi_replace (&gsi, vec_stmt, true);
1789
1790 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1791 }
1792
1793 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1794 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1795
1796 stmt_vec_info
1797 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1798 gimple_stmt_iterator *gsi)
1799 {
1800 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1801
1802 if (!gsi_end_p (*gsi)
1803 && gimple_has_mem_ops (vec_stmt))
1804 {
1805 gimple *at_stmt = gsi_stmt (*gsi);
1806 tree vuse = gimple_vuse (at_stmt);
1807 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1808 {
1809 tree vdef = gimple_vdef (at_stmt);
1810 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1811 /* If we have an SSA vuse and insert a store, update virtual
1812 SSA form to avoid triggering the renamer. Do so only
1813 if we can easily see all uses - which is what almost always
1814 happens with the way vectorized stmts are inserted. */
1815 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1816 && ((is_gimple_assign (vec_stmt)
1817 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1818 || (is_gimple_call (vec_stmt)
1819 && !(gimple_call_flags (vec_stmt)
1820 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1821 {
1822 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1823 gimple_set_vdef (vec_stmt, new_vdef);
1824 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1825 }
1826 }
1827 }
1828 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1829 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1830 }
1831
1832 /* We want to vectorize a call to combined function CFN with function
1833 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1834 as the types of all inputs. Check whether this is possible using
1835 an internal function, returning its code if so or IFN_LAST if not. */
1836
1837 static internal_fn
1838 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1839 tree vectype_out, tree vectype_in)
1840 {
1841 internal_fn ifn;
1842 if (internal_fn_p (cfn))
1843 ifn = as_internal_fn (cfn);
1844 else
1845 ifn = associated_internal_fn (fndecl);
1846 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1847 {
1848 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1849 if (info.vectorizable)
1850 {
1851 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1852 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1853 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1854 OPTIMIZE_FOR_SPEED))
1855 return ifn;
1856 }
1857 }
1858 return IFN_LAST;
1859 }
1860
1861
1862 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1863 gimple_stmt_iterator *);
1864
1865 /* Check whether a load or store statement in the loop described by
1866 LOOP_VINFO is possible in a fully-masked loop. This is testing
1867 whether the vectorizer pass has the appropriate support, as well as
1868 whether the target does.
1869
1870 VLS_TYPE says whether the statement is a load or store and VECTYPE
1871 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1872 says how the load or store is going to be implemented and GROUP_SIZE
1873 is the number of load or store statements in the containing group.
1874 If the access is a gather load or scatter store, GS_INFO describes
1875 its arguments. If the load or store is conditional, SCALAR_MASK is the
1876 condition under which it occurs.
1877
1878 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1879 supported, otherwise record the required mask types. */
1880
1881 static void
1882 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1883 vec_load_store_type vls_type, int group_size,
1884 vect_memory_access_type memory_access_type,
1885 gather_scatter_info *gs_info, tree scalar_mask)
1886 {
1887 /* Invariant loads need no special support. */
1888 if (memory_access_type == VMAT_INVARIANT)
1889 return;
1890
1891 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1892 machine_mode vecmode = TYPE_MODE (vectype);
1893 bool is_load = (vls_type == VLS_LOAD);
1894 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1895 {
1896 if (is_load
1897 ? !vect_load_lanes_supported (vectype, group_size, true)
1898 : !vect_store_lanes_supported (vectype, group_size, true))
1899 {
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1902 "can't use a fully-masked loop because the"
1903 " target doesn't have an appropriate masked"
1904 " load/store-lanes instruction.\n");
1905 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1906 return;
1907 }
1908 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1909 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1910 return;
1911 }
1912
1913 if (memory_access_type == VMAT_GATHER_SCATTER)
1914 {
1915 internal_fn ifn = (is_load
1916 ? IFN_MASK_GATHER_LOAD
1917 : IFN_MASK_SCATTER_STORE);
1918 tree offset_type = TREE_TYPE (gs_info->offset);
1919 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1920 gs_info->memory_type,
1921 TYPE_SIGN (offset_type),
1922 gs_info->scale))
1923 {
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1926 "can't use a fully-masked loop because the"
1927 " target doesn't have an appropriate masked"
1928 " gather load or scatter store instruction.\n");
1929 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1930 return;
1931 }
1932 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1933 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1934 return;
1935 }
1936
1937 if (memory_access_type != VMAT_CONTIGUOUS
1938 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1939 {
1940 /* Element X of the data must come from iteration i * VF + X of the
1941 scalar loop. We need more work to support other mappings. */
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944 "can't use a fully-masked loop because an access"
1945 " isn't contiguous.\n");
1946 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1947 return;
1948 }
1949
1950 machine_mode mask_mode;
1951 if (!(targetm.vectorize.get_mask_mode
1952 (GET_MODE_NUNITS (vecmode),
1953 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1954 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1955 {
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1958 "can't use a fully-masked loop because the target"
1959 " doesn't have the appropriate masked load or"
1960 " store.\n");
1961 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1962 return;
1963 }
1964 /* We might load more scalars than we need for permuting SLP loads.
1965 We checked in get_group_load_store_type that the extra elements
1966 don't leak into a new vector. */
1967 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1968 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1969 unsigned int nvectors;
1970 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1971 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1972 else
1973 gcc_unreachable ();
1974 }
1975
1976 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1977 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1978 that needs to be applied to all loads and stores in a vectorized loop.
1979 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1980
1981 MASK_TYPE is the type of both masks. If new statements are needed,
1982 insert them before GSI. */
1983
1984 static tree
1985 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1986 gimple_stmt_iterator *gsi)
1987 {
1988 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1989 if (!loop_mask)
1990 return vec_mask;
1991
1992 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1993 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1994 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1995 vec_mask, loop_mask);
1996 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1997 return and_res;
1998 }
1999
2000 /* Determine whether we can use a gather load or scatter store to vectorize
2001 strided load or store STMT_INFO by truncating the current offset to a
2002 smaller width. We need to be able to construct an offset vector:
2003
2004 { 0, X, X*2, X*3, ... }
2005
2006 without loss of precision, where X is STMT_INFO's DR_STEP.
2007
2008 Return true if this is possible, describing the gather load or scatter
2009 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2010
2011 static bool
2012 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2013 loop_vec_info loop_vinfo, bool masked_p,
2014 gather_scatter_info *gs_info)
2015 {
2016 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2017 data_reference *dr = dr_info->dr;
2018 tree step = DR_STEP (dr);
2019 if (TREE_CODE (step) != INTEGER_CST)
2020 {
2021 /* ??? Perhaps we could use range information here? */
2022 if (dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE, vect_location,
2024 "cannot truncate variable step.\n");
2025 return false;
2026 }
2027
2028 /* Get the number of bits in an element. */
2029 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2030 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2031 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2032
2033 /* Set COUNT to the upper limit on the number of elements - 1.
2034 Start with the maximum vectorization factor. */
2035 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2036
2037 /* Try lowering COUNT to the number of scalar latch iterations. */
2038 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2039 widest_int max_iters;
2040 if (max_loop_iterations (loop, &max_iters)
2041 && max_iters < count)
2042 count = max_iters.to_shwi ();
2043
2044 /* Try scales of 1 and the element size. */
2045 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2046 wi::overflow_type overflow = wi::OVF_NONE;
2047 for (int i = 0; i < 2; ++i)
2048 {
2049 int scale = scales[i];
2050 widest_int factor;
2051 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2052 continue;
2053
2054 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2055 in OFFSET_BITS bits. */
2056 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2057 if (overflow)
2058 continue;
2059 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2060 if (wi::min_precision (range, sign) > element_bits)
2061 {
2062 overflow = wi::OVF_UNKNOWN;
2063 continue;
2064 }
2065
2066 /* See whether the target supports the operation. */
2067 tree memory_type = TREE_TYPE (DR_REF (dr));
2068 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2069 memory_type, element_bits, sign, scale,
2070 &gs_info->ifn, &gs_info->element_type))
2071 continue;
2072
2073 tree offset_type = build_nonstandard_integer_type (element_bits,
2074 sign == UNSIGNED);
2075
2076 gs_info->decl = NULL_TREE;
2077 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2078 but we don't need to store that here. */
2079 gs_info->base = NULL_TREE;
2080 gs_info->offset = fold_convert (offset_type, step);
2081 gs_info->offset_dt = vect_constant_def;
2082 gs_info->offset_vectype = NULL_TREE;
2083 gs_info->scale = scale;
2084 gs_info->memory_type = memory_type;
2085 return true;
2086 }
2087
2088 if (overflow && dump_enabled_p ())
2089 dump_printf_loc (MSG_NOTE, vect_location,
2090 "truncating gather/scatter offset to %d bits"
2091 " might change its value.\n", element_bits);
2092
2093 return false;
2094 }
2095
2096 /* Return true if we can use gather/scatter internal functions to
2097 vectorize STMT_INFO, which is a grouped or strided load or store.
2098 MASKED_P is true if load or store is conditional. When returning
2099 true, fill in GS_INFO with the information required to perform the
2100 operation. */
2101
2102 static bool
2103 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2104 loop_vec_info loop_vinfo, bool masked_p,
2105 gather_scatter_info *gs_info)
2106 {
2107 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2108 || gs_info->decl)
2109 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2110 masked_p, gs_info);
2111
2112 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2113 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2114 tree offset_type = TREE_TYPE (gs_info->offset);
2115 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2116
2117 /* Enforced by vect_check_gather_scatter. */
2118 gcc_assert (element_bits >= offset_bits);
2119
2120 /* If the elements are wider than the offset, convert the offset to the
2121 same width, without changing its sign. */
2122 if (element_bits > offset_bits)
2123 {
2124 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2125 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2126 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2127 }
2128
2129 if (dump_enabled_p ())
2130 dump_printf_loc (MSG_NOTE, vect_location,
2131 "using gather/scatter for strided/grouped access,"
2132 " scale = %d\n", gs_info->scale);
2133
2134 return true;
2135 }
2136
2137 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2138 elements with a known constant step. Return -1 if that step
2139 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2140
2141 static int
2142 compare_step_with_zero (stmt_vec_info stmt_info)
2143 {
2144 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2145 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2146 size_zero_node);
2147 }
2148
2149 /* If the target supports a permute mask that reverses the elements in
2150 a vector of type VECTYPE, return that mask, otherwise return null. */
2151
2152 static tree
2153 perm_mask_for_reverse (tree vectype)
2154 {
2155 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2156
2157 /* The encoding has a single stepped pattern. */
2158 vec_perm_builder sel (nunits, 1, 3);
2159 for (int i = 0; i < 3; ++i)
2160 sel.quick_push (nunits - 1 - i);
2161
2162 vec_perm_indices indices (sel, 1, nunits);
2163 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2164 return NULL_TREE;
2165 return vect_gen_perm_mask_checked (vectype, indices);
2166 }
2167
2168 /* STMT_INFO is either a masked or unconditional store. Return the value
2169 being stored. */
2170
2171 tree
2172 vect_get_store_rhs (stmt_vec_info stmt_info)
2173 {
2174 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2175 {
2176 gcc_assert (gimple_assign_single_p (assign));
2177 return gimple_assign_rhs1 (assign);
2178 }
2179 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2180 {
2181 internal_fn ifn = gimple_call_internal_fn (call);
2182 int index = internal_fn_stored_value_index (ifn);
2183 gcc_assert (index >= 0);
2184 return gimple_call_arg (call, index);
2185 }
2186 gcc_unreachable ();
2187 }
2188
2189 /* A subroutine of get_load_store_type, with a subset of the same
2190 arguments. Handle the case where STMT_INFO is part of a grouped load
2191 or store.
2192
2193 For stores, the statements in the group are all consecutive
2194 and there is no gap at the end. For loads, the statements in the
2195 group might not be consecutive; there can be gaps between statements
2196 as well as at the end. */
2197
2198 static bool
2199 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2200 bool masked_p, vec_load_store_type vls_type,
2201 vect_memory_access_type *memory_access_type,
2202 gather_scatter_info *gs_info)
2203 {
2204 vec_info *vinfo = stmt_info->vinfo;
2205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2206 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2207 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2208 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2209 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2210 bool single_element_p = (stmt_info == first_stmt_info
2211 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2212 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2213 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2214
2215 /* True if the vectorized statements would access beyond the last
2216 statement in the group. */
2217 bool overrun_p = false;
2218
2219 /* True if we can cope with such overrun by peeling for gaps, so that
2220 there is at least one final scalar iteration after the vector loop. */
2221 bool can_overrun_p = (!masked_p
2222 && vls_type == VLS_LOAD
2223 && loop_vinfo
2224 && !loop->inner);
2225
2226 /* There can only be a gap at the end of the group if the stride is
2227 known at compile time. */
2228 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2229
2230 /* Stores can't yet have gaps. */
2231 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2232
2233 if (slp)
2234 {
2235 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2236 {
2237 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2238 separated by the stride, until we have a complete vector.
2239 Fall back to scalar accesses if that isn't possible. */
2240 if (multiple_p (nunits, group_size))
2241 *memory_access_type = VMAT_STRIDED_SLP;
2242 else
2243 *memory_access_type = VMAT_ELEMENTWISE;
2244 }
2245 else
2246 {
2247 overrun_p = loop_vinfo && gap != 0;
2248 if (overrun_p && vls_type != VLS_LOAD)
2249 {
2250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2251 "Grouped store with gaps requires"
2252 " non-consecutive accesses\n");
2253 return false;
2254 }
2255 /* An overrun is fine if the trailing elements are smaller
2256 than the alignment boundary B. Every vector access will
2257 be a multiple of B and so we are guaranteed to access a
2258 non-gap element in the same B-sized block. */
2259 if (overrun_p
2260 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2261 / vect_get_scalar_dr_size (first_dr_info)))
2262 overrun_p = false;
2263
2264 /* If the gap splits the vector in half and the target
2265 can do half-vector operations avoid the epilogue peeling
2266 by simply loading half of the vector only. Usually
2267 the construction with an upper zero half will be elided. */
2268 dr_alignment_support alignment_support_scheme;
2269 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2270 machine_mode vmode;
2271 if (overrun_p
2272 && !masked_p
2273 && (((alignment_support_scheme
2274 = vect_supportable_dr_alignment (first_dr_info, false)))
2275 == dr_aligned
2276 || alignment_support_scheme == dr_unaligned_supported)
2277 && known_eq (nunits, (group_size - gap) * 2)
2278 && known_eq (nunits, group_size)
2279 && mode_for_vector (elmode, (group_size - gap)).exists (&vmode)
2280 && VECTOR_MODE_P (vmode)
2281 && targetm.vector_mode_supported_p (vmode)
2282 && (convert_optab_handler (vec_init_optab,
2283 TYPE_MODE (vectype), vmode)
2284 != CODE_FOR_nothing))
2285 overrun_p = false;
2286
2287 if (overrun_p && !can_overrun_p)
2288 {
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2291 "Peeling for outer loop is not supported\n");
2292 return false;
2293 }
2294 *memory_access_type = VMAT_CONTIGUOUS;
2295 }
2296 }
2297 else
2298 {
2299 /* We can always handle this case using elementwise accesses,
2300 but see if something more efficient is available. */
2301 *memory_access_type = VMAT_ELEMENTWISE;
2302
2303 /* If there is a gap at the end of the group then these optimizations
2304 would access excess elements in the last iteration. */
2305 bool would_overrun_p = (gap != 0);
2306 /* An overrun is fine if the trailing elements are smaller than the
2307 alignment boundary B. Every vector access will be a multiple of B
2308 and so we are guaranteed to access a non-gap element in the
2309 same B-sized block. */
2310 if (would_overrun_p
2311 && !masked_p
2312 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2313 / vect_get_scalar_dr_size (first_dr_info)))
2314 would_overrun_p = false;
2315
2316 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2317 && (can_overrun_p || !would_overrun_p)
2318 && compare_step_with_zero (stmt_info) > 0)
2319 {
2320 /* First cope with the degenerate case of a single-element
2321 vector. */
2322 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2323 *memory_access_type = VMAT_CONTIGUOUS;
2324
2325 /* Otherwise try using LOAD/STORE_LANES. */
2326 if (*memory_access_type == VMAT_ELEMENTWISE
2327 && (vls_type == VLS_LOAD
2328 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2329 : vect_store_lanes_supported (vectype, group_size,
2330 masked_p)))
2331 {
2332 *memory_access_type = VMAT_LOAD_STORE_LANES;
2333 overrun_p = would_overrun_p;
2334 }
2335
2336 /* If that fails, try using permuting loads. */
2337 if (*memory_access_type == VMAT_ELEMENTWISE
2338 && (vls_type == VLS_LOAD
2339 ? vect_grouped_load_supported (vectype, single_element_p,
2340 group_size)
2341 : vect_grouped_store_supported (vectype, group_size)))
2342 {
2343 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2344 overrun_p = would_overrun_p;
2345 }
2346 }
2347
2348 /* As a last resort, trying using a gather load or scatter store.
2349
2350 ??? Although the code can handle all group sizes correctly,
2351 it probably isn't a win to use separate strided accesses based
2352 on nearby locations. Or, even if it's a win over scalar code,
2353 it might not be a win over vectorizing at a lower VF, if that
2354 allows us to use contiguous accesses. */
2355 if (*memory_access_type == VMAT_ELEMENTWISE
2356 && single_element_p
2357 && loop_vinfo
2358 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2359 masked_p, gs_info))
2360 *memory_access_type = VMAT_GATHER_SCATTER;
2361 }
2362
2363 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2364 {
2365 /* STMT is the leader of the group. Check the operands of all the
2366 stmts of the group. */
2367 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2368 while (next_stmt_info)
2369 {
2370 tree op = vect_get_store_rhs (next_stmt_info);
2371 enum vect_def_type dt;
2372 if (!vect_is_simple_use (op, vinfo, &dt))
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "use not simple.\n");
2377 return false;
2378 }
2379 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2380 }
2381 }
2382
2383 if (overrun_p)
2384 {
2385 gcc_assert (can_overrun_p);
2386 if (dump_enabled_p ())
2387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2388 "Data access with gaps requires scalar "
2389 "epilogue loop\n");
2390 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2391 }
2392
2393 return true;
2394 }
2395
2396 /* A subroutine of get_load_store_type, with a subset of the same
2397 arguments. Handle the case where STMT_INFO is a load or store that
2398 accesses consecutive elements with a negative step. */
2399
2400 static vect_memory_access_type
2401 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2402 vec_load_store_type vls_type,
2403 unsigned int ncopies)
2404 {
2405 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2406 dr_alignment_support alignment_support_scheme;
2407
2408 if (ncopies > 1)
2409 {
2410 if (dump_enabled_p ())
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2412 "multiple types with negative step.\n");
2413 return VMAT_ELEMENTWISE;
2414 }
2415
2416 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2417 if (alignment_support_scheme != dr_aligned
2418 && alignment_support_scheme != dr_unaligned_supported)
2419 {
2420 if (dump_enabled_p ())
2421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2422 "negative step but alignment required.\n");
2423 return VMAT_ELEMENTWISE;
2424 }
2425
2426 if (vls_type == VLS_STORE_INVARIANT)
2427 {
2428 if (dump_enabled_p ())
2429 dump_printf_loc (MSG_NOTE, vect_location,
2430 "negative step with invariant source;"
2431 " no permute needed.\n");
2432 return VMAT_CONTIGUOUS_DOWN;
2433 }
2434
2435 if (!perm_mask_for_reverse (vectype))
2436 {
2437 if (dump_enabled_p ())
2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2439 "negative step and reversing not supported.\n");
2440 return VMAT_ELEMENTWISE;
2441 }
2442
2443 return VMAT_CONTIGUOUS_REVERSE;
2444 }
2445
2446 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2447 if there is a memory access type that the vectorized form can use,
2448 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2449 or scatters, fill in GS_INFO accordingly.
2450
2451 SLP says whether we're performing SLP rather than loop vectorization.
2452 MASKED_P is true if the statement is conditional on a vectorized mask.
2453 VECTYPE is the vector type that the vectorized statements will use.
2454 NCOPIES is the number of vector statements that will be needed. */
2455
2456 static bool
2457 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2458 bool masked_p, vec_load_store_type vls_type,
2459 unsigned int ncopies,
2460 vect_memory_access_type *memory_access_type,
2461 gather_scatter_info *gs_info)
2462 {
2463 vec_info *vinfo = stmt_info->vinfo;
2464 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2465 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2466 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2467 {
2468 *memory_access_type = VMAT_GATHER_SCATTER;
2469 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2470 gcc_unreachable ();
2471 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2472 &gs_info->offset_dt,
2473 &gs_info->offset_vectype))
2474 {
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2477 "%s index use not simple.\n",
2478 vls_type == VLS_LOAD ? "gather" : "scatter");
2479 return false;
2480 }
2481 }
2482 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2483 {
2484 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2485 vls_type, memory_access_type, gs_info))
2486 return false;
2487 }
2488 else if (STMT_VINFO_STRIDED_P (stmt_info))
2489 {
2490 gcc_assert (!slp);
2491 if (loop_vinfo
2492 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2493 masked_p, gs_info))
2494 *memory_access_type = VMAT_GATHER_SCATTER;
2495 else
2496 *memory_access_type = VMAT_ELEMENTWISE;
2497 }
2498 else
2499 {
2500 int cmp = compare_step_with_zero (stmt_info);
2501 if (cmp < 0)
2502 *memory_access_type = get_negative_load_store_type
2503 (stmt_info, vectype, vls_type, ncopies);
2504 else if (cmp == 0)
2505 {
2506 gcc_assert (vls_type == VLS_LOAD);
2507 *memory_access_type = VMAT_INVARIANT;
2508 }
2509 else
2510 *memory_access_type = VMAT_CONTIGUOUS;
2511 }
2512
2513 if ((*memory_access_type == VMAT_ELEMENTWISE
2514 || *memory_access_type == VMAT_STRIDED_SLP)
2515 && !nunits.is_constant ())
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "Not using elementwise accesses due to variable "
2520 "vectorization factor.\n");
2521 return false;
2522 }
2523
2524 /* FIXME: At the moment the cost model seems to underestimate the
2525 cost of using elementwise accesses. This check preserves the
2526 traditional behavior until that can be fixed. */
2527 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2528 if (!first_stmt_info)
2529 first_stmt_info = stmt_info;
2530 if (*memory_access_type == VMAT_ELEMENTWISE
2531 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2532 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2533 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2534 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2535 {
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "not falling back to elementwise accesses\n");
2539 return false;
2540 }
2541 return true;
2542 }
2543
2544 /* Return true if boolean argument MASK is suitable for vectorizing
2545 conditional load or store STMT_INFO. When returning true, store the type
2546 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2547 in *MASK_VECTYPE_OUT. */
2548
2549 static bool
2550 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2551 vect_def_type *mask_dt_out,
2552 tree *mask_vectype_out)
2553 {
2554 vec_info *vinfo = stmt_info->vinfo;
2555 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2556 {
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2559 "mask argument is not a boolean.\n");
2560 return false;
2561 }
2562
2563 if (TREE_CODE (mask) != SSA_NAME)
2564 {
2565 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2567 "mask argument is not an SSA name.\n");
2568 return false;
2569 }
2570
2571 enum vect_def_type mask_dt;
2572 tree mask_vectype;
2573 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2574 {
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "mask use not simple.\n");
2578 return false;
2579 }
2580
2581 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2582 if (!mask_vectype)
2583 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2584
2585 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2586 {
2587 if (dump_enabled_p ())
2588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2589 "could not find an appropriate vector mask type.\n");
2590 return false;
2591 }
2592
2593 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2594 TYPE_VECTOR_SUBPARTS (vectype)))
2595 {
2596 if (dump_enabled_p ())
2597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2598 "vector mask type %T"
2599 " does not match vector data type %T.\n",
2600 mask_vectype, vectype);
2601
2602 return false;
2603 }
2604
2605 *mask_dt_out = mask_dt;
2606 *mask_vectype_out = mask_vectype;
2607 return true;
2608 }
2609
2610 /* Return true if stored value RHS is suitable for vectorizing store
2611 statement STMT_INFO. When returning true, store the type of the
2612 definition in *RHS_DT_OUT, the type of the vectorized store value in
2613 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2614
2615 static bool
2616 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2617 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2618 vec_load_store_type *vls_type_out)
2619 {
2620 /* In the case this is a store from a constant make sure
2621 native_encode_expr can handle it. */
2622 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2623 {
2624 if (dump_enabled_p ())
2625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2626 "cannot encode constant as a byte sequence.\n");
2627 return false;
2628 }
2629
2630 enum vect_def_type rhs_dt;
2631 tree rhs_vectype;
2632 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2633 {
2634 if (dump_enabled_p ())
2635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2636 "use not simple.\n");
2637 return false;
2638 }
2639
2640 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2641 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2642 {
2643 if (dump_enabled_p ())
2644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2645 "incompatible vector types.\n");
2646 return false;
2647 }
2648
2649 *rhs_dt_out = rhs_dt;
2650 *rhs_vectype_out = rhs_vectype;
2651 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2652 *vls_type_out = VLS_STORE_INVARIANT;
2653 else
2654 *vls_type_out = VLS_STORE;
2655 return true;
2656 }
2657
2658 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2659 Note that we support masks with floating-point type, in which case the
2660 floats are interpreted as a bitmask. */
2661
2662 static tree
2663 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2664 {
2665 if (TREE_CODE (masktype) == INTEGER_TYPE)
2666 return build_int_cst (masktype, -1);
2667 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2668 {
2669 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2670 mask = build_vector_from_val (masktype, mask);
2671 return vect_init_vector (stmt_info, mask, masktype, NULL);
2672 }
2673 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2674 {
2675 REAL_VALUE_TYPE r;
2676 long tmp[6];
2677 for (int j = 0; j < 6; ++j)
2678 tmp[j] = -1;
2679 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2680 tree mask = build_real (TREE_TYPE (masktype), r);
2681 mask = build_vector_from_val (masktype, mask);
2682 return vect_init_vector (stmt_info, mask, masktype, NULL);
2683 }
2684 gcc_unreachable ();
2685 }
2686
2687 /* Build an all-zero merge value of type VECTYPE while vectorizing
2688 STMT_INFO as a gather load. */
2689
2690 static tree
2691 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2692 {
2693 tree merge;
2694 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2695 merge = build_int_cst (TREE_TYPE (vectype), 0);
2696 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2697 {
2698 REAL_VALUE_TYPE r;
2699 long tmp[6];
2700 for (int j = 0; j < 6; ++j)
2701 tmp[j] = 0;
2702 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2703 merge = build_real (TREE_TYPE (vectype), r);
2704 }
2705 else
2706 gcc_unreachable ();
2707 merge = build_vector_from_val (vectype, merge);
2708 return vect_init_vector (stmt_info, merge, vectype, NULL);
2709 }
2710
2711 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2712 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2713 the gather load operation. If the load is conditional, MASK is the
2714 unvectorized condition and MASK_DT is its definition type, otherwise
2715 MASK is null. */
2716
2717 static void
2718 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2719 gimple_stmt_iterator *gsi,
2720 stmt_vec_info *vec_stmt,
2721 gather_scatter_info *gs_info,
2722 tree mask)
2723 {
2724 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2725 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2726 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2727 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2728 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2729 edge pe = loop_preheader_edge (loop);
2730 enum { NARROW, NONE, WIDEN } modifier;
2731 poly_uint64 gather_off_nunits
2732 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2733
2734 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2735 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2736 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2737 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2738 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2739 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2740 tree scaletype = TREE_VALUE (arglist);
2741 tree real_masktype = masktype;
2742 gcc_checking_assert (types_compatible_p (srctype, rettype)
2743 && (!mask
2744 || TREE_CODE (masktype) == INTEGER_TYPE
2745 || types_compatible_p (srctype, masktype)));
2746 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2747 masktype = build_same_sized_truth_vector_type (srctype);
2748
2749 tree mask_halftype = masktype;
2750 tree perm_mask = NULL_TREE;
2751 tree mask_perm_mask = NULL_TREE;
2752 if (known_eq (nunits, gather_off_nunits))
2753 modifier = NONE;
2754 else if (known_eq (nunits * 2, gather_off_nunits))
2755 {
2756 modifier = WIDEN;
2757
2758 /* Currently widening gathers and scatters are only supported for
2759 fixed-length vectors. */
2760 int count = gather_off_nunits.to_constant ();
2761 vec_perm_builder sel (count, count, 1);
2762 for (int i = 0; i < count; ++i)
2763 sel.quick_push (i | (count / 2));
2764
2765 vec_perm_indices indices (sel, 1, count);
2766 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2767 indices);
2768 }
2769 else if (known_eq (nunits, gather_off_nunits * 2))
2770 {
2771 modifier = NARROW;
2772
2773 /* Currently narrowing gathers and scatters are only supported for
2774 fixed-length vectors. */
2775 int count = nunits.to_constant ();
2776 vec_perm_builder sel (count, count, 1);
2777 sel.quick_grow (count);
2778 for (int i = 0; i < count; ++i)
2779 sel[i] = i < count / 2 ? i : i + count / 2;
2780 vec_perm_indices indices (sel, 2, count);
2781 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2782
2783 ncopies *= 2;
2784
2785 if (mask && masktype == real_masktype)
2786 {
2787 for (int i = 0; i < count; ++i)
2788 sel[i] = i | (count / 2);
2789 indices.new_vector (sel, 2, count);
2790 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2791 }
2792 else if (mask)
2793 mask_halftype
2794 = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2795 }
2796 else
2797 gcc_unreachable ();
2798
2799 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2800 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2801
2802 tree ptr = fold_convert (ptrtype, gs_info->base);
2803 if (!is_gimple_min_invariant (ptr))
2804 {
2805 gimple_seq seq;
2806 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2807 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2808 gcc_assert (!new_bb);
2809 }
2810
2811 tree scale = build_int_cst (scaletype, gs_info->scale);
2812
2813 tree vec_oprnd0 = NULL_TREE;
2814 tree vec_mask = NULL_TREE;
2815 tree src_op = NULL_TREE;
2816 tree mask_op = NULL_TREE;
2817 tree prev_res = NULL_TREE;
2818 stmt_vec_info prev_stmt_info = NULL;
2819
2820 if (!mask)
2821 {
2822 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2823 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2824 }
2825
2826 for (int j = 0; j < ncopies; ++j)
2827 {
2828 tree op, var;
2829 if (modifier == WIDEN && (j & 1))
2830 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2831 perm_mask, stmt_info, gsi);
2832 else if (j == 0)
2833 op = vec_oprnd0
2834 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2835 else
2836 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2837 vec_oprnd0);
2838
2839 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2840 {
2841 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2842 TYPE_VECTOR_SUBPARTS (idxtype)));
2843 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2844 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2845 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2846 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2847 op = var;
2848 }
2849
2850 if (mask)
2851 {
2852 if (mask_perm_mask && (j & 1))
2853 mask_op = permute_vec_elements (mask_op, mask_op,
2854 mask_perm_mask, stmt_info, gsi);
2855 else
2856 {
2857 if (j == 0)
2858 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2859 else if (modifier != NARROW || (j & 1) == 0)
2860 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2861 vec_mask);
2862
2863 mask_op = vec_mask;
2864 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2865 {
2866 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2867 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2868 gcc_assert (known_eq (sub1, sub2));
2869 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2870 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2871 gassign *new_stmt
2872 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2873 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2874 mask_op = var;
2875 }
2876 }
2877 if (modifier == NARROW && masktype != real_masktype)
2878 {
2879 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2880 gassign *new_stmt
2881 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2882 : VEC_UNPACK_LO_EXPR,
2883 mask_op);
2884 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2885 mask_op = var;
2886 }
2887 src_op = mask_op;
2888 }
2889
2890 tree mask_arg = mask_op;
2891 if (masktype != real_masktype)
2892 {
2893 tree utype, optype = TREE_TYPE (mask_op);
2894 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2895 utype = real_masktype;
2896 else
2897 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2898 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2899 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2900 gassign *new_stmt
2901 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2902 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2903 mask_arg = var;
2904 if (!useless_type_conversion_p (real_masktype, utype))
2905 {
2906 gcc_assert (TYPE_PRECISION (utype)
2907 <= TYPE_PRECISION (real_masktype));
2908 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2909 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2910 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2911 mask_arg = var;
2912 }
2913 src_op = build_zero_cst (srctype);
2914 }
2915 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2916 mask_arg, scale);
2917
2918 stmt_vec_info new_stmt_info;
2919 if (!useless_type_conversion_p (vectype, rettype))
2920 {
2921 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2922 TYPE_VECTOR_SUBPARTS (rettype)));
2923 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2924 gimple_call_set_lhs (new_call, op);
2925 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2926 var = make_ssa_name (vec_dest);
2927 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2928 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2929 new_stmt_info
2930 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2931 }
2932 else
2933 {
2934 var = make_ssa_name (vec_dest, new_call);
2935 gimple_call_set_lhs (new_call, var);
2936 new_stmt_info
2937 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2938 }
2939
2940 if (modifier == NARROW)
2941 {
2942 if ((j & 1) == 0)
2943 {
2944 prev_res = var;
2945 continue;
2946 }
2947 var = permute_vec_elements (prev_res, var, perm_mask,
2948 stmt_info, gsi);
2949 new_stmt_info = loop_vinfo->lookup_def (var);
2950 }
2951
2952 if (prev_stmt_info == NULL)
2953 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2954 else
2955 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2956 prev_stmt_info = new_stmt_info;
2957 }
2958 }
2959
2960 /* Prepare the base and offset in GS_INFO for vectorization.
2961 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2962 to the vectorized offset argument for the first copy of STMT_INFO.
2963 STMT_INFO is the statement described by GS_INFO and LOOP is the
2964 containing loop. */
2965
2966 static void
2967 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
2968 gather_scatter_info *gs_info,
2969 tree *dataref_ptr, tree *vec_offset)
2970 {
2971 vec_info *vinfo = stmt_info->vinfo;
2972 gimple_seq stmts = NULL;
2973 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2974 if (stmts != NULL)
2975 {
2976 basic_block new_bb;
2977 edge pe = loop_preheader_edge (loop);
2978 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2979 gcc_assert (!new_bb);
2980 }
2981 tree offset_type = TREE_TYPE (gs_info->offset);
2982 tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
2983 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2984 offset_vectype);
2985 }
2986
2987 /* Prepare to implement a grouped or strided load or store using
2988 the gather load or scatter store operation described by GS_INFO.
2989 STMT_INFO is the load or store statement.
2990
2991 Set *DATAREF_BUMP to the amount that should be added to the base
2992 address after each copy of the vectorized statement. Set *VEC_OFFSET
2993 to an invariant offset vector in which element I has the value
2994 I * DR_STEP / SCALE. */
2995
2996 static void
2997 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2998 loop_vec_info loop_vinfo,
2999 gather_scatter_info *gs_info,
3000 tree *dataref_bump, tree *vec_offset)
3001 {
3002 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3003 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3004 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3005 gimple_seq stmts;
3006
3007 tree bump = size_binop (MULT_EXPR,
3008 fold_convert (sizetype, DR_STEP (dr)),
3009 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3010 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3011 if (stmts)
3012 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3013
3014 /* The offset given in GS_INFO can have pointer type, so use the element
3015 type of the vector instead. */
3016 tree offset_type = TREE_TYPE (gs_info->offset);
3017 tree offset_vectype = get_vectype_for_scalar_type (loop_vinfo, offset_type);
3018 offset_type = TREE_TYPE (offset_vectype);
3019
3020 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3021 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
3022 ssize_int (gs_info->scale));
3023 step = fold_convert (offset_type, step);
3024 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3025
3026 /* Create {0, X, X*2, X*3, ...}. */
3027 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
3028 build_zero_cst (offset_type), step);
3029 if (stmts)
3030 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3031 }
3032
3033 /* Return the amount that should be added to a vector pointer to move
3034 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3035 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3036 vectorization. */
3037
3038 static tree
3039 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3040 vect_memory_access_type memory_access_type)
3041 {
3042 if (memory_access_type == VMAT_INVARIANT)
3043 return size_zero_node;
3044
3045 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3046 tree step = vect_dr_behavior (dr_info)->step;
3047 if (tree_int_cst_sgn (step) == -1)
3048 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3049 return iv_step;
3050 }
3051
3052 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3053
3054 static bool
3055 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3056 stmt_vec_info *vec_stmt, slp_tree slp_node,
3057 tree vectype_in, stmt_vector_for_cost *cost_vec)
3058 {
3059 tree op, vectype;
3060 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3061 vec_info *vinfo = stmt_info->vinfo;
3062 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3063 unsigned ncopies;
3064
3065 op = gimple_call_arg (stmt, 0);
3066 vectype = STMT_VINFO_VECTYPE (stmt_info);
3067 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3068
3069 /* Multiple types in SLP are handled by creating the appropriate number of
3070 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3071 case of SLP. */
3072 if (slp_node)
3073 ncopies = 1;
3074 else
3075 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3076
3077 gcc_assert (ncopies >= 1);
3078
3079 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3080 if (! char_vectype)
3081 return false;
3082
3083 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3084 unsigned word_bytes;
3085 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3086 return false;
3087
3088 /* The encoding uses one stepped pattern for each byte in the word. */
3089 vec_perm_builder elts (num_bytes, word_bytes, 3);
3090 for (unsigned i = 0; i < 3; ++i)
3091 for (unsigned j = 0; j < word_bytes; ++j)
3092 elts.quick_push ((i + 1) * word_bytes - j - 1);
3093
3094 vec_perm_indices indices (elts, 1, num_bytes);
3095 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3096 return false;
3097
3098 if (! vec_stmt)
3099 {
3100 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3101 DUMP_VECT_SCOPE ("vectorizable_bswap");
3102 if (! slp_node)
3103 {
3104 record_stmt_cost (cost_vec,
3105 1, vector_stmt, stmt_info, 0, vect_prologue);
3106 record_stmt_cost (cost_vec,
3107 ncopies, vec_perm, stmt_info, 0, vect_body);
3108 }
3109 return true;
3110 }
3111
3112 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3113
3114 /* Transform. */
3115 vec<tree> vec_oprnds = vNULL;
3116 stmt_vec_info new_stmt_info = NULL;
3117 stmt_vec_info prev_stmt_info = NULL;
3118 for (unsigned j = 0; j < ncopies; j++)
3119 {
3120 /* Handle uses. */
3121 if (j == 0)
3122 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3123 else
3124 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3125
3126 /* Arguments are ready. create the new vector stmt. */
3127 unsigned i;
3128 tree vop;
3129 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3130 {
3131 gimple *new_stmt;
3132 tree tem = make_ssa_name (char_vectype);
3133 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3134 char_vectype, vop));
3135 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3136 tree tem2 = make_ssa_name (char_vectype);
3137 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3138 tem, tem, bswap_vconst);
3139 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3140 tem = make_ssa_name (vectype);
3141 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3142 vectype, tem2));
3143 new_stmt_info
3144 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3145 if (slp_node)
3146 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3147 }
3148
3149 if (slp_node)
3150 continue;
3151
3152 if (j == 0)
3153 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3154 else
3155 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3156
3157 prev_stmt_info = new_stmt_info;
3158 }
3159
3160 vec_oprnds.release ();
3161 return true;
3162 }
3163
3164 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3165 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3166 in a single step. On success, store the binary pack code in
3167 *CONVERT_CODE. */
3168
3169 static bool
3170 simple_integer_narrowing (vec_info *vinfo, tree vectype_out, tree vectype_in,
3171 tree_code *convert_code)
3172 {
3173 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3174 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3175 return false;
3176
3177 tree_code code;
3178 int multi_step_cvt = 0;
3179 auto_vec <tree, 8> interm_types;
3180 if (!supportable_narrowing_operation (vinfo, NOP_EXPR, vectype_out,
3181 vectype_in, &code, &multi_step_cvt,
3182 &interm_types)
3183 || multi_step_cvt)
3184 return false;
3185
3186 *convert_code = code;
3187 return true;
3188 }
3189
3190 /* Function vectorizable_call.
3191
3192 Check if STMT_INFO performs a function call that can be vectorized.
3193 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3194 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3195 Return true if STMT_INFO is vectorizable in this way. */
3196
3197 static bool
3198 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3199 stmt_vec_info *vec_stmt, slp_tree slp_node,
3200 stmt_vector_for_cost *cost_vec)
3201 {
3202 gcall *stmt;
3203 tree vec_dest;
3204 tree scalar_dest;
3205 tree op;
3206 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3207 stmt_vec_info prev_stmt_info;
3208 tree vectype_out, vectype_in;
3209 poly_uint64 nunits_in;
3210 poly_uint64 nunits_out;
3211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3212 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3213 vec_info *vinfo = stmt_info->vinfo;
3214 tree fndecl, new_temp, rhs_type;
3215 enum vect_def_type dt[4]
3216 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3217 vect_unknown_def_type };
3218 tree vectypes[ARRAY_SIZE (dt)] = {};
3219 int ndts = ARRAY_SIZE (dt);
3220 int ncopies, j;
3221 auto_vec<tree, 8> vargs;
3222 auto_vec<tree, 8> orig_vargs;
3223 enum { NARROW, NONE, WIDEN } modifier;
3224 size_t i, nargs;
3225 tree lhs;
3226
3227 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3228 return false;
3229
3230 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3231 && ! vec_stmt)
3232 return false;
3233
3234 /* Is STMT_INFO a vectorizable call? */
3235 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3236 if (!stmt)
3237 return false;
3238
3239 if (gimple_call_internal_p (stmt)
3240 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3241 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3242 /* Handled by vectorizable_load and vectorizable_store. */
3243 return false;
3244
3245 if (gimple_call_lhs (stmt) == NULL_TREE
3246 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3247 return false;
3248
3249 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3250
3251 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3252
3253 /* Process function arguments. */
3254 rhs_type = NULL_TREE;
3255 vectype_in = NULL_TREE;
3256 nargs = gimple_call_num_args (stmt);
3257
3258 /* Bail out if the function has more than three arguments, we do not have
3259 interesting builtin functions to vectorize with more than two arguments
3260 except for fma. No arguments is also not good. */
3261 if (nargs == 0 || nargs > 4)
3262 return false;
3263
3264 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3265 combined_fn cfn = gimple_call_combined_fn (stmt);
3266 if (cfn == CFN_GOMP_SIMD_LANE)
3267 {
3268 nargs = 0;
3269 rhs_type = unsigned_type_node;
3270 }
3271
3272 int mask_opno = -1;
3273 if (internal_fn_p (cfn))
3274 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3275
3276 for (i = 0; i < nargs; i++)
3277 {
3278 op = gimple_call_arg (stmt, i);
3279 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3280 {
3281 if (dump_enabled_p ())
3282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3283 "use not simple.\n");
3284 return false;
3285 }
3286
3287 /* Skip the mask argument to an internal function. This operand
3288 has been converted via a pattern if necessary. */
3289 if ((int) i == mask_opno)
3290 continue;
3291
3292 /* We can only handle calls with arguments of the same type. */
3293 if (rhs_type
3294 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3295 {
3296 if (dump_enabled_p ())
3297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3298 "argument types differ.\n");
3299 return false;
3300 }
3301 if (!rhs_type)
3302 rhs_type = TREE_TYPE (op);
3303
3304 if (!vectype_in)
3305 vectype_in = vectypes[i];
3306 else if (vectypes[i]
3307 && !types_compatible_p (vectypes[i], vectype_in))
3308 {
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3311 "argument vector types differ.\n");
3312 return false;
3313 }
3314 }
3315 /* If all arguments are external or constant defs use a vector type with
3316 the same size as the output vector type. */
3317 if (!vectype_in)
3318 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3319 if (vec_stmt)
3320 gcc_assert (vectype_in);
3321 if (!vectype_in)
3322 {
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3325 "no vectype for scalar type %T\n", rhs_type);
3326
3327 return false;
3328 }
3329
3330 /* FORNOW */
3331 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3332 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3333 if (known_eq (nunits_in * 2, nunits_out))
3334 modifier = NARROW;
3335 else if (known_eq (nunits_out, nunits_in))
3336 modifier = NONE;
3337 else if (known_eq (nunits_out * 2, nunits_in))
3338 modifier = WIDEN;
3339 else
3340 return false;
3341
3342 /* We only handle functions that do not read or clobber memory. */
3343 if (gimple_vuse (stmt))
3344 {
3345 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3347 "function reads from or writes to memory.\n");
3348 return false;
3349 }
3350
3351 /* For now, we only vectorize functions if a target specific builtin
3352 is available. TODO -- in some cases, it might be profitable to
3353 insert the calls for pieces of the vector, in order to be able
3354 to vectorize other operations in the loop. */
3355 fndecl = NULL_TREE;
3356 internal_fn ifn = IFN_LAST;
3357 tree callee = gimple_call_fndecl (stmt);
3358
3359 /* First try using an internal function. */
3360 tree_code convert_code = ERROR_MARK;
3361 if (cfn != CFN_LAST
3362 && (modifier == NONE
3363 || (modifier == NARROW
3364 && simple_integer_narrowing (vinfo, vectype_out, vectype_in,
3365 &convert_code))))
3366 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3367 vectype_in);
3368
3369 /* If that fails, try asking for a target-specific built-in function. */
3370 if (ifn == IFN_LAST)
3371 {
3372 if (cfn != CFN_LAST)
3373 fndecl = targetm.vectorize.builtin_vectorized_function
3374 (cfn, vectype_out, vectype_in);
3375 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3376 fndecl = targetm.vectorize.builtin_md_vectorized_function
3377 (callee, vectype_out, vectype_in);
3378 }
3379
3380 if (ifn == IFN_LAST && !fndecl)
3381 {
3382 if (cfn == CFN_GOMP_SIMD_LANE
3383 && !slp_node
3384 && loop_vinfo
3385 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3386 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3387 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3388 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3389 {
3390 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3391 { 0, 1, 2, ... vf - 1 } vector. */
3392 gcc_assert (nargs == 0);
3393 }
3394 else if (modifier == NONE
3395 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3396 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3397 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3398 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3399 vectype_in, cost_vec);
3400 else
3401 {
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3404 "function is not vectorizable.\n");
3405 return false;
3406 }
3407 }
3408
3409 if (slp_node)
3410 ncopies = 1;
3411 else if (modifier == NARROW && ifn == IFN_LAST)
3412 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3413 else
3414 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3415
3416 /* Sanity check: make sure that at least one copy of the vectorized stmt
3417 needs to be generated. */
3418 gcc_assert (ncopies >= 1);
3419
3420 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3421 if (!vec_stmt) /* transformation not required. */
3422 {
3423 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3424 DUMP_VECT_SCOPE ("vectorizable_call");
3425 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3426 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3427 record_stmt_cost (cost_vec, ncopies / 2,
3428 vec_promote_demote, stmt_info, 0, vect_body);
3429
3430 if (loop_vinfo && mask_opno >= 0)
3431 {
3432 unsigned int nvectors = (slp_node
3433 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3434 : ncopies);
3435 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3436 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3437 vectype_out, scalar_mask);
3438 }
3439 return true;
3440 }
3441
3442 /* Transform. */
3443
3444 if (dump_enabled_p ())
3445 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3446
3447 /* Handle def. */
3448 scalar_dest = gimple_call_lhs (stmt);
3449 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3450
3451 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3452
3453 stmt_vec_info new_stmt_info = NULL;
3454 prev_stmt_info = NULL;
3455 if (modifier == NONE || ifn != IFN_LAST)
3456 {
3457 tree prev_res = NULL_TREE;
3458 vargs.safe_grow (nargs);
3459 orig_vargs.safe_grow (nargs);
3460 for (j = 0; j < ncopies; ++j)
3461 {
3462 /* Build argument list for the vectorized call. */
3463 if (slp_node)
3464 {
3465 auto_vec<vec<tree> > vec_defs (nargs);
3466 vec<tree> vec_oprnds0;
3467
3468 vect_get_slp_defs (slp_node, &vec_defs);
3469 vec_oprnds0 = vec_defs[0];
3470
3471 /* Arguments are ready. Create the new vector stmt. */
3472 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3473 {
3474 size_t k;
3475 for (k = 0; k < nargs; k++)
3476 {
3477 vec<tree> vec_oprndsk = vec_defs[k];
3478 vargs[k] = vec_oprndsk[i];
3479 }
3480 if (modifier == NARROW)
3481 {
3482 /* We don't define any narrowing conditional functions
3483 at present. */
3484 gcc_assert (mask_opno < 0);
3485 tree half_res = make_ssa_name (vectype_in);
3486 gcall *call
3487 = gimple_build_call_internal_vec (ifn, vargs);
3488 gimple_call_set_lhs (call, half_res);
3489 gimple_call_set_nothrow (call, true);
3490 vect_finish_stmt_generation (stmt_info, call, gsi);
3491 if ((i & 1) == 0)
3492 {
3493 prev_res = half_res;
3494 continue;
3495 }
3496 new_temp = make_ssa_name (vec_dest);
3497 gimple *new_stmt
3498 = gimple_build_assign (new_temp, convert_code,
3499 prev_res, half_res);
3500 new_stmt_info
3501 = vect_finish_stmt_generation (stmt_info, new_stmt,
3502 gsi);
3503 }
3504 else
3505 {
3506 if (mask_opno >= 0 && masked_loop_p)
3507 {
3508 unsigned int vec_num = vec_oprnds0.length ();
3509 /* Always true for SLP. */
3510 gcc_assert (ncopies == 1);
3511 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3512 vectype_out, i);
3513 vargs[mask_opno] = prepare_load_store_mask
3514 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3515 }
3516
3517 gcall *call;
3518 if (ifn != IFN_LAST)
3519 call = gimple_build_call_internal_vec (ifn, vargs);
3520 else
3521 call = gimple_build_call_vec (fndecl, vargs);
3522 new_temp = make_ssa_name (vec_dest, call);
3523 gimple_call_set_lhs (call, new_temp);
3524 gimple_call_set_nothrow (call, true);
3525 new_stmt_info
3526 = vect_finish_stmt_generation (stmt_info, call, gsi);
3527 }
3528 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3529 }
3530
3531 for (i = 0; i < nargs; i++)
3532 {
3533 vec<tree> vec_oprndsi = vec_defs[i];
3534 vec_oprndsi.release ();
3535 }
3536 continue;
3537 }
3538
3539 if (mask_opno >= 0 && !vectypes[mask_opno])
3540 {
3541 gcc_assert (modifier != WIDEN);
3542 vectypes[mask_opno]
3543 = build_same_sized_truth_vector_type (vectype_in);
3544 }
3545
3546 for (i = 0; i < nargs; i++)
3547 {
3548 op = gimple_call_arg (stmt, i);
3549 if (j == 0)
3550 vec_oprnd0
3551 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3552 else
3553 vec_oprnd0
3554 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3555
3556 orig_vargs[i] = vargs[i] = vec_oprnd0;
3557 }
3558
3559 if (mask_opno >= 0 && masked_loop_p)
3560 {
3561 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3562 vectype_out, j);
3563 vargs[mask_opno]
3564 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3565 vargs[mask_opno], gsi);
3566 }
3567
3568 if (cfn == CFN_GOMP_SIMD_LANE)
3569 {
3570 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3571 tree new_var
3572 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3573 gimple *init_stmt = gimple_build_assign (new_var, cst);
3574 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3575 new_temp = make_ssa_name (vec_dest);
3576 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3577 new_stmt_info
3578 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3579 }
3580 else if (modifier == NARROW)
3581 {
3582 /* We don't define any narrowing conditional functions at
3583 present. */
3584 gcc_assert (mask_opno < 0);
3585 tree half_res = make_ssa_name (vectype_in);
3586 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3587 gimple_call_set_lhs (call, half_res);
3588 gimple_call_set_nothrow (call, true);
3589 vect_finish_stmt_generation (stmt_info, call, gsi);
3590 if ((j & 1) == 0)
3591 {
3592 prev_res = half_res;
3593 continue;
3594 }
3595 new_temp = make_ssa_name (vec_dest);
3596 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3597 prev_res, half_res);
3598 new_stmt_info
3599 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3600 }
3601 else
3602 {
3603 gcall *call;
3604 if (ifn != IFN_LAST)
3605 call = gimple_build_call_internal_vec (ifn, vargs);
3606 else
3607 call = gimple_build_call_vec (fndecl, vargs);
3608 new_temp = make_ssa_name (vec_dest, call);
3609 gimple_call_set_lhs (call, new_temp);
3610 gimple_call_set_nothrow (call, true);
3611 new_stmt_info
3612 = vect_finish_stmt_generation (stmt_info, call, gsi);
3613 }
3614
3615 if (j == (modifier == NARROW ? 1 : 0))
3616 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3617 else
3618 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3619
3620 prev_stmt_info = new_stmt_info;
3621 }
3622 }
3623 else if (modifier == NARROW)
3624 {
3625 /* We don't define any narrowing conditional functions at present. */
3626 gcc_assert (mask_opno < 0);
3627 for (j = 0; j < ncopies; ++j)
3628 {
3629 /* Build argument list for the vectorized call. */
3630 if (j == 0)
3631 vargs.create (nargs * 2);
3632 else
3633 vargs.truncate (0);
3634
3635 if (slp_node)
3636 {
3637 auto_vec<vec<tree> > vec_defs (nargs);
3638 vec<tree> vec_oprnds0;
3639
3640 vect_get_slp_defs (slp_node, &vec_defs);
3641 vec_oprnds0 = vec_defs[0];
3642
3643 /* Arguments are ready. Create the new vector stmt. */
3644 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3645 {
3646 size_t k;
3647 vargs.truncate (0);
3648 for (k = 0; k < nargs; k++)
3649 {
3650 vec<tree> vec_oprndsk = vec_defs[k];
3651 vargs.quick_push (vec_oprndsk[i]);
3652 vargs.quick_push (vec_oprndsk[i + 1]);
3653 }
3654 gcall *call;
3655 if (ifn != IFN_LAST)
3656 call = gimple_build_call_internal_vec (ifn, vargs);
3657 else
3658 call = gimple_build_call_vec (fndecl, vargs);
3659 new_temp = make_ssa_name (vec_dest, call);
3660 gimple_call_set_lhs (call, new_temp);
3661 gimple_call_set_nothrow (call, true);
3662 new_stmt_info
3663 = vect_finish_stmt_generation (stmt_info, call, gsi);
3664 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3665 }
3666
3667 for (i = 0; i < nargs; i++)
3668 {
3669 vec<tree> vec_oprndsi = vec_defs[i];
3670 vec_oprndsi.release ();
3671 }
3672 continue;
3673 }
3674
3675 for (i = 0; i < nargs; i++)
3676 {
3677 op = gimple_call_arg (stmt, i);
3678 if (j == 0)
3679 {
3680 vec_oprnd0
3681 = vect_get_vec_def_for_operand (op, stmt_info,
3682 vectypes[i]);
3683 vec_oprnd1
3684 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3685 }
3686 else
3687 {
3688 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3689 2 * i + 1);
3690 vec_oprnd0
3691 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3692 vec_oprnd1
3693 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3694 }
3695
3696 vargs.quick_push (vec_oprnd0);
3697 vargs.quick_push (vec_oprnd1);
3698 }
3699
3700 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3701 new_temp = make_ssa_name (vec_dest, new_stmt);
3702 gimple_call_set_lhs (new_stmt, new_temp);
3703 new_stmt_info
3704 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3705
3706 if (j == 0)
3707 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3708 else
3709 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3710
3711 prev_stmt_info = new_stmt_info;
3712 }
3713
3714 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3715 }
3716 else
3717 /* No current target implements this case. */
3718 return false;
3719
3720 vargs.release ();
3721
3722 /* The call in STMT might prevent it from being removed in dce.
3723 We however cannot remove it here, due to the way the ssa name
3724 it defines is mapped to the new definition. So just replace
3725 rhs of the statement with something harmless. */
3726
3727 if (slp_node)
3728 return true;
3729
3730 stmt_info = vect_orig_stmt (stmt_info);
3731 lhs = gimple_get_lhs (stmt_info->stmt);
3732
3733 gassign *new_stmt
3734 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3735 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3736
3737 return true;
3738 }
3739
3740
3741 struct simd_call_arg_info
3742 {
3743 tree vectype;
3744 tree op;
3745 HOST_WIDE_INT linear_step;
3746 enum vect_def_type dt;
3747 unsigned int align;
3748 bool simd_lane_linear;
3749 };
3750
3751 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3752 is linear within simd lane (but not within whole loop), note it in
3753 *ARGINFO. */
3754
3755 static void
3756 vect_simd_lane_linear (tree op, class loop *loop,
3757 struct simd_call_arg_info *arginfo)
3758 {
3759 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3760
3761 if (!is_gimple_assign (def_stmt)
3762 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3763 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3764 return;
3765
3766 tree base = gimple_assign_rhs1 (def_stmt);
3767 HOST_WIDE_INT linear_step = 0;
3768 tree v = gimple_assign_rhs2 (def_stmt);
3769 while (TREE_CODE (v) == SSA_NAME)
3770 {
3771 tree t;
3772 def_stmt = SSA_NAME_DEF_STMT (v);
3773 if (is_gimple_assign (def_stmt))
3774 switch (gimple_assign_rhs_code (def_stmt))
3775 {
3776 case PLUS_EXPR:
3777 t = gimple_assign_rhs2 (def_stmt);
3778 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3779 return;
3780 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3781 v = gimple_assign_rhs1 (def_stmt);
3782 continue;
3783 case MULT_EXPR:
3784 t = gimple_assign_rhs2 (def_stmt);
3785 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3786 return;
3787 linear_step = tree_to_shwi (t);
3788 v = gimple_assign_rhs1 (def_stmt);
3789 continue;
3790 CASE_CONVERT:
3791 t = gimple_assign_rhs1 (def_stmt);
3792 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3793 || (TYPE_PRECISION (TREE_TYPE (v))
3794 < TYPE_PRECISION (TREE_TYPE (t))))
3795 return;
3796 if (!linear_step)
3797 linear_step = 1;
3798 v = t;
3799 continue;
3800 default:
3801 return;
3802 }
3803 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3804 && loop->simduid
3805 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3806 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3807 == loop->simduid))
3808 {
3809 if (!linear_step)
3810 linear_step = 1;
3811 arginfo->linear_step = linear_step;
3812 arginfo->op = base;
3813 arginfo->simd_lane_linear = true;
3814 return;
3815 }
3816 }
3817 }
3818
3819 /* Return the number of elements in vector type VECTYPE, which is associated
3820 with a SIMD clone. At present these vectors always have a constant
3821 length. */
3822
3823 static unsigned HOST_WIDE_INT
3824 simd_clone_subparts (tree vectype)
3825 {
3826 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3827 }
3828
3829 /* Function vectorizable_simd_clone_call.
3830
3831 Check if STMT_INFO performs a function call that can be vectorized
3832 by calling a simd clone of the function.
3833 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3834 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3835 Return true if STMT_INFO is vectorizable in this way. */
3836
3837 static bool
3838 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3839 gimple_stmt_iterator *gsi,
3840 stmt_vec_info *vec_stmt, slp_tree slp_node,
3841 stmt_vector_for_cost *)
3842 {
3843 tree vec_dest;
3844 tree scalar_dest;
3845 tree op, type;
3846 tree vec_oprnd0 = NULL_TREE;
3847 stmt_vec_info prev_stmt_info;
3848 tree vectype;
3849 unsigned int nunits;
3850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3852 vec_info *vinfo = stmt_info->vinfo;
3853 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3854 tree fndecl, new_temp;
3855 int ncopies, j;
3856 auto_vec<simd_call_arg_info> arginfo;
3857 vec<tree> vargs = vNULL;
3858 size_t i, nargs;
3859 tree lhs, rtype, ratype;
3860 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3861
3862 /* Is STMT a vectorizable call? */
3863 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3864 if (!stmt)
3865 return false;
3866
3867 fndecl = gimple_call_fndecl (stmt);
3868 if (fndecl == NULL_TREE)
3869 return false;
3870
3871 struct cgraph_node *node = cgraph_node::get (fndecl);
3872 if (node == NULL || node->simd_clones == NULL)
3873 return false;
3874
3875 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3876 return false;
3877
3878 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3879 && ! vec_stmt)
3880 return false;
3881
3882 if (gimple_call_lhs (stmt)
3883 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3884 return false;
3885
3886 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3887
3888 vectype = STMT_VINFO_VECTYPE (stmt_info);
3889
3890 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3891 return false;
3892
3893 /* FORNOW */
3894 if (slp_node)
3895 return false;
3896
3897 /* Process function arguments. */
3898 nargs = gimple_call_num_args (stmt);
3899
3900 /* Bail out if the function has zero arguments. */
3901 if (nargs == 0)
3902 return false;
3903
3904 arginfo.reserve (nargs, true);
3905
3906 for (i = 0; i < nargs; i++)
3907 {
3908 simd_call_arg_info thisarginfo;
3909 affine_iv iv;
3910
3911 thisarginfo.linear_step = 0;
3912 thisarginfo.align = 0;
3913 thisarginfo.op = NULL_TREE;
3914 thisarginfo.simd_lane_linear = false;
3915
3916 op = gimple_call_arg (stmt, i);
3917 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3918 &thisarginfo.vectype)
3919 || thisarginfo.dt == vect_uninitialized_def)
3920 {
3921 if (dump_enabled_p ())
3922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3923 "use not simple.\n");
3924 return false;
3925 }
3926
3927 if (thisarginfo.dt == vect_constant_def
3928 || thisarginfo.dt == vect_external_def)
3929 gcc_assert (thisarginfo.vectype == NULL_TREE);
3930 else
3931 gcc_assert (thisarginfo.vectype != NULL_TREE);
3932
3933 /* For linear arguments, the analyze phase should have saved
3934 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3935 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3936 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3937 {
3938 gcc_assert (vec_stmt);
3939 thisarginfo.linear_step
3940 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3941 thisarginfo.op
3942 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3943 thisarginfo.simd_lane_linear
3944 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3945 == boolean_true_node);
3946 /* If loop has been peeled for alignment, we need to adjust it. */
3947 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3948 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3949 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3950 {
3951 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3952 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3953 tree opt = TREE_TYPE (thisarginfo.op);
3954 bias = fold_convert (TREE_TYPE (step), bias);
3955 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3956 thisarginfo.op
3957 = fold_build2 (POINTER_TYPE_P (opt)
3958 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3959 thisarginfo.op, bias);
3960 }
3961 }
3962 else if (!vec_stmt
3963 && thisarginfo.dt != vect_constant_def
3964 && thisarginfo.dt != vect_external_def
3965 && loop_vinfo
3966 && TREE_CODE (op) == SSA_NAME
3967 && simple_iv (loop, loop_containing_stmt (stmt), op,
3968 &iv, false)
3969 && tree_fits_shwi_p (iv.step))
3970 {
3971 thisarginfo.linear_step = tree_to_shwi (iv.step);
3972 thisarginfo.op = iv.base;
3973 }
3974 else if ((thisarginfo.dt == vect_constant_def
3975 || thisarginfo.dt == vect_external_def)
3976 && POINTER_TYPE_P (TREE_TYPE (op)))
3977 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3978 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3979 linear too. */
3980 if (POINTER_TYPE_P (TREE_TYPE (op))
3981 && !thisarginfo.linear_step
3982 && !vec_stmt
3983 && thisarginfo.dt != vect_constant_def
3984 && thisarginfo.dt != vect_external_def
3985 && loop_vinfo
3986 && !slp_node
3987 && TREE_CODE (op) == SSA_NAME)
3988 vect_simd_lane_linear (op, loop, &thisarginfo);
3989
3990 arginfo.quick_push (thisarginfo);
3991 }
3992
3993 unsigned HOST_WIDE_INT vf;
3994 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3995 {
3996 if (dump_enabled_p ())
3997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3998 "not considering SIMD clones; not yet supported"
3999 " for variable-width vectors.\n");
4000 return false;
4001 }
4002
4003 unsigned int badness = 0;
4004 struct cgraph_node *bestn = NULL;
4005 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4006 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4007 else
4008 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4009 n = n->simdclone->next_clone)
4010 {
4011 unsigned int this_badness = 0;
4012 if (n->simdclone->simdlen > vf
4013 || n->simdclone->nargs != nargs)
4014 continue;
4015 if (n->simdclone->simdlen < vf)
4016 this_badness += (exact_log2 (vf)
4017 - exact_log2 (n->simdclone->simdlen)) * 1024;
4018 if (n->simdclone->inbranch)
4019 this_badness += 2048;
4020 int target_badness = targetm.simd_clone.usable (n);
4021 if (target_badness < 0)
4022 continue;
4023 this_badness += target_badness * 512;
4024 /* FORNOW: Have to add code to add the mask argument. */
4025 if (n->simdclone->inbranch)
4026 continue;
4027 for (i = 0; i < nargs; i++)
4028 {
4029 switch (n->simdclone->args[i].arg_type)
4030 {
4031 case SIMD_CLONE_ARG_TYPE_VECTOR:
4032 if (!useless_type_conversion_p
4033 (n->simdclone->args[i].orig_type,
4034 TREE_TYPE (gimple_call_arg (stmt, i))))
4035 i = -1;
4036 else if (arginfo[i].dt == vect_constant_def
4037 || arginfo[i].dt == vect_external_def
4038 || arginfo[i].linear_step)
4039 this_badness += 64;
4040 break;
4041 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4042 if (arginfo[i].dt != vect_constant_def
4043 && arginfo[i].dt != vect_external_def)
4044 i = -1;
4045 break;
4046 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4047 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4048 if (arginfo[i].dt == vect_constant_def
4049 || arginfo[i].dt == vect_external_def
4050 || (arginfo[i].linear_step
4051 != n->simdclone->args[i].linear_step))
4052 i = -1;
4053 break;
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4055 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4056 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4057 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4058 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4060 /* FORNOW */
4061 i = -1;
4062 break;
4063 case SIMD_CLONE_ARG_TYPE_MASK:
4064 gcc_unreachable ();
4065 }
4066 if (i == (size_t) -1)
4067 break;
4068 if (n->simdclone->args[i].alignment > arginfo[i].align)
4069 {
4070 i = -1;
4071 break;
4072 }
4073 if (arginfo[i].align)
4074 this_badness += (exact_log2 (arginfo[i].align)
4075 - exact_log2 (n->simdclone->args[i].alignment));
4076 }
4077 if (i == (size_t) -1)
4078 continue;
4079 if (bestn == NULL || this_badness < badness)
4080 {
4081 bestn = n;
4082 badness = this_badness;
4083 }
4084 }
4085
4086 if (bestn == NULL)
4087 return false;
4088
4089 for (i = 0; i < nargs; i++)
4090 if ((arginfo[i].dt == vect_constant_def
4091 || arginfo[i].dt == vect_external_def)
4092 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4093 {
4094 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4095 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
4096 if (arginfo[i].vectype == NULL
4097 || (simd_clone_subparts (arginfo[i].vectype)
4098 > bestn->simdclone->simdlen))
4099 return false;
4100 }
4101
4102 fndecl = bestn->decl;
4103 nunits = bestn->simdclone->simdlen;
4104 ncopies = vf / nunits;
4105
4106 /* If the function isn't const, only allow it in simd loops where user
4107 has asserted that at least nunits consecutive iterations can be
4108 performed using SIMD instructions. */
4109 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4110 && gimple_vuse (stmt))
4111 return false;
4112
4113 /* Sanity check: make sure that at least one copy of the vectorized stmt
4114 needs to be generated. */
4115 gcc_assert (ncopies >= 1);
4116
4117 if (!vec_stmt) /* transformation not required. */
4118 {
4119 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4120 for (i = 0; i < nargs; i++)
4121 if ((bestn->simdclone->args[i].arg_type
4122 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4123 || (bestn->simdclone->args[i].arg_type
4124 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4125 {
4126 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4127 + 1);
4128 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4129 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4130 ? size_type_node : TREE_TYPE (arginfo[i].op);
4131 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4132 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4133 tree sll = arginfo[i].simd_lane_linear
4134 ? boolean_true_node : boolean_false_node;
4135 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4136 }
4137 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4138 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4139 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4140 return true;
4141 }
4142
4143 /* Transform. */
4144
4145 if (dump_enabled_p ())
4146 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4147
4148 /* Handle def. */
4149 scalar_dest = gimple_call_lhs (stmt);
4150 vec_dest = NULL_TREE;
4151 rtype = NULL_TREE;
4152 ratype = NULL_TREE;
4153 if (scalar_dest)
4154 {
4155 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4156 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4157 if (TREE_CODE (rtype) == ARRAY_TYPE)
4158 {
4159 ratype = rtype;
4160 rtype = TREE_TYPE (ratype);
4161 }
4162 }
4163
4164 prev_stmt_info = NULL;
4165 for (j = 0; j < ncopies; ++j)
4166 {
4167 /* Build argument list for the vectorized call. */
4168 if (j == 0)
4169 vargs.create (nargs);
4170 else
4171 vargs.truncate (0);
4172
4173 for (i = 0; i < nargs; i++)
4174 {
4175 unsigned int k, l, m, o;
4176 tree atype;
4177 op = gimple_call_arg (stmt, i);
4178 switch (bestn->simdclone->args[i].arg_type)
4179 {
4180 case SIMD_CLONE_ARG_TYPE_VECTOR:
4181 atype = bestn->simdclone->args[i].vector_type;
4182 o = nunits / simd_clone_subparts (atype);
4183 for (m = j * o; m < (j + 1) * o; m++)
4184 {
4185 if (simd_clone_subparts (atype)
4186 < simd_clone_subparts (arginfo[i].vectype))
4187 {
4188 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4189 k = (simd_clone_subparts (arginfo[i].vectype)
4190 / simd_clone_subparts (atype));
4191 gcc_assert ((k & (k - 1)) == 0);
4192 if (m == 0)
4193 vec_oprnd0
4194 = vect_get_vec_def_for_operand (op, stmt_info);
4195 else
4196 {
4197 vec_oprnd0 = arginfo[i].op;
4198 if ((m & (k - 1)) == 0)
4199 vec_oprnd0
4200 = vect_get_vec_def_for_stmt_copy (vinfo,
4201 vec_oprnd0);
4202 }
4203 arginfo[i].op = vec_oprnd0;
4204 vec_oprnd0
4205 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4206 bitsize_int (prec),
4207 bitsize_int ((m & (k - 1)) * prec));
4208 gassign *new_stmt
4209 = gimple_build_assign (make_ssa_name (atype),
4210 vec_oprnd0);
4211 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4212 vargs.safe_push (gimple_assign_lhs (new_stmt));
4213 }
4214 else
4215 {
4216 k = (simd_clone_subparts (atype)
4217 / simd_clone_subparts (arginfo[i].vectype));
4218 gcc_assert ((k & (k - 1)) == 0);
4219 vec<constructor_elt, va_gc> *ctor_elts;
4220 if (k != 1)
4221 vec_alloc (ctor_elts, k);
4222 else
4223 ctor_elts = NULL;
4224 for (l = 0; l < k; l++)
4225 {
4226 if (m == 0 && l == 0)
4227 vec_oprnd0
4228 = vect_get_vec_def_for_operand (op, stmt_info);
4229 else
4230 vec_oprnd0
4231 = vect_get_vec_def_for_stmt_copy (vinfo,
4232 arginfo[i].op);
4233 arginfo[i].op = vec_oprnd0;
4234 if (k == 1)
4235 break;
4236 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4237 vec_oprnd0);
4238 }
4239 if (k == 1)
4240 vargs.safe_push (vec_oprnd0);
4241 else
4242 {
4243 vec_oprnd0 = build_constructor (atype, ctor_elts);
4244 gassign *new_stmt
4245 = gimple_build_assign (make_ssa_name (atype),
4246 vec_oprnd0);
4247 vect_finish_stmt_generation (stmt_info, new_stmt,
4248 gsi);
4249 vargs.safe_push (gimple_assign_lhs (new_stmt));
4250 }
4251 }
4252 }
4253 break;
4254 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4255 vargs.safe_push (op);
4256 break;
4257 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4258 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4259 if (j == 0)
4260 {
4261 gimple_seq stmts;
4262 arginfo[i].op
4263 = force_gimple_operand (arginfo[i].op, &stmts, true,
4264 NULL_TREE);
4265 if (stmts != NULL)
4266 {
4267 basic_block new_bb;
4268 edge pe = loop_preheader_edge (loop);
4269 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4270 gcc_assert (!new_bb);
4271 }
4272 if (arginfo[i].simd_lane_linear)
4273 {
4274 vargs.safe_push (arginfo[i].op);
4275 break;
4276 }
4277 tree phi_res = copy_ssa_name (op);
4278 gphi *new_phi = create_phi_node (phi_res, loop->header);
4279 loop_vinfo->add_stmt (new_phi);
4280 add_phi_arg (new_phi, arginfo[i].op,
4281 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4282 enum tree_code code
4283 = POINTER_TYPE_P (TREE_TYPE (op))
4284 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4285 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4286 ? sizetype : TREE_TYPE (op);
4287 widest_int cst
4288 = wi::mul (bestn->simdclone->args[i].linear_step,
4289 ncopies * nunits);
4290 tree tcst = wide_int_to_tree (type, cst);
4291 tree phi_arg = copy_ssa_name (op);
4292 gassign *new_stmt
4293 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4294 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4295 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4296 loop_vinfo->add_stmt (new_stmt);
4297 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4298 UNKNOWN_LOCATION);
4299 arginfo[i].op = phi_res;
4300 vargs.safe_push (phi_res);
4301 }
4302 else
4303 {
4304 enum tree_code code
4305 = POINTER_TYPE_P (TREE_TYPE (op))
4306 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4307 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4308 ? sizetype : TREE_TYPE (op);
4309 widest_int cst
4310 = wi::mul (bestn->simdclone->args[i].linear_step,
4311 j * nunits);
4312 tree tcst = wide_int_to_tree (type, cst);
4313 new_temp = make_ssa_name (TREE_TYPE (op));
4314 gassign *new_stmt
4315 = gimple_build_assign (new_temp, code,
4316 arginfo[i].op, tcst);
4317 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4318 vargs.safe_push (new_temp);
4319 }
4320 break;
4321 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4322 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4323 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4324 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4325 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4326 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4327 default:
4328 gcc_unreachable ();
4329 }
4330 }
4331
4332 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4333 if (vec_dest)
4334 {
4335 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4336 if (ratype)
4337 new_temp = create_tmp_var (ratype);
4338 else if (simd_clone_subparts (vectype)
4339 == simd_clone_subparts (rtype))
4340 new_temp = make_ssa_name (vec_dest, new_call);
4341 else
4342 new_temp = make_ssa_name (rtype, new_call);
4343 gimple_call_set_lhs (new_call, new_temp);
4344 }
4345 stmt_vec_info new_stmt_info
4346 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4347
4348 if (vec_dest)
4349 {
4350 if (simd_clone_subparts (vectype) < nunits)
4351 {
4352 unsigned int k, l;
4353 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4354 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4355 k = nunits / simd_clone_subparts (vectype);
4356 gcc_assert ((k & (k - 1)) == 0);
4357 for (l = 0; l < k; l++)
4358 {
4359 tree t;
4360 if (ratype)
4361 {
4362 t = build_fold_addr_expr (new_temp);
4363 t = build2 (MEM_REF, vectype, t,
4364 build_int_cst (TREE_TYPE (t), l * bytes));
4365 }
4366 else
4367 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4368 bitsize_int (prec), bitsize_int (l * prec));
4369 gimple *new_stmt
4370 = gimple_build_assign (make_ssa_name (vectype), t);
4371 new_stmt_info
4372 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4373
4374 if (j == 0 && l == 0)
4375 STMT_VINFO_VEC_STMT (stmt_info)
4376 = *vec_stmt = new_stmt_info;
4377 else
4378 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4379
4380 prev_stmt_info = new_stmt_info;
4381 }
4382
4383 if (ratype)
4384 vect_clobber_variable (stmt_info, gsi, new_temp);
4385 continue;
4386 }
4387 else if (simd_clone_subparts (vectype) > nunits)
4388 {
4389 unsigned int k = (simd_clone_subparts (vectype)
4390 / simd_clone_subparts (rtype));
4391 gcc_assert ((k & (k - 1)) == 0);
4392 if ((j & (k - 1)) == 0)
4393 vec_alloc (ret_ctor_elts, k);
4394 if (ratype)
4395 {
4396 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4397 for (m = 0; m < o; m++)
4398 {
4399 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4400 size_int (m), NULL_TREE, NULL_TREE);
4401 gimple *new_stmt
4402 = gimple_build_assign (make_ssa_name (rtype), tem);
4403 new_stmt_info
4404 = vect_finish_stmt_generation (stmt_info, new_stmt,
4405 gsi);
4406 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4407 gimple_assign_lhs (new_stmt));
4408 }
4409 vect_clobber_variable (stmt_info, gsi, new_temp);
4410 }
4411 else
4412 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4413 if ((j & (k - 1)) != k - 1)
4414 continue;
4415 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4416 gimple *new_stmt
4417 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4418 new_stmt_info
4419 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4420
4421 if ((unsigned) j == k - 1)
4422 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4423 else
4424 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4425
4426 prev_stmt_info = new_stmt_info;
4427 continue;
4428 }
4429 else if (ratype)
4430 {
4431 tree t = build_fold_addr_expr (new_temp);
4432 t = build2 (MEM_REF, vectype, t,
4433 build_int_cst (TREE_TYPE (t), 0));
4434 gimple *new_stmt
4435 = gimple_build_assign (make_ssa_name (vec_dest), t);
4436 new_stmt_info
4437 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4438 vect_clobber_variable (stmt_info, gsi, new_temp);
4439 }
4440 }
4441
4442 if (j == 0)
4443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4444 else
4445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4446
4447 prev_stmt_info = new_stmt_info;
4448 }
4449
4450 vargs.release ();
4451
4452 /* The call in STMT might prevent it from being removed in dce.
4453 We however cannot remove it here, due to the way the ssa name
4454 it defines is mapped to the new definition. So just replace
4455 rhs of the statement with something harmless. */
4456
4457 if (slp_node)
4458 return true;
4459
4460 gimple *new_stmt;
4461 if (scalar_dest)
4462 {
4463 type = TREE_TYPE (scalar_dest);
4464 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4465 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4466 }
4467 else
4468 new_stmt = gimple_build_nop ();
4469 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4470 unlink_stmt_vdef (stmt);
4471
4472 return true;
4473 }
4474
4475
4476 /* Function vect_gen_widened_results_half
4477
4478 Create a vector stmt whose code, type, number of arguments, and result
4479 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4480 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4481 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4482 needs to be created (DECL is a function-decl of a target-builtin).
4483 STMT_INFO is the original scalar stmt that we are vectorizing. */
4484
4485 static gimple *
4486 vect_gen_widened_results_half (enum tree_code code,
4487 tree decl,
4488 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4489 tree vec_dest, gimple_stmt_iterator *gsi,
4490 stmt_vec_info stmt_info)
4491 {
4492 gimple *new_stmt;
4493 tree new_temp;
4494
4495 /* Generate half of the widened result: */
4496 if (code == CALL_EXPR)
4497 {
4498 /* Target specific support */
4499 if (op_type == binary_op)
4500 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4501 else
4502 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4503 new_temp = make_ssa_name (vec_dest, new_stmt);
4504 gimple_call_set_lhs (new_stmt, new_temp);
4505 }
4506 else
4507 {
4508 /* Generic support */
4509 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4510 if (op_type != binary_op)
4511 vec_oprnd1 = NULL;
4512 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4513 new_temp = make_ssa_name (vec_dest, new_stmt);
4514 gimple_assign_set_lhs (new_stmt, new_temp);
4515 }
4516 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4517
4518 return new_stmt;
4519 }
4520
4521
4522 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4523 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4524 containing scalar operand), and for the rest we get a copy with
4525 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4526 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4527 The vectors are collected into VEC_OPRNDS. */
4528
4529 static void
4530 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4531 vec<tree> *vec_oprnds, int multi_step_cvt)
4532 {
4533 vec_info *vinfo = stmt_info->vinfo;
4534 tree vec_oprnd;
4535
4536 /* Get first vector operand. */
4537 /* All the vector operands except the very first one (that is scalar oprnd)
4538 are stmt copies. */
4539 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4540 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4541 else
4542 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4543
4544 vec_oprnds->quick_push (vec_oprnd);
4545
4546 /* Get second vector operand. */
4547 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4548 vec_oprnds->quick_push (vec_oprnd);
4549
4550 *oprnd = vec_oprnd;
4551
4552 /* For conversion in multiple steps, continue to get operands
4553 recursively. */
4554 if (multi_step_cvt)
4555 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4556 multi_step_cvt - 1);
4557 }
4558
4559
4560 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4561 For multi-step conversions store the resulting vectors and call the function
4562 recursively. */
4563
4564 static void
4565 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4566 int multi_step_cvt,
4567 stmt_vec_info stmt_info,
4568 vec<tree> vec_dsts,
4569 gimple_stmt_iterator *gsi,
4570 slp_tree slp_node, enum tree_code code,
4571 stmt_vec_info *prev_stmt_info)
4572 {
4573 unsigned int i;
4574 tree vop0, vop1, new_tmp, vec_dest;
4575
4576 vec_dest = vec_dsts.pop ();
4577
4578 for (i = 0; i < vec_oprnds->length (); i += 2)
4579 {
4580 /* Create demotion operation. */
4581 vop0 = (*vec_oprnds)[i];
4582 vop1 = (*vec_oprnds)[i + 1];
4583 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4584 new_tmp = make_ssa_name (vec_dest, new_stmt);
4585 gimple_assign_set_lhs (new_stmt, new_tmp);
4586 stmt_vec_info new_stmt_info
4587 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4588
4589 if (multi_step_cvt)
4590 /* Store the resulting vector for next recursive call. */
4591 (*vec_oprnds)[i/2] = new_tmp;
4592 else
4593 {
4594 /* This is the last step of the conversion sequence. Store the
4595 vectors in SLP_NODE or in vector info of the scalar statement
4596 (or in STMT_VINFO_RELATED_STMT chain). */
4597 if (slp_node)
4598 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4599 else
4600 {
4601 if (!*prev_stmt_info)
4602 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4603 else
4604 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4605
4606 *prev_stmt_info = new_stmt_info;
4607 }
4608 }
4609 }
4610
4611 /* For multi-step demotion operations we first generate demotion operations
4612 from the source type to the intermediate types, and then combine the
4613 results (stored in VEC_OPRNDS) in demotion operation to the destination
4614 type. */
4615 if (multi_step_cvt)
4616 {
4617 /* At each level of recursion we have half of the operands we had at the
4618 previous level. */
4619 vec_oprnds->truncate ((i+1)/2);
4620 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4621 stmt_info, vec_dsts, gsi,
4622 slp_node, VEC_PACK_TRUNC_EXPR,
4623 prev_stmt_info);
4624 }
4625
4626 vec_dsts.quick_push (vec_dest);
4627 }
4628
4629
4630 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4631 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4632 STMT_INFO. For multi-step conversions store the resulting vectors and
4633 call the function recursively. */
4634
4635 static void
4636 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4637 vec<tree> *vec_oprnds1,
4638 stmt_vec_info stmt_info, tree vec_dest,
4639 gimple_stmt_iterator *gsi,
4640 enum tree_code code1,
4641 enum tree_code code2, tree decl1,
4642 tree decl2, int op_type)
4643 {
4644 int i;
4645 tree vop0, vop1, new_tmp1, new_tmp2;
4646 gimple *new_stmt1, *new_stmt2;
4647 vec<tree> vec_tmp = vNULL;
4648
4649 vec_tmp.create (vec_oprnds0->length () * 2);
4650 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4651 {
4652 if (op_type == binary_op)
4653 vop1 = (*vec_oprnds1)[i];
4654 else
4655 vop1 = NULL_TREE;
4656
4657 /* Generate the two halves of promotion operation. */
4658 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4659 op_type, vec_dest, gsi,
4660 stmt_info);
4661 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4662 op_type, vec_dest, gsi,
4663 stmt_info);
4664 if (is_gimple_call (new_stmt1))
4665 {
4666 new_tmp1 = gimple_call_lhs (new_stmt1);
4667 new_tmp2 = gimple_call_lhs (new_stmt2);
4668 }
4669 else
4670 {
4671 new_tmp1 = gimple_assign_lhs (new_stmt1);
4672 new_tmp2 = gimple_assign_lhs (new_stmt2);
4673 }
4674
4675 /* Store the results for the next step. */
4676 vec_tmp.quick_push (new_tmp1);
4677 vec_tmp.quick_push (new_tmp2);
4678 }
4679
4680 vec_oprnds0->release ();
4681 *vec_oprnds0 = vec_tmp;
4682 }
4683
4684
4685 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4687 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4688 Return true if STMT_INFO is vectorizable in this way. */
4689
4690 static bool
4691 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4692 stmt_vec_info *vec_stmt, slp_tree slp_node,
4693 stmt_vector_for_cost *cost_vec)
4694 {
4695 tree vec_dest;
4696 tree scalar_dest;
4697 tree op0, op1 = NULL_TREE;
4698 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4699 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4700 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4701 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4702 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4703 tree new_temp;
4704 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4705 int ndts = 2;
4706 stmt_vec_info prev_stmt_info;
4707 poly_uint64 nunits_in;
4708 poly_uint64 nunits_out;
4709 tree vectype_out, vectype_in;
4710 int ncopies, i, j;
4711 tree lhs_type, rhs_type;
4712 enum { NARROW, NONE, WIDEN } modifier;
4713 vec<tree> vec_oprnds0 = vNULL;
4714 vec<tree> vec_oprnds1 = vNULL;
4715 tree vop0;
4716 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4717 vec_info *vinfo = stmt_info->vinfo;
4718 int multi_step_cvt = 0;
4719 vec<tree> interm_types = vNULL;
4720 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4721 int op_type;
4722 unsigned short fltsz;
4723
4724 /* Is STMT a vectorizable conversion? */
4725
4726 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4727 return false;
4728
4729 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4730 && ! vec_stmt)
4731 return false;
4732
4733 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4734 if (!stmt)
4735 return false;
4736
4737 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4738 return false;
4739
4740 code = gimple_assign_rhs_code (stmt);
4741 if (!CONVERT_EXPR_CODE_P (code)
4742 && code != FIX_TRUNC_EXPR
4743 && code != FLOAT_EXPR
4744 && code != WIDEN_MULT_EXPR
4745 && code != WIDEN_LSHIFT_EXPR)
4746 return false;
4747
4748 op_type = TREE_CODE_LENGTH (code);
4749
4750 /* Check types of lhs and rhs. */
4751 scalar_dest = gimple_assign_lhs (stmt);
4752 lhs_type = TREE_TYPE (scalar_dest);
4753 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4754
4755 op0 = gimple_assign_rhs1 (stmt);
4756 rhs_type = TREE_TYPE (op0);
4757
4758 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4759 && !((INTEGRAL_TYPE_P (lhs_type)
4760 && INTEGRAL_TYPE_P (rhs_type))
4761 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4762 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4763 return false;
4764
4765 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4766 && ((INTEGRAL_TYPE_P (lhs_type)
4767 && !type_has_mode_precision_p (lhs_type))
4768 || (INTEGRAL_TYPE_P (rhs_type)
4769 && !type_has_mode_precision_p (rhs_type))))
4770 {
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4773 "type conversion to/from bit-precision unsupported."
4774 "\n");
4775 return false;
4776 }
4777
4778 /* Check the operands of the operation. */
4779 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4780 {
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4783 "use not simple.\n");
4784 return false;
4785 }
4786 if (op_type == binary_op)
4787 {
4788 bool ok;
4789
4790 op1 = gimple_assign_rhs2 (stmt);
4791 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4792 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4793 OP1. */
4794 if (CONSTANT_CLASS_P (op0))
4795 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4796 else
4797 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4798
4799 if (!ok)
4800 {
4801 if (dump_enabled_p ())
4802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4803 "use not simple.\n");
4804 return false;
4805 }
4806 }
4807
4808 /* If op0 is an external or constant defs use a vector type of
4809 the same size as the output vector type. */
4810 if (!vectype_in)
4811 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4812 if (vec_stmt)
4813 gcc_assert (vectype_in);
4814 if (!vectype_in)
4815 {
4816 if (dump_enabled_p ())
4817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4818 "no vectype for scalar type %T\n", rhs_type);
4819
4820 return false;
4821 }
4822
4823 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4824 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4825 {
4826 if (dump_enabled_p ())
4827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4828 "can't convert between boolean and non "
4829 "boolean vectors %T\n", rhs_type);
4830
4831 return false;
4832 }
4833
4834 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4835 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4836 if (known_eq (nunits_out, nunits_in))
4837 modifier = NONE;
4838 else if (multiple_p (nunits_out, nunits_in))
4839 modifier = NARROW;
4840 else
4841 {
4842 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4843 modifier = WIDEN;
4844 }
4845
4846 /* Multiple types in SLP are handled by creating the appropriate number of
4847 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4848 case of SLP. */
4849 if (slp_node)
4850 ncopies = 1;
4851 else if (modifier == NARROW)
4852 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4853 else
4854 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4855
4856 /* Sanity check: make sure that at least one copy of the vectorized stmt
4857 needs to be generated. */
4858 gcc_assert (ncopies >= 1);
4859
4860 bool found_mode = false;
4861 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4862 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4863 opt_scalar_mode rhs_mode_iter;
4864
4865 /* Supportable by target? */
4866 switch (modifier)
4867 {
4868 case NONE:
4869 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4870 return false;
4871 if (supportable_convert_operation (code, vectype_out, vectype_in,
4872 &decl1, &code1))
4873 break;
4874 /* FALLTHRU */
4875 unsupported:
4876 if (dump_enabled_p ())
4877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4878 "conversion not supported by target.\n");
4879 return false;
4880
4881 case WIDEN:
4882 if (supportable_widening_operation (code, stmt_info, vectype_out,
4883 vectype_in, &code1, &code2,
4884 &multi_step_cvt, &interm_types))
4885 {
4886 /* Binary widening operation can only be supported directly by the
4887 architecture. */
4888 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4889 break;
4890 }
4891
4892 if (code != FLOAT_EXPR
4893 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4894 goto unsupported;
4895
4896 fltsz = GET_MODE_SIZE (lhs_mode);
4897 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4898 {
4899 rhs_mode = rhs_mode_iter.require ();
4900 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4901 break;
4902
4903 cvt_type
4904 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4905 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4906 if (cvt_type == NULL_TREE)
4907 goto unsupported;
4908
4909 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4910 {
4911 if (!supportable_convert_operation (code, vectype_out,
4912 cvt_type, &decl1, &codecvt1))
4913 goto unsupported;
4914 }
4915 else if (!supportable_widening_operation (code, stmt_info,
4916 vectype_out, cvt_type,
4917 &codecvt1, &codecvt2,
4918 &multi_step_cvt,
4919 &interm_types))
4920 continue;
4921 else
4922 gcc_assert (multi_step_cvt == 0);
4923
4924 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4925 vectype_in, &code1, &code2,
4926 &multi_step_cvt, &interm_types))
4927 {
4928 found_mode = true;
4929 break;
4930 }
4931 }
4932
4933 if (!found_mode)
4934 goto unsupported;
4935
4936 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4937 codecvt2 = ERROR_MARK;
4938 else
4939 {
4940 multi_step_cvt++;
4941 interm_types.safe_push (cvt_type);
4942 cvt_type = NULL_TREE;
4943 }
4944 break;
4945
4946 case NARROW:
4947 gcc_assert (op_type == unary_op);
4948 if (supportable_narrowing_operation (vinfo, code, vectype_out,
4949 vectype_in, &code1, &multi_step_cvt,
4950 &interm_types))
4951 break;
4952
4953 if (code != FIX_TRUNC_EXPR
4954 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4955 goto unsupported;
4956
4957 cvt_type
4958 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4959 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4960 if (cvt_type == NULL_TREE)
4961 goto unsupported;
4962 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4963 &decl1, &codecvt1))
4964 goto unsupported;
4965 if (supportable_narrowing_operation (vinfo, NOP_EXPR, vectype_out,
4966 cvt_type, &code1, &multi_step_cvt,
4967 &interm_types))
4968 break;
4969 goto unsupported;
4970
4971 default:
4972 gcc_unreachable ();
4973 }
4974
4975 if (!vec_stmt) /* transformation not required. */
4976 {
4977 DUMP_VECT_SCOPE ("vectorizable_conversion");
4978 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4979 {
4980 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4981 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4982 cost_vec);
4983 }
4984 else if (modifier == NARROW)
4985 {
4986 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4987 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4988 cost_vec);
4989 }
4990 else
4991 {
4992 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4993 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4994 cost_vec);
4995 }
4996 interm_types.release ();
4997 return true;
4998 }
4999
5000 /* Transform. */
5001 if (dump_enabled_p ())
5002 dump_printf_loc (MSG_NOTE, vect_location,
5003 "transform conversion. ncopies = %d.\n", ncopies);
5004
5005 if (op_type == binary_op)
5006 {
5007 if (CONSTANT_CLASS_P (op0))
5008 op0 = fold_convert (TREE_TYPE (op1), op0);
5009 else if (CONSTANT_CLASS_P (op1))
5010 op1 = fold_convert (TREE_TYPE (op0), op1);
5011 }
5012
5013 /* In case of multi-step conversion, we first generate conversion operations
5014 to the intermediate types, and then from that types to the final one.
5015 We create vector destinations for the intermediate type (TYPES) received
5016 from supportable_*_operation, and store them in the correct order
5017 for future use in vect_create_vectorized_*_stmts (). */
5018 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5019 vec_dest = vect_create_destination_var (scalar_dest,
5020 (cvt_type && modifier == WIDEN)
5021 ? cvt_type : vectype_out);
5022 vec_dsts.quick_push (vec_dest);
5023
5024 if (multi_step_cvt)
5025 {
5026 for (i = interm_types.length () - 1;
5027 interm_types.iterate (i, &intermediate_type); i--)
5028 {
5029 vec_dest = vect_create_destination_var (scalar_dest,
5030 intermediate_type);
5031 vec_dsts.quick_push (vec_dest);
5032 }
5033 }
5034
5035 if (cvt_type)
5036 vec_dest = vect_create_destination_var (scalar_dest,
5037 modifier == WIDEN
5038 ? vectype_out : cvt_type);
5039
5040 if (!slp_node)
5041 {
5042 if (modifier == WIDEN)
5043 {
5044 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5045 if (op_type == binary_op)
5046 vec_oprnds1.create (1);
5047 }
5048 else if (modifier == NARROW)
5049 vec_oprnds0.create (
5050 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5051 }
5052 else if (code == WIDEN_LSHIFT_EXPR)
5053 vec_oprnds1.create (slp_node->vec_stmts_size);
5054
5055 last_oprnd = op0;
5056 prev_stmt_info = NULL;
5057 switch (modifier)
5058 {
5059 case NONE:
5060 for (j = 0; j < ncopies; j++)
5061 {
5062 if (j == 0)
5063 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5064 NULL, slp_node);
5065 else
5066 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5067
5068 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5069 {
5070 stmt_vec_info new_stmt_info;
5071 /* Arguments are ready, create the new vector stmt. */
5072 if (code1 == CALL_EXPR)
5073 {
5074 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5075 new_temp = make_ssa_name (vec_dest, new_stmt);
5076 gimple_call_set_lhs (new_stmt, new_temp);
5077 new_stmt_info
5078 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5079 }
5080 else
5081 {
5082 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5083 gassign *new_stmt
5084 = gimple_build_assign (vec_dest, code1, vop0);
5085 new_temp = make_ssa_name (vec_dest, new_stmt);
5086 gimple_assign_set_lhs (new_stmt, new_temp);
5087 new_stmt_info
5088 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5089 }
5090
5091 if (slp_node)
5092 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5093 else
5094 {
5095 if (!prev_stmt_info)
5096 STMT_VINFO_VEC_STMT (stmt_info)
5097 = *vec_stmt = new_stmt_info;
5098 else
5099 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5100 prev_stmt_info = new_stmt_info;
5101 }
5102 }
5103 }
5104 break;
5105
5106 case WIDEN:
5107 /* In case the vectorization factor (VF) is bigger than the number
5108 of elements that we can fit in a vectype (nunits), we have to
5109 generate more than one vector stmt - i.e - we need to "unroll"
5110 the vector stmt by a factor VF/nunits. */
5111 for (j = 0; j < ncopies; j++)
5112 {
5113 /* Handle uses. */
5114 if (j == 0)
5115 {
5116 if (slp_node)
5117 {
5118 if (code == WIDEN_LSHIFT_EXPR)
5119 {
5120 unsigned int k;
5121
5122 vec_oprnd1 = op1;
5123 /* Store vec_oprnd1 for every vector stmt to be created
5124 for SLP_NODE. We check during the analysis that all
5125 the shift arguments are the same. */
5126 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5127 vec_oprnds1.quick_push (vec_oprnd1);
5128
5129 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5130 &vec_oprnds0, NULL, slp_node);
5131 }
5132 else
5133 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5134 &vec_oprnds1, slp_node);
5135 }
5136 else
5137 {
5138 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5139 vec_oprnds0.quick_push (vec_oprnd0);
5140 if (op_type == binary_op)
5141 {
5142 if (code == WIDEN_LSHIFT_EXPR)
5143 vec_oprnd1 = op1;
5144 else
5145 vec_oprnd1
5146 = vect_get_vec_def_for_operand (op1, stmt_info);
5147 vec_oprnds1.quick_push (vec_oprnd1);
5148 }
5149 }
5150 }
5151 else
5152 {
5153 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5154 vec_oprnds0.truncate (0);
5155 vec_oprnds0.quick_push (vec_oprnd0);
5156 if (op_type == binary_op)
5157 {
5158 if (code == WIDEN_LSHIFT_EXPR)
5159 vec_oprnd1 = op1;
5160 else
5161 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5162 vec_oprnd1);
5163 vec_oprnds1.truncate (0);
5164 vec_oprnds1.quick_push (vec_oprnd1);
5165 }
5166 }
5167
5168 /* Arguments are ready. Create the new vector stmts. */
5169 for (i = multi_step_cvt; i >= 0; i--)
5170 {
5171 tree this_dest = vec_dsts[i];
5172 enum tree_code c1 = code1, c2 = code2;
5173 if (i == 0 && codecvt2 != ERROR_MARK)
5174 {
5175 c1 = codecvt1;
5176 c2 = codecvt2;
5177 }
5178 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5179 &vec_oprnds1, stmt_info,
5180 this_dest, gsi,
5181 c1, c2, decl1, decl2,
5182 op_type);
5183 }
5184
5185 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5186 {
5187 stmt_vec_info new_stmt_info;
5188 if (cvt_type)
5189 {
5190 if (codecvt1 == CALL_EXPR)
5191 {
5192 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5193 new_temp = make_ssa_name (vec_dest, new_stmt);
5194 gimple_call_set_lhs (new_stmt, new_temp);
5195 new_stmt_info
5196 = vect_finish_stmt_generation (stmt_info, new_stmt,
5197 gsi);
5198 }
5199 else
5200 {
5201 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5202 new_temp = make_ssa_name (vec_dest);
5203 gassign *new_stmt
5204 = gimple_build_assign (new_temp, codecvt1, vop0);
5205 new_stmt_info
5206 = vect_finish_stmt_generation (stmt_info, new_stmt,
5207 gsi);
5208 }
5209 }
5210 else
5211 new_stmt_info = vinfo->lookup_def (vop0);
5212
5213 if (slp_node)
5214 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5215 else
5216 {
5217 if (!prev_stmt_info)
5218 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5219 else
5220 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5221 prev_stmt_info = new_stmt_info;
5222 }
5223 }
5224 }
5225
5226 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5227 break;
5228
5229 case NARROW:
5230 /* In case the vectorization factor (VF) is bigger than the number
5231 of elements that we can fit in a vectype (nunits), we have to
5232 generate more than one vector stmt - i.e - we need to "unroll"
5233 the vector stmt by a factor VF/nunits. */
5234 for (j = 0; j < ncopies; j++)
5235 {
5236 /* Handle uses. */
5237 if (slp_node)
5238 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5239 slp_node);
5240 else
5241 {
5242 vec_oprnds0.truncate (0);
5243 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5244 vect_pow2 (multi_step_cvt) - 1);
5245 }
5246
5247 /* Arguments are ready. Create the new vector stmts. */
5248 if (cvt_type)
5249 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5250 {
5251 if (codecvt1 == CALL_EXPR)
5252 {
5253 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5254 new_temp = make_ssa_name (vec_dest, new_stmt);
5255 gimple_call_set_lhs (new_stmt, new_temp);
5256 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5257 }
5258 else
5259 {
5260 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5261 new_temp = make_ssa_name (vec_dest);
5262 gassign *new_stmt
5263 = gimple_build_assign (new_temp, codecvt1, vop0);
5264 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5265 }
5266
5267 vec_oprnds0[i] = new_temp;
5268 }
5269
5270 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5271 stmt_info, vec_dsts, gsi,
5272 slp_node, code1,
5273 &prev_stmt_info);
5274 }
5275
5276 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5277 break;
5278 }
5279
5280 vec_oprnds0.release ();
5281 vec_oprnds1.release ();
5282 interm_types.release ();
5283
5284 return true;
5285 }
5286
5287
5288 /* Function vectorizable_assignment.
5289
5290 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5291 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5292 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5293 Return true if STMT_INFO is vectorizable in this way. */
5294
5295 static bool
5296 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5297 stmt_vec_info *vec_stmt, slp_tree slp_node,
5298 stmt_vector_for_cost *cost_vec)
5299 {
5300 tree vec_dest;
5301 tree scalar_dest;
5302 tree op;
5303 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5304 tree new_temp;
5305 enum vect_def_type dt[1] = {vect_unknown_def_type};
5306 int ndts = 1;
5307 int ncopies;
5308 int i, j;
5309 vec<tree> vec_oprnds = vNULL;
5310 tree vop;
5311 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5312 vec_info *vinfo = stmt_info->vinfo;
5313 stmt_vec_info prev_stmt_info = NULL;
5314 enum tree_code code;
5315 tree vectype_in;
5316
5317 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5318 return false;
5319
5320 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5321 && ! vec_stmt)
5322 return false;
5323
5324 /* Is vectorizable assignment? */
5325 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5326 if (!stmt)
5327 return false;
5328
5329 scalar_dest = gimple_assign_lhs (stmt);
5330 if (TREE_CODE (scalar_dest) != SSA_NAME)
5331 return false;
5332
5333 code = gimple_assign_rhs_code (stmt);
5334 if (gimple_assign_single_p (stmt)
5335 || code == PAREN_EXPR
5336 || CONVERT_EXPR_CODE_P (code))
5337 op = gimple_assign_rhs1 (stmt);
5338 else
5339 return false;
5340
5341 if (code == VIEW_CONVERT_EXPR)
5342 op = TREE_OPERAND (op, 0);
5343
5344 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5345 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5346
5347 /* Multiple types in SLP are handled by creating the appropriate number of
5348 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5349 case of SLP. */
5350 if (slp_node)
5351 ncopies = 1;
5352 else
5353 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5354
5355 gcc_assert (ncopies >= 1);
5356
5357 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5358 {
5359 if (dump_enabled_p ())
5360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5361 "use not simple.\n");
5362 return false;
5363 }
5364
5365 /* We can handle NOP_EXPR conversions that do not change the number
5366 of elements or the vector size. */
5367 if ((CONVERT_EXPR_CODE_P (code)
5368 || code == VIEW_CONVERT_EXPR)
5369 && (!vectype_in
5370 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5371 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5372 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5373 return false;
5374
5375 /* We do not handle bit-precision changes. */
5376 if ((CONVERT_EXPR_CODE_P (code)
5377 || code == VIEW_CONVERT_EXPR)
5378 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5379 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5380 || !type_has_mode_precision_p (TREE_TYPE (op)))
5381 /* But a conversion that does not change the bit-pattern is ok. */
5382 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5383 > TYPE_PRECISION (TREE_TYPE (op)))
5384 && TYPE_UNSIGNED (TREE_TYPE (op)))
5385 /* Conversion between boolean types of different sizes is
5386 a simple assignment in case their vectypes are same
5387 boolean vectors. */
5388 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5389 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5390 {
5391 if (dump_enabled_p ())
5392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5393 "type conversion to/from bit-precision "
5394 "unsupported.\n");
5395 return false;
5396 }
5397
5398 if (!vec_stmt) /* transformation not required. */
5399 {
5400 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5401 DUMP_VECT_SCOPE ("vectorizable_assignment");
5402 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5403 return true;
5404 }
5405
5406 /* Transform. */
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5409
5410 /* Handle def. */
5411 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5412
5413 /* Handle use. */
5414 for (j = 0; j < ncopies; j++)
5415 {
5416 /* Handle uses. */
5417 if (j == 0)
5418 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5419 else
5420 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5421
5422 /* Arguments are ready. create the new vector stmt. */
5423 stmt_vec_info new_stmt_info = NULL;
5424 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5425 {
5426 if (CONVERT_EXPR_CODE_P (code)
5427 || code == VIEW_CONVERT_EXPR)
5428 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5429 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5430 new_temp = make_ssa_name (vec_dest, new_stmt);
5431 gimple_assign_set_lhs (new_stmt, new_temp);
5432 new_stmt_info
5433 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5434 if (slp_node)
5435 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5436 }
5437
5438 if (slp_node)
5439 continue;
5440
5441 if (j == 0)
5442 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5443 else
5444 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5445
5446 prev_stmt_info = new_stmt_info;
5447 }
5448
5449 vec_oprnds.release ();
5450 return true;
5451 }
5452
5453
5454 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5455 either as shift by a scalar or by a vector. */
5456
5457 bool
5458 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5459 {
5460
5461 machine_mode vec_mode;
5462 optab optab;
5463 int icode;
5464 tree vectype;
5465
5466 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5467 if (!vectype)
5468 return false;
5469
5470 optab = optab_for_tree_code (code, vectype, optab_scalar);
5471 if (!optab
5472 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5473 {
5474 optab = optab_for_tree_code (code, vectype, optab_vector);
5475 if (!optab
5476 || (optab_handler (optab, TYPE_MODE (vectype))
5477 == CODE_FOR_nothing))
5478 return false;
5479 }
5480
5481 vec_mode = TYPE_MODE (vectype);
5482 icode = (int) optab_handler (optab, vec_mode);
5483 if (icode == CODE_FOR_nothing)
5484 return false;
5485
5486 return true;
5487 }
5488
5489
5490 /* Function vectorizable_shift.
5491
5492 Check if STMT_INFO performs a shift operation that can be vectorized.
5493 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5494 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5495 Return true if STMT_INFO is vectorizable in this way. */
5496
5497 static bool
5498 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5499 stmt_vec_info *vec_stmt, slp_tree slp_node,
5500 stmt_vector_for_cost *cost_vec)
5501 {
5502 tree vec_dest;
5503 tree scalar_dest;
5504 tree op0, op1 = NULL;
5505 tree vec_oprnd1 = NULL_TREE;
5506 tree vectype;
5507 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5508 enum tree_code code;
5509 machine_mode vec_mode;
5510 tree new_temp;
5511 optab optab;
5512 int icode;
5513 machine_mode optab_op2_mode;
5514 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5515 int ndts = 2;
5516 stmt_vec_info prev_stmt_info;
5517 poly_uint64 nunits_in;
5518 poly_uint64 nunits_out;
5519 tree vectype_out;
5520 tree op1_vectype;
5521 int ncopies;
5522 int j, i;
5523 vec<tree> vec_oprnds0 = vNULL;
5524 vec<tree> vec_oprnds1 = vNULL;
5525 tree vop0, vop1;
5526 unsigned int k;
5527 bool scalar_shift_arg = true;
5528 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5529 vec_info *vinfo = stmt_info->vinfo;
5530
5531 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5532 return false;
5533
5534 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5535 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5536 && ! vec_stmt)
5537 return false;
5538
5539 /* Is STMT a vectorizable binary/unary operation? */
5540 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5541 if (!stmt)
5542 return false;
5543
5544 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5545 return false;
5546
5547 code = gimple_assign_rhs_code (stmt);
5548
5549 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5550 || code == RROTATE_EXPR))
5551 return false;
5552
5553 scalar_dest = gimple_assign_lhs (stmt);
5554 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5555 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5556 {
5557 if (dump_enabled_p ())
5558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5559 "bit-precision shifts not supported.\n");
5560 return false;
5561 }
5562
5563 op0 = gimple_assign_rhs1 (stmt);
5564 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5565 {
5566 if (dump_enabled_p ())
5567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5568 "use not simple.\n");
5569 return false;
5570 }
5571 /* If op0 is an external or constant def use a vector type with
5572 the same size as the output vector type. */
5573 if (!vectype)
5574 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5575 if (vec_stmt)
5576 gcc_assert (vectype);
5577 if (!vectype)
5578 {
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5581 "no vectype for scalar type\n");
5582 return false;
5583 }
5584
5585 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5586 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5587 if (maybe_ne (nunits_out, nunits_in))
5588 return false;
5589
5590 op1 = gimple_assign_rhs2 (stmt);
5591 stmt_vec_info op1_def_stmt_info;
5592 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5593 &op1_def_stmt_info))
5594 {
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5597 "use not simple.\n");
5598 return false;
5599 }
5600
5601 /* Multiple types in SLP are handled by creating the appropriate number of
5602 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5603 case of SLP. */
5604 if (slp_node)
5605 ncopies = 1;
5606 else
5607 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5608
5609 gcc_assert (ncopies >= 1);
5610
5611 /* Determine whether the shift amount is a vector, or scalar. If the
5612 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5613
5614 if ((dt[1] == vect_internal_def
5615 || dt[1] == vect_induction_def
5616 || dt[1] == vect_nested_cycle)
5617 && !slp_node)
5618 scalar_shift_arg = false;
5619 else if (dt[1] == vect_constant_def
5620 || dt[1] == vect_external_def
5621 || dt[1] == vect_internal_def)
5622 {
5623 /* In SLP, need to check whether the shift count is the same,
5624 in loops if it is a constant or invariant, it is always
5625 a scalar shift. */
5626 if (slp_node)
5627 {
5628 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5629 stmt_vec_info slpstmt_info;
5630
5631 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5632 {
5633 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5634 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5635 scalar_shift_arg = false;
5636 }
5637
5638 /* For internal SLP defs we have to make sure we see scalar stmts
5639 for all vector elements.
5640 ??? For different vectors we could resort to a different
5641 scalar shift operand but code-generation below simply always
5642 takes the first. */
5643 if (dt[1] == vect_internal_def
5644 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5645 stmts.length ()))
5646 scalar_shift_arg = false;
5647 }
5648
5649 /* If the shift amount is computed by a pattern stmt we cannot
5650 use the scalar amount directly thus give up and use a vector
5651 shift. */
5652 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5653 scalar_shift_arg = false;
5654 }
5655 else
5656 {
5657 if (dump_enabled_p ())
5658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5659 "operand mode requires invariant argument.\n");
5660 return false;
5661 }
5662
5663 /* Vector shifted by vector. */
5664 bool was_scalar_shift_arg = scalar_shift_arg;
5665 if (!scalar_shift_arg)
5666 {
5667 optab = optab_for_tree_code (code, vectype, optab_vector);
5668 if (dump_enabled_p ())
5669 dump_printf_loc (MSG_NOTE, vect_location,
5670 "vector/vector shift/rotate found.\n");
5671
5672 if (!op1_vectype)
5673 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5674 if ((op1_vectype == NULL_TREE
5675 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5676 && (!slp_node
5677 || SLP_TREE_DEF_TYPE
5678 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5679 {
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5682 "unusable type for last operand in"
5683 " vector/vector shift/rotate.\n");
5684 return false;
5685 }
5686 }
5687 /* See if the machine has a vector shifted by scalar insn and if not
5688 then see if it has a vector shifted by vector insn. */
5689 else
5690 {
5691 optab = optab_for_tree_code (code, vectype, optab_scalar);
5692 if (optab
5693 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5694 {
5695 if (dump_enabled_p ())
5696 dump_printf_loc (MSG_NOTE, vect_location,
5697 "vector/scalar shift/rotate found.\n");
5698 }
5699 else
5700 {
5701 optab = optab_for_tree_code (code, vectype, optab_vector);
5702 if (optab
5703 && (optab_handler (optab, TYPE_MODE (vectype))
5704 != CODE_FOR_nothing))
5705 {
5706 scalar_shift_arg = false;
5707
5708 if (dump_enabled_p ())
5709 dump_printf_loc (MSG_NOTE, vect_location,
5710 "vector/vector shift/rotate found.\n");
5711
5712 /* Unlike the other binary operators, shifts/rotates have
5713 the rhs being int, instead of the same type as the lhs,
5714 so make sure the scalar is the right type if we are
5715 dealing with vectors of long long/long/short/char. */
5716 if (dt[1] == vect_constant_def)
5717 {
5718 if (!slp_node)
5719 op1 = fold_convert (TREE_TYPE (vectype), op1);
5720 }
5721 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5722 TREE_TYPE (op1)))
5723 {
5724 if (vec_stmt && !slp_node)
5725 {
5726 op1 = fold_convert (TREE_TYPE (vectype), op1);
5727 op1 = vect_init_vector (stmt_info, op1,
5728 TREE_TYPE (vectype), NULL);
5729 }
5730 }
5731 }
5732 }
5733 }
5734
5735 /* Supportable by target? */
5736 if (!optab)
5737 {
5738 if (dump_enabled_p ())
5739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5740 "no optab.\n");
5741 return false;
5742 }
5743 vec_mode = TYPE_MODE (vectype);
5744 icode = (int) optab_handler (optab, vec_mode);
5745 if (icode == CODE_FOR_nothing)
5746 {
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5749 "op not supported by target.\n");
5750 /* Check only during analysis. */
5751 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5752 || (!vec_stmt
5753 && !vect_worthwhile_without_simd_p (vinfo, code)))
5754 return false;
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_NOTE, vect_location,
5757 "proceeding using word mode.\n");
5758 }
5759
5760 /* Worthwhile without SIMD support? Check only during analysis. */
5761 if (!vec_stmt
5762 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5763 && !vect_worthwhile_without_simd_p (vinfo, code))
5764 {
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5767 "not worthwhile without SIMD support.\n");
5768 return false;
5769 }
5770
5771 if (!vec_stmt) /* transformation not required. */
5772 {
5773 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5774 DUMP_VECT_SCOPE ("vectorizable_shift");
5775 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5776 return true;
5777 }
5778
5779 /* Transform. */
5780
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_NOTE, vect_location,
5783 "transform binary/unary operation.\n");
5784
5785 /* Handle def. */
5786 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5787
5788 prev_stmt_info = NULL;
5789 for (j = 0; j < ncopies; j++)
5790 {
5791 /* Handle uses. */
5792 if (j == 0)
5793 {
5794 if (scalar_shift_arg)
5795 {
5796 /* Vector shl and shr insn patterns can be defined with scalar
5797 operand 2 (shift operand). In this case, use constant or loop
5798 invariant op1 directly, without extending it to vector mode
5799 first. */
5800 optab_op2_mode = insn_data[icode].operand[2].mode;
5801 if (!VECTOR_MODE_P (optab_op2_mode))
5802 {
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_NOTE, vect_location,
5805 "operand 1 using scalar mode.\n");
5806 vec_oprnd1 = op1;
5807 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5808 vec_oprnds1.quick_push (vec_oprnd1);
5809 if (slp_node)
5810 {
5811 /* Store vec_oprnd1 for every vector stmt to be created
5812 for SLP_NODE. We check during the analysis that all
5813 the shift arguments are the same.
5814 TODO: Allow different constants for different vector
5815 stmts generated for an SLP instance. */
5816 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5817 vec_oprnds1.quick_push (vec_oprnd1);
5818 }
5819 }
5820 }
5821 else if (slp_node
5822 && !useless_type_conversion_p (TREE_TYPE (vectype),
5823 TREE_TYPE (op1)))
5824 {
5825 if (was_scalar_shift_arg)
5826 {
5827 /* If the argument was the same in all lanes create
5828 the correctly typed vector shift amount directly. */
5829 op1 = fold_convert (TREE_TYPE (vectype), op1);
5830 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5831 !loop_vinfo ? gsi : NULL);
5832 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5833 !loop_vinfo ? gsi : NULL);
5834 vec_oprnds1.create (slp_node->vec_stmts_size);
5835 for (k = 0; k < slp_node->vec_stmts_size; k++)
5836 vec_oprnds1.quick_push (vec_oprnd1);
5837 }
5838 else if (dt[1] == vect_constant_def)
5839 {
5840 /* Convert the scalar constant shift amounts in-place. */
5841 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5842 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5843 for (unsigned i = 0;
5844 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5845 {
5846 SLP_TREE_SCALAR_OPS (shift)[i]
5847 = fold_convert (TREE_TYPE (vectype),
5848 SLP_TREE_SCALAR_OPS (shift)[i]);
5849 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5850 == INTEGER_CST));
5851 }
5852 }
5853 else
5854 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5855 }
5856
5857 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5858 (a special case for certain kind of vector shifts); otherwise,
5859 operand 1 should be of a vector type (the usual case). */
5860 if (vec_oprnd1)
5861 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5862 slp_node);
5863 else
5864 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5865 slp_node);
5866 }
5867 else
5868 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5869
5870 /* Arguments are ready. Create the new vector stmt. */
5871 stmt_vec_info new_stmt_info = NULL;
5872 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5873 {
5874 vop1 = vec_oprnds1[i];
5875 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5876 new_temp = make_ssa_name (vec_dest, new_stmt);
5877 gimple_assign_set_lhs (new_stmt, new_temp);
5878 new_stmt_info
5879 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5880 if (slp_node)
5881 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5882 }
5883
5884 if (slp_node)
5885 continue;
5886
5887 if (j == 0)
5888 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5889 else
5890 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5891 prev_stmt_info = new_stmt_info;
5892 }
5893
5894 vec_oprnds0.release ();
5895 vec_oprnds1.release ();
5896
5897 return true;
5898 }
5899
5900
5901 /* Function vectorizable_operation.
5902
5903 Check if STMT_INFO performs a binary, unary or ternary operation that can
5904 be vectorized.
5905 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5906 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5907 Return true if STMT_INFO is vectorizable in this way. */
5908
5909 static bool
5910 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5911 stmt_vec_info *vec_stmt, slp_tree slp_node,
5912 stmt_vector_for_cost *cost_vec)
5913 {
5914 tree vec_dest;
5915 tree scalar_dest;
5916 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5917 tree vectype;
5918 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5919 enum tree_code code, orig_code;
5920 machine_mode vec_mode;
5921 tree new_temp;
5922 int op_type;
5923 optab optab;
5924 bool target_support_p;
5925 enum vect_def_type dt[3]
5926 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5927 int ndts = 3;
5928 stmt_vec_info prev_stmt_info;
5929 poly_uint64 nunits_in;
5930 poly_uint64 nunits_out;
5931 tree vectype_out;
5932 int ncopies, vec_num;
5933 int j, i;
5934 vec<tree> vec_oprnds0 = vNULL;
5935 vec<tree> vec_oprnds1 = vNULL;
5936 vec<tree> vec_oprnds2 = vNULL;
5937 tree vop0, vop1, vop2;
5938 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5939 vec_info *vinfo = stmt_info->vinfo;
5940
5941 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5942 return false;
5943
5944 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5945 && ! vec_stmt)
5946 return false;
5947
5948 /* Is STMT a vectorizable binary/unary operation? */
5949 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5950 if (!stmt)
5951 return false;
5952
5953 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5954 return false;
5955
5956 orig_code = code = gimple_assign_rhs_code (stmt);
5957
5958 /* For pointer addition and subtraction, we should use the normal
5959 plus and minus for the vector operation. */
5960 if (code == POINTER_PLUS_EXPR)
5961 code = PLUS_EXPR;
5962 if (code == POINTER_DIFF_EXPR)
5963 code = MINUS_EXPR;
5964
5965 /* Support only unary or binary operations. */
5966 op_type = TREE_CODE_LENGTH (code);
5967 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5968 {
5969 if (dump_enabled_p ())
5970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5971 "num. args = %d (not unary/binary/ternary op).\n",
5972 op_type);
5973 return false;
5974 }
5975
5976 scalar_dest = gimple_assign_lhs (stmt);
5977 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5978
5979 /* Most operations cannot handle bit-precision types without extra
5980 truncations. */
5981 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5982 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5983 /* Exception are bitwise binary operations. */
5984 && code != BIT_IOR_EXPR
5985 && code != BIT_XOR_EXPR
5986 && code != BIT_AND_EXPR)
5987 {
5988 if (dump_enabled_p ())
5989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5990 "bit-precision arithmetic not supported.\n");
5991 return false;
5992 }
5993
5994 op0 = gimple_assign_rhs1 (stmt);
5995 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5996 {
5997 if (dump_enabled_p ())
5998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5999 "use not simple.\n");
6000 return false;
6001 }
6002 /* If op0 is an external or constant def use a vector type with
6003 the same size as the output vector type. */
6004 if (!vectype)
6005 {
6006 /* For boolean type we cannot determine vectype by
6007 invariant value (don't know whether it is a vector
6008 of booleans or vector of integers). We use output
6009 vectype because operations on boolean don't change
6010 type. */
6011 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6012 {
6013 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6014 {
6015 if (dump_enabled_p ())
6016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6017 "not supported operation on bool value.\n");
6018 return false;
6019 }
6020 vectype = vectype_out;
6021 }
6022 else
6023 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
6024 }
6025 if (vec_stmt)
6026 gcc_assert (vectype);
6027 if (!vectype)
6028 {
6029 if (dump_enabled_p ())
6030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6031 "no vectype for scalar type %T\n",
6032 TREE_TYPE (op0));
6033
6034 return false;
6035 }
6036
6037 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6038 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6039 if (maybe_ne (nunits_out, nunits_in))
6040 return false;
6041
6042 if (op_type == binary_op || op_type == ternary_op)
6043 {
6044 op1 = gimple_assign_rhs2 (stmt);
6045 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
6046 {
6047 if (dump_enabled_p ())
6048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6049 "use not simple.\n");
6050 return false;
6051 }
6052 }
6053 if (op_type == ternary_op)
6054 {
6055 op2 = gimple_assign_rhs3 (stmt);
6056 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6057 {
6058 if (dump_enabled_p ())
6059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6060 "use not simple.\n");
6061 return false;
6062 }
6063 }
6064
6065 /* Multiple types in SLP are handled by creating the appropriate number of
6066 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6067 case of SLP. */
6068 if (slp_node)
6069 {
6070 ncopies = 1;
6071 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6072 }
6073 else
6074 {
6075 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6076 vec_num = 1;
6077 }
6078
6079 gcc_assert (ncopies >= 1);
6080
6081 /* Shifts are handled in vectorizable_shift (). */
6082 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6083 || code == RROTATE_EXPR)
6084 return false;
6085
6086 /* Supportable by target? */
6087
6088 vec_mode = TYPE_MODE (vectype);
6089 if (code == MULT_HIGHPART_EXPR)
6090 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6091 else
6092 {
6093 optab = optab_for_tree_code (code, vectype, optab_default);
6094 if (!optab)
6095 {
6096 if (dump_enabled_p ())
6097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6098 "no optab.\n");
6099 return false;
6100 }
6101 target_support_p = (optab_handler (optab, vec_mode)
6102 != CODE_FOR_nothing);
6103 }
6104
6105 if (!target_support_p)
6106 {
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6109 "op not supported by target.\n");
6110 /* Check only during analysis. */
6111 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6112 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6113 return false;
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_NOTE, vect_location,
6116 "proceeding using word mode.\n");
6117 }
6118
6119 /* Worthwhile without SIMD support? Check only during analysis. */
6120 if (!VECTOR_MODE_P (vec_mode)
6121 && !vec_stmt
6122 && !vect_worthwhile_without_simd_p (vinfo, code))
6123 {
6124 if (dump_enabled_p ())
6125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6126 "not worthwhile without SIMD support.\n");
6127 return false;
6128 }
6129
6130 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6131 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6132 internal_fn cond_fn = get_conditional_internal_fn (code);
6133
6134 if (!vec_stmt) /* transformation not required. */
6135 {
6136 /* If this operation is part of a reduction, a fully-masked loop
6137 should only change the active lanes of the reduction chain,
6138 keeping the inactive lanes as-is. */
6139 if (loop_vinfo
6140 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6141 && reduc_idx >= 0)
6142 {
6143 if (cond_fn == IFN_LAST
6144 || !direct_internal_fn_supported_p (cond_fn, vectype,
6145 OPTIMIZE_FOR_SPEED))
6146 {
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6149 "can't use a fully-masked loop because no"
6150 " conditional operation is available.\n");
6151 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6152 }
6153 else
6154 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6155 vectype, NULL);
6156 }
6157
6158 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6159 DUMP_VECT_SCOPE ("vectorizable_operation");
6160 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6161 return true;
6162 }
6163
6164 /* Transform. */
6165
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE, vect_location,
6168 "transform binary/unary operation.\n");
6169
6170 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6171
6172 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6173 vectors with unsigned elements, but the result is signed. So, we
6174 need to compute the MINUS_EXPR into vectype temporary and
6175 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6176 tree vec_cvt_dest = NULL_TREE;
6177 if (orig_code == POINTER_DIFF_EXPR)
6178 {
6179 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6180 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6181 }
6182 /* Handle def. */
6183 else
6184 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6185
6186 /* In case the vectorization factor (VF) is bigger than the number
6187 of elements that we can fit in a vectype (nunits), we have to generate
6188 more than one vector stmt - i.e - we need to "unroll" the
6189 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6190 from one copy of the vector stmt to the next, in the field
6191 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6192 stages to find the correct vector defs to be used when vectorizing
6193 stmts that use the defs of the current stmt. The example below
6194 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6195 we need to create 4 vectorized stmts):
6196
6197 before vectorization:
6198 RELATED_STMT VEC_STMT
6199 S1: x = memref - -
6200 S2: z = x + 1 - -
6201
6202 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6203 there):
6204 RELATED_STMT VEC_STMT
6205 VS1_0: vx0 = memref0 VS1_1 -
6206 VS1_1: vx1 = memref1 VS1_2 -
6207 VS1_2: vx2 = memref2 VS1_3 -
6208 VS1_3: vx3 = memref3 - -
6209 S1: x = load - VS1_0
6210 S2: z = x + 1 - -
6211
6212 step2: vectorize stmt S2 (done here):
6213 To vectorize stmt S2 we first need to find the relevant vector
6214 def for the first operand 'x'. This is, as usual, obtained from
6215 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6216 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6217 relevant vector def 'vx0'. Having found 'vx0' we can generate
6218 the vector stmt VS2_0, and as usual, record it in the
6219 STMT_VINFO_VEC_STMT of stmt S2.
6220 When creating the second copy (VS2_1), we obtain the relevant vector
6221 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6222 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6223 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6224 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6225 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6226 chain of stmts and pointers:
6227 RELATED_STMT VEC_STMT
6228 VS1_0: vx0 = memref0 VS1_1 -
6229 VS1_1: vx1 = memref1 VS1_2 -
6230 VS1_2: vx2 = memref2 VS1_3 -
6231 VS1_3: vx3 = memref3 - -
6232 S1: x = load - VS1_0
6233 VS2_0: vz0 = vx0 + v1 VS2_1 -
6234 VS2_1: vz1 = vx1 + v1 VS2_2 -
6235 VS2_2: vz2 = vx2 + v1 VS2_3 -
6236 VS2_3: vz3 = vx3 + v1 - -
6237 S2: z = x + 1 - VS2_0 */
6238
6239 prev_stmt_info = NULL;
6240 for (j = 0; j < ncopies; j++)
6241 {
6242 /* Handle uses. */
6243 if (j == 0)
6244 {
6245 if (op_type == binary_op)
6246 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6247 slp_node);
6248 else if (op_type == ternary_op)
6249 {
6250 if (slp_node)
6251 {
6252 auto_vec<vec<tree> > vec_defs(3);
6253 vect_get_slp_defs (slp_node, &vec_defs);
6254 vec_oprnds0 = vec_defs[0];
6255 vec_oprnds1 = vec_defs[1];
6256 vec_oprnds2 = vec_defs[2];
6257 }
6258 else
6259 {
6260 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6261 &vec_oprnds1, NULL);
6262 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6263 NULL, NULL);
6264 }
6265 }
6266 else
6267 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6268 slp_node);
6269 }
6270 else
6271 {
6272 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6273 if (op_type == ternary_op)
6274 {
6275 tree vec_oprnd = vec_oprnds2.pop ();
6276 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6277 vec_oprnd));
6278 }
6279 }
6280
6281 /* Arguments are ready. Create the new vector stmt. */
6282 stmt_vec_info new_stmt_info = NULL;
6283 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6284 {
6285 vop1 = ((op_type == binary_op || op_type == ternary_op)
6286 ? vec_oprnds1[i] : NULL_TREE);
6287 vop2 = ((op_type == ternary_op)
6288 ? vec_oprnds2[i] : NULL_TREE);
6289 if (masked_loop_p && reduc_idx >= 0)
6290 {
6291 /* Perform the operation on active elements only and take
6292 inactive elements from the reduction chain input. */
6293 gcc_assert (!vop2);
6294 vop2 = reduc_idx == 1 ? vop1 : vop0;
6295 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6296 vectype, i * ncopies + j);
6297 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6298 vop0, vop1, vop2);
6299 new_temp = make_ssa_name (vec_dest, call);
6300 gimple_call_set_lhs (call, new_temp);
6301 gimple_call_set_nothrow (call, true);
6302 new_stmt_info
6303 = vect_finish_stmt_generation (stmt_info, call, gsi);
6304 }
6305 else
6306 {
6307 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6308 vop0, vop1, vop2);
6309 new_temp = make_ssa_name (vec_dest, new_stmt);
6310 gimple_assign_set_lhs (new_stmt, new_temp);
6311 new_stmt_info
6312 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6313 if (vec_cvt_dest)
6314 {
6315 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6316 gassign *new_stmt
6317 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6318 new_temp);
6319 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6320 gimple_assign_set_lhs (new_stmt, new_temp);
6321 new_stmt_info
6322 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6323 }
6324 }
6325 if (slp_node)
6326 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6327 }
6328
6329 if (slp_node)
6330 continue;
6331
6332 if (j == 0)
6333 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6334 else
6335 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6336 prev_stmt_info = new_stmt_info;
6337 }
6338
6339 vec_oprnds0.release ();
6340 vec_oprnds1.release ();
6341 vec_oprnds2.release ();
6342
6343 return true;
6344 }
6345
6346 /* A helper function to ensure data reference DR_INFO's base alignment. */
6347
6348 static void
6349 ensure_base_align (dr_vec_info *dr_info)
6350 {
6351 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6352 return;
6353
6354 if (dr_info->base_misaligned)
6355 {
6356 tree base_decl = dr_info->base_decl;
6357
6358 // We should only be able to increase the alignment of a base object if
6359 // we know what its new alignment should be at compile time.
6360 unsigned HOST_WIDE_INT align_base_to =
6361 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6362
6363 if (decl_in_symtab_p (base_decl))
6364 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6365 else if (DECL_ALIGN (base_decl) < align_base_to)
6366 {
6367 SET_DECL_ALIGN (base_decl, align_base_to);
6368 DECL_USER_ALIGN (base_decl) = 1;
6369 }
6370 dr_info->base_misaligned = false;
6371 }
6372 }
6373
6374
6375 /* Function get_group_alias_ptr_type.
6376
6377 Return the alias type for the group starting at FIRST_STMT_INFO. */
6378
6379 static tree
6380 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6381 {
6382 struct data_reference *first_dr, *next_dr;
6383
6384 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6385 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6386 while (next_stmt_info)
6387 {
6388 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6389 if (get_alias_set (DR_REF (first_dr))
6390 != get_alias_set (DR_REF (next_dr)))
6391 {
6392 if (dump_enabled_p ())
6393 dump_printf_loc (MSG_NOTE, vect_location,
6394 "conflicting alias set types.\n");
6395 return ptr_type_node;
6396 }
6397 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6398 }
6399 return reference_alias_ptr_type (DR_REF (first_dr));
6400 }
6401
6402
6403 /* Function scan_operand_equal_p.
6404
6405 Helper function for check_scan_store. Compare two references
6406 with .GOMP_SIMD_LANE bases. */
6407
6408 static bool
6409 scan_operand_equal_p (tree ref1, tree ref2)
6410 {
6411 tree ref[2] = { ref1, ref2 };
6412 poly_int64 bitsize[2], bitpos[2];
6413 tree offset[2], base[2];
6414 for (int i = 0; i < 2; ++i)
6415 {
6416 machine_mode mode;
6417 int unsignedp, reversep, volatilep = 0;
6418 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6419 &offset[i], &mode, &unsignedp,
6420 &reversep, &volatilep);
6421 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6422 return false;
6423 if (TREE_CODE (base[i]) == MEM_REF
6424 && offset[i] == NULL_TREE
6425 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6426 {
6427 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6428 if (is_gimple_assign (def_stmt)
6429 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6430 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6431 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6432 {
6433 if (maybe_ne (mem_ref_offset (base[i]), 0))
6434 return false;
6435 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6436 offset[i] = gimple_assign_rhs2 (def_stmt);
6437 }
6438 }
6439 }
6440
6441 if (!operand_equal_p (base[0], base[1], 0))
6442 return false;
6443 if (maybe_ne (bitsize[0], bitsize[1]))
6444 return false;
6445 if (offset[0] != offset[1])
6446 {
6447 if (!offset[0] || !offset[1])
6448 return false;
6449 if (!operand_equal_p (offset[0], offset[1], 0))
6450 {
6451 tree step[2];
6452 for (int i = 0; i < 2; ++i)
6453 {
6454 step[i] = integer_one_node;
6455 if (TREE_CODE (offset[i]) == SSA_NAME)
6456 {
6457 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6458 if (is_gimple_assign (def_stmt)
6459 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6460 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6461 == INTEGER_CST))
6462 {
6463 step[i] = gimple_assign_rhs2 (def_stmt);
6464 offset[i] = gimple_assign_rhs1 (def_stmt);
6465 }
6466 }
6467 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6468 {
6469 step[i] = TREE_OPERAND (offset[i], 1);
6470 offset[i] = TREE_OPERAND (offset[i], 0);
6471 }
6472 tree rhs1 = NULL_TREE;
6473 if (TREE_CODE (offset[i]) == SSA_NAME)
6474 {
6475 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6476 if (gimple_assign_cast_p (def_stmt))
6477 rhs1 = gimple_assign_rhs1 (def_stmt);
6478 }
6479 else if (CONVERT_EXPR_P (offset[i]))
6480 rhs1 = TREE_OPERAND (offset[i], 0);
6481 if (rhs1
6482 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6483 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6484 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6485 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6486 offset[i] = rhs1;
6487 }
6488 if (!operand_equal_p (offset[0], offset[1], 0)
6489 || !operand_equal_p (step[0], step[1], 0))
6490 return false;
6491 }
6492 }
6493 return true;
6494 }
6495
6496
6497 enum scan_store_kind {
6498 /* Normal permutation. */
6499 scan_store_kind_perm,
6500
6501 /* Whole vector left shift permutation with zero init. */
6502 scan_store_kind_lshift_zero,
6503
6504 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6505 scan_store_kind_lshift_cond
6506 };
6507
6508 /* Function check_scan_store.
6509
6510 Verify if we can perform the needed permutations or whole vector shifts.
6511 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6512 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6513 to do at each step. */
6514
6515 static int
6516 scan_store_can_perm_p (tree vectype, tree init,
6517 vec<enum scan_store_kind> *use_whole_vector = NULL)
6518 {
6519 enum machine_mode vec_mode = TYPE_MODE (vectype);
6520 unsigned HOST_WIDE_INT nunits;
6521 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6522 return -1;
6523 int units_log2 = exact_log2 (nunits);
6524 if (units_log2 <= 0)
6525 return -1;
6526
6527 int i;
6528 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6529 for (i = 0; i <= units_log2; ++i)
6530 {
6531 unsigned HOST_WIDE_INT j, k;
6532 enum scan_store_kind kind = scan_store_kind_perm;
6533 vec_perm_builder sel (nunits, nunits, 1);
6534 sel.quick_grow (nunits);
6535 if (i == units_log2)
6536 {
6537 for (j = 0; j < nunits; ++j)
6538 sel[j] = nunits - 1;
6539 }
6540 else
6541 {
6542 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6543 sel[j] = j;
6544 for (k = 0; j < nunits; ++j, ++k)
6545 sel[j] = nunits + k;
6546 }
6547 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6548 if (!can_vec_perm_const_p (vec_mode, indices))
6549 {
6550 if (i == units_log2)
6551 return -1;
6552
6553 if (whole_vector_shift_kind == scan_store_kind_perm)
6554 {
6555 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6556 return -1;
6557 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6558 /* Whole vector shifts shift in zeros, so if init is all zero
6559 constant, there is no need to do anything further. */
6560 if ((TREE_CODE (init) != INTEGER_CST
6561 && TREE_CODE (init) != REAL_CST)
6562 || !initializer_zerop (init))
6563 {
6564 tree masktype = build_same_sized_truth_vector_type (vectype);
6565 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6566 return -1;
6567 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6568 }
6569 }
6570 kind = whole_vector_shift_kind;
6571 }
6572 if (use_whole_vector)
6573 {
6574 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6575 use_whole_vector->safe_grow_cleared (i);
6576 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6577 use_whole_vector->safe_push (kind);
6578 }
6579 }
6580
6581 return units_log2;
6582 }
6583
6584
6585 /* Function check_scan_store.
6586
6587 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6588
6589 static bool
6590 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6591 enum vect_def_type rhs_dt, bool slp, tree mask,
6592 vect_memory_access_type memory_access_type)
6593 {
6594 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6595 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6596 tree ref_type;
6597
6598 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6599 if (slp
6600 || mask
6601 || memory_access_type != VMAT_CONTIGUOUS
6602 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6603 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6604 || loop_vinfo == NULL
6605 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6606 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6607 || !integer_zerop (DR_OFFSET (dr_info->dr))
6608 || !integer_zerop (DR_INIT (dr_info->dr))
6609 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6610 || !alias_sets_conflict_p (get_alias_set (vectype),
6611 get_alias_set (TREE_TYPE (ref_type))))
6612 {
6613 if (dump_enabled_p ())
6614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6615 "unsupported OpenMP scan store.\n");
6616 return false;
6617 }
6618
6619 /* We need to pattern match code built by OpenMP lowering and simplified
6620 by following optimizations into something we can handle.
6621 #pragma omp simd reduction(inscan,+:r)
6622 for (...)
6623 {
6624 r += something ();
6625 #pragma omp scan inclusive (r)
6626 use (r);
6627 }
6628 shall have body with:
6629 // Initialization for input phase, store the reduction initializer:
6630 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6631 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6632 D.2042[_21] = 0;
6633 // Actual input phase:
6634 ...
6635 r.0_5 = D.2042[_20];
6636 _6 = _4 + r.0_5;
6637 D.2042[_20] = _6;
6638 // Initialization for scan phase:
6639 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6640 _26 = D.2043[_25];
6641 _27 = D.2042[_25];
6642 _28 = _26 + _27;
6643 D.2043[_25] = _28;
6644 D.2042[_25] = _28;
6645 // Actual scan phase:
6646 ...
6647 r.1_8 = D.2042[_20];
6648 ...
6649 The "omp simd array" variable D.2042 holds the privatized copy used
6650 inside of the loop and D.2043 is another one that holds copies of
6651 the current original list item. The separate GOMP_SIMD_LANE ifn
6652 kinds are there in order to allow optimizing the initializer store
6653 and combiner sequence, e.g. if it is originally some C++ish user
6654 defined reduction, but allow the vectorizer to pattern recognize it
6655 and turn into the appropriate vectorized scan.
6656
6657 For exclusive scan, this is slightly different:
6658 #pragma omp simd reduction(inscan,+:r)
6659 for (...)
6660 {
6661 use (r);
6662 #pragma omp scan exclusive (r)
6663 r += something ();
6664 }
6665 shall have body with:
6666 // Initialization for input phase, store the reduction initializer:
6667 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6668 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6669 D.2042[_21] = 0;
6670 // Actual input phase:
6671 ...
6672 r.0_5 = D.2042[_20];
6673 _6 = _4 + r.0_5;
6674 D.2042[_20] = _6;
6675 // Initialization for scan phase:
6676 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6677 _26 = D.2043[_25];
6678 D.2044[_25] = _26;
6679 _27 = D.2042[_25];
6680 _28 = _26 + _27;
6681 D.2043[_25] = _28;
6682 // Actual scan phase:
6683 ...
6684 r.1_8 = D.2044[_20];
6685 ... */
6686
6687 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6688 {
6689 /* Match the D.2042[_21] = 0; store above. Just require that
6690 it is a constant or external definition store. */
6691 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6692 {
6693 fail_init:
6694 if (dump_enabled_p ())
6695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6696 "unsupported OpenMP scan initializer store.\n");
6697 return false;
6698 }
6699
6700 if (! loop_vinfo->scan_map)
6701 loop_vinfo->scan_map = new hash_map<tree, tree>;
6702 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6703 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6704 if (cached)
6705 goto fail_init;
6706 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6707
6708 /* These stores can be vectorized normally. */
6709 return true;
6710 }
6711
6712 if (rhs_dt != vect_internal_def)
6713 {
6714 fail:
6715 if (dump_enabled_p ())
6716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6717 "unsupported OpenMP scan combiner pattern.\n");
6718 return false;
6719 }
6720
6721 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6722 tree rhs = gimple_assign_rhs1 (stmt);
6723 if (TREE_CODE (rhs) != SSA_NAME)
6724 goto fail;
6725
6726 gimple *other_store_stmt = NULL;
6727 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6728 bool inscan_var_store
6729 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6730
6731 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6732 {
6733 if (!inscan_var_store)
6734 {
6735 use_operand_p use_p;
6736 imm_use_iterator iter;
6737 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6738 {
6739 gimple *use_stmt = USE_STMT (use_p);
6740 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6741 continue;
6742 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6743 || !is_gimple_assign (use_stmt)
6744 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6745 || other_store_stmt
6746 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6747 goto fail;
6748 other_store_stmt = use_stmt;
6749 }
6750 if (other_store_stmt == NULL)
6751 goto fail;
6752 rhs = gimple_assign_lhs (other_store_stmt);
6753 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6754 goto fail;
6755 }
6756 }
6757 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6758 {
6759 use_operand_p use_p;
6760 imm_use_iterator iter;
6761 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6762 {
6763 gimple *use_stmt = USE_STMT (use_p);
6764 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6765 continue;
6766 if (other_store_stmt)
6767 goto fail;
6768 other_store_stmt = use_stmt;
6769 }
6770 }
6771 else
6772 goto fail;
6773
6774 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6775 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6776 || !is_gimple_assign (def_stmt)
6777 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6778 goto fail;
6779
6780 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6781 /* For pointer addition, we should use the normal plus for the vector
6782 operation. */
6783 switch (code)
6784 {
6785 case POINTER_PLUS_EXPR:
6786 code = PLUS_EXPR;
6787 break;
6788 case MULT_HIGHPART_EXPR:
6789 goto fail;
6790 default:
6791 break;
6792 }
6793 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6794 goto fail;
6795
6796 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6797 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6798 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6799 goto fail;
6800
6801 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6802 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6803 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6804 || !gimple_assign_load_p (load1_stmt)
6805 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6806 || !gimple_assign_load_p (load2_stmt))
6807 goto fail;
6808
6809 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6810 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6811 if (load1_stmt_info == NULL
6812 || load2_stmt_info == NULL
6813 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6814 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6815 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6816 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6817 goto fail;
6818
6819 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6820 {
6821 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6822 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6823 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6824 goto fail;
6825 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6826 tree lrhs;
6827 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6828 lrhs = rhs1;
6829 else
6830 lrhs = rhs2;
6831 use_operand_p use_p;
6832 imm_use_iterator iter;
6833 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6834 {
6835 gimple *use_stmt = USE_STMT (use_p);
6836 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6837 continue;
6838 if (other_store_stmt)
6839 goto fail;
6840 other_store_stmt = use_stmt;
6841 }
6842 }
6843
6844 if (other_store_stmt == NULL)
6845 goto fail;
6846 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6847 || !gimple_store_p (other_store_stmt))
6848 goto fail;
6849
6850 stmt_vec_info other_store_stmt_info
6851 = loop_vinfo->lookup_stmt (other_store_stmt);
6852 if (other_store_stmt_info == NULL
6853 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6854 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6855 goto fail;
6856
6857 gimple *stmt1 = stmt;
6858 gimple *stmt2 = other_store_stmt;
6859 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6860 std::swap (stmt1, stmt2);
6861 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6862 gimple_assign_rhs1 (load2_stmt)))
6863 {
6864 std::swap (rhs1, rhs2);
6865 std::swap (load1_stmt, load2_stmt);
6866 std::swap (load1_stmt_info, load2_stmt_info);
6867 }
6868 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6869 gimple_assign_rhs1 (load1_stmt)))
6870 goto fail;
6871
6872 tree var3 = NULL_TREE;
6873 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6874 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6875 gimple_assign_rhs1 (load2_stmt)))
6876 goto fail;
6877 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6878 {
6879 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6880 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6881 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6882 goto fail;
6883 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6884 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6885 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6886 || lookup_attribute ("omp simd inscan exclusive",
6887 DECL_ATTRIBUTES (var3)))
6888 goto fail;
6889 }
6890
6891 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6892 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6893 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6894 goto fail;
6895
6896 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6897 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6898 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6899 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6900 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6901 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6902 goto fail;
6903
6904 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6905 std::swap (var1, var2);
6906
6907 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6908 {
6909 if (!lookup_attribute ("omp simd inscan exclusive",
6910 DECL_ATTRIBUTES (var1)))
6911 goto fail;
6912 var1 = var3;
6913 }
6914
6915 if (loop_vinfo->scan_map == NULL)
6916 goto fail;
6917 tree *init = loop_vinfo->scan_map->get (var1);
6918 if (init == NULL)
6919 goto fail;
6920
6921 /* The IL is as expected, now check if we can actually vectorize it.
6922 Inclusive scan:
6923 _26 = D.2043[_25];
6924 _27 = D.2042[_25];
6925 _28 = _26 + _27;
6926 D.2043[_25] = _28;
6927 D.2042[_25] = _28;
6928 should be vectorized as (where _40 is the vectorized rhs
6929 from the D.2042[_21] = 0; store):
6930 _30 = MEM <vector(8) int> [(int *)&D.2043];
6931 _31 = MEM <vector(8) int> [(int *)&D.2042];
6932 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6933 _33 = _31 + _32;
6934 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6935 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6936 _35 = _33 + _34;
6937 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6938 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6939 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6940 _37 = _35 + _36;
6941 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6942 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6943 _38 = _30 + _37;
6944 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6945 MEM <vector(8) int> [(int *)&D.2043] = _39;
6946 MEM <vector(8) int> [(int *)&D.2042] = _38;
6947 Exclusive scan:
6948 _26 = D.2043[_25];
6949 D.2044[_25] = _26;
6950 _27 = D.2042[_25];
6951 _28 = _26 + _27;
6952 D.2043[_25] = _28;
6953 should be vectorized as (where _40 is the vectorized rhs
6954 from the D.2042[_21] = 0; store):
6955 _30 = MEM <vector(8) int> [(int *)&D.2043];
6956 _31 = MEM <vector(8) int> [(int *)&D.2042];
6957 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6958 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6959 _34 = _32 + _33;
6960 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6961 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6962 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6963 _36 = _34 + _35;
6964 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6965 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6966 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6967 _38 = _36 + _37;
6968 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6969 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6970 _39 = _30 + _38;
6971 _50 = _31 + _39;
6972 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6973 MEM <vector(8) int> [(int *)&D.2044] = _39;
6974 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6975 enum machine_mode vec_mode = TYPE_MODE (vectype);
6976 optab optab = optab_for_tree_code (code, vectype, optab_default);
6977 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6978 goto fail;
6979
6980 int units_log2 = scan_store_can_perm_p (vectype, *init);
6981 if (units_log2 == -1)
6982 goto fail;
6983
6984 return true;
6985 }
6986
6987
6988 /* Function vectorizable_scan_store.
6989
6990 Helper of vectorizable_score, arguments like on vectorizable_store.
6991 Handle only the transformation, checking is done in check_scan_store. */
6992
6993 static bool
6994 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6995 stmt_vec_info *vec_stmt, int ncopies)
6996 {
6997 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6998 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6999 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7000 vec_info *vinfo = stmt_info->vinfo;
7001 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7002
7003 if (dump_enabled_p ())
7004 dump_printf_loc (MSG_NOTE, vect_location,
7005 "transform scan store. ncopies = %d\n", ncopies);
7006
7007 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7008 tree rhs = gimple_assign_rhs1 (stmt);
7009 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7010
7011 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7012 bool inscan_var_store
7013 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7014
7015 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7016 {
7017 use_operand_p use_p;
7018 imm_use_iterator iter;
7019 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7020 {
7021 gimple *use_stmt = USE_STMT (use_p);
7022 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7023 continue;
7024 rhs = gimple_assign_lhs (use_stmt);
7025 break;
7026 }
7027 }
7028
7029 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7030 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7031 if (code == POINTER_PLUS_EXPR)
7032 code = PLUS_EXPR;
7033 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7034 && commutative_tree_code (code));
7035 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7036 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7037 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7038 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7039 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7040 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7041 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7042 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7043 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7044 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7045 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7046
7047 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7048 {
7049 std::swap (rhs1, rhs2);
7050 std::swap (var1, var2);
7051 std::swap (load1_dr_info, load2_dr_info);
7052 }
7053
7054 tree *init = loop_vinfo->scan_map->get (var1);
7055 gcc_assert (init);
7056
7057 unsigned HOST_WIDE_INT nunits;
7058 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7059 gcc_unreachable ();
7060 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7061 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7062 gcc_assert (units_log2 > 0);
7063 auto_vec<tree, 16> perms;
7064 perms.quick_grow (units_log2 + 1);
7065 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7066 for (int i = 0; i <= units_log2; ++i)
7067 {
7068 unsigned HOST_WIDE_INT j, k;
7069 vec_perm_builder sel (nunits, nunits, 1);
7070 sel.quick_grow (nunits);
7071 if (i == units_log2)
7072 for (j = 0; j < nunits; ++j)
7073 sel[j] = nunits - 1;
7074 else
7075 {
7076 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7077 sel[j] = j;
7078 for (k = 0; j < nunits; ++j, ++k)
7079 sel[j] = nunits + k;
7080 }
7081 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7082 if (!use_whole_vector.is_empty ()
7083 && use_whole_vector[i] != scan_store_kind_perm)
7084 {
7085 if (zero_vec == NULL_TREE)
7086 zero_vec = build_zero_cst (vectype);
7087 if (masktype == NULL_TREE
7088 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7089 masktype = build_same_sized_truth_vector_type (vectype);
7090 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7091 }
7092 else
7093 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7094 }
7095
7096 stmt_vec_info prev_stmt_info = NULL;
7097 tree vec_oprnd1 = NULL_TREE;
7098 tree vec_oprnd2 = NULL_TREE;
7099 tree vec_oprnd3 = NULL_TREE;
7100 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7101 tree dataref_offset = build_int_cst (ref_type, 0);
7102 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7103 tree ldataref_ptr = NULL_TREE;
7104 tree orig = NULL_TREE;
7105 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7106 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7107 for (int j = 0; j < ncopies; j++)
7108 {
7109 stmt_vec_info new_stmt_info;
7110 if (j == 0)
7111 {
7112 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7113 if (ldataref_ptr == NULL)
7114 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7115 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7116 orig = vec_oprnd3;
7117 }
7118 else
7119 {
7120 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7121 if (ldataref_ptr == NULL)
7122 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7123 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7124 if (!inscan_var_store)
7125 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7126 }
7127
7128 if (ldataref_ptr)
7129 {
7130 vec_oprnd2 = make_ssa_name (vectype);
7131 tree data_ref = fold_build2 (MEM_REF, vectype,
7132 unshare_expr (ldataref_ptr),
7133 dataref_offset);
7134 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7135 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7136 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7137 if (prev_stmt_info == NULL)
7138 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7139 else
7140 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7141 prev_stmt_info = new_stmt_info;
7142 }
7143
7144 tree v = vec_oprnd2;
7145 for (int i = 0; i < units_log2; ++i)
7146 {
7147 tree new_temp = make_ssa_name (vectype);
7148 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7149 (zero_vec
7150 && (use_whole_vector[i]
7151 != scan_store_kind_perm))
7152 ? zero_vec : vec_oprnd1, v,
7153 perms[i]);
7154 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7155 if (prev_stmt_info == NULL)
7156 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7157 else
7158 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7159 prev_stmt_info = new_stmt_info;
7160
7161 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7162 {
7163 /* Whole vector shift shifted in zero bits, but if *init
7164 is not initializer_zerop, we need to replace those elements
7165 with elements from vec_oprnd1. */
7166 tree_vector_builder vb (masktype, nunits, 1);
7167 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7168 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7169 ? boolean_false_node : boolean_true_node);
7170
7171 tree new_temp2 = make_ssa_name (vectype);
7172 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7173 new_temp, vec_oprnd1);
7174 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7176 prev_stmt_info = new_stmt_info;
7177 new_temp = new_temp2;
7178 }
7179
7180 /* For exclusive scan, perform the perms[i] permutation once
7181 more. */
7182 if (i == 0
7183 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7184 && v == vec_oprnd2)
7185 {
7186 v = new_temp;
7187 --i;
7188 continue;
7189 }
7190
7191 tree new_temp2 = make_ssa_name (vectype);
7192 g = gimple_build_assign (new_temp2, code, v, new_temp);
7193 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7194 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7195 prev_stmt_info = new_stmt_info;
7196
7197 v = new_temp2;
7198 }
7199
7200 tree new_temp = make_ssa_name (vectype);
7201 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7202 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7203 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7204 prev_stmt_info = new_stmt_info;
7205
7206 tree last_perm_arg = new_temp;
7207 /* For exclusive scan, new_temp computed above is the exclusive scan
7208 prefix sum. Turn it into inclusive prefix sum for the broadcast
7209 of the last element into orig. */
7210 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7211 {
7212 last_perm_arg = make_ssa_name (vectype);
7213 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7214 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7216 prev_stmt_info = new_stmt_info;
7217 }
7218
7219 orig = make_ssa_name (vectype);
7220 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7221 last_perm_arg, perms[units_log2]);
7222 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7223 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7224 prev_stmt_info = new_stmt_info;
7225
7226 if (!inscan_var_store)
7227 {
7228 tree data_ref = fold_build2 (MEM_REF, vectype,
7229 unshare_expr (dataref_ptr),
7230 dataref_offset);
7231 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7232 g = gimple_build_assign (data_ref, new_temp);
7233 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7234 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7235 prev_stmt_info = new_stmt_info;
7236 }
7237 }
7238
7239 if (inscan_var_store)
7240 for (int j = 0; j < ncopies; j++)
7241 {
7242 if (j != 0)
7243 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7244
7245 tree data_ref = fold_build2 (MEM_REF, vectype,
7246 unshare_expr (dataref_ptr),
7247 dataref_offset);
7248 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7249 gimple *g = gimple_build_assign (data_ref, orig);
7250 stmt_vec_info new_stmt_info
7251 = vect_finish_stmt_generation (stmt_info, g, gsi);
7252 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7253 prev_stmt_info = new_stmt_info;
7254 }
7255 return true;
7256 }
7257
7258
7259 /* Function vectorizable_store.
7260
7261 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7262 that can be vectorized.
7263 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7264 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7265 Return true if STMT_INFO is vectorizable in this way. */
7266
7267 static bool
7268 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7269 stmt_vec_info *vec_stmt, slp_tree slp_node,
7270 stmt_vector_for_cost *cost_vec)
7271 {
7272 tree data_ref;
7273 tree op;
7274 tree vec_oprnd = NULL_TREE;
7275 tree elem_type;
7276 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7277 class loop *loop = NULL;
7278 machine_mode vec_mode;
7279 tree dummy;
7280 enum dr_alignment_support alignment_support_scheme;
7281 enum vect_def_type rhs_dt = vect_unknown_def_type;
7282 enum vect_def_type mask_dt = vect_unknown_def_type;
7283 stmt_vec_info prev_stmt_info = NULL;
7284 tree dataref_ptr = NULL_TREE;
7285 tree dataref_offset = NULL_TREE;
7286 gimple *ptr_incr = NULL;
7287 int ncopies;
7288 int j;
7289 stmt_vec_info first_stmt_info;
7290 bool grouped_store;
7291 unsigned int group_size, i;
7292 vec<tree> oprnds = vNULL;
7293 vec<tree> result_chain = vNULL;
7294 tree offset = NULL_TREE;
7295 vec<tree> vec_oprnds = vNULL;
7296 bool slp = (slp_node != NULL);
7297 unsigned int vec_num;
7298 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7299 vec_info *vinfo = stmt_info->vinfo;
7300 tree aggr_type;
7301 gather_scatter_info gs_info;
7302 poly_uint64 vf;
7303 vec_load_store_type vls_type;
7304 tree ref_type;
7305
7306 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7307 return false;
7308
7309 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7310 && ! vec_stmt)
7311 return false;
7312
7313 /* Is vectorizable store? */
7314
7315 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7316 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7317 {
7318 tree scalar_dest = gimple_assign_lhs (assign);
7319 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7320 && is_pattern_stmt_p (stmt_info))
7321 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7322 if (TREE_CODE (scalar_dest) != ARRAY_REF
7323 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7324 && TREE_CODE (scalar_dest) != INDIRECT_REF
7325 && TREE_CODE (scalar_dest) != COMPONENT_REF
7326 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7327 && TREE_CODE (scalar_dest) != REALPART_EXPR
7328 && TREE_CODE (scalar_dest) != MEM_REF)
7329 return false;
7330 }
7331 else
7332 {
7333 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7334 if (!call || !gimple_call_internal_p (call))
7335 return false;
7336
7337 internal_fn ifn = gimple_call_internal_fn (call);
7338 if (!internal_store_fn_p (ifn))
7339 return false;
7340
7341 if (slp_node != NULL)
7342 {
7343 if (dump_enabled_p ())
7344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7345 "SLP of masked stores not supported.\n");
7346 return false;
7347 }
7348
7349 int mask_index = internal_fn_mask_index (ifn);
7350 if (mask_index >= 0)
7351 {
7352 mask = gimple_call_arg (call, mask_index);
7353 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7354 &mask_vectype))
7355 return false;
7356 }
7357 }
7358
7359 op = vect_get_store_rhs (stmt_info);
7360
7361 /* Cannot have hybrid store SLP -- that would mean storing to the
7362 same location twice. */
7363 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7364
7365 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7366 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7367
7368 if (loop_vinfo)
7369 {
7370 loop = LOOP_VINFO_LOOP (loop_vinfo);
7371 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7372 }
7373 else
7374 vf = 1;
7375
7376 /* Multiple types in SLP are handled by creating the appropriate number of
7377 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7378 case of SLP. */
7379 if (slp)
7380 ncopies = 1;
7381 else
7382 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7383
7384 gcc_assert (ncopies >= 1);
7385
7386 /* FORNOW. This restriction should be relaxed. */
7387 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7388 {
7389 if (dump_enabled_p ())
7390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7391 "multiple types in nested loop.\n");
7392 return false;
7393 }
7394
7395 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7396 return false;
7397
7398 elem_type = TREE_TYPE (vectype);
7399 vec_mode = TYPE_MODE (vectype);
7400
7401 if (!STMT_VINFO_DATA_REF (stmt_info))
7402 return false;
7403
7404 vect_memory_access_type memory_access_type;
7405 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7406 &memory_access_type, &gs_info))
7407 return false;
7408
7409 if (mask)
7410 {
7411 if (memory_access_type == VMAT_CONTIGUOUS)
7412 {
7413 if (!VECTOR_MODE_P (vec_mode)
7414 || !can_vec_mask_load_store_p (vec_mode,
7415 TYPE_MODE (mask_vectype), false))
7416 return false;
7417 }
7418 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7419 && (memory_access_type != VMAT_GATHER_SCATTER
7420 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7421 {
7422 if (dump_enabled_p ())
7423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7424 "unsupported access type for masked store.\n");
7425 return false;
7426 }
7427 }
7428 else
7429 {
7430 /* FORNOW. In some cases can vectorize even if data-type not supported
7431 (e.g. - array initialization with 0). */
7432 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7433 return false;
7434 }
7435
7436 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7437 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7438 && memory_access_type != VMAT_GATHER_SCATTER
7439 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7440 if (grouped_store)
7441 {
7442 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7443 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7444 group_size = DR_GROUP_SIZE (first_stmt_info);
7445 }
7446 else
7447 {
7448 first_stmt_info = stmt_info;
7449 first_dr_info = dr_info;
7450 group_size = vec_num = 1;
7451 }
7452
7453 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7454 {
7455 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7456 memory_access_type))
7457 return false;
7458 }
7459
7460 if (!vec_stmt) /* transformation not required. */
7461 {
7462 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7463
7464 if (loop_vinfo
7465 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7466 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7467 memory_access_type, &gs_info, mask);
7468
7469 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7470 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7471 vls_type, slp_node, cost_vec);
7472 return true;
7473 }
7474 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7475
7476 /* Transform. */
7477
7478 ensure_base_align (dr_info);
7479
7480 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7481 {
7482 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7483 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7484 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7485 tree ptr, var, scale, vec_mask;
7486 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7487 tree mask_halfvectype = mask_vectype;
7488 edge pe = loop_preheader_edge (loop);
7489 gimple_seq seq;
7490 basic_block new_bb;
7491 enum { NARROW, NONE, WIDEN } modifier;
7492 poly_uint64 scatter_off_nunits
7493 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7494
7495 if (known_eq (nunits, scatter_off_nunits))
7496 modifier = NONE;
7497 else if (known_eq (nunits * 2, scatter_off_nunits))
7498 {
7499 modifier = WIDEN;
7500
7501 /* Currently gathers and scatters are only supported for
7502 fixed-length vectors. */
7503 unsigned int count = scatter_off_nunits.to_constant ();
7504 vec_perm_builder sel (count, count, 1);
7505 for (i = 0; i < (unsigned int) count; ++i)
7506 sel.quick_push (i | (count / 2));
7507
7508 vec_perm_indices indices (sel, 1, count);
7509 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7510 indices);
7511 gcc_assert (perm_mask != NULL_TREE);
7512 }
7513 else if (known_eq (nunits, scatter_off_nunits * 2))
7514 {
7515 modifier = NARROW;
7516
7517 /* Currently gathers and scatters are only supported for
7518 fixed-length vectors. */
7519 unsigned int count = nunits.to_constant ();
7520 vec_perm_builder sel (count, count, 1);
7521 for (i = 0; i < (unsigned int) count; ++i)
7522 sel.quick_push (i | (count / 2));
7523
7524 vec_perm_indices indices (sel, 2, count);
7525 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7526 gcc_assert (perm_mask != NULL_TREE);
7527 ncopies *= 2;
7528
7529 if (mask)
7530 mask_halfvectype
7531 = build_same_sized_truth_vector_type (gs_info.offset_vectype);
7532 }
7533 else
7534 gcc_unreachable ();
7535
7536 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7537 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7538 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7539 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7540 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7541 scaletype = TREE_VALUE (arglist);
7542
7543 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7544 && TREE_CODE (rettype) == VOID_TYPE);
7545
7546 ptr = fold_convert (ptrtype, gs_info.base);
7547 if (!is_gimple_min_invariant (ptr))
7548 {
7549 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7550 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7551 gcc_assert (!new_bb);
7552 }
7553
7554 if (mask == NULL_TREE)
7555 {
7556 mask_arg = build_int_cst (masktype, -1);
7557 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7558 }
7559
7560 scale = build_int_cst (scaletype, gs_info.scale);
7561
7562 prev_stmt_info = NULL;
7563 for (j = 0; j < ncopies; ++j)
7564 {
7565 if (j == 0)
7566 {
7567 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7568 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7569 stmt_info);
7570 if (mask)
7571 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
7572 stmt_info);
7573 }
7574 else if (modifier != NONE && (j & 1))
7575 {
7576 if (modifier == WIDEN)
7577 {
7578 src
7579 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7580 vec_oprnd1);
7581 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7582 stmt_info, gsi);
7583 if (mask)
7584 mask_op
7585 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7586 vec_mask);
7587 }
7588 else if (modifier == NARROW)
7589 {
7590 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7591 stmt_info, gsi);
7592 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7593 vec_oprnd0);
7594 }
7595 else
7596 gcc_unreachable ();
7597 }
7598 else
7599 {
7600 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7601 vec_oprnd1);
7602 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7603 vec_oprnd0);
7604 if (mask)
7605 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7606 vec_mask);
7607 }
7608
7609 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7610 {
7611 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7612 TYPE_VECTOR_SUBPARTS (srctype)));
7613 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7614 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7615 gassign *new_stmt
7616 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7617 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7618 src = var;
7619 }
7620
7621 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7622 {
7623 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7624 TYPE_VECTOR_SUBPARTS (idxtype)));
7625 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7626 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7627 gassign *new_stmt
7628 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7629 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7630 op = var;
7631 }
7632
7633 if (mask)
7634 {
7635 tree utype;
7636 mask_arg = mask_op;
7637 if (modifier == NARROW)
7638 {
7639 var = vect_get_new_ssa_name (mask_halfvectype,
7640 vect_simple_var);
7641 gassign *new_stmt
7642 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7643 : VEC_UNPACK_LO_EXPR,
7644 mask_op);
7645 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7646 mask_arg = var;
7647 }
7648 tree optype = TREE_TYPE (mask_arg);
7649 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7650 utype = masktype;
7651 else
7652 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7653 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7654 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7655 gassign *new_stmt
7656 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7657 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7658 mask_arg = var;
7659 if (!useless_type_conversion_p (masktype, utype))
7660 {
7661 gcc_assert (TYPE_PRECISION (utype)
7662 <= TYPE_PRECISION (masktype));
7663 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7664 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7665 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7666 mask_arg = var;
7667 }
7668 }
7669
7670 gcall *new_stmt
7671 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7672 stmt_vec_info new_stmt_info
7673 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7674
7675 if (prev_stmt_info == NULL)
7676 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7677 else
7678 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7679 prev_stmt_info = new_stmt_info;
7680 }
7681 return true;
7682 }
7683 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7684 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7685
7686 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7687 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7688
7689 if (grouped_store)
7690 {
7691 /* FORNOW */
7692 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7693
7694 /* We vectorize all the stmts of the interleaving group when we
7695 reach the last stmt in the group. */
7696 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7697 < DR_GROUP_SIZE (first_stmt_info)
7698 && !slp)
7699 {
7700 *vec_stmt = NULL;
7701 return true;
7702 }
7703
7704 if (slp)
7705 {
7706 grouped_store = false;
7707 /* VEC_NUM is the number of vect stmts to be created for this
7708 group. */
7709 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7710 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7711 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7712 == first_stmt_info);
7713 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7714 op = vect_get_store_rhs (first_stmt_info);
7715 }
7716 else
7717 /* VEC_NUM is the number of vect stmts to be created for this
7718 group. */
7719 vec_num = group_size;
7720
7721 ref_type = get_group_alias_ptr_type (first_stmt_info);
7722 }
7723 else
7724 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7725
7726 if (dump_enabled_p ())
7727 dump_printf_loc (MSG_NOTE, vect_location,
7728 "transform store. ncopies = %d\n", ncopies);
7729
7730 if (memory_access_type == VMAT_ELEMENTWISE
7731 || memory_access_type == VMAT_STRIDED_SLP)
7732 {
7733 gimple_stmt_iterator incr_gsi;
7734 bool insert_after;
7735 gimple *incr;
7736 tree offvar;
7737 tree ivstep;
7738 tree running_off;
7739 tree stride_base, stride_step, alias_off;
7740 tree vec_oprnd;
7741 unsigned int g;
7742 /* Checked by get_load_store_type. */
7743 unsigned int const_nunits = nunits.to_constant ();
7744
7745 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7746 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7747
7748 stride_base
7749 = fold_build_pointer_plus
7750 (DR_BASE_ADDRESS (first_dr_info->dr),
7751 size_binop (PLUS_EXPR,
7752 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7753 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7754 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7755
7756 /* For a store with loop-invariant (but other than power-of-2)
7757 stride (i.e. not a grouped access) like so:
7758
7759 for (i = 0; i < n; i += stride)
7760 array[i] = ...;
7761
7762 we generate a new induction variable and new stores from
7763 the components of the (vectorized) rhs:
7764
7765 for (j = 0; ; j += VF*stride)
7766 vectemp = ...;
7767 tmp1 = vectemp[0];
7768 array[j] = tmp1;
7769 tmp2 = vectemp[1];
7770 array[j + stride] = tmp2;
7771 ...
7772 */
7773
7774 unsigned nstores = const_nunits;
7775 unsigned lnel = 1;
7776 tree ltype = elem_type;
7777 tree lvectype = vectype;
7778 if (slp)
7779 {
7780 if (group_size < const_nunits
7781 && const_nunits % group_size == 0)
7782 {
7783 nstores = const_nunits / group_size;
7784 lnel = group_size;
7785 ltype = build_vector_type (elem_type, group_size);
7786 lvectype = vectype;
7787
7788 /* First check if vec_extract optab doesn't support extraction
7789 of vector elts directly. */
7790 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7791 machine_mode vmode;
7792 if (!mode_for_vector (elmode, group_size).exists (&vmode)
7793 || !VECTOR_MODE_P (vmode)
7794 || !targetm.vector_mode_supported_p (vmode)
7795 || (convert_optab_handler (vec_extract_optab,
7796 TYPE_MODE (vectype), vmode)
7797 == CODE_FOR_nothing))
7798 {
7799 /* Try to avoid emitting an extract of vector elements
7800 by performing the extracts using an integer type of the
7801 same size, extracting from a vector of those and then
7802 re-interpreting it as the original vector type if
7803 supported. */
7804 unsigned lsize
7805 = group_size * GET_MODE_BITSIZE (elmode);
7806 unsigned int lnunits = const_nunits / group_size;
7807 /* If we can't construct such a vector fall back to
7808 element extracts from the original vector type and
7809 element size stores. */
7810 if (int_mode_for_size (lsize, 0).exists (&elmode)
7811 && mode_for_vector (elmode, lnunits).exists (&vmode)
7812 && VECTOR_MODE_P (vmode)
7813 && targetm.vector_mode_supported_p (vmode)
7814 && (convert_optab_handler (vec_extract_optab,
7815 vmode, elmode)
7816 != CODE_FOR_nothing))
7817 {
7818 nstores = lnunits;
7819 lnel = group_size;
7820 ltype = build_nonstandard_integer_type (lsize, 1);
7821 lvectype = build_vector_type (ltype, nstores);
7822 }
7823 /* Else fall back to vector extraction anyway.
7824 Fewer stores are more important than avoiding spilling
7825 of the vector we extract from. Compared to the
7826 construction case in vectorizable_load no store-forwarding
7827 issue exists here for reasonable archs. */
7828 }
7829 }
7830 else if (group_size >= const_nunits
7831 && group_size % const_nunits == 0)
7832 {
7833 nstores = 1;
7834 lnel = const_nunits;
7835 ltype = vectype;
7836 lvectype = vectype;
7837 }
7838 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7839 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7840 }
7841
7842 ivstep = stride_step;
7843 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7844 build_int_cst (TREE_TYPE (ivstep), vf));
7845
7846 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7847
7848 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7849 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7850 create_iv (stride_base, ivstep, NULL,
7851 loop, &incr_gsi, insert_after,
7852 &offvar, NULL);
7853 incr = gsi_stmt (incr_gsi);
7854 loop_vinfo->add_stmt (incr);
7855
7856 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7857
7858 prev_stmt_info = NULL;
7859 alias_off = build_int_cst (ref_type, 0);
7860 stmt_vec_info next_stmt_info = first_stmt_info;
7861 for (g = 0; g < group_size; g++)
7862 {
7863 running_off = offvar;
7864 if (g)
7865 {
7866 tree size = TYPE_SIZE_UNIT (ltype);
7867 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7868 size);
7869 tree newoff = copy_ssa_name (running_off, NULL);
7870 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7871 running_off, pos);
7872 vect_finish_stmt_generation (stmt_info, incr, gsi);
7873 running_off = newoff;
7874 }
7875 unsigned int group_el = 0;
7876 unsigned HOST_WIDE_INT
7877 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7878 for (j = 0; j < ncopies; j++)
7879 {
7880 /* We've set op and dt above, from vect_get_store_rhs,
7881 and first_stmt_info == stmt_info. */
7882 if (j == 0)
7883 {
7884 if (slp)
7885 {
7886 vect_get_vec_defs (op, NULL_TREE, stmt_info,
7887 &vec_oprnds, NULL, slp_node);
7888 vec_oprnd = vec_oprnds[0];
7889 }
7890 else
7891 {
7892 op = vect_get_store_rhs (next_stmt_info);
7893 vec_oprnd = vect_get_vec_def_for_operand
7894 (op, next_stmt_info);
7895 }
7896 }
7897 else
7898 {
7899 if (slp)
7900 vec_oprnd = vec_oprnds[j];
7901 else
7902 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7903 vec_oprnd);
7904 }
7905 /* Pun the vector to extract from if necessary. */
7906 if (lvectype != vectype)
7907 {
7908 tree tem = make_ssa_name (lvectype);
7909 gimple *pun
7910 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7911 lvectype, vec_oprnd));
7912 vect_finish_stmt_generation (stmt_info, pun, gsi);
7913 vec_oprnd = tem;
7914 }
7915 for (i = 0; i < nstores; i++)
7916 {
7917 tree newref, newoff;
7918 gimple *incr, *assign;
7919 tree size = TYPE_SIZE (ltype);
7920 /* Extract the i'th component. */
7921 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7922 bitsize_int (i), size);
7923 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7924 size, pos);
7925
7926 elem = force_gimple_operand_gsi (gsi, elem, true,
7927 NULL_TREE, true,
7928 GSI_SAME_STMT);
7929
7930 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7931 group_el * elsz);
7932 newref = build2 (MEM_REF, ltype,
7933 running_off, this_off);
7934 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7935
7936 /* And store it to *running_off. */
7937 assign = gimple_build_assign (newref, elem);
7938 stmt_vec_info assign_info
7939 = vect_finish_stmt_generation (stmt_info, assign, gsi);
7940
7941 group_el += lnel;
7942 if (! slp
7943 || group_el == group_size)
7944 {
7945 newoff = copy_ssa_name (running_off, NULL);
7946 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7947 running_off, stride_step);
7948 vect_finish_stmt_generation (stmt_info, incr, gsi);
7949
7950 running_off = newoff;
7951 group_el = 0;
7952 }
7953 if (g == group_size - 1
7954 && !slp)
7955 {
7956 if (j == 0 && i == 0)
7957 STMT_VINFO_VEC_STMT (stmt_info)
7958 = *vec_stmt = assign_info;
7959 else
7960 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
7961 prev_stmt_info = assign_info;
7962 }
7963 }
7964 }
7965 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7966 if (slp)
7967 break;
7968 }
7969
7970 vec_oprnds.release ();
7971 return true;
7972 }
7973
7974 auto_vec<tree> dr_chain (group_size);
7975 oprnds.create (group_size);
7976
7977 alignment_support_scheme
7978 = vect_supportable_dr_alignment (first_dr_info, false);
7979 gcc_assert (alignment_support_scheme);
7980 vec_loop_masks *loop_masks
7981 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7982 ? &LOOP_VINFO_MASKS (loop_vinfo)
7983 : NULL);
7984 /* Targets with store-lane instructions must not require explicit
7985 realignment. vect_supportable_dr_alignment always returns either
7986 dr_aligned or dr_unaligned_supported for masked operations. */
7987 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7988 && !mask
7989 && !loop_masks)
7990 || alignment_support_scheme == dr_aligned
7991 || alignment_support_scheme == dr_unaligned_supported);
7992
7993 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7994 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7995 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7996
7997 tree bump;
7998 tree vec_offset = NULL_TREE;
7999 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8000 {
8001 aggr_type = NULL_TREE;
8002 bump = NULL_TREE;
8003 }
8004 else if (memory_access_type == VMAT_GATHER_SCATTER)
8005 {
8006 aggr_type = elem_type;
8007 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8008 &bump, &vec_offset);
8009 }
8010 else
8011 {
8012 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8013 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8014 else
8015 aggr_type = vectype;
8016 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8017 memory_access_type);
8018 }
8019
8020 if (mask)
8021 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8022
8023 /* In case the vectorization factor (VF) is bigger than the number
8024 of elements that we can fit in a vectype (nunits), we have to generate
8025 more than one vector stmt - i.e - we need to "unroll" the
8026 vector stmt by a factor VF/nunits. For more details see documentation in
8027 vect_get_vec_def_for_copy_stmt. */
8028
8029 /* In case of interleaving (non-unit grouped access):
8030
8031 S1: &base + 2 = x2
8032 S2: &base = x0
8033 S3: &base + 1 = x1
8034 S4: &base + 3 = x3
8035
8036 We create vectorized stores starting from base address (the access of the
8037 first stmt in the chain (S2 in the above example), when the last store stmt
8038 of the chain (S4) is reached:
8039
8040 VS1: &base = vx2
8041 VS2: &base + vec_size*1 = vx0
8042 VS3: &base + vec_size*2 = vx1
8043 VS4: &base + vec_size*3 = vx3
8044
8045 Then permutation statements are generated:
8046
8047 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8048 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8049 ...
8050
8051 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8052 (the order of the data-refs in the output of vect_permute_store_chain
8053 corresponds to the order of scalar stmts in the interleaving chain - see
8054 the documentation of vect_permute_store_chain()).
8055
8056 In case of both multiple types and interleaving, above vector stores and
8057 permutation stmts are created for every copy. The result vector stmts are
8058 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8059 STMT_VINFO_RELATED_STMT for the next copies.
8060 */
8061
8062 prev_stmt_info = NULL;
8063 tree vec_mask = NULL_TREE;
8064 for (j = 0; j < ncopies; j++)
8065 {
8066 stmt_vec_info new_stmt_info;
8067 if (j == 0)
8068 {
8069 if (slp)
8070 {
8071 /* Get vectorized arguments for SLP_NODE. */
8072 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8073 NULL, slp_node);
8074
8075 vec_oprnd = vec_oprnds[0];
8076 }
8077 else
8078 {
8079 /* For interleaved stores we collect vectorized defs for all the
8080 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8081 used as an input to vect_permute_store_chain(), and OPRNDS as
8082 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8083
8084 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8085 OPRNDS are of size 1. */
8086 stmt_vec_info next_stmt_info = first_stmt_info;
8087 for (i = 0; i < group_size; i++)
8088 {
8089 /* Since gaps are not supported for interleaved stores,
8090 DR_GROUP_SIZE is the exact number of stmts in the chain.
8091 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8092 that there is no interleaving, DR_GROUP_SIZE is 1,
8093 and only one iteration of the loop will be executed. */
8094 op = vect_get_store_rhs (next_stmt_info);
8095 vec_oprnd = vect_get_vec_def_for_operand
8096 (op, next_stmt_info);
8097 dr_chain.quick_push (vec_oprnd);
8098 oprnds.quick_push (vec_oprnd);
8099 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8100 }
8101 if (mask)
8102 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8103 mask_vectype);
8104 }
8105
8106 /* We should have catched mismatched types earlier. */
8107 gcc_assert (useless_type_conversion_p (vectype,
8108 TREE_TYPE (vec_oprnd)));
8109 bool simd_lane_access_p
8110 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8111 if (simd_lane_access_p
8112 && !loop_masks
8113 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8114 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8115 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8116 && integer_zerop (DR_INIT (first_dr_info->dr))
8117 && alias_sets_conflict_p (get_alias_set (aggr_type),
8118 get_alias_set (TREE_TYPE (ref_type))))
8119 {
8120 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8121 dataref_offset = build_int_cst (ref_type, 0);
8122 }
8123 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8124 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8125 &dataref_ptr, &vec_offset);
8126 else
8127 dataref_ptr
8128 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8129 simd_lane_access_p ? loop : NULL,
8130 offset, &dummy, gsi, &ptr_incr,
8131 simd_lane_access_p, NULL_TREE, bump);
8132 }
8133 else
8134 {
8135 /* For interleaved stores we created vectorized defs for all the
8136 defs stored in OPRNDS in the previous iteration (previous copy).
8137 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8138 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8139 next copy.
8140 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8141 OPRNDS are of size 1. */
8142 for (i = 0; i < group_size; i++)
8143 {
8144 op = oprnds[i];
8145 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8146 dr_chain[i] = vec_oprnd;
8147 oprnds[i] = vec_oprnd;
8148 }
8149 if (mask)
8150 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8151 if (dataref_offset)
8152 dataref_offset
8153 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8154 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8155 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8156 else
8157 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8158 stmt_info, bump);
8159 }
8160
8161 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8162 {
8163 tree vec_array;
8164
8165 /* Get an array into which we can store the individual vectors. */
8166 vec_array = create_vector_array (vectype, vec_num);
8167
8168 /* Invalidate the current contents of VEC_ARRAY. This should
8169 become an RTL clobber too, which prevents the vector registers
8170 from being upward-exposed. */
8171 vect_clobber_variable (stmt_info, gsi, vec_array);
8172
8173 /* Store the individual vectors into the array. */
8174 for (i = 0; i < vec_num; i++)
8175 {
8176 vec_oprnd = dr_chain[i];
8177 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8178 }
8179
8180 tree final_mask = NULL;
8181 if (loop_masks)
8182 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8183 vectype, j);
8184 if (vec_mask)
8185 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8186 vec_mask, gsi);
8187
8188 gcall *call;
8189 if (final_mask)
8190 {
8191 /* Emit:
8192 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8193 VEC_ARRAY). */
8194 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8195 tree alias_ptr = build_int_cst (ref_type, align);
8196 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8197 dataref_ptr, alias_ptr,
8198 final_mask, vec_array);
8199 }
8200 else
8201 {
8202 /* Emit:
8203 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8204 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8205 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8206 vec_array);
8207 gimple_call_set_lhs (call, data_ref);
8208 }
8209 gimple_call_set_nothrow (call, true);
8210 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8211
8212 /* Record that VEC_ARRAY is now dead. */
8213 vect_clobber_variable (stmt_info, gsi, vec_array);
8214 }
8215 else
8216 {
8217 new_stmt_info = NULL;
8218 if (grouped_store)
8219 {
8220 if (j == 0)
8221 result_chain.create (group_size);
8222 /* Permute. */
8223 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8224 &result_chain);
8225 }
8226
8227 stmt_vec_info next_stmt_info = first_stmt_info;
8228 for (i = 0; i < vec_num; i++)
8229 {
8230 unsigned misalign;
8231 unsigned HOST_WIDE_INT align;
8232
8233 tree final_mask = NULL_TREE;
8234 if (loop_masks)
8235 final_mask = vect_get_loop_mask (gsi, loop_masks,
8236 vec_num * ncopies,
8237 vectype, vec_num * j + i);
8238 if (vec_mask)
8239 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8240 vec_mask, gsi);
8241
8242 if (memory_access_type == VMAT_GATHER_SCATTER)
8243 {
8244 tree scale = size_int (gs_info.scale);
8245 gcall *call;
8246 if (loop_masks)
8247 call = gimple_build_call_internal
8248 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8249 scale, vec_oprnd, final_mask);
8250 else
8251 call = gimple_build_call_internal
8252 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8253 scale, vec_oprnd);
8254 gimple_call_set_nothrow (call, true);
8255 new_stmt_info
8256 = vect_finish_stmt_generation (stmt_info, call, gsi);
8257 break;
8258 }
8259
8260 if (i > 0)
8261 /* Bump the vector pointer. */
8262 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8263 stmt_info, bump);
8264
8265 if (slp)
8266 vec_oprnd = vec_oprnds[i];
8267 else if (grouped_store)
8268 /* For grouped stores vectorized defs are interleaved in
8269 vect_permute_store_chain(). */
8270 vec_oprnd = result_chain[i];
8271
8272 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8273 if (aligned_access_p (first_dr_info))
8274 misalign = 0;
8275 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8276 {
8277 align = dr_alignment (vect_dr_behavior (first_dr_info));
8278 misalign = 0;
8279 }
8280 else
8281 misalign = DR_MISALIGNMENT (first_dr_info);
8282 if (dataref_offset == NULL_TREE
8283 && TREE_CODE (dataref_ptr) == SSA_NAME)
8284 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8285 misalign);
8286
8287 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8288 {
8289 tree perm_mask = perm_mask_for_reverse (vectype);
8290 tree perm_dest = vect_create_destination_var
8291 (vect_get_store_rhs (stmt_info), vectype);
8292 tree new_temp = make_ssa_name (perm_dest);
8293
8294 /* Generate the permute statement. */
8295 gimple *perm_stmt
8296 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8297 vec_oprnd, perm_mask);
8298 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8299
8300 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8301 vec_oprnd = new_temp;
8302 }
8303
8304 /* Arguments are ready. Create the new vector stmt. */
8305 if (final_mask)
8306 {
8307 align = least_bit_hwi (misalign | align);
8308 tree ptr = build_int_cst (ref_type, align);
8309 gcall *call
8310 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8311 dataref_ptr, ptr,
8312 final_mask, vec_oprnd);
8313 gimple_call_set_nothrow (call, true);
8314 new_stmt_info
8315 = vect_finish_stmt_generation (stmt_info, call, gsi);
8316 }
8317 else
8318 {
8319 data_ref = fold_build2 (MEM_REF, vectype,
8320 dataref_ptr,
8321 dataref_offset
8322 ? dataref_offset
8323 : build_int_cst (ref_type, 0));
8324 if (aligned_access_p (first_dr_info))
8325 ;
8326 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8327 TREE_TYPE (data_ref)
8328 = build_aligned_type (TREE_TYPE (data_ref),
8329 align * BITS_PER_UNIT);
8330 else
8331 TREE_TYPE (data_ref)
8332 = build_aligned_type (TREE_TYPE (data_ref),
8333 TYPE_ALIGN (elem_type));
8334 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8335 gassign *new_stmt
8336 = gimple_build_assign (data_ref, vec_oprnd);
8337 new_stmt_info
8338 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8339 }
8340
8341 if (slp)
8342 continue;
8343
8344 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8345 if (!next_stmt_info)
8346 break;
8347 }
8348 }
8349 if (!slp)
8350 {
8351 if (j == 0)
8352 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8353 else
8354 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8355 prev_stmt_info = new_stmt_info;
8356 }
8357 }
8358
8359 oprnds.release ();
8360 result_chain.release ();
8361 vec_oprnds.release ();
8362
8363 return true;
8364 }
8365
8366 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8367 VECTOR_CST mask. No checks are made that the target platform supports the
8368 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8369 vect_gen_perm_mask_checked. */
8370
8371 tree
8372 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8373 {
8374 tree mask_type;
8375
8376 poly_uint64 nunits = sel.length ();
8377 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8378
8379 mask_type = build_vector_type (ssizetype, nunits);
8380 return vec_perm_indices_to_tree (mask_type, sel);
8381 }
8382
8383 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8384 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8385
8386 tree
8387 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8388 {
8389 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8390 return vect_gen_perm_mask_any (vectype, sel);
8391 }
8392
8393 /* Given a vector variable X and Y, that was generated for the scalar
8394 STMT_INFO, generate instructions to permute the vector elements of X and Y
8395 using permutation mask MASK_VEC, insert them at *GSI and return the
8396 permuted vector variable. */
8397
8398 static tree
8399 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8400 gimple_stmt_iterator *gsi)
8401 {
8402 tree vectype = TREE_TYPE (x);
8403 tree perm_dest, data_ref;
8404 gimple *perm_stmt;
8405
8406 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8407 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8408 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8409 else
8410 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8411 data_ref = make_ssa_name (perm_dest);
8412
8413 /* Generate the permute statement. */
8414 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8415 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8416
8417 return data_ref;
8418 }
8419
8420 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8421 inserting them on the loops preheader edge. Returns true if we
8422 were successful in doing so (and thus STMT_INFO can be moved then),
8423 otherwise returns false. */
8424
8425 static bool
8426 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8427 {
8428 ssa_op_iter i;
8429 tree op;
8430 bool any = false;
8431
8432 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8433 {
8434 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8435 if (!gimple_nop_p (def_stmt)
8436 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8437 {
8438 /* Make sure we don't need to recurse. While we could do
8439 so in simple cases when there are more complex use webs
8440 we don't have an easy way to preserve stmt order to fulfil
8441 dependencies within them. */
8442 tree op2;
8443 ssa_op_iter i2;
8444 if (gimple_code (def_stmt) == GIMPLE_PHI)
8445 return false;
8446 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8447 {
8448 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8449 if (!gimple_nop_p (def_stmt2)
8450 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8451 return false;
8452 }
8453 any = true;
8454 }
8455 }
8456
8457 if (!any)
8458 return true;
8459
8460 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8461 {
8462 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8463 if (!gimple_nop_p (def_stmt)
8464 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8465 {
8466 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8467 gsi_remove (&gsi, false);
8468 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8469 }
8470 }
8471
8472 return true;
8473 }
8474
8475 /* vectorizable_load.
8476
8477 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8478 that can be vectorized.
8479 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8480 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8481 Return true if STMT_INFO is vectorizable in this way. */
8482
8483 static bool
8484 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8485 stmt_vec_info *vec_stmt, slp_tree slp_node,
8486 slp_instance slp_node_instance,
8487 stmt_vector_for_cost *cost_vec)
8488 {
8489 tree scalar_dest;
8490 tree vec_dest = NULL;
8491 tree data_ref = NULL;
8492 stmt_vec_info prev_stmt_info;
8493 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8494 class loop *loop = NULL;
8495 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8496 bool nested_in_vect_loop = false;
8497 tree elem_type;
8498 tree new_temp;
8499 machine_mode mode;
8500 tree dummy;
8501 enum dr_alignment_support alignment_support_scheme;
8502 tree dataref_ptr = NULL_TREE;
8503 tree dataref_offset = NULL_TREE;
8504 gimple *ptr_incr = NULL;
8505 int ncopies;
8506 int i, j;
8507 unsigned int group_size;
8508 poly_uint64 group_gap_adj;
8509 tree msq = NULL_TREE, lsq;
8510 tree offset = NULL_TREE;
8511 tree byte_offset = NULL_TREE;
8512 tree realignment_token = NULL_TREE;
8513 gphi *phi = NULL;
8514 vec<tree> dr_chain = vNULL;
8515 bool grouped_load = false;
8516 stmt_vec_info first_stmt_info;
8517 stmt_vec_info first_stmt_info_for_drptr = NULL;
8518 bool compute_in_loop = false;
8519 class loop *at_loop;
8520 int vec_num;
8521 bool slp = (slp_node != NULL);
8522 bool slp_perm = false;
8523 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8524 poly_uint64 vf;
8525 tree aggr_type;
8526 gather_scatter_info gs_info;
8527 vec_info *vinfo = stmt_info->vinfo;
8528 tree ref_type;
8529 enum vect_def_type mask_dt = vect_unknown_def_type;
8530
8531 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8532 return false;
8533
8534 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8535 && ! vec_stmt)
8536 return false;
8537
8538 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8539 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8540 {
8541 scalar_dest = gimple_assign_lhs (assign);
8542 if (TREE_CODE (scalar_dest) != SSA_NAME)
8543 return false;
8544
8545 tree_code code = gimple_assign_rhs_code (assign);
8546 if (code != ARRAY_REF
8547 && code != BIT_FIELD_REF
8548 && code != INDIRECT_REF
8549 && code != COMPONENT_REF
8550 && code != IMAGPART_EXPR
8551 && code != REALPART_EXPR
8552 && code != MEM_REF
8553 && TREE_CODE_CLASS (code) != tcc_declaration)
8554 return false;
8555 }
8556 else
8557 {
8558 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8559 if (!call || !gimple_call_internal_p (call))
8560 return false;
8561
8562 internal_fn ifn = gimple_call_internal_fn (call);
8563 if (!internal_load_fn_p (ifn))
8564 return false;
8565
8566 scalar_dest = gimple_call_lhs (call);
8567 if (!scalar_dest)
8568 return false;
8569
8570 int mask_index = internal_fn_mask_index (ifn);
8571 if (mask_index >= 0)
8572 {
8573 mask = gimple_call_arg (call, mask_index);
8574 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
8575 &mask_vectype))
8576 return false;
8577 }
8578 }
8579
8580 if (!STMT_VINFO_DATA_REF (stmt_info))
8581 return false;
8582
8583 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8584 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8585
8586 if (loop_vinfo)
8587 {
8588 loop = LOOP_VINFO_LOOP (loop_vinfo);
8589 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8590 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8591 }
8592 else
8593 vf = 1;
8594
8595 /* Multiple types in SLP are handled by creating the appropriate number of
8596 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8597 case of SLP. */
8598 if (slp)
8599 ncopies = 1;
8600 else
8601 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8602
8603 gcc_assert (ncopies >= 1);
8604
8605 /* FORNOW. This restriction should be relaxed. */
8606 if (nested_in_vect_loop && ncopies > 1)
8607 {
8608 if (dump_enabled_p ())
8609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8610 "multiple types in nested loop.\n");
8611 return false;
8612 }
8613
8614 /* Invalidate assumptions made by dependence analysis when vectorization
8615 on the unrolled body effectively re-orders stmts. */
8616 if (ncopies > 1
8617 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8618 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8619 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8620 {
8621 if (dump_enabled_p ())
8622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8623 "cannot perform implicit CSE when unrolling "
8624 "with negative dependence distance\n");
8625 return false;
8626 }
8627
8628 elem_type = TREE_TYPE (vectype);
8629 mode = TYPE_MODE (vectype);
8630
8631 /* FORNOW. In some cases can vectorize even if data-type not supported
8632 (e.g. - data copies). */
8633 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8634 {
8635 if (dump_enabled_p ())
8636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8637 "Aligned load, but unsupported type.\n");
8638 return false;
8639 }
8640
8641 /* Check if the load is a part of an interleaving chain. */
8642 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8643 {
8644 grouped_load = true;
8645 /* FORNOW */
8646 gcc_assert (!nested_in_vect_loop);
8647 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8648
8649 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8650 group_size = DR_GROUP_SIZE (first_stmt_info);
8651
8652 /* Refuse non-SLP vectorization of SLP-only groups. */
8653 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8654 {
8655 if (dump_enabled_p ())
8656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8657 "cannot vectorize load in non-SLP mode.\n");
8658 return false;
8659 }
8660
8661 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8662 slp_perm = true;
8663
8664 /* Invalidate assumptions made by dependence analysis when vectorization
8665 on the unrolled body effectively re-orders stmts. */
8666 if (!PURE_SLP_STMT (stmt_info)
8667 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8668 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8669 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8670 {
8671 if (dump_enabled_p ())
8672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8673 "cannot perform implicit CSE when performing "
8674 "group loads with negative dependence distance\n");
8675 return false;
8676 }
8677 }
8678 else
8679 group_size = 1;
8680
8681 vect_memory_access_type memory_access_type;
8682 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8683 &memory_access_type, &gs_info))
8684 return false;
8685
8686 if (mask)
8687 {
8688 if (memory_access_type == VMAT_CONTIGUOUS)
8689 {
8690 machine_mode vec_mode = TYPE_MODE (vectype);
8691 if (!VECTOR_MODE_P (vec_mode)
8692 || !can_vec_mask_load_store_p (vec_mode,
8693 TYPE_MODE (mask_vectype), true))
8694 return false;
8695 }
8696 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8697 && memory_access_type != VMAT_GATHER_SCATTER)
8698 {
8699 if (dump_enabled_p ())
8700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8701 "unsupported access type for masked load.\n");
8702 return false;
8703 }
8704 }
8705
8706 if (!vec_stmt) /* transformation not required. */
8707 {
8708 if (!slp)
8709 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8710
8711 if (loop_vinfo
8712 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8713 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8714 memory_access_type, &gs_info, mask);
8715
8716 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8717 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8718 slp_node_instance, slp_node, cost_vec);
8719 return true;
8720 }
8721
8722 if (!slp)
8723 gcc_assert (memory_access_type
8724 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8725
8726 if (dump_enabled_p ())
8727 dump_printf_loc (MSG_NOTE, vect_location,
8728 "transform load. ncopies = %d\n", ncopies);
8729
8730 /* Transform. */
8731
8732 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8733 ensure_base_align (dr_info);
8734
8735 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8736 {
8737 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8738 return true;
8739 }
8740
8741 if (memory_access_type == VMAT_INVARIANT)
8742 {
8743 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8744 /* If we have versioned for aliasing or the loop doesn't
8745 have any data dependencies that would preclude this,
8746 then we are sure this is a loop invariant load and
8747 thus we can insert it on the preheader edge. */
8748 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8749 && !nested_in_vect_loop
8750 && hoist_defs_of_uses (stmt_info, loop));
8751 if (hoist_p)
8752 {
8753 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8754 if (dump_enabled_p ())
8755 dump_printf_loc (MSG_NOTE, vect_location,
8756 "hoisting out of the vectorized loop: %G", stmt);
8757 scalar_dest = copy_ssa_name (scalar_dest);
8758 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8759 gsi_insert_on_edge_immediate
8760 (loop_preheader_edge (loop),
8761 gimple_build_assign (scalar_dest, rhs));
8762 }
8763 /* These copies are all equivalent, but currently the representation
8764 requires a separate STMT_VINFO_VEC_STMT for each one. */
8765 prev_stmt_info = NULL;
8766 gimple_stmt_iterator gsi2 = *gsi;
8767 gsi_next (&gsi2);
8768 for (j = 0; j < ncopies; j++)
8769 {
8770 stmt_vec_info new_stmt_info;
8771 if (hoist_p)
8772 {
8773 new_temp = vect_init_vector (stmt_info, scalar_dest,
8774 vectype, NULL);
8775 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8776 new_stmt_info = vinfo->add_stmt (new_stmt);
8777 }
8778 else
8779 {
8780 new_temp = vect_init_vector (stmt_info, scalar_dest,
8781 vectype, &gsi2);
8782 new_stmt_info = vinfo->lookup_def (new_temp);
8783 }
8784 if (slp)
8785 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8786 else if (j == 0)
8787 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8788 else
8789 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8790 prev_stmt_info = new_stmt_info;
8791 }
8792 return true;
8793 }
8794
8795 if (memory_access_type == VMAT_ELEMENTWISE
8796 || memory_access_type == VMAT_STRIDED_SLP)
8797 {
8798 gimple_stmt_iterator incr_gsi;
8799 bool insert_after;
8800 gimple *incr;
8801 tree offvar;
8802 tree ivstep;
8803 tree running_off;
8804 vec<constructor_elt, va_gc> *v = NULL;
8805 tree stride_base, stride_step, alias_off;
8806 /* Checked by get_load_store_type. */
8807 unsigned int const_nunits = nunits.to_constant ();
8808 unsigned HOST_WIDE_INT cst_offset = 0;
8809
8810 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8811 gcc_assert (!nested_in_vect_loop);
8812
8813 if (grouped_load)
8814 {
8815 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8816 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8817 }
8818 else
8819 {
8820 first_stmt_info = stmt_info;
8821 first_dr_info = dr_info;
8822 }
8823 if (slp && grouped_load)
8824 {
8825 group_size = DR_GROUP_SIZE (first_stmt_info);
8826 ref_type = get_group_alias_ptr_type (first_stmt_info);
8827 }
8828 else
8829 {
8830 if (grouped_load)
8831 cst_offset
8832 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8833 * vect_get_place_in_interleaving_chain (stmt_info,
8834 first_stmt_info));
8835 group_size = 1;
8836 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8837 }
8838
8839 stride_base
8840 = fold_build_pointer_plus
8841 (DR_BASE_ADDRESS (first_dr_info->dr),
8842 size_binop (PLUS_EXPR,
8843 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
8844 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8845 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8846
8847 /* For a load with loop-invariant (but other than power-of-2)
8848 stride (i.e. not a grouped access) like so:
8849
8850 for (i = 0; i < n; i += stride)
8851 ... = array[i];
8852
8853 we generate a new induction variable and new accesses to
8854 form a new vector (or vectors, depending on ncopies):
8855
8856 for (j = 0; ; j += VF*stride)
8857 tmp1 = array[j];
8858 tmp2 = array[j + stride];
8859 ...
8860 vectemp = {tmp1, tmp2, ...}
8861 */
8862
8863 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8864 build_int_cst (TREE_TYPE (stride_step), vf));
8865
8866 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8867
8868 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8869 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8870 create_iv (stride_base, ivstep, NULL,
8871 loop, &incr_gsi, insert_after,
8872 &offvar, NULL);
8873 incr = gsi_stmt (incr_gsi);
8874 loop_vinfo->add_stmt (incr);
8875
8876 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8877
8878 prev_stmt_info = NULL;
8879 running_off = offvar;
8880 alias_off = build_int_cst (ref_type, 0);
8881 int nloads = const_nunits;
8882 int lnel = 1;
8883 tree ltype = TREE_TYPE (vectype);
8884 tree lvectype = vectype;
8885 auto_vec<tree> dr_chain;
8886 if (memory_access_type == VMAT_STRIDED_SLP)
8887 {
8888 if (group_size < const_nunits)
8889 {
8890 /* First check if vec_init optab supports construction from
8891 vector elts directly. */
8892 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
8893 machine_mode vmode;
8894 if (mode_for_vector (elmode, group_size).exists (&vmode)
8895 && VECTOR_MODE_P (vmode)
8896 && targetm.vector_mode_supported_p (vmode)
8897 && (convert_optab_handler (vec_init_optab,
8898 TYPE_MODE (vectype), vmode)
8899 != CODE_FOR_nothing))
8900 {
8901 nloads = const_nunits / group_size;
8902 lnel = group_size;
8903 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
8904 }
8905 else
8906 {
8907 /* Otherwise avoid emitting a constructor of vector elements
8908 by performing the loads using an integer type of the same
8909 size, constructing a vector of those and then
8910 re-interpreting it as the original vector type.
8911 This avoids a huge runtime penalty due to the general
8912 inability to perform store forwarding from smaller stores
8913 to a larger load. */
8914 unsigned lsize
8915 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
8916 unsigned int lnunits = const_nunits / group_size;
8917 /* If we can't construct such a vector fall back to
8918 element loads of the original vector type. */
8919 if (int_mode_for_size (lsize, 0).exists (&elmode)
8920 && mode_for_vector (elmode, lnunits).exists (&vmode)
8921 && VECTOR_MODE_P (vmode)
8922 && targetm.vector_mode_supported_p (vmode)
8923 && (convert_optab_handler (vec_init_optab, vmode, elmode)
8924 != CODE_FOR_nothing))
8925 {
8926 nloads = lnunits;
8927 lnel = group_size;
8928 ltype = build_nonstandard_integer_type (lsize, 1);
8929 lvectype = build_vector_type (ltype, nloads);
8930 }
8931 }
8932 }
8933 else
8934 {
8935 nloads = 1;
8936 lnel = const_nunits;
8937 ltype = vectype;
8938 }
8939 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8940 }
8941 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8942 else if (nloads == 1)
8943 ltype = vectype;
8944
8945 if (slp)
8946 {
8947 /* For SLP permutation support we need to load the whole group,
8948 not only the number of vector stmts the permutation result
8949 fits in. */
8950 if (slp_perm)
8951 {
8952 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8953 variable VF. */
8954 unsigned int const_vf = vf.to_constant ();
8955 ncopies = CEIL (group_size * const_vf, const_nunits);
8956 dr_chain.create (ncopies);
8957 }
8958 else
8959 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8960 }
8961 unsigned int group_el = 0;
8962 unsigned HOST_WIDE_INT
8963 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8964 for (j = 0; j < ncopies; j++)
8965 {
8966 if (nloads > 1)
8967 vec_alloc (v, nloads);
8968 stmt_vec_info new_stmt_info = NULL;
8969 for (i = 0; i < nloads; i++)
8970 {
8971 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8972 group_el * elsz + cst_offset);
8973 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8974 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8975 gassign *new_stmt
8976 = gimple_build_assign (make_ssa_name (ltype), data_ref);
8977 new_stmt_info
8978 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8979 if (nloads > 1)
8980 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8981 gimple_assign_lhs (new_stmt));
8982
8983 group_el += lnel;
8984 if (! slp
8985 || group_el == group_size)
8986 {
8987 tree newoff = copy_ssa_name (running_off);
8988 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8989 running_off, stride_step);
8990 vect_finish_stmt_generation (stmt_info, incr, gsi);
8991
8992 running_off = newoff;
8993 group_el = 0;
8994 }
8995 }
8996 if (nloads > 1)
8997 {
8998 tree vec_inv = build_constructor (lvectype, v);
8999 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9000 new_stmt_info = vinfo->lookup_def (new_temp);
9001 if (lvectype != vectype)
9002 {
9003 gassign *new_stmt
9004 = gimple_build_assign (make_ssa_name (vectype),
9005 VIEW_CONVERT_EXPR,
9006 build1 (VIEW_CONVERT_EXPR,
9007 vectype, new_temp));
9008 new_stmt_info
9009 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9010 }
9011 }
9012
9013 if (slp)
9014 {
9015 if (slp_perm)
9016 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9017 else
9018 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9019 }
9020 else
9021 {
9022 if (j == 0)
9023 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9024 else
9025 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9026 prev_stmt_info = new_stmt_info;
9027 }
9028 }
9029 if (slp_perm)
9030 {
9031 unsigned n_perms;
9032 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9033 slp_node_instance, false, &n_perms);
9034 }
9035 return true;
9036 }
9037
9038 if (memory_access_type == VMAT_GATHER_SCATTER
9039 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9040 grouped_load = false;
9041
9042 if (grouped_load)
9043 {
9044 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9045 group_size = DR_GROUP_SIZE (first_stmt_info);
9046 /* For SLP vectorization we directly vectorize a subchain
9047 without permutation. */
9048 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9049 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9050 /* For BB vectorization always use the first stmt to base
9051 the data ref pointer on. */
9052 if (bb_vinfo)
9053 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9054
9055 /* Check if the chain of loads is already vectorized. */
9056 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9057 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9058 ??? But we can only do so if there is exactly one
9059 as we have no way to get at the rest. Leave the CSE
9060 opportunity alone.
9061 ??? With the group load eventually participating
9062 in multiple different permutations (having multiple
9063 slp nodes which refer to the same group) the CSE
9064 is even wrong code. See PR56270. */
9065 && !slp)
9066 {
9067 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9068 return true;
9069 }
9070 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9071 group_gap_adj = 0;
9072
9073 /* VEC_NUM is the number of vect stmts to be created for this group. */
9074 if (slp)
9075 {
9076 grouped_load = false;
9077 /* If an SLP permutation is from N elements to N elements,
9078 and if one vector holds a whole number of N, we can load
9079 the inputs to the permutation in the same way as an
9080 unpermuted sequence. In other cases we need to load the
9081 whole group, not only the number of vector stmts the
9082 permutation result fits in. */
9083 if (slp_perm
9084 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9085 || !multiple_p (nunits, group_size)))
9086 {
9087 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9088 variable VF; see vect_transform_slp_perm_load. */
9089 unsigned int const_vf = vf.to_constant ();
9090 unsigned int const_nunits = nunits.to_constant ();
9091 vec_num = CEIL (group_size * const_vf, const_nunits);
9092 group_gap_adj = vf * group_size - nunits * vec_num;
9093 }
9094 else
9095 {
9096 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9097 group_gap_adj
9098 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9099 }
9100 }
9101 else
9102 vec_num = group_size;
9103
9104 ref_type = get_group_alias_ptr_type (first_stmt_info);
9105 }
9106 else
9107 {
9108 first_stmt_info = stmt_info;
9109 first_dr_info = dr_info;
9110 group_size = vec_num = 1;
9111 group_gap_adj = 0;
9112 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9113 }
9114
9115 alignment_support_scheme
9116 = vect_supportable_dr_alignment (first_dr_info, false);
9117 gcc_assert (alignment_support_scheme);
9118 vec_loop_masks *loop_masks
9119 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9120 ? &LOOP_VINFO_MASKS (loop_vinfo)
9121 : NULL);
9122 /* Targets with store-lane instructions must not require explicit
9123 realignment. vect_supportable_dr_alignment always returns either
9124 dr_aligned or dr_unaligned_supported for masked operations. */
9125 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9126 && !mask
9127 && !loop_masks)
9128 || alignment_support_scheme == dr_aligned
9129 || alignment_support_scheme == dr_unaligned_supported);
9130
9131 /* In case the vectorization factor (VF) is bigger than the number
9132 of elements that we can fit in a vectype (nunits), we have to generate
9133 more than one vector stmt - i.e - we need to "unroll" the
9134 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9135 from one copy of the vector stmt to the next, in the field
9136 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9137 stages to find the correct vector defs to be used when vectorizing
9138 stmts that use the defs of the current stmt. The example below
9139 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9140 need to create 4 vectorized stmts):
9141
9142 before vectorization:
9143 RELATED_STMT VEC_STMT
9144 S1: x = memref - -
9145 S2: z = x + 1 - -
9146
9147 step 1: vectorize stmt S1:
9148 We first create the vector stmt VS1_0, and, as usual, record a
9149 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9150 Next, we create the vector stmt VS1_1, and record a pointer to
9151 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9152 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9153 stmts and pointers:
9154 RELATED_STMT VEC_STMT
9155 VS1_0: vx0 = memref0 VS1_1 -
9156 VS1_1: vx1 = memref1 VS1_2 -
9157 VS1_2: vx2 = memref2 VS1_3 -
9158 VS1_3: vx3 = memref3 - -
9159 S1: x = load - VS1_0
9160 S2: z = x + 1 - -
9161
9162 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9163 information we recorded in RELATED_STMT field is used to vectorize
9164 stmt S2. */
9165
9166 /* In case of interleaving (non-unit grouped access):
9167
9168 S1: x2 = &base + 2
9169 S2: x0 = &base
9170 S3: x1 = &base + 1
9171 S4: x3 = &base + 3
9172
9173 Vectorized loads are created in the order of memory accesses
9174 starting from the access of the first stmt of the chain:
9175
9176 VS1: vx0 = &base
9177 VS2: vx1 = &base + vec_size*1
9178 VS3: vx3 = &base + vec_size*2
9179 VS4: vx4 = &base + vec_size*3
9180
9181 Then permutation statements are generated:
9182
9183 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9184 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9185 ...
9186
9187 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9188 (the order of the data-refs in the output of vect_permute_load_chain
9189 corresponds to the order of scalar stmts in the interleaving chain - see
9190 the documentation of vect_permute_load_chain()).
9191 The generation of permutation stmts and recording them in
9192 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9193
9194 In case of both multiple types and interleaving, the vector loads and
9195 permutation stmts above are created for every copy. The result vector
9196 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9197 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9198
9199 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9200 on a target that supports unaligned accesses (dr_unaligned_supported)
9201 we generate the following code:
9202 p = initial_addr;
9203 indx = 0;
9204 loop {
9205 p = p + indx * vectype_size;
9206 vec_dest = *(p);
9207 indx = indx + 1;
9208 }
9209
9210 Otherwise, the data reference is potentially unaligned on a target that
9211 does not support unaligned accesses (dr_explicit_realign_optimized) -
9212 then generate the following code, in which the data in each iteration is
9213 obtained by two vector loads, one from the previous iteration, and one
9214 from the current iteration:
9215 p1 = initial_addr;
9216 msq_init = *(floor(p1))
9217 p2 = initial_addr + VS - 1;
9218 realignment_token = call target_builtin;
9219 indx = 0;
9220 loop {
9221 p2 = p2 + indx * vectype_size
9222 lsq = *(floor(p2))
9223 vec_dest = realign_load (msq, lsq, realignment_token)
9224 indx = indx + 1;
9225 msq = lsq;
9226 } */
9227
9228 /* If the misalignment remains the same throughout the execution of the
9229 loop, we can create the init_addr and permutation mask at the loop
9230 preheader. Otherwise, it needs to be created inside the loop.
9231 This can only occur when vectorizing memory accesses in the inner-loop
9232 nested within an outer-loop that is being vectorized. */
9233
9234 if (nested_in_vect_loop
9235 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9236 GET_MODE_SIZE (TYPE_MODE (vectype))))
9237 {
9238 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9239 compute_in_loop = true;
9240 }
9241
9242 if ((alignment_support_scheme == dr_explicit_realign_optimized
9243 || alignment_support_scheme == dr_explicit_realign)
9244 && !compute_in_loop)
9245 {
9246 msq = vect_setup_realignment (first_stmt_info_for_drptr
9247 ? first_stmt_info_for_drptr
9248 : first_stmt_info, gsi, &realignment_token,
9249 alignment_support_scheme, NULL_TREE,
9250 &at_loop);
9251 if (alignment_support_scheme == dr_explicit_realign_optimized)
9252 {
9253 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9254 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9255 size_one_node);
9256 }
9257 }
9258 else
9259 at_loop = loop;
9260
9261 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9262 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9263
9264 tree bump;
9265 tree vec_offset = NULL_TREE;
9266 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9267 {
9268 aggr_type = NULL_TREE;
9269 bump = NULL_TREE;
9270 }
9271 else if (memory_access_type == VMAT_GATHER_SCATTER)
9272 {
9273 aggr_type = elem_type;
9274 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9275 &bump, &vec_offset);
9276 }
9277 else
9278 {
9279 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9280 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9281 else
9282 aggr_type = vectype;
9283 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9284 memory_access_type);
9285 }
9286
9287 tree vec_mask = NULL_TREE;
9288 prev_stmt_info = NULL;
9289 poly_uint64 group_elt = 0;
9290 for (j = 0; j < ncopies; j++)
9291 {
9292 stmt_vec_info new_stmt_info = NULL;
9293 /* 1. Create the vector or array pointer update chain. */
9294 if (j == 0)
9295 {
9296 bool simd_lane_access_p
9297 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9298 if (simd_lane_access_p
9299 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9300 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9301 && integer_zerop (DR_OFFSET (first_dr_info->dr))
9302 && integer_zerop (DR_INIT (first_dr_info->dr))
9303 && alias_sets_conflict_p (get_alias_set (aggr_type),
9304 get_alias_set (TREE_TYPE (ref_type)))
9305 && (alignment_support_scheme == dr_aligned
9306 || alignment_support_scheme == dr_unaligned_supported))
9307 {
9308 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9309 dataref_offset = build_int_cst (ref_type, 0);
9310 }
9311 else if (first_stmt_info_for_drptr
9312 && first_stmt_info != first_stmt_info_for_drptr)
9313 {
9314 dataref_ptr
9315 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9316 aggr_type, at_loop, offset, &dummy,
9317 gsi, &ptr_incr, simd_lane_access_p,
9318 byte_offset, bump);
9319 /* Adjust the pointer by the difference to first_stmt. */
9320 data_reference_p ptrdr
9321 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9322 tree diff
9323 = fold_convert (sizetype,
9324 size_binop (MINUS_EXPR,
9325 DR_INIT (first_dr_info->dr),
9326 DR_INIT (ptrdr)));
9327 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9328 stmt_info, diff);
9329 }
9330 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9331 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9332 &dataref_ptr, &vec_offset);
9333 else
9334 dataref_ptr
9335 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9336 offset, &dummy, gsi, &ptr_incr,
9337 simd_lane_access_p,
9338 byte_offset, bump);
9339 if (mask)
9340 {
9341 if (slp_node)
9342 {
9343 auto_vec<vec<tree> > vec_defs (1);
9344 vect_get_slp_defs (slp_node, &vec_defs);
9345 vec_mask = vec_defs[0][0];
9346 }
9347 else
9348 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9349 mask_vectype);
9350 }
9351 }
9352 else
9353 {
9354 if (dataref_offset)
9355 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9356 bump);
9357 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9358 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9359 else
9360 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9361 stmt_info, bump);
9362 if (mask)
9363 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9364 }
9365
9366 if (grouped_load || slp_perm)
9367 dr_chain.create (vec_num);
9368
9369 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9370 {
9371 tree vec_array;
9372
9373 vec_array = create_vector_array (vectype, vec_num);
9374
9375 tree final_mask = NULL_TREE;
9376 if (loop_masks)
9377 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9378 vectype, j);
9379 if (vec_mask)
9380 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9381 vec_mask, gsi);
9382
9383 gcall *call;
9384 if (final_mask)
9385 {
9386 /* Emit:
9387 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9388 VEC_MASK). */
9389 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9390 tree alias_ptr = build_int_cst (ref_type, align);
9391 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9392 dataref_ptr, alias_ptr,
9393 final_mask);
9394 }
9395 else
9396 {
9397 /* Emit:
9398 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9399 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9400 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9401 }
9402 gimple_call_set_lhs (call, vec_array);
9403 gimple_call_set_nothrow (call, true);
9404 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9405
9406 /* Extract each vector into an SSA_NAME. */
9407 for (i = 0; i < vec_num; i++)
9408 {
9409 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9410 vec_array, i);
9411 dr_chain.quick_push (new_temp);
9412 }
9413
9414 /* Record the mapping between SSA_NAMEs and statements. */
9415 vect_record_grouped_load_vectors (stmt_info, dr_chain);
9416
9417 /* Record that VEC_ARRAY is now dead. */
9418 vect_clobber_variable (stmt_info, gsi, vec_array);
9419 }
9420 else
9421 {
9422 for (i = 0; i < vec_num; i++)
9423 {
9424 tree final_mask = NULL_TREE;
9425 if (loop_masks
9426 && memory_access_type != VMAT_INVARIANT)
9427 final_mask = vect_get_loop_mask (gsi, loop_masks,
9428 vec_num * ncopies,
9429 vectype, vec_num * j + i);
9430 if (vec_mask)
9431 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9432 vec_mask, gsi);
9433
9434 if (i > 0)
9435 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9436 stmt_info, bump);
9437
9438 /* 2. Create the vector-load in the loop. */
9439 gimple *new_stmt = NULL;
9440 switch (alignment_support_scheme)
9441 {
9442 case dr_aligned:
9443 case dr_unaligned_supported:
9444 {
9445 unsigned int misalign;
9446 unsigned HOST_WIDE_INT align;
9447
9448 if (memory_access_type == VMAT_GATHER_SCATTER)
9449 {
9450 tree scale = size_int (gs_info.scale);
9451 gcall *call;
9452 if (loop_masks)
9453 call = gimple_build_call_internal
9454 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
9455 vec_offset, scale, final_mask);
9456 else
9457 call = gimple_build_call_internal
9458 (IFN_GATHER_LOAD, 3, dataref_ptr,
9459 vec_offset, scale);
9460 gimple_call_set_nothrow (call, true);
9461 new_stmt = call;
9462 data_ref = NULL_TREE;
9463 break;
9464 }
9465
9466 align =
9467 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9468 if (alignment_support_scheme == dr_aligned)
9469 {
9470 gcc_assert (aligned_access_p (first_dr_info));
9471 misalign = 0;
9472 }
9473 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9474 {
9475 align = dr_alignment
9476 (vect_dr_behavior (first_dr_info));
9477 misalign = 0;
9478 }
9479 else
9480 misalign = DR_MISALIGNMENT (first_dr_info);
9481 if (dataref_offset == NULL_TREE
9482 && TREE_CODE (dataref_ptr) == SSA_NAME)
9483 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9484 align, misalign);
9485
9486 if (final_mask)
9487 {
9488 align = least_bit_hwi (misalign | align);
9489 tree ptr = build_int_cst (ref_type, align);
9490 gcall *call
9491 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9492 dataref_ptr, ptr,
9493 final_mask);
9494 gimple_call_set_nothrow (call, true);
9495 new_stmt = call;
9496 data_ref = NULL_TREE;
9497 }
9498 else
9499 {
9500 tree ltype = vectype;
9501 /* If there's no peeling for gaps but we have a gap
9502 with slp loads then load the lower half of the
9503 vector only. See get_group_load_store_type for
9504 when we apply this optimization. */
9505 if (slp
9506 && loop_vinfo
9507 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9508 && DR_GROUP_GAP (first_stmt_info) != 0
9509 && known_eq (nunits,
9510 (group_size
9511 - DR_GROUP_GAP (first_stmt_info)) * 2)
9512 && known_eq (nunits, group_size))
9513 ltype = build_vector_type (TREE_TYPE (vectype),
9514 (group_size
9515 - DR_GROUP_GAP
9516 (first_stmt_info)));
9517 data_ref
9518 = fold_build2 (MEM_REF, ltype, dataref_ptr,
9519 dataref_offset
9520 ? dataref_offset
9521 : build_int_cst (ref_type, 0));
9522 if (alignment_support_scheme == dr_aligned)
9523 ;
9524 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9525 TREE_TYPE (data_ref)
9526 = build_aligned_type (TREE_TYPE (data_ref),
9527 align * BITS_PER_UNIT);
9528 else
9529 TREE_TYPE (data_ref)
9530 = build_aligned_type (TREE_TYPE (data_ref),
9531 TYPE_ALIGN (elem_type));
9532 if (ltype != vectype)
9533 {
9534 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9535 tree tem = make_ssa_name (ltype);
9536 new_stmt = gimple_build_assign (tem, data_ref);
9537 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9538 data_ref = NULL;
9539 vec<constructor_elt, va_gc> *v;
9540 vec_alloc (v, 2);
9541 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9542 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9543 build_zero_cst (ltype));
9544 new_stmt
9545 = gimple_build_assign (vec_dest,
9546 build_constructor
9547 (vectype, v));
9548 }
9549 }
9550 break;
9551 }
9552 case dr_explicit_realign:
9553 {
9554 tree ptr, bump;
9555
9556 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9557
9558 if (compute_in_loop)
9559 msq = vect_setup_realignment (first_stmt_info, gsi,
9560 &realignment_token,
9561 dr_explicit_realign,
9562 dataref_ptr, NULL);
9563
9564 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9565 ptr = copy_ssa_name (dataref_ptr);
9566 else
9567 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9568 // For explicit realign the target alignment should be
9569 // known at compile time.
9570 unsigned HOST_WIDE_INT align =
9571 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9572 new_stmt = gimple_build_assign
9573 (ptr, BIT_AND_EXPR, dataref_ptr,
9574 build_int_cst
9575 (TREE_TYPE (dataref_ptr),
9576 -(HOST_WIDE_INT) align));
9577 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9578 data_ref
9579 = build2 (MEM_REF, vectype, ptr,
9580 build_int_cst (ref_type, 0));
9581 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9582 vec_dest = vect_create_destination_var (scalar_dest,
9583 vectype);
9584 new_stmt = gimple_build_assign (vec_dest, data_ref);
9585 new_temp = make_ssa_name (vec_dest, new_stmt);
9586 gimple_assign_set_lhs (new_stmt, new_temp);
9587 gimple_move_vops (new_stmt, stmt_info->stmt);
9588 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9589 msq = new_temp;
9590
9591 bump = size_binop (MULT_EXPR, vs,
9592 TYPE_SIZE_UNIT (elem_type));
9593 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9594 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9595 stmt_info, bump);
9596 new_stmt = gimple_build_assign
9597 (NULL_TREE, BIT_AND_EXPR, ptr,
9598 build_int_cst
9599 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9600 ptr = copy_ssa_name (ptr, new_stmt);
9601 gimple_assign_set_lhs (new_stmt, ptr);
9602 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9603 data_ref
9604 = build2 (MEM_REF, vectype, ptr,
9605 build_int_cst (ref_type, 0));
9606 break;
9607 }
9608 case dr_explicit_realign_optimized:
9609 {
9610 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9611 new_temp = copy_ssa_name (dataref_ptr);
9612 else
9613 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9614 // We should only be doing this if we know the target
9615 // alignment at compile time.
9616 unsigned HOST_WIDE_INT align =
9617 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9618 new_stmt = gimple_build_assign
9619 (new_temp, BIT_AND_EXPR, dataref_ptr,
9620 build_int_cst (TREE_TYPE (dataref_ptr),
9621 -(HOST_WIDE_INT) align));
9622 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9623 data_ref
9624 = build2 (MEM_REF, vectype, new_temp,
9625 build_int_cst (ref_type, 0));
9626 break;
9627 }
9628 default:
9629 gcc_unreachable ();
9630 }
9631 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9632 /* DATA_REF is null if we've already built the statement. */
9633 if (data_ref)
9634 {
9635 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9636 new_stmt = gimple_build_assign (vec_dest, data_ref);
9637 }
9638 new_temp = make_ssa_name (vec_dest, new_stmt);
9639 gimple_set_lhs (new_stmt, new_temp);
9640 new_stmt_info
9641 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9642
9643 /* 3. Handle explicit realignment if necessary/supported.
9644 Create in loop:
9645 vec_dest = realign_load (msq, lsq, realignment_token) */
9646 if (alignment_support_scheme == dr_explicit_realign_optimized
9647 || alignment_support_scheme == dr_explicit_realign)
9648 {
9649 lsq = gimple_assign_lhs (new_stmt);
9650 if (!realignment_token)
9651 realignment_token = dataref_ptr;
9652 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9653 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9654 msq, lsq, realignment_token);
9655 new_temp = make_ssa_name (vec_dest, new_stmt);
9656 gimple_assign_set_lhs (new_stmt, new_temp);
9657 new_stmt_info
9658 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9659
9660 if (alignment_support_scheme == dr_explicit_realign_optimized)
9661 {
9662 gcc_assert (phi);
9663 if (i == vec_num - 1 && j == ncopies - 1)
9664 add_phi_arg (phi, lsq,
9665 loop_latch_edge (containing_loop),
9666 UNKNOWN_LOCATION);
9667 msq = lsq;
9668 }
9669 }
9670
9671 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9672 {
9673 tree perm_mask = perm_mask_for_reverse (vectype);
9674 new_temp = permute_vec_elements (new_temp, new_temp,
9675 perm_mask, stmt_info, gsi);
9676 new_stmt_info = vinfo->lookup_def (new_temp);
9677 }
9678
9679 /* Collect vector loads and later create their permutation in
9680 vect_transform_grouped_load (). */
9681 if (grouped_load || slp_perm)
9682 dr_chain.quick_push (new_temp);
9683
9684 /* Store vector loads in the corresponding SLP_NODE. */
9685 if (slp && !slp_perm)
9686 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9687
9688 /* With SLP permutation we load the gaps as well, without
9689 we need to skip the gaps after we manage to fully load
9690 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9691 group_elt += nunits;
9692 if (maybe_ne (group_gap_adj, 0U)
9693 && !slp_perm
9694 && known_eq (group_elt, group_size - group_gap_adj))
9695 {
9696 poly_wide_int bump_val
9697 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9698 * group_gap_adj);
9699 tree bump = wide_int_to_tree (sizetype, bump_val);
9700 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9701 stmt_info, bump);
9702 group_elt = 0;
9703 }
9704 }
9705 /* Bump the vector pointer to account for a gap or for excess
9706 elements loaded for a permuted SLP load. */
9707 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9708 {
9709 poly_wide_int bump_val
9710 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9711 * group_gap_adj);
9712 tree bump = wide_int_to_tree (sizetype, bump_val);
9713 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9714 stmt_info, bump);
9715 }
9716 }
9717
9718 if (slp && !slp_perm)
9719 continue;
9720
9721 if (slp_perm)
9722 {
9723 unsigned n_perms;
9724 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9725 slp_node_instance, false,
9726 &n_perms))
9727 {
9728 dr_chain.release ();
9729 return false;
9730 }
9731 }
9732 else
9733 {
9734 if (grouped_load)
9735 {
9736 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9737 vect_transform_grouped_load (stmt_info, dr_chain,
9738 group_size, gsi);
9739 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9740 }
9741 else
9742 {
9743 if (j == 0)
9744 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9745 else
9746 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9747 prev_stmt_info = new_stmt_info;
9748 }
9749 }
9750 dr_chain.release ();
9751 }
9752
9753 return true;
9754 }
9755
9756 /* Function vect_is_simple_cond.
9757
9758 Input:
9759 LOOP - the loop that is being vectorized.
9760 COND - Condition that is checked for simple use.
9761
9762 Output:
9763 *COMP_VECTYPE - the vector type for the comparison.
9764 *DTS - The def types for the arguments of the comparison
9765
9766 Returns whether a COND can be vectorized. Checks whether
9767 condition operands are supportable using vec_is_simple_use. */
9768
9769 static bool
9770 vect_is_simple_cond (tree cond, vec_info *vinfo,
9771 tree *comp_vectype, enum vect_def_type *dts,
9772 tree vectype)
9773 {
9774 tree lhs, rhs;
9775 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9776
9777 /* Mask case. */
9778 if (TREE_CODE (cond) == SSA_NAME
9779 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9780 {
9781 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9782 || !*comp_vectype
9783 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9784 return false;
9785 return true;
9786 }
9787
9788 if (!COMPARISON_CLASS_P (cond))
9789 return false;
9790
9791 lhs = TREE_OPERAND (cond, 0);
9792 rhs = TREE_OPERAND (cond, 1);
9793
9794 if (TREE_CODE (lhs) == SSA_NAME)
9795 {
9796 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9797 return false;
9798 }
9799 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9800 || TREE_CODE (lhs) == FIXED_CST)
9801 dts[0] = vect_constant_def;
9802 else
9803 return false;
9804
9805 if (TREE_CODE (rhs) == SSA_NAME)
9806 {
9807 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9808 return false;
9809 }
9810 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9811 || TREE_CODE (rhs) == FIXED_CST)
9812 dts[1] = vect_constant_def;
9813 else
9814 return false;
9815
9816 if (vectype1 && vectype2
9817 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9818 TYPE_VECTOR_SUBPARTS (vectype2)))
9819 return false;
9820
9821 *comp_vectype = vectype1 ? vectype1 : vectype2;
9822 /* Invariant comparison. */
9823 if (! *comp_vectype)
9824 {
9825 tree scalar_type = TREE_TYPE (lhs);
9826 /* If we can widen the comparison to match vectype do so. */
9827 if (INTEGRAL_TYPE_P (scalar_type)
9828 && vectype
9829 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9830 TYPE_SIZE (TREE_TYPE (vectype))))
9831 scalar_type = build_nonstandard_integer_type
9832 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9833 TYPE_UNSIGNED (scalar_type));
9834 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
9835 }
9836
9837 return true;
9838 }
9839
9840 /* vectorizable_condition.
9841
9842 Check if STMT_INFO is conditional modify expression that can be vectorized.
9843 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9844 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9845 at GSI.
9846
9847 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9848
9849 Return true if STMT_INFO is vectorizable in this way. */
9850
9851 static bool
9852 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9853 stmt_vec_info *vec_stmt,
9854 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9855 {
9856 vec_info *vinfo = stmt_info->vinfo;
9857 tree scalar_dest = NULL_TREE;
9858 tree vec_dest = NULL_TREE;
9859 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9860 tree then_clause, else_clause;
9861 tree comp_vectype = NULL_TREE;
9862 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9863 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9864 tree vec_compare;
9865 tree new_temp;
9866 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9867 enum vect_def_type dts[4]
9868 = {vect_unknown_def_type, vect_unknown_def_type,
9869 vect_unknown_def_type, vect_unknown_def_type};
9870 int ndts = 4;
9871 int ncopies;
9872 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9873 stmt_vec_info prev_stmt_info = NULL;
9874 int i, j;
9875 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9876 vec<tree> vec_oprnds0 = vNULL;
9877 vec<tree> vec_oprnds1 = vNULL;
9878 vec<tree> vec_oprnds2 = vNULL;
9879 vec<tree> vec_oprnds3 = vNULL;
9880 tree vec_cmp_type;
9881 bool masked = false;
9882
9883 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9884 return false;
9885
9886 /* Is vectorizable conditional operation? */
9887 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9888 if (!stmt)
9889 return false;
9890
9891 code = gimple_assign_rhs_code (stmt);
9892 if (code != COND_EXPR)
9893 return false;
9894
9895 stmt_vec_info reduc_info = NULL;
9896 int reduc_index = -1;
9897 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9898 bool for_reduction
9899 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9900 if (for_reduction)
9901 {
9902 if (STMT_SLP_TYPE (stmt_info))
9903 return false;
9904 reduc_info = info_for_reduction (stmt_info);
9905 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9906 reduc_index = STMT_VINFO_REDUC_IDX (reduc_info);
9907 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9908 || reduc_index != -1);
9909 }
9910 else
9911 {
9912 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9913 return false;
9914
9915 /* FORNOW: only supported as part of a reduction. */
9916 if (STMT_VINFO_LIVE_P (stmt_info))
9917 {
9918 if (dump_enabled_p ())
9919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9920 "value used after loop.\n");
9921 return false;
9922 }
9923 }
9924
9925 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9926 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9927
9928 if (slp_node)
9929 ncopies = 1;
9930 else
9931 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9932
9933 gcc_assert (ncopies >= 1);
9934 if (for_reduction && ncopies > 1)
9935 return false; /* FORNOW */
9936
9937 cond_expr = gimple_assign_rhs1 (stmt);
9938 then_clause = gimple_assign_rhs2 (stmt);
9939 else_clause = gimple_assign_rhs3 (stmt);
9940
9941 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
9942 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
9943 || !comp_vectype)
9944 return false;
9945
9946 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
9947 return false;
9948 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
9949 return false;
9950
9951 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9952 return false;
9953
9954 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9955 return false;
9956
9957 masked = !COMPARISON_CLASS_P (cond_expr);
9958 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
9959
9960 if (vec_cmp_type == NULL_TREE)
9961 return false;
9962
9963 cond_code = TREE_CODE (cond_expr);
9964 if (!masked)
9965 {
9966 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9967 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9968 }
9969
9970 /* For conditional reductions, the "then" value needs to be the candidate
9971 value calculated by this iteration while the "else" value needs to be
9972 the result carried over from previous iterations. If the COND_EXPR
9973 is the other way around, we need to swap it. */
9974 bool must_invert_cmp_result = false;
9975 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9976 {
9977 if (masked)
9978 must_invert_cmp_result = true;
9979 else
9980 {
9981 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9982 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9983 if (new_code == ERROR_MARK)
9984 must_invert_cmp_result = true;
9985 else
9986 cond_code = new_code;
9987 }
9988 /* Make sure we don't accidentally use the old condition. */
9989 cond_expr = NULL_TREE;
9990 std::swap (then_clause, else_clause);
9991 }
9992
9993 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9994 {
9995 /* Boolean values may have another representation in vectors
9996 and therefore we prefer bit operations over comparison for
9997 them (which also works for scalar masks). We store opcodes
9998 to use in bitop1 and bitop2. Statement is vectorized as
9999 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10000 depending on bitop1 and bitop2 arity. */
10001 switch (cond_code)
10002 {
10003 case GT_EXPR:
10004 bitop1 = BIT_NOT_EXPR;
10005 bitop2 = BIT_AND_EXPR;
10006 break;
10007 case GE_EXPR:
10008 bitop1 = BIT_NOT_EXPR;
10009 bitop2 = BIT_IOR_EXPR;
10010 break;
10011 case LT_EXPR:
10012 bitop1 = BIT_NOT_EXPR;
10013 bitop2 = BIT_AND_EXPR;
10014 std::swap (cond_expr0, cond_expr1);
10015 break;
10016 case LE_EXPR:
10017 bitop1 = BIT_NOT_EXPR;
10018 bitop2 = BIT_IOR_EXPR;
10019 std::swap (cond_expr0, cond_expr1);
10020 break;
10021 case NE_EXPR:
10022 bitop1 = BIT_XOR_EXPR;
10023 break;
10024 case EQ_EXPR:
10025 bitop1 = BIT_XOR_EXPR;
10026 bitop2 = BIT_NOT_EXPR;
10027 break;
10028 default:
10029 return false;
10030 }
10031 cond_code = SSA_NAME;
10032 }
10033
10034 if (!vec_stmt)
10035 {
10036 if (bitop1 != NOP_EXPR)
10037 {
10038 machine_mode mode = TYPE_MODE (comp_vectype);
10039 optab optab;
10040
10041 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10042 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10043 return false;
10044
10045 if (bitop2 != NOP_EXPR)
10046 {
10047 optab = optab_for_tree_code (bitop2, comp_vectype,
10048 optab_default);
10049 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10050 return false;
10051 }
10052 }
10053 if (expand_vec_cond_expr_p (vectype, comp_vectype,
10054 cond_code))
10055 {
10056 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10057 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10058 cost_vec);
10059 return true;
10060 }
10061 return false;
10062 }
10063
10064 /* Transform. */
10065
10066 if (!slp_node)
10067 {
10068 vec_oprnds0.create (1);
10069 vec_oprnds1.create (1);
10070 vec_oprnds2.create (1);
10071 vec_oprnds3.create (1);
10072 }
10073
10074 /* Handle def. */
10075 scalar_dest = gimple_assign_lhs (stmt);
10076 if (reduction_type != EXTRACT_LAST_REDUCTION)
10077 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10078
10079 /* Handle cond expr. */
10080 for (j = 0; j < ncopies; j++)
10081 {
10082 bool swap_cond_operands = false;
10083
10084 /* See whether another part of the vectorized code applies a loop
10085 mask to the condition, or to its inverse. */
10086
10087 vec_loop_masks *masks = NULL;
10088 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10089 {
10090 if (reduction_type == EXTRACT_LAST_REDUCTION)
10091 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10092 else
10093 {
10094 scalar_cond_masked_key cond (cond_expr, ncopies);
10095 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10096 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10097 else
10098 {
10099 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10100 cond.code = invert_tree_comparison (cond.code, honor_nans);
10101 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10102 {
10103 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10104 cond_code = cond.code;
10105 swap_cond_operands = true;
10106 }
10107 }
10108 }
10109 }
10110
10111 stmt_vec_info new_stmt_info = NULL;
10112 if (j == 0)
10113 {
10114 if (slp_node)
10115 {
10116 auto_vec<vec<tree>, 4> vec_defs;
10117 vect_get_slp_defs (slp_node, &vec_defs);
10118 vec_oprnds3 = vec_defs.pop ();
10119 vec_oprnds2 = vec_defs.pop ();
10120 if (!masked)
10121 vec_oprnds1 = vec_defs.pop ();
10122 vec_oprnds0 = vec_defs.pop ();
10123 }
10124 else
10125 {
10126 if (masked)
10127 {
10128 vec_cond_lhs
10129 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10130 comp_vectype);
10131 }
10132 else
10133 {
10134 vec_cond_lhs
10135 = vect_get_vec_def_for_operand (cond_expr0,
10136 stmt_info, comp_vectype);
10137 vec_cond_rhs
10138 = vect_get_vec_def_for_operand (cond_expr1,
10139 stmt_info, comp_vectype);
10140 }
10141 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10142 stmt_info);
10143 if (reduction_type != EXTRACT_LAST_REDUCTION)
10144 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10145 stmt_info);
10146 }
10147 }
10148 else
10149 {
10150 vec_cond_lhs
10151 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10152 if (!masked)
10153 vec_cond_rhs
10154 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10155
10156 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10157 vec_oprnds2.pop ());
10158 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10159 vec_oprnds3.pop ());
10160 }
10161
10162 if (!slp_node)
10163 {
10164 vec_oprnds0.quick_push (vec_cond_lhs);
10165 if (!masked)
10166 vec_oprnds1.quick_push (vec_cond_rhs);
10167 vec_oprnds2.quick_push (vec_then_clause);
10168 vec_oprnds3.quick_push (vec_else_clause);
10169 }
10170
10171 /* Arguments are ready. Create the new vector stmt. */
10172 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10173 {
10174 vec_then_clause = vec_oprnds2[i];
10175 vec_else_clause = vec_oprnds3[i];
10176
10177 if (swap_cond_operands)
10178 std::swap (vec_then_clause, vec_else_clause);
10179
10180 if (masked)
10181 vec_compare = vec_cond_lhs;
10182 else
10183 {
10184 vec_cond_rhs = vec_oprnds1[i];
10185 if (bitop1 == NOP_EXPR)
10186 vec_compare = build2 (cond_code, vec_cmp_type,
10187 vec_cond_lhs, vec_cond_rhs);
10188 else
10189 {
10190 new_temp = make_ssa_name (vec_cmp_type);
10191 gassign *new_stmt;
10192 if (bitop1 == BIT_NOT_EXPR)
10193 new_stmt = gimple_build_assign (new_temp, bitop1,
10194 vec_cond_rhs);
10195 else
10196 new_stmt
10197 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10198 vec_cond_rhs);
10199 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10200 if (bitop2 == NOP_EXPR)
10201 vec_compare = new_temp;
10202 else if (bitop2 == BIT_NOT_EXPR)
10203 {
10204 /* Instead of doing ~x ? y : z do x ? z : y. */
10205 vec_compare = new_temp;
10206 std::swap (vec_then_clause, vec_else_clause);
10207 }
10208 else
10209 {
10210 vec_compare = make_ssa_name (vec_cmp_type);
10211 new_stmt
10212 = gimple_build_assign (vec_compare, bitop2,
10213 vec_cond_lhs, new_temp);
10214 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10215 }
10216 }
10217 }
10218
10219 /* If we decided to apply a loop mask to the result of the vector
10220 comparison, AND the comparison with the mask now. Later passes
10221 should then be able to reuse the AND results between mulitple
10222 vector statements.
10223
10224 For example:
10225 for (int i = 0; i < 100; ++i)
10226 x[i] = y[i] ? z[i] : 10;
10227
10228 results in following optimized GIMPLE:
10229
10230 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10231 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10232 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10233 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10234 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10235 vect_iftmp.11_47, { 10, ... }>;
10236
10237 instead of using a masked and unmasked forms of
10238 vec != { 0, ... } (masked in the MASK_LOAD,
10239 unmasked in the VEC_COND_EXPR). */
10240
10241 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10242 in cases where that's necessary. */
10243
10244 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10245 {
10246 if (!is_gimple_val (vec_compare))
10247 {
10248 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10249 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10250 vec_compare);
10251 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10252 vec_compare = vec_compare_name;
10253 }
10254
10255 if (must_invert_cmp_result)
10256 {
10257 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10258 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10259 BIT_NOT_EXPR,
10260 vec_compare);
10261 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10262 vec_compare = vec_compare_name;
10263 }
10264
10265 if (masks)
10266 {
10267 unsigned vec_num = vec_oprnds0.length ();
10268 tree loop_mask
10269 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10270 vectype, vec_num * j + i);
10271 tree tmp2 = make_ssa_name (vec_cmp_type);
10272 gassign *g
10273 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10274 loop_mask);
10275 vect_finish_stmt_generation (stmt_info, g, gsi);
10276 vec_compare = tmp2;
10277 }
10278 }
10279
10280 if (reduction_type == EXTRACT_LAST_REDUCTION)
10281 {
10282 gcall *new_stmt = gimple_build_call_internal
10283 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10284 vec_then_clause);
10285 gimple_call_set_lhs (new_stmt, scalar_dest);
10286 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
10287 if (stmt_info->stmt == gsi_stmt (*gsi))
10288 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10289 else
10290 {
10291 /* In this case we're moving the definition to later in the
10292 block. That doesn't matter because the only uses of the
10293 lhs are in phi statements. */
10294 gimple_stmt_iterator old_gsi
10295 = gsi_for_stmt (stmt_info->stmt);
10296 gsi_remove (&old_gsi, true);
10297 new_stmt_info
10298 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10299 }
10300 }
10301 else
10302 {
10303 new_temp = make_ssa_name (vec_dest);
10304 gassign *new_stmt
10305 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10306 vec_then_clause, vec_else_clause);
10307 new_stmt_info
10308 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10309 }
10310 if (slp_node)
10311 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10312 }
10313
10314 if (slp_node)
10315 continue;
10316
10317 if (j == 0)
10318 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10319 else
10320 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10321
10322 prev_stmt_info = new_stmt_info;
10323 }
10324
10325 vec_oprnds0.release ();
10326 vec_oprnds1.release ();
10327 vec_oprnds2.release ();
10328 vec_oprnds3.release ();
10329
10330 return true;
10331 }
10332
10333 /* vectorizable_comparison.
10334
10335 Check if STMT_INFO is comparison expression that can be vectorized.
10336 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10337 comparison, put it in VEC_STMT, and insert it at GSI.
10338
10339 Return true if STMT_INFO is vectorizable in this way. */
10340
10341 static bool
10342 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10343 stmt_vec_info *vec_stmt,
10344 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10345 {
10346 vec_info *vinfo = stmt_info->vinfo;
10347 tree lhs, rhs1, rhs2;
10348 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10349 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10350 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10351 tree new_temp;
10352 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10353 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10354 int ndts = 2;
10355 poly_uint64 nunits;
10356 int ncopies;
10357 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10358 stmt_vec_info prev_stmt_info = NULL;
10359 int i, j;
10360 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10361 vec<tree> vec_oprnds0 = vNULL;
10362 vec<tree> vec_oprnds1 = vNULL;
10363 tree mask_type;
10364 tree mask;
10365
10366 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10367 return false;
10368
10369 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10370 return false;
10371
10372 mask_type = vectype;
10373 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10374
10375 if (slp_node)
10376 ncopies = 1;
10377 else
10378 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10379
10380 gcc_assert (ncopies >= 1);
10381 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10382 return false;
10383
10384 if (STMT_VINFO_LIVE_P (stmt_info))
10385 {
10386 if (dump_enabled_p ())
10387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10388 "value used after loop.\n");
10389 return false;
10390 }
10391
10392 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10393 if (!stmt)
10394 return false;
10395
10396 code = gimple_assign_rhs_code (stmt);
10397
10398 if (TREE_CODE_CLASS (code) != tcc_comparison)
10399 return false;
10400
10401 rhs1 = gimple_assign_rhs1 (stmt);
10402 rhs2 = gimple_assign_rhs2 (stmt);
10403
10404 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10405 return false;
10406
10407 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10408 return false;
10409
10410 if (vectype1 && vectype2
10411 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10412 TYPE_VECTOR_SUBPARTS (vectype2)))
10413 return false;
10414
10415 vectype = vectype1 ? vectype1 : vectype2;
10416
10417 /* Invariant comparison. */
10418 if (!vectype)
10419 {
10420 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
10421 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10422 return false;
10423 }
10424 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10425 return false;
10426
10427 /* Can't compare mask and non-mask types. */
10428 if (vectype1 && vectype2
10429 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10430 return false;
10431
10432 /* Boolean values may have another representation in vectors
10433 and therefore we prefer bit operations over comparison for
10434 them (which also works for scalar masks). We store opcodes
10435 to use in bitop1 and bitop2. Statement is vectorized as
10436 BITOP2 (rhs1 BITOP1 rhs2) or
10437 rhs1 BITOP2 (BITOP1 rhs2)
10438 depending on bitop1 and bitop2 arity. */
10439 bool swap_p = false;
10440 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10441 {
10442 if (code == GT_EXPR)
10443 {
10444 bitop1 = BIT_NOT_EXPR;
10445 bitop2 = BIT_AND_EXPR;
10446 }
10447 else if (code == GE_EXPR)
10448 {
10449 bitop1 = BIT_NOT_EXPR;
10450 bitop2 = BIT_IOR_EXPR;
10451 }
10452 else if (code == LT_EXPR)
10453 {
10454 bitop1 = BIT_NOT_EXPR;
10455 bitop2 = BIT_AND_EXPR;
10456 swap_p = true;
10457 }
10458 else if (code == LE_EXPR)
10459 {
10460 bitop1 = BIT_NOT_EXPR;
10461 bitop2 = BIT_IOR_EXPR;
10462 swap_p = true;
10463 }
10464 else
10465 {
10466 bitop1 = BIT_XOR_EXPR;
10467 if (code == EQ_EXPR)
10468 bitop2 = BIT_NOT_EXPR;
10469 }
10470 }
10471
10472 if (!vec_stmt)
10473 {
10474 if (bitop1 == NOP_EXPR)
10475 {
10476 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10477 return false;
10478 }
10479 else
10480 {
10481 machine_mode mode = TYPE_MODE (vectype);
10482 optab optab;
10483
10484 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10485 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10486 return false;
10487
10488 if (bitop2 != NOP_EXPR)
10489 {
10490 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10491 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10492 return false;
10493 }
10494 }
10495
10496 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10497 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10498 dts, ndts, slp_node, cost_vec);
10499 return true;
10500 }
10501
10502 /* Transform. */
10503 if (!slp_node)
10504 {
10505 vec_oprnds0.create (1);
10506 vec_oprnds1.create (1);
10507 }
10508
10509 /* Handle def. */
10510 lhs = gimple_assign_lhs (stmt);
10511 mask = vect_create_destination_var (lhs, mask_type);
10512
10513 /* Handle cmp expr. */
10514 for (j = 0; j < ncopies; j++)
10515 {
10516 stmt_vec_info new_stmt_info = NULL;
10517 if (j == 0)
10518 {
10519 if (slp_node)
10520 {
10521 auto_vec<vec<tree>, 2> vec_defs;
10522 vect_get_slp_defs (slp_node, &vec_defs);
10523 vec_oprnds1 = vec_defs.pop ();
10524 vec_oprnds0 = vec_defs.pop ();
10525 if (swap_p)
10526 std::swap (vec_oprnds0, vec_oprnds1);
10527 }
10528 else
10529 {
10530 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10531 vectype);
10532 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10533 vectype);
10534 }
10535 }
10536 else
10537 {
10538 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10539 vec_oprnds0.pop ());
10540 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10541 vec_oprnds1.pop ());
10542 }
10543
10544 if (!slp_node)
10545 {
10546 if (swap_p && j == 0)
10547 std::swap (vec_rhs1, vec_rhs2);
10548 vec_oprnds0.quick_push (vec_rhs1);
10549 vec_oprnds1.quick_push (vec_rhs2);
10550 }
10551
10552 /* Arguments are ready. Create the new vector stmt. */
10553 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10554 {
10555 vec_rhs2 = vec_oprnds1[i];
10556
10557 new_temp = make_ssa_name (mask);
10558 if (bitop1 == NOP_EXPR)
10559 {
10560 gassign *new_stmt = gimple_build_assign (new_temp, code,
10561 vec_rhs1, vec_rhs2);
10562 new_stmt_info
10563 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10564 }
10565 else
10566 {
10567 gassign *new_stmt;
10568 if (bitop1 == BIT_NOT_EXPR)
10569 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10570 else
10571 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10572 vec_rhs2);
10573 new_stmt_info
10574 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10575 if (bitop2 != NOP_EXPR)
10576 {
10577 tree res = make_ssa_name (mask);
10578 if (bitop2 == BIT_NOT_EXPR)
10579 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10580 else
10581 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10582 new_temp);
10583 new_stmt_info
10584 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10585 }
10586 }
10587 if (slp_node)
10588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10589 }
10590
10591 if (slp_node)
10592 continue;
10593
10594 if (j == 0)
10595 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10596 else
10597 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10598
10599 prev_stmt_info = new_stmt_info;
10600 }
10601
10602 vec_oprnds0.release ();
10603 vec_oprnds1.release ();
10604
10605 return true;
10606 }
10607
10608 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10609 can handle all live statements in the node. Otherwise return true
10610 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10611 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10612
10613 static bool
10614 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10615 slp_tree slp_node, slp_instance slp_node_instance,
10616 bool vec_stmt_p,
10617 stmt_vector_for_cost *cost_vec)
10618 {
10619 if (slp_node)
10620 {
10621 stmt_vec_info slp_stmt_info;
10622 unsigned int i;
10623 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10624 {
10625 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10626 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10627 slp_node_instance, i,
10628 vec_stmt_p, cost_vec))
10629 return false;
10630 }
10631 }
10632 else if (STMT_VINFO_LIVE_P (stmt_info)
10633 && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10634 slp_node_instance, -1,
10635 vec_stmt_p, cost_vec))
10636 return false;
10637
10638 return true;
10639 }
10640
10641 /* Make sure the statement is vectorizable. */
10642
10643 opt_result
10644 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10645 slp_tree node, slp_instance node_instance,
10646 stmt_vector_for_cost *cost_vec)
10647 {
10648 vec_info *vinfo = stmt_info->vinfo;
10649 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10650 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10651 bool ok;
10652 gimple_seq pattern_def_seq;
10653
10654 if (dump_enabled_p ())
10655 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10656 stmt_info->stmt);
10657
10658 if (gimple_has_volatile_ops (stmt_info->stmt))
10659 return opt_result::failure_at (stmt_info->stmt,
10660 "not vectorized:"
10661 " stmt has volatile operands: %G\n",
10662 stmt_info->stmt);
10663
10664 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10665 && node == NULL
10666 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10667 {
10668 gimple_stmt_iterator si;
10669
10670 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10671 {
10672 stmt_vec_info pattern_def_stmt_info
10673 = vinfo->lookup_stmt (gsi_stmt (si));
10674 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10675 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10676 {
10677 /* Analyze def stmt of STMT if it's a pattern stmt. */
10678 if (dump_enabled_p ())
10679 dump_printf_loc (MSG_NOTE, vect_location,
10680 "==> examining pattern def statement: %G",
10681 pattern_def_stmt_info->stmt);
10682
10683 opt_result res
10684 = vect_analyze_stmt (pattern_def_stmt_info,
10685 need_to_vectorize, node, node_instance,
10686 cost_vec);
10687 if (!res)
10688 return res;
10689 }
10690 }
10691 }
10692
10693 /* Skip stmts that do not need to be vectorized. In loops this is expected
10694 to include:
10695 - the COND_EXPR which is the loop exit condition
10696 - any LABEL_EXPRs in the loop
10697 - computations that are used only for array indexing or loop control.
10698 In basic blocks we only analyze statements that are a part of some SLP
10699 instance, therefore, all the statements are relevant.
10700
10701 Pattern statement needs to be analyzed instead of the original statement
10702 if the original statement is not relevant. Otherwise, we analyze both
10703 statements. In basic blocks we are called from some SLP instance
10704 traversal, don't analyze pattern stmts instead, the pattern stmts
10705 already will be part of SLP instance. */
10706
10707 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10708 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10709 && !STMT_VINFO_LIVE_P (stmt_info))
10710 {
10711 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10712 && pattern_stmt_info
10713 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10714 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10715 {
10716 /* Analyze PATTERN_STMT instead of the original stmt. */
10717 stmt_info = pattern_stmt_info;
10718 if (dump_enabled_p ())
10719 dump_printf_loc (MSG_NOTE, vect_location,
10720 "==> examining pattern statement: %G",
10721 stmt_info->stmt);
10722 }
10723 else
10724 {
10725 if (dump_enabled_p ())
10726 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10727
10728 return opt_result::success ();
10729 }
10730 }
10731 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10732 && node == NULL
10733 && pattern_stmt_info
10734 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10735 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10736 {
10737 /* Analyze PATTERN_STMT too. */
10738 if (dump_enabled_p ())
10739 dump_printf_loc (MSG_NOTE, vect_location,
10740 "==> examining pattern statement: %G",
10741 pattern_stmt_info->stmt);
10742
10743 opt_result res
10744 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10745 node_instance, cost_vec);
10746 if (!res)
10747 return res;
10748 }
10749
10750 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10751 {
10752 case vect_internal_def:
10753 break;
10754
10755 case vect_reduction_def:
10756 case vect_nested_cycle:
10757 gcc_assert (!bb_vinfo
10758 && (relevance == vect_used_in_outer
10759 || relevance == vect_used_in_outer_by_reduction
10760 || relevance == vect_used_by_reduction
10761 || relevance == vect_unused_in_scope
10762 || relevance == vect_used_only_live));
10763 break;
10764
10765 case vect_induction_def:
10766 gcc_assert (!bb_vinfo);
10767 break;
10768
10769 case vect_constant_def:
10770 case vect_external_def:
10771 case vect_unknown_def_type:
10772 default:
10773 gcc_unreachable ();
10774 }
10775
10776 if (STMT_VINFO_RELEVANT_P (stmt_info))
10777 {
10778 tree type = gimple_expr_type (stmt_info->stmt);
10779 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10780 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10781 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10782 || (call && gimple_call_lhs (call) == NULL_TREE));
10783 *need_to_vectorize = true;
10784 }
10785
10786 if (PURE_SLP_STMT (stmt_info) && !node)
10787 {
10788 if (dump_enabled_p ())
10789 dump_printf_loc (MSG_NOTE, vect_location,
10790 "handled only by SLP analysis\n");
10791 return opt_result::success ();
10792 }
10793
10794 ok = true;
10795 if (!bb_vinfo
10796 && (STMT_VINFO_RELEVANT_P (stmt_info)
10797 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10798 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10799 -mveclibabi= takes preference over library functions with
10800 the simd attribute. */
10801 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10802 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10803 cost_vec)
10804 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10805 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10806 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10807 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10808 cost_vec)
10809 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10810 || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10811 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10812 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10813 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10814 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10815 cost_vec)
10816 || vectorizable_lc_phi (stmt_info, NULL, node));
10817 else
10818 {
10819 if (bb_vinfo)
10820 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10821 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10822 cost_vec)
10823 || vectorizable_conversion (stmt_info, NULL, NULL, node,
10824 cost_vec)
10825 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10826 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10827 || vectorizable_assignment (stmt_info, NULL, NULL, node,
10828 cost_vec)
10829 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10830 cost_vec)
10831 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10832 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10833 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10834 cost_vec));
10835 }
10836
10837 if (!ok)
10838 return opt_result::failure_at (stmt_info->stmt,
10839 "not vectorized:"
10840 " relevant stmt not supported: %G",
10841 stmt_info->stmt);
10842
10843 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10844 need extra handling, except for vectorizable reductions. */
10845 if (!bb_vinfo
10846 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10847 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10848 && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
10849 false, cost_vec))
10850 return opt_result::failure_at (stmt_info->stmt,
10851 "not vectorized:"
10852 " live stmt not supported: %G",
10853 stmt_info->stmt);
10854
10855 return opt_result::success ();
10856 }
10857
10858
10859 /* Function vect_transform_stmt.
10860
10861 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10862
10863 bool
10864 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10865 slp_tree slp_node, slp_instance slp_node_instance)
10866 {
10867 vec_info *vinfo = stmt_info->vinfo;
10868 bool is_store = false;
10869 stmt_vec_info vec_stmt = NULL;
10870 bool done;
10871
10872 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10873 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
10874
10875 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
10876 && nested_in_vect_loop_p
10877 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
10878 stmt_info));
10879
10880 gimple *stmt = stmt_info->stmt;
10881 switch (STMT_VINFO_TYPE (stmt_info))
10882 {
10883 case type_demotion_vec_info_type:
10884 case type_promotion_vec_info_type:
10885 case type_conversion_vec_info_type:
10886 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
10887 NULL);
10888 gcc_assert (done);
10889 break;
10890
10891 case induc_vec_info_type:
10892 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
10893 NULL);
10894 gcc_assert (done);
10895 break;
10896
10897 case shift_vec_info_type:
10898 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10899 gcc_assert (done);
10900 break;
10901
10902 case op_vec_info_type:
10903 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
10904 NULL);
10905 gcc_assert (done);
10906 break;
10907
10908 case assignment_vec_info_type:
10909 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
10910 NULL);
10911 gcc_assert (done);
10912 break;
10913
10914 case load_vec_info_type:
10915 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
10916 slp_node_instance, NULL);
10917 gcc_assert (done);
10918 break;
10919
10920 case store_vec_info_type:
10921 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10922 gcc_assert (done);
10923 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10924 {
10925 /* In case of interleaving, the whole chain is vectorized when the
10926 last store in the chain is reached. Store stmts before the last
10927 one are skipped, and there vec_stmt_info shouldn't be freed
10928 meanwhile. */
10929 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10930 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10931 is_store = true;
10932 }
10933 else
10934 is_store = true;
10935 break;
10936
10937 case condition_vec_info_type:
10938 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10939 gcc_assert (done);
10940 break;
10941
10942 case comparison_vec_info_type:
10943 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
10944 slp_node, NULL);
10945 gcc_assert (done);
10946 break;
10947
10948 case call_vec_info_type:
10949 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10950 stmt = gsi_stmt (*gsi);
10951 break;
10952
10953 case call_simd_clone_vec_info_type:
10954 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
10955 slp_node, NULL);
10956 stmt = gsi_stmt (*gsi);
10957 break;
10958
10959 case reduc_vec_info_type:
10960 done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
10961 gcc_assert (done);
10962 break;
10963
10964 case cycle_phi_info_type:
10965 done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
10966 slp_node_instance);
10967 gcc_assert (done);
10968 break;
10969
10970 case lc_phi_info_type:
10971 done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
10972 gcc_assert (done);
10973 break;
10974
10975 default:
10976 if (!STMT_VINFO_LIVE_P (stmt_info))
10977 {
10978 if (dump_enabled_p ())
10979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10980 "stmt not supported.\n");
10981 gcc_unreachable ();
10982 }
10983 }
10984
10985 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
10986 This would break hybrid SLP vectorization. */
10987 if (slp_node)
10988 gcc_assert (!vec_stmt
10989 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
10990
10991 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
10992 is being vectorized, but outside the immediately enclosing loop. */
10993 if (vec_stmt
10994 && nested_p
10995 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10996 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
10997 || STMT_VINFO_RELEVANT (stmt_info) ==
10998 vect_used_in_outer_by_reduction))
10999 {
11000 class loop *innerloop = LOOP_VINFO_LOOP (
11001 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11002 imm_use_iterator imm_iter;
11003 use_operand_p use_p;
11004 tree scalar_dest;
11005
11006 if (dump_enabled_p ())
11007 dump_printf_loc (MSG_NOTE, vect_location,
11008 "Record the vdef for outer-loop vectorization.\n");
11009
11010 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11011 (to be used when vectorizing outer-loop stmts that use the DEF of
11012 STMT). */
11013 if (gimple_code (stmt) == GIMPLE_PHI)
11014 scalar_dest = PHI_RESULT (stmt);
11015 else
11016 scalar_dest = gimple_get_lhs (stmt);
11017
11018 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11019 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11020 {
11021 stmt_vec_info exit_phi_info
11022 = vinfo->lookup_stmt (USE_STMT (use_p));
11023 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11024 }
11025 }
11026
11027 if (vec_stmt)
11028 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11029
11030 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11031 return is_store;
11032
11033 /* If this stmt defines a value used on a backedge, update the
11034 vectorized PHIs. */
11035 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11036 stmt_vec_info reduc_info;
11037 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11038 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11039 && (reduc_info = info_for_reduction (orig_stmt_info))
11040 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11041 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11042 {
11043 gphi *phi;
11044 edge e;
11045 if (!slp_node
11046 && (phi = dyn_cast <gphi *>
11047 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11048 && dominated_by_p (CDI_DOMINATORS,
11049 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11050 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11051 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11052 == gimple_get_lhs (orig_stmt_info->stmt)))
11053 {
11054 stmt_vec_info phi_info
11055 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11056 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11057 do
11058 {
11059 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11060 gimple_get_lhs (vec_stmt->stmt), e,
11061 gimple_phi_arg_location (phi, e->dest_idx));
11062 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11063 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11064 }
11065 while (phi_info);
11066 gcc_assert (!vec_stmt);
11067 }
11068 else if (slp_node
11069 && slp_node != slp_node_instance->reduc_phis)
11070 {
11071 slp_tree phi_node = slp_node_instance->reduc_phis;
11072 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11073 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11074 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11075 == SLP_TREE_VEC_STMTS (slp_node).length ());
11076 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11077 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11078 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11079 e, gimple_phi_arg_location (phi, e->dest_idx));
11080 }
11081 }
11082
11083 /* Handle stmts whose DEF is used outside the loop-nest that is
11084 being vectorized. */
11085 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11086 slp_node_instance, true, NULL);
11087 gcc_assert (done);
11088
11089 return false;
11090 }
11091
11092
11093 /* Remove a group of stores (for SLP or interleaving), free their
11094 stmt_vec_info. */
11095
11096 void
11097 vect_remove_stores (stmt_vec_info first_stmt_info)
11098 {
11099 vec_info *vinfo = first_stmt_info->vinfo;
11100 stmt_vec_info next_stmt_info = first_stmt_info;
11101
11102 while (next_stmt_info)
11103 {
11104 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11105 next_stmt_info = vect_orig_stmt (next_stmt_info);
11106 /* Free the attached stmt_vec_info and remove the stmt. */
11107 vinfo->remove_stmt (next_stmt_info);
11108 next_stmt_info = tmp;
11109 }
11110 }
11111
11112 /* Function get_vectype_for_scalar_type_and_size.
11113
11114 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
11115 by the target. */
11116
11117 tree
11118 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
11119 {
11120 tree orig_scalar_type = scalar_type;
11121 scalar_mode inner_mode;
11122 machine_mode simd_mode;
11123 poly_uint64 nunits;
11124 tree vectype;
11125
11126 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11127 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11128 return NULL_TREE;
11129
11130 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11131
11132 /* For vector types of elements whose mode precision doesn't
11133 match their types precision we use a element type of mode
11134 precision. The vectorization routines will have to make sure
11135 they support the proper result truncation/extension.
11136 We also make sure to build vector types with INTEGER_TYPE
11137 component type only. */
11138 if (INTEGRAL_TYPE_P (scalar_type)
11139 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11140 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11141 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11142 TYPE_UNSIGNED (scalar_type));
11143
11144 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11145 When the component mode passes the above test simply use a type
11146 corresponding to that mode. The theory is that any use that
11147 would cause problems with this will disable vectorization anyway. */
11148 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11149 && !INTEGRAL_TYPE_P (scalar_type))
11150 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11151
11152 /* We can't build a vector type of elements with alignment bigger than
11153 their size. */
11154 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11155 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11156 TYPE_UNSIGNED (scalar_type));
11157
11158 /* If we felt back to using the mode fail if there was
11159 no scalar type for it. */
11160 if (scalar_type == NULL_TREE)
11161 return NULL_TREE;
11162
11163 /* If no size was supplied use the mode the target prefers. Otherwise
11164 lookup a vector mode of the specified size. */
11165 if (known_eq (size, 0U))
11166 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11167 else if (!multiple_p (size, nbytes, &nunits)
11168 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11169 return NULL_TREE;
11170 /* NOTE: nunits == 1 is allowed to support single element vector types. */
11171 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
11172 return NULL_TREE;
11173
11174 vectype = build_vector_type (scalar_type, nunits);
11175
11176 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11177 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11178 return NULL_TREE;
11179
11180 /* Re-attach the address-space qualifier if we canonicalized the scalar
11181 type. */
11182 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11183 return build_qualified_type
11184 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11185
11186 return vectype;
11187 }
11188
11189 /* Function get_vectype_for_scalar_type.
11190
11191 Returns the vector type corresponding to SCALAR_TYPE as supported
11192 by the target. */
11193
11194 tree
11195 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
11196 {
11197 tree vectype;
11198 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
11199 vinfo->vector_size);
11200 if (vectype
11201 && known_eq (vinfo->vector_size, 0U))
11202 vinfo->vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
11203 return vectype;
11204 }
11205
11206 /* Function get_mask_type_for_scalar_type.
11207
11208 Returns the mask type corresponding to a result of comparison
11209 of vectors of specified SCALAR_TYPE as supported by target. */
11210
11211 tree
11212 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
11213 {
11214 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
11215
11216 if (!vectype)
11217 return NULL;
11218
11219 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
11220 vinfo->vector_size);
11221 }
11222
11223 /* Function get_same_sized_vectype
11224
11225 Returns a vector type corresponding to SCALAR_TYPE of size
11226 VECTOR_TYPE if supported by the target. */
11227
11228 tree
11229 get_same_sized_vectype (tree scalar_type, tree vector_type)
11230 {
11231 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11232 return build_same_sized_truth_vector_type (vector_type);
11233
11234 return get_vectype_for_scalar_type_and_size
11235 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
11236 }
11237
11238 /* Function vect_is_simple_use.
11239
11240 Input:
11241 VINFO - the vect info of the loop or basic block that is being vectorized.
11242 OPERAND - operand in the loop or bb.
11243 Output:
11244 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11245 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11246 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11247 the definition could be anywhere in the function
11248 DT - the type of definition
11249
11250 Returns whether a stmt with OPERAND can be vectorized.
11251 For loops, supportable operands are constants, loop invariants, and operands
11252 that are defined by the current iteration of the loop. Unsupportable
11253 operands are those that are defined by a previous iteration of the loop (as
11254 is the case in reduction/induction computations).
11255 For basic blocks, supportable operands are constants and bb invariants.
11256 For now, operands defined outside the basic block are not supported. */
11257
11258 bool
11259 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11260 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11261 {
11262 if (def_stmt_info_out)
11263 *def_stmt_info_out = NULL;
11264 if (def_stmt_out)
11265 *def_stmt_out = NULL;
11266 *dt = vect_unknown_def_type;
11267
11268 if (dump_enabled_p ())
11269 {
11270 dump_printf_loc (MSG_NOTE, vect_location,
11271 "vect_is_simple_use: operand ");
11272 if (TREE_CODE (operand) == SSA_NAME
11273 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11274 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11275 else
11276 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11277 }
11278
11279 if (CONSTANT_CLASS_P (operand))
11280 *dt = vect_constant_def;
11281 else if (is_gimple_min_invariant (operand))
11282 *dt = vect_external_def;
11283 else if (TREE_CODE (operand) != SSA_NAME)
11284 *dt = vect_unknown_def_type;
11285 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11286 *dt = vect_external_def;
11287 else
11288 {
11289 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11290 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11291 if (!stmt_vinfo)
11292 *dt = vect_external_def;
11293 else
11294 {
11295 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11296 def_stmt = stmt_vinfo->stmt;
11297 switch (gimple_code (def_stmt))
11298 {
11299 case GIMPLE_PHI:
11300 case GIMPLE_ASSIGN:
11301 case GIMPLE_CALL:
11302 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11303 break;
11304 default:
11305 *dt = vect_unknown_def_type;
11306 break;
11307 }
11308 if (def_stmt_info_out)
11309 *def_stmt_info_out = stmt_vinfo;
11310 }
11311 if (def_stmt_out)
11312 *def_stmt_out = def_stmt;
11313 }
11314
11315 if (dump_enabled_p ())
11316 {
11317 dump_printf (MSG_NOTE, ", type of def: ");
11318 switch (*dt)
11319 {
11320 case vect_uninitialized_def:
11321 dump_printf (MSG_NOTE, "uninitialized\n");
11322 break;
11323 case vect_constant_def:
11324 dump_printf (MSG_NOTE, "constant\n");
11325 break;
11326 case vect_external_def:
11327 dump_printf (MSG_NOTE, "external\n");
11328 break;
11329 case vect_internal_def:
11330 dump_printf (MSG_NOTE, "internal\n");
11331 break;
11332 case vect_induction_def:
11333 dump_printf (MSG_NOTE, "induction\n");
11334 break;
11335 case vect_reduction_def:
11336 dump_printf (MSG_NOTE, "reduction\n");
11337 break;
11338 case vect_double_reduction_def:
11339 dump_printf (MSG_NOTE, "double reduction\n");
11340 break;
11341 case vect_nested_cycle:
11342 dump_printf (MSG_NOTE, "nested cycle\n");
11343 break;
11344 case vect_unknown_def_type:
11345 dump_printf (MSG_NOTE, "unknown\n");
11346 break;
11347 }
11348 }
11349
11350 if (*dt == vect_unknown_def_type)
11351 {
11352 if (dump_enabled_p ())
11353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11354 "Unsupported pattern.\n");
11355 return false;
11356 }
11357
11358 return true;
11359 }
11360
11361 /* Function vect_is_simple_use.
11362
11363 Same as vect_is_simple_use but also determines the vector operand
11364 type of OPERAND and stores it to *VECTYPE. If the definition of
11365 OPERAND is vect_uninitialized_def, vect_constant_def or
11366 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11367 is responsible to compute the best suited vector type for the
11368 scalar operand. */
11369
11370 bool
11371 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11372 tree *vectype, stmt_vec_info *def_stmt_info_out,
11373 gimple **def_stmt_out)
11374 {
11375 stmt_vec_info def_stmt_info;
11376 gimple *def_stmt;
11377 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11378 return false;
11379
11380 if (def_stmt_out)
11381 *def_stmt_out = def_stmt;
11382 if (def_stmt_info_out)
11383 *def_stmt_info_out = def_stmt_info;
11384
11385 /* Now get a vector type if the def is internal, otherwise supply
11386 NULL_TREE and leave it up to the caller to figure out a proper
11387 type for the use stmt. */
11388 if (*dt == vect_internal_def
11389 || *dt == vect_induction_def
11390 || *dt == vect_reduction_def
11391 || *dt == vect_double_reduction_def
11392 || *dt == vect_nested_cycle)
11393 {
11394 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11395 gcc_assert (*vectype != NULL_TREE);
11396 if (dump_enabled_p ())
11397 dump_printf_loc (MSG_NOTE, vect_location,
11398 "vect_is_simple_use: vectype %T\n", *vectype);
11399 }
11400 else if (*dt == vect_uninitialized_def
11401 || *dt == vect_constant_def
11402 || *dt == vect_external_def)
11403 *vectype = NULL_TREE;
11404 else
11405 gcc_unreachable ();
11406
11407 return true;
11408 }
11409
11410
11411 /* Function supportable_widening_operation
11412
11413 Check whether an operation represented by the code CODE is a
11414 widening operation that is supported by the target platform in
11415 vector form (i.e., when operating on arguments of type VECTYPE_IN
11416 producing a result of type VECTYPE_OUT).
11417
11418 Widening operations we currently support are NOP (CONVERT), FLOAT,
11419 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11420 are supported by the target platform either directly (via vector
11421 tree-codes), or via target builtins.
11422
11423 Output:
11424 - CODE1 and CODE2 are codes of vector operations to be used when
11425 vectorizing the operation, if available.
11426 - MULTI_STEP_CVT determines the number of required intermediate steps in
11427 case of multi-step conversion (like char->short->int - in that case
11428 MULTI_STEP_CVT will be 1).
11429 - INTERM_TYPES contains the intermediate type required to perform the
11430 widening operation (short in the above example). */
11431
11432 bool
11433 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11434 tree vectype_out, tree vectype_in,
11435 enum tree_code *code1, enum tree_code *code2,
11436 int *multi_step_cvt,
11437 vec<tree> *interm_types)
11438 {
11439 vec_info *vinfo = stmt_info->vinfo;
11440 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11441 class loop *vect_loop = NULL;
11442 machine_mode vec_mode;
11443 enum insn_code icode1, icode2;
11444 optab optab1, optab2;
11445 tree vectype = vectype_in;
11446 tree wide_vectype = vectype_out;
11447 enum tree_code c1, c2;
11448 int i;
11449 tree prev_type, intermediate_type;
11450 machine_mode intermediate_mode, prev_mode;
11451 optab optab3, optab4;
11452
11453 *multi_step_cvt = 0;
11454 if (loop_info)
11455 vect_loop = LOOP_VINFO_LOOP (loop_info);
11456
11457 switch (code)
11458 {
11459 case WIDEN_MULT_EXPR:
11460 /* The result of a vectorized widening operation usually requires
11461 two vectors (because the widened results do not fit into one vector).
11462 The generated vector results would normally be expected to be
11463 generated in the same order as in the original scalar computation,
11464 i.e. if 8 results are generated in each vector iteration, they are
11465 to be organized as follows:
11466 vect1: [res1,res2,res3,res4],
11467 vect2: [res5,res6,res7,res8].
11468
11469 However, in the special case that the result of the widening
11470 operation is used in a reduction computation only, the order doesn't
11471 matter (because when vectorizing a reduction we change the order of
11472 the computation). Some targets can take advantage of this and
11473 generate more efficient code. For example, targets like Altivec,
11474 that support widen_mult using a sequence of {mult_even,mult_odd}
11475 generate the following vectors:
11476 vect1: [res1,res3,res5,res7],
11477 vect2: [res2,res4,res6,res8].
11478
11479 When vectorizing outer-loops, we execute the inner-loop sequentially
11480 (each vectorized inner-loop iteration contributes to VF outer-loop
11481 iterations in parallel). We therefore don't allow to change the
11482 order of the computation in the inner-loop during outer-loop
11483 vectorization. */
11484 /* TODO: Another case in which order doesn't *really* matter is when we
11485 widen and then contract again, e.g. (short)((int)x * y >> 8).
11486 Normally, pack_trunc performs an even/odd permute, whereas the
11487 repack from an even/odd expansion would be an interleave, which
11488 would be significantly simpler for e.g. AVX2. */
11489 /* In any case, in order to avoid duplicating the code below, recurse
11490 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11491 are properly set up for the caller. If we fail, we'll continue with
11492 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11493 if (vect_loop
11494 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11495 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11496 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11497 stmt_info, vectype_out,
11498 vectype_in, code1, code2,
11499 multi_step_cvt, interm_types))
11500 {
11501 /* Elements in a vector with vect_used_by_reduction property cannot
11502 be reordered if the use chain with this property does not have the
11503 same operation. One such an example is s += a * b, where elements
11504 in a and b cannot be reordered. Here we check if the vector defined
11505 by STMT is only directly used in the reduction statement. */
11506 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11507 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11508 if (use_stmt_info
11509 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11510 return true;
11511 }
11512 c1 = VEC_WIDEN_MULT_LO_EXPR;
11513 c2 = VEC_WIDEN_MULT_HI_EXPR;
11514 break;
11515
11516 case DOT_PROD_EXPR:
11517 c1 = DOT_PROD_EXPR;
11518 c2 = DOT_PROD_EXPR;
11519 break;
11520
11521 case SAD_EXPR:
11522 c1 = SAD_EXPR;
11523 c2 = SAD_EXPR;
11524 break;
11525
11526 case VEC_WIDEN_MULT_EVEN_EXPR:
11527 /* Support the recursion induced just above. */
11528 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11529 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11530 break;
11531
11532 case WIDEN_LSHIFT_EXPR:
11533 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11534 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11535 break;
11536
11537 CASE_CONVERT:
11538 c1 = VEC_UNPACK_LO_EXPR;
11539 c2 = VEC_UNPACK_HI_EXPR;
11540 break;
11541
11542 case FLOAT_EXPR:
11543 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11544 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11545 break;
11546
11547 case FIX_TRUNC_EXPR:
11548 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11549 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11550 break;
11551
11552 default:
11553 gcc_unreachable ();
11554 }
11555
11556 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11557 std::swap (c1, c2);
11558
11559 if (code == FIX_TRUNC_EXPR)
11560 {
11561 /* The signedness is determined from output operand. */
11562 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11563 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11564 }
11565 else if (CONVERT_EXPR_CODE_P (code)
11566 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11567 && VECTOR_BOOLEAN_TYPE_P (vectype)
11568 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11569 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11570 {
11571 /* If the input and result modes are the same, a different optab
11572 is needed where we pass in the number of units in vectype. */
11573 optab1 = vec_unpacks_sbool_lo_optab;
11574 optab2 = vec_unpacks_sbool_hi_optab;
11575 }
11576 else
11577 {
11578 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11579 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11580 }
11581
11582 if (!optab1 || !optab2)
11583 return false;
11584
11585 vec_mode = TYPE_MODE (vectype);
11586 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11587 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11588 return false;
11589
11590 *code1 = c1;
11591 *code2 = c2;
11592
11593 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11594 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11595 {
11596 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11597 return true;
11598 /* For scalar masks we may have different boolean
11599 vector types having the same QImode. Thus we
11600 add additional check for elements number. */
11601 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11602 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11603 return true;
11604 }
11605
11606 /* Check if it's a multi-step conversion that can be done using intermediate
11607 types. */
11608
11609 prev_type = vectype;
11610 prev_mode = vec_mode;
11611
11612 if (!CONVERT_EXPR_CODE_P (code))
11613 return false;
11614
11615 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11616 intermediate steps in promotion sequence. We try
11617 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11618 not. */
11619 interm_types->create (MAX_INTERM_CVT_STEPS);
11620 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11621 {
11622 intermediate_mode = insn_data[icode1].operand[0].mode;
11623 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11624 {
11625 intermediate_type = vect_halve_mask_nunits (vinfo, prev_type);
11626 if (intermediate_mode != TYPE_MODE (intermediate_type))
11627 return false;
11628 }
11629 else
11630 intermediate_type
11631 = lang_hooks.types.type_for_mode (intermediate_mode,
11632 TYPE_UNSIGNED (prev_type));
11633
11634 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11635 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11636 && intermediate_mode == prev_mode
11637 && SCALAR_INT_MODE_P (prev_mode))
11638 {
11639 /* If the input and result modes are the same, a different optab
11640 is needed where we pass in the number of units in vectype. */
11641 optab3 = vec_unpacks_sbool_lo_optab;
11642 optab4 = vec_unpacks_sbool_hi_optab;
11643 }
11644 else
11645 {
11646 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11647 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11648 }
11649
11650 if (!optab3 || !optab4
11651 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11652 || insn_data[icode1].operand[0].mode != intermediate_mode
11653 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11654 || insn_data[icode2].operand[0].mode != intermediate_mode
11655 || ((icode1 = optab_handler (optab3, intermediate_mode))
11656 == CODE_FOR_nothing)
11657 || ((icode2 = optab_handler (optab4, intermediate_mode))
11658 == CODE_FOR_nothing))
11659 break;
11660
11661 interm_types->quick_push (intermediate_type);
11662 (*multi_step_cvt)++;
11663
11664 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11665 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11666 {
11667 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11668 return true;
11669 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11670 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11671 return true;
11672 }
11673
11674 prev_type = intermediate_type;
11675 prev_mode = intermediate_mode;
11676 }
11677
11678 interm_types->release ();
11679 return false;
11680 }
11681
11682
11683 /* Function supportable_narrowing_operation
11684
11685 Check whether an operation represented by the code CODE is a
11686 narrowing operation that is supported by the target platform in
11687 vector form (i.e., when operating on arguments of type VECTYPE_IN
11688 and producing a result of type VECTYPE_OUT).
11689
11690 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11691 and FLOAT. This function checks if these operations are supported by
11692 the target platform directly via vector tree-codes.
11693
11694 Output:
11695 - CODE1 is the code of a vector operation to be used when
11696 vectorizing the operation, if available.
11697 - MULTI_STEP_CVT determines the number of required intermediate steps in
11698 case of multi-step conversion (like int->short->char - in that case
11699 MULTI_STEP_CVT will be 1).
11700 - INTERM_TYPES contains the intermediate type required to perform the
11701 narrowing operation (short in the above example). */
11702
11703 bool
11704 supportable_narrowing_operation (vec_info *vinfo, enum tree_code code,
11705 tree vectype_out, tree vectype_in,
11706 enum tree_code *code1, int *multi_step_cvt,
11707 vec<tree> *interm_types)
11708 {
11709 machine_mode vec_mode;
11710 enum insn_code icode1;
11711 optab optab1, interm_optab;
11712 tree vectype = vectype_in;
11713 tree narrow_vectype = vectype_out;
11714 enum tree_code c1;
11715 tree intermediate_type, prev_type;
11716 machine_mode intermediate_mode, prev_mode;
11717 int i;
11718 bool uns;
11719
11720 *multi_step_cvt = 0;
11721 switch (code)
11722 {
11723 CASE_CONVERT:
11724 c1 = VEC_PACK_TRUNC_EXPR;
11725 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11726 && VECTOR_BOOLEAN_TYPE_P (vectype)
11727 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11728 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11729 optab1 = vec_pack_sbool_trunc_optab;
11730 else
11731 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11732 break;
11733
11734 case FIX_TRUNC_EXPR:
11735 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11736 /* The signedness is determined from output operand. */
11737 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11738 break;
11739
11740 case FLOAT_EXPR:
11741 c1 = VEC_PACK_FLOAT_EXPR;
11742 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11743 break;
11744
11745 default:
11746 gcc_unreachable ();
11747 }
11748
11749 if (!optab1)
11750 return false;
11751
11752 vec_mode = TYPE_MODE (vectype);
11753 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11754 return false;
11755
11756 *code1 = c1;
11757
11758 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11759 {
11760 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11761 return true;
11762 /* For scalar masks we may have different boolean
11763 vector types having the same QImode. Thus we
11764 add additional check for elements number. */
11765 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11766 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11767 return true;
11768 }
11769
11770 if (code == FLOAT_EXPR)
11771 return false;
11772
11773 /* Check if it's a multi-step conversion that can be done using intermediate
11774 types. */
11775 prev_mode = vec_mode;
11776 prev_type = vectype;
11777 if (code == FIX_TRUNC_EXPR)
11778 uns = TYPE_UNSIGNED (vectype_out);
11779 else
11780 uns = TYPE_UNSIGNED (vectype);
11781
11782 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11783 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11784 costly than signed. */
11785 if (code == FIX_TRUNC_EXPR && uns)
11786 {
11787 enum insn_code icode2;
11788
11789 intermediate_type
11790 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11791 interm_optab
11792 = optab_for_tree_code (c1, intermediate_type, optab_default);
11793 if (interm_optab != unknown_optab
11794 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11795 && insn_data[icode1].operand[0].mode
11796 == insn_data[icode2].operand[0].mode)
11797 {
11798 uns = false;
11799 optab1 = interm_optab;
11800 icode1 = icode2;
11801 }
11802 }
11803
11804 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11805 intermediate steps in promotion sequence. We try
11806 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11807 interm_types->create (MAX_INTERM_CVT_STEPS);
11808 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11809 {
11810 intermediate_mode = insn_data[icode1].operand[0].mode;
11811 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11812 {
11813 intermediate_type = vect_double_mask_nunits (vinfo, prev_type);
11814 if (intermediate_mode != TYPE_MODE (intermediate_type))
11815 return false;
11816 }
11817 else
11818 intermediate_type
11819 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11820 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11821 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11822 && intermediate_mode == prev_mode
11823 && SCALAR_INT_MODE_P (prev_mode))
11824 interm_optab = vec_pack_sbool_trunc_optab;
11825 else
11826 interm_optab
11827 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11828 optab_default);
11829 if (!interm_optab
11830 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11831 || insn_data[icode1].operand[0].mode != intermediate_mode
11832 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11833 == CODE_FOR_nothing))
11834 break;
11835
11836 interm_types->quick_push (intermediate_type);
11837 (*multi_step_cvt)++;
11838
11839 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11840 {
11841 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11842 return true;
11843 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11844 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11845 return true;
11846 }
11847
11848 prev_mode = intermediate_mode;
11849 prev_type = intermediate_type;
11850 optab1 = interm_optab;
11851 }
11852
11853 interm_types->release ();
11854 return false;
11855 }
11856
11857 /* Generate and return a statement that sets vector mask MASK such that
11858 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11859
11860 gcall *
11861 vect_gen_while (tree mask, tree start_index, tree end_index)
11862 {
11863 tree cmp_type = TREE_TYPE (start_index);
11864 tree mask_type = TREE_TYPE (mask);
11865 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11866 cmp_type, mask_type,
11867 OPTIMIZE_FOR_SPEED));
11868 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11869 start_index, end_index,
11870 build_zero_cst (mask_type));
11871 gimple_call_set_lhs (call, mask);
11872 return call;
11873 }
11874
11875 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11876 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11877
11878 tree
11879 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11880 tree end_index)
11881 {
11882 tree tmp = make_ssa_name (mask_type);
11883 gcall *call = vect_gen_while (tmp, start_index, end_index);
11884 gimple_seq_add_stmt (seq, call);
11885 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11886 }
11887
11888 /* Try to compute the vector types required to vectorize STMT_INFO,
11889 returning true on success and false if vectorization isn't possible.
11890
11891 On success:
11892
11893 - Set *STMT_VECTYPE_OUT to:
11894 - NULL_TREE if the statement doesn't need to be vectorized;
11895 - boolean_type_node if the statement is a boolean operation whose
11896 vector type can only be determined once all the other vector types
11897 are known; and
11898 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11899
11900 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11901 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11902 statement does not help to determine the overall number of units. */
11903
11904 opt_result
11905 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
11906 tree *stmt_vectype_out,
11907 tree *nunits_vectype_out)
11908 {
11909 vec_info *vinfo = stmt_info->vinfo;
11910 gimple *stmt = stmt_info->stmt;
11911
11912 *stmt_vectype_out = NULL_TREE;
11913 *nunits_vectype_out = NULL_TREE;
11914
11915 if (gimple_get_lhs (stmt) == NULL_TREE
11916 /* MASK_STORE has no lhs, but is ok. */
11917 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11918 {
11919 if (is_a <gcall *> (stmt))
11920 {
11921 /* Ignore calls with no lhs. These must be calls to
11922 #pragma omp simd functions, and what vectorization factor
11923 it really needs can't be determined until
11924 vectorizable_simd_clone_call. */
11925 if (dump_enabled_p ())
11926 dump_printf_loc (MSG_NOTE, vect_location,
11927 "defer to SIMD clone analysis.\n");
11928 return opt_result::success ();
11929 }
11930
11931 return opt_result::failure_at (stmt,
11932 "not vectorized: irregular stmt.%G", stmt);
11933 }
11934
11935 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11936 return opt_result::failure_at (stmt,
11937 "not vectorized: vector stmt in loop:%G",
11938 stmt);
11939
11940 tree vectype;
11941 tree scalar_type = NULL_TREE;
11942 if (STMT_VINFO_VECTYPE (stmt_info))
11943 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
11944 else
11945 {
11946 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
11947 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11948 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11949 else
11950 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11951
11952 /* Pure bool ops don't participate in number-of-units computation.
11953 For comparisons use the types being compared. */
11954 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
11955 && is_gimple_assign (stmt)
11956 && gimple_assign_rhs_code (stmt) != COND_EXPR)
11957 {
11958 *stmt_vectype_out = boolean_type_node;
11959
11960 tree rhs1 = gimple_assign_rhs1 (stmt);
11961 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
11962 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11963 scalar_type = TREE_TYPE (rhs1);
11964 else
11965 {
11966 if (dump_enabled_p ())
11967 dump_printf_loc (MSG_NOTE, vect_location,
11968 "pure bool operation.\n");
11969 return opt_result::success ();
11970 }
11971 }
11972
11973 if (dump_enabled_p ())
11974 dump_printf_loc (MSG_NOTE, vect_location,
11975 "get vectype for scalar type: %T\n", scalar_type);
11976 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
11977 if (!vectype)
11978 return opt_result::failure_at (stmt,
11979 "not vectorized:"
11980 " unsupported data-type %T\n",
11981 scalar_type);
11982
11983 if (!*stmt_vectype_out)
11984 *stmt_vectype_out = vectype;
11985
11986 if (dump_enabled_p ())
11987 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11988 }
11989
11990 /* Don't try to compute scalar types if the stmt produces a boolean
11991 vector; use the existing vector type instead. */
11992 tree nunits_vectype;
11993 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11994 nunits_vectype = vectype;
11995 else
11996 {
11997 /* The number of units is set according to the smallest scalar
11998 type (or the largest vector size, but we only support one
11999 vector size per vectorization). */
12000 if (*stmt_vectype_out != boolean_type_node)
12001 {
12002 HOST_WIDE_INT dummy;
12003 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12004 &dummy, &dummy);
12005 }
12006 if (dump_enabled_p ())
12007 dump_printf_loc (MSG_NOTE, vect_location,
12008 "get vectype for scalar type: %T\n", scalar_type);
12009 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
12010 }
12011 if (!nunits_vectype)
12012 return opt_result::failure_at (stmt,
12013 "not vectorized: unsupported data-type %T\n",
12014 scalar_type);
12015
12016 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
12017 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
12018 return opt_result::failure_at (stmt,
12019 "not vectorized: different sized vector "
12020 "types in statement, %T and %T\n",
12021 vectype, nunits_vectype);
12022
12023 if (dump_enabled_p ())
12024 {
12025 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
12026 nunits_vectype);
12027
12028 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12029 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12030 dump_printf (MSG_NOTE, "\n");
12031 }
12032
12033 *nunits_vectype_out = nunits_vectype;
12034 return opt_result::success ();
12035 }
12036
12037 /* Try to determine the correct vector type for STMT_INFO, which is a
12038 statement that produces a scalar boolean result. Return the vector
12039 type on success, otherwise return NULL_TREE. */
12040
12041 opt_tree
12042 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
12043 {
12044 vec_info *vinfo = stmt_info->vinfo;
12045 gimple *stmt = stmt_info->stmt;
12046 tree mask_type = NULL;
12047 tree vectype, scalar_type;
12048
12049 if (is_gimple_assign (stmt)
12050 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
12051 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
12052 {
12053 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
12054 mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
12055
12056 if (!mask_type)
12057 return opt_tree::failure_at (stmt,
12058 "not vectorized: unsupported mask\n");
12059 }
12060 else
12061 {
12062 tree rhs;
12063 ssa_op_iter iter;
12064 enum vect_def_type dt;
12065
12066 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
12067 {
12068 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
12069 return opt_tree::failure_at (stmt,
12070 "not vectorized:can't compute mask"
12071 " type for statement, %G", stmt);
12072
12073 /* No vectype probably means external definition.
12074 Allow it in case there is another operand which
12075 allows to determine mask type. */
12076 if (!vectype)
12077 continue;
12078
12079 if (!mask_type)
12080 mask_type = vectype;
12081 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
12082 TYPE_VECTOR_SUBPARTS (vectype)))
12083 return opt_tree::failure_at (stmt,
12084 "not vectorized: different sized mask"
12085 " types in statement, %T and %T\n",
12086 mask_type, vectype);
12087 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
12088 != VECTOR_BOOLEAN_TYPE_P (vectype))
12089 return opt_tree::failure_at (stmt,
12090 "not vectorized: mixed mask and "
12091 "nonmask vector types in statement, "
12092 "%T and %T\n",
12093 mask_type, vectype);
12094 }
12095
12096 /* We may compare boolean value loaded as vector of integers.
12097 Fix mask_type in such case. */
12098 if (mask_type
12099 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
12100 && gimple_code (stmt) == GIMPLE_ASSIGN
12101 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
12102 mask_type = build_same_sized_truth_vector_type (mask_type);
12103 }
12104
12105 /* No mask_type should mean loop invariant predicate.
12106 This is probably a subject for optimization in if-conversion. */
12107 if (!mask_type)
12108 return opt_tree::failure_at (stmt,
12109 "not vectorized: can't compute mask type "
12110 "for statement: %G", stmt);
12111
12112 return opt_tree::success (mask_type);
12113 }