Require equal type sizes for vectorised calls
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
96 {
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
103
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
106
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 {
130 tree vect_type, vect, vect_name, array_ref;
131 gimple *new_stmt;
132
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
139
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144
145 return vect_name;
146 }
147
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
151
152 static void
153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 tree vect, tree array, unsigned HOST_WIDE_INT n)
155 {
156 tree array_ref;
157 gimple *new_stmt;
158
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
162
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
165 }
166
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
170
171 static tree
172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 {
174 tree mem_ref;
175
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
180 }
181
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
184
185 static void
186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 tree var)
188 {
189 tree clobber = build_clobber (TREE_TYPE (var));
190 gimple *new_stmt = gimple_build_assign (var, clobber);
191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
192 }
193
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195
196 /* Function vect_mark_relevant.
197
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
199
200 static void
201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 enum vect_relevant relevant, bool live_p)
203 {
204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: %G", relevant, live_p,
210 stmt_info->stmt);
211
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 {
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
222
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE, vect_location,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info = stmt_info;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232 }
233
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 {
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
245 }
246
247 worklist->safe_push (stmt_info);
248 }
249
250
251 /* Function is_simple_and_all_uses_invariant
252
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
254
255 bool
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 loop_vec_info loop_vinfo)
258 {
259 tree op;
260 ssa_op_iter iter;
261
262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263 if (!stmt)
264 return false;
265
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 {
268 enum vect_def_type dt = vect_uninitialized_def;
269
270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 {
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
276 }
277
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
280 }
281 return true;
282 }
283
284 /* Function vect_stmt_relevant_p.
285
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
288
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
293
294 CHECKME: what other side effects would the vectorizer allow? */
295
296 static bool
297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
299 {
300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
305
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
308
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info->stmt)
311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
313
314 /* changing memory. */
315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt_info->stmt)
317 && !gimple_clobber_p (stmt_info->stmt))
318 {
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE, vect_location,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant = vect_used_in_scope;
323 }
324
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 {
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 {
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
332 {
333 if (is_gimple_debug (USE_STMT (use_p)))
334 continue;
335
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
339
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
344
345 *live_p = true;
346 }
347 }
348 }
349
350 if (*live_p && *relevant == vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 {
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant = vect_used_only_live;
357 }
358
359 return (*live_p || *relevant);
360 }
361
362
363 /* Function exist_non_indexing_operands_for_use_p
364
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
367
368 static bool
369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 {
371 tree operand;
372
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
378
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
384
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
388
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
391
392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
394 {
395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396 if (call && gimple_call_internal_p (call))
397 {
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
410 }
411 return false;
412 }
413
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
419
420 if (operand == use)
421 return true;
422
423 return false;
424 }
425
426
427 /*
428 Function process_use.
429
430 Inputs:
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
437
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
451
452 Return true if everything is as expected. Return false otherwise. */
453
454 static opt_result
455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 bool force)
458 {
459 stmt_vec_info dstmt_vinfo;
460 enum vect_def_type dt;
461
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return opt_result::success ();
466
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468 return opt_result::failure_at (stmt_vinfo->stmt,
469 "not vectorized:"
470 " unsupported use in stmt.\n");
471
472 if (!dstmt_vinfo)
473 return opt_result::success ();
474
475 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
476 basic_block bb = gimple_bb (stmt_vinfo->stmt);
477
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 We have to force the stmt live since the epilogue loop needs it to
480 continue computing the reduction. */
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
491 return opt_result::success ();
492 }
493
494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 outer-loop-header-bb:
496 d = dstmt_vinfo
497 inner-loop:
498 stmt # use (d)
499 outer-loop-tail-bb:
500 ... */
501 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
502 {
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE, vect_location,
505 "outer-loop def-stmt defining inner-loop stmt.\n");
506
507 switch (relevant)
508 {
509 case vect_unused_in_scope:
510 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
511 vect_used_in_scope : vect_unused_in_scope;
512 break;
513
514 case vect_used_in_outer_by_reduction:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 relevant = vect_used_by_reduction;
517 break;
518
519 case vect_used_in_outer:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_in_scope;
522 break;
523
524 case vect_used_in_scope:
525 break;
526
527 default:
528 gcc_unreachable ();
529 }
530 }
531
532 /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 outer-loop-header-bb:
534 ...
535 inner-loop:
536 d = dstmt_vinfo
537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
538 stmt # use (d) */
539 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
540 {
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "inner-loop def-stmt defining outer-loop stmt.\n");
544
545 switch (relevant)
546 {
547 case vect_unused_in_scope:
548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
549 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
550 vect_used_in_outer_by_reduction : vect_unused_in_scope;
551 break;
552
553 case vect_used_by_reduction:
554 case vect_used_only_live:
555 relevant = vect_used_in_outer_by_reduction;
556 break;
557
558 case vect_used_in_scope:
559 relevant = vect_used_in_outer;
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566 /* We are also not interested in uses on loop PHI backedges that are
567 inductions. Otherwise we'll needlessly vectorize the IV increment
568 and cause hybrid SLP for SLP inductions. Unless the PHI is live
569 of course. */
570 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
571 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
572 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
574 loop_latch_edge (bb->loop_father))
575 == use))
576 {
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
579 "induction value on backedge.\n");
580 return opt_result::success ();
581 }
582
583
584 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
585 return opt_result::success ();
586 }
587
588
589 /* Function vect_mark_stmts_to_be_vectorized.
590
591 Not all stmts in the loop need to be vectorized. For example:
592
593 for i...
594 for j...
595 1. T0 = i + j
596 2. T1 = a[T0]
597
598 3. j = j + 1
599
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
602
603 This pass detects such stmts. */
604
605 opt_result
606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
607 {
608 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610 unsigned int nbbs = loop->num_nodes;
611 gimple_stmt_iterator si;
612 unsigned int i;
613 basic_block bb;
614 bool live_p;
615 enum vect_relevant relevant;
616
617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
618
619 auto_vec<stmt_vec_info, 64> worklist;
620
621 /* 1. Init worklist. */
622 for (i = 0; i < nbbs; i++)
623 {
624 bb = bbs[i];
625 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
626 {
627 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
630 phi_info->stmt);
631
632 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
634 }
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 {
637 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "init: stmt relevant? %G", stmt_info->stmt);
641
642 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
643 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
644 }
645 }
646
647 /* 2. Process_worklist */
648 while (worklist.length () > 0)
649 {
650 use_operand_p use_p;
651 ssa_op_iter iter;
652
653 stmt_vec_info stmt_vinfo = worklist.pop ();
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "worklist: examine stmt: %G", stmt_vinfo->stmt);
657
658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 (DEF_STMT) as relevant/irrelevant according to the relevance property
660 of STMT. */
661 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
662
663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 propagated as is to the DEF_STMTs of its USEs.
665
666 One exception is when STMT has been identified as defining a reduction
667 variable; in this case we set the relevance to vect_used_by_reduction.
668 This is because we distinguish between two kinds of relevant stmts -
669 those that are used by a reduction computation, and those that are
670 (also) used by a regular computation. This allows us later on to
671 identify stmts that are used solely by a reduction, and therefore the
672 order of the results that they produce does not have to be kept. */
673
674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
675 {
676 case vect_reduction_def:
677 gcc_assert (relevant != vect_unused_in_scope);
678 if (relevant != vect_unused_in_scope
679 && relevant != vect_used_in_scope
680 && relevant != vect_used_by_reduction
681 && relevant != vect_used_only_live)
682 return opt_result::failure_at
683 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
684 break;
685
686 case vect_nested_cycle:
687 if (relevant != vect_unused_in_scope
688 && relevant != vect_used_in_outer_by_reduction
689 && relevant != vect_used_in_outer)
690 return opt_result::failure_at
691 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
692 break;
693
694 case vect_double_reduction_def:
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_by_reduction
697 && relevant != vect_used_only_live)
698 return opt_result::failure_at
699 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
700 break;
701
702 default:
703 break;
704 }
705
706 if (is_pattern_stmt_p (stmt_vinfo))
707 {
708 /* Pattern statements are not inserted into the code, so
709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710 have to scan the RHS or function arguments instead. */
711 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
712 {
713 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
714 tree op = gimple_assign_rhs1 (assign);
715
716 i = 1;
717 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
718 {
719 opt_result res
720 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
721 loop_vinfo, relevant, &worklist, false);
722 if (!res)
723 return res;
724 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 i = 2;
729 }
730 for (; i < gimple_num_ops (assign); i++)
731 {
732 op = gimple_op (assign, i);
733 if (TREE_CODE (op) == SSA_NAME)
734 {
735 opt_result res
736 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
737 &worklist, false);
738 if (!res)
739 return res;
740 }
741 }
742 }
743 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
744 {
745 for (i = 0; i < gimple_call_num_args (call); i++)
746 {
747 tree arg = gimple_call_arg (call, i);
748 opt_result res
749 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
750 &worklist, false);
751 if (!res)
752 return res;
753 }
754 }
755 }
756 else
757 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
758 {
759 tree op = USE_FROM_PTR (use_p);
760 opt_result res
761 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
762 &worklist, false);
763 if (!res)
764 return res;
765 }
766
767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
768 {
769 gather_scatter_info gs_info;
770 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
771 gcc_unreachable ();
772 opt_result res
773 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
774 &worklist, true);
775 if (!res)
776 {
777 if (fatal)
778 *fatal = false;
779 return res;
780 }
781 }
782 } /* while worklist */
783
784 return opt_result::success ();
785 }
786
787 /* Compute the prologue cost for invariant or constant operands. */
788
789 static unsigned
790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 unsigned opno, enum vect_def_type dt,
792 stmt_vector_for_cost *cost_vec)
793 {
794 vec_info *vinfo = stmt_info->vinfo;
795 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796 tree op = gimple_op (stmt, opno);
797 unsigned prologue_cost = 0;
798
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
803 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804 unsigned num_vects_to_check;
805 unsigned HOST_WIDE_INT const_nunits;
806 unsigned nelt_limit;
807 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808 && ! multiple_p (const_nunits, group_size))
809 {
810 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811 nelt_limit = const_nunits;
812 }
813 else
814 {
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check = 1;
819 nelt_limit = group_size;
820 }
821 tree elt = NULL_TREE;
822 unsigned nelt = 0;
823 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824 {
825 unsigned si = j % group_size;
826 if (nelt == 0)
827 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 opno))
833 elt = NULL_TREE;
834 nelt++;
835 if (nelt == nelt_limit)
836 {
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost += record_stmt_cost
840 (cost_vec, 1,
841 dt == vect_external_def
842 ? (elt ? scalar_to_vec : vec_construct)
843 : vector_load,
844 stmt_info, 0, vect_prologue);
845 nelt = 0;
846 }
847 }
848
849 return prologue_cost;
850 }
851
852 /* Function vect_model_simple_cost.
853
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
857
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 enum vect_def_type *dt,
861 int ndts,
862 slp_tree node,
863 stmt_vector_for_cost *cost_vec)
864 {
865 int inside_cost = 0, prologue_cost = 0;
866
867 gcc_assert (cost_vec != NULL);
868
869 /* ??? Somehow we need to fix this at the callers. */
870 if (node)
871 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
872
873 if (node)
874 {
875 /* Scan operands and account for prologue cost of constants/externals.
876 ??? This over-estimates cost for multiple uses and should be
877 re-engineered. */
878 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
879 tree lhs = gimple_get_lhs (stmt);
880 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
881 {
882 tree op = gimple_op (stmt, i);
883 enum vect_def_type dt;
884 if (!op || op == lhs)
885 continue;
886 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
887 && (dt == vect_constant_def || dt == vect_external_def))
888 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
889 i, dt, cost_vec);
890 }
891 }
892 else
893 /* Cost the "broadcast" of a scalar operand in to a vector operand.
894 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
895 cost model. */
896 for (int i = 0; i < ndts; i++)
897 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
898 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
899 stmt_info, 0, vect_prologue);
900
901 /* Adjust for two-operator SLP nodes. */
902 if (node && SLP_TREE_TWO_OPERATORS (node))
903 {
904 ncopies *= 2;
905 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
906 stmt_info, 0, vect_body);
907 }
908
909 /* Pass the inside-of-loop statements to the target-specific cost model. */
910 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
911 stmt_info, 0, vect_body);
912
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_NOTE, vect_location,
915 "vect_model_simple_cost: inside_cost = %d, "
916 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 }
918
919
920 /* Model cost for type demotion and promotion operations. PWR is
921 normally zero for single-step promotions and demotions. It will be
922 one if two-step promotion/demotion is required, and so on. NCOPIES
923 is the number of vector results (and thus number of instructions)
924 for the narrowest end of the operation chain. Each additional
925 step doubles the number of instructions required. */
926
927 static void
928 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
929 enum vect_def_type *dt,
930 unsigned int ncopies, int pwr,
931 stmt_vector_for_cost *cost_vec)
932 {
933 int i;
934 int inside_cost = 0, prologue_cost = 0;
935
936 for (i = 0; i < pwr + 1; i++)
937 {
938 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
939 stmt_info, 0, vect_body);
940 ncopies *= 2;
941 }
942
943 /* FORNOW: Assuming maximum 2 args per stmts. */
944 for (i = 0; i < 2; i++)
945 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
946 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
947 stmt_info, 0, vect_prologue);
948
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE, vect_location,
951 "vect_model_promotion_demotion_cost: inside_cost = %d, "
952 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 }
954
955 /* Returns true if the current function returns DECL. */
956
957 static bool
958 cfun_returns (tree decl)
959 {
960 edge_iterator ei;
961 edge e;
962 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
963 {
964 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
965 if (!ret)
966 continue;
967 if (gimple_return_retval (ret) == decl)
968 return true;
969 /* We often end up with an aggregate copy to the result decl,
970 handle that case as well. First skip intermediate clobbers
971 though. */
972 gimple *def = ret;
973 do
974 {
975 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
976 }
977 while (gimple_clobber_p (def));
978 if (is_a <gassign *> (def)
979 && gimple_assign_lhs (def) == gimple_return_retval (ret)
980 && gimple_assign_rhs1 (def) == decl)
981 return true;
982 }
983 return false;
984 }
985
986 /* Function vect_model_store_cost
987
988 Models cost for stores. In the case of grouped accesses, one access
989 has the overhead of the grouped access attributed to it. */
990
991 static void
992 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
993 enum vect_def_type dt,
994 vect_memory_access_type memory_access_type,
995 vec_load_store_type vls_type, slp_tree slp_node,
996 stmt_vector_for_cost *cost_vec)
997 {
998 unsigned int inside_cost = 0, prologue_cost = 0;
999 stmt_vec_info first_stmt_info = stmt_info;
1000 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1001
1002 /* ??? Somehow we need to fix this at the callers. */
1003 if (slp_node)
1004 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1005
1006 if (vls_type == VLS_STORE_INVARIANT)
1007 {
1008 if (slp_node)
1009 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1010 1, dt, cost_vec);
1011 else
1012 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1013 stmt_info, 0, vect_prologue);
1014 }
1015
1016 /* Grouped stores update all elements in the group at once,
1017 so we want the DR for the first statement. */
1018 if (!slp_node && grouped_access_p)
1019 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1020
1021 /* True if we should include any once-per-group costs as well as
1022 the cost of the statement itself. For SLP we only get called
1023 once per group anyhow. */
1024 bool first_stmt_p = (first_stmt_info == stmt_info);
1025
1026 /* We assume that the cost of a single store-lanes instruction is
1027 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1028 access is instead being provided by a permute-and-store operation,
1029 include the cost of the permutes. */
1030 if (first_stmt_p
1031 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1032 {
1033 /* Uses a high and low interleave or shuffle operations for each
1034 needed permute. */
1035 int group_size = DR_GROUP_SIZE (first_stmt_info);
1036 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1037 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1038 stmt_info, 0, vect_body);
1039
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE, vect_location,
1042 "vect_model_store_cost: strided group_size = %d .\n",
1043 group_size);
1044 }
1045
1046 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1047 /* Costs of the stores. */
1048 if (memory_access_type == VMAT_ELEMENTWISE
1049 || memory_access_type == VMAT_GATHER_SCATTER)
1050 {
1051 /* N scalar stores plus extracting the elements. */
1052 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1053 inside_cost += record_stmt_cost (cost_vec,
1054 ncopies * assumed_nunits,
1055 scalar_store, stmt_info, 0, vect_body);
1056 }
1057 else
1058 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1059
1060 if (memory_access_type == VMAT_ELEMENTWISE
1061 || memory_access_type == VMAT_STRIDED_SLP)
1062 {
1063 /* N scalar stores plus extracting the elements. */
1064 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1065 inside_cost += record_stmt_cost (cost_vec,
1066 ncopies * assumed_nunits,
1067 vec_to_scalar, stmt_info, 0, vect_body);
1068 }
1069
1070 /* When vectorizing a store into the function result assign
1071 a penalty if the function returns in a multi-register location.
1072 In this case we assume we'll end up with having to spill the
1073 vector result and do piecewise loads as a conservative estimate. */
1074 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1075 if (base
1076 && (TREE_CODE (base) == RESULT_DECL
1077 || (DECL_P (base) && cfun_returns (base)))
1078 && !aggregate_value_p (base, cfun->decl))
1079 {
1080 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1081 /* ??? Handle PARALLEL in some way. */
1082 if (REG_P (reg))
1083 {
1084 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1085 /* Assume that a single reg-reg move is possible and cheap,
1086 do not account for vector to gp register move cost. */
1087 if (nregs > 1)
1088 {
1089 /* Spill. */
1090 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1091 vector_store,
1092 stmt_info, 0, vect_epilogue);
1093 /* Loads. */
1094 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1095 scalar_load,
1096 stmt_info, 0, vect_epilogue);
1097 }
1098 }
1099 }
1100
1101 if (dump_enabled_p ())
1102 dump_printf_loc (MSG_NOTE, vect_location,
1103 "vect_model_store_cost: inside_cost = %d, "
1104 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1105 }
1106
1107
1108 /* Calculate cost of DR's memory access. */
1109 void
1110 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1111 unsigned int *inside_cost,
1112 stmt_vector_for_cost *body_cost_vec)
1113 {
1114 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1115 int alignment_support_scheme
1116 = vect_supportable_dr_alignment (dr_info, false);
1117
1118 switch (alignment_support_scheme)
1119 {
1120 case dr_aligned:
1121 {
1122 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1123 vector_store, stmt_info, 0,
1124 vect_body);
1125
1126 if (dump_enabled_p ())
1127 dump_printf_loc (MSG_NOTE, vect_location,
1128 "vect_model_store_cost: aligned.\n");
1129 break;
1130 }
1131
1132 case dr_unaligned_supported:
1133 {
1134 /* Here, we assign an additional cost for the unaligned store. */
1135 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1136 unaligned_store, stmt_info,
1137 DR_MISALIGNMENT (dr_info),
1138 vect_body);
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location,
1141 "vect_model_store_cost: unaligned supported by "
1142 "hardware.\n");
1143 break;
1144 }
1145
1146 case dr_unaligned_unsupported:
1147 {
1148 *inside_cost = VECT_MAX_COST;
1149
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1152 "vect_model_store_cost: unsupported access.\n");
1153 break;
1154 }
1155
1156 default:
1157 gcc_unreachable ();
1158 }
1159 }
1160
1161
1162 /* Function vect_model_load_cost
1163
1164 Models cost for loads. In the case of grouped accesses, one access has
1165 the overhead of the grouped access attributed to it. Since unaligned
1166 accesses are supported for loads, we also account for the costs of the
1167 access scheme chosen. */
1168
1169 static void
1170 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1171 vect_memory_access_type memory_access_type,
1172 slp_instance instance,
1173 slp_tree slp_node,
1174 stmt_vector_for_cost *cost_vec)
1175 {
1176 unsigned int inside_cost = 0, prologue_cost = 0;
1177 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1178
1179 gcc_assert (cost_vec);
1180
1181 /* ??? Somehow we need to fix this at the callers. */
1182 if (slp_node)
1183 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1184
1185 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1186 {
1187 /* If the load is permuted then the alignment is determined by
1188 the first group element not by the first scalar stmt DR. */
1189 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1190 /* Record the cost for the permutation. */
1191 unsigned n_perms;
1192 unsigned assumed_nunits
1193 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1194 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1195 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1196 slp_vf, instance, true,
1197 &n_perms);
1198 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1199 first_stmt_info, 0, vect_body);
1200 /* And adjust the number of loads performed. This handles
1201 redundancies as well as loads that are later dead. */
1202 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1203 bitmap_clear (perm);
1204 for (unsigned i = 0;
1205 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1206 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1207 ncopies = 0;
1208 bool load_seen = false;
1209 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1210 {
1211 if (i % assumed_nunits == 0)
1212 {
1213 if (load_seen)
1214 ncopies++;
1215 load_seen = false;
1216 }
1217 if (bitmap_bit_p (perm, i))
1218 load_seen = true;
1219 }
1220 if (load_seen)
1221 ncopies++;
1222 gcc_assert (ncopies
1223 <= (DR_GROUP_SIZE (first_stmt_info)
1224 - DR_GROUP_GAP (first_stmt_info)
1225 + assumed_nunits - 1) / assumed_nunits);
1226 }
1227
1228 /* Grouped loads read all elements in the group at once,
1229 so we want the DR for the first statement. */
1230 stmt_vec_info first_stmt_info = stmt_info;
1231 if (!slp_node && grouped_access_p)
1232 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1233
1234 /* True if we should include any once-per-group costs as well as
1235 the cost of the statement itself. For SLP we only get called
1236 once per group anyhow. */
1237 bool first_stmt_p = (first_stmt_info == stmt_info);
1238
1239 /* We assume that the cost of a single load-lanes instruction is
1240 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1241 access is instead being provided by a load-and-permute operation,
1242 include the cost of the permutes. */
1243 if (first_stmt_p
1244 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1245 {
1246 /* Uses an even and odd extract operations or shuffle operations
1247 for each needed permute. */
1248 int group_size = DR_GROUP_SIZE (first_stmt_info);
1249 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1250 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1251 stmt_info, 0, vect_body);
1252
1253 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE, vect_location,
1255 "vect_model_load_cost: strided group_size = %d .\n",
1256 group_size);
1257 }
1258
1259 /* The loads themselves. */
1260 if (memory_access_type == VMAT_ELEMENTWISE
1261 || memory_access_type == VMAT_GATHER_SCATTER)
1262 {
1263 /* N scalar loads plus gathering them into a vector. */
1264 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1265 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1266 inside_cost += record_stmt_cost (cost_vec,
1267 ncopies * assumed_nunits,
1268 scalar_load, stmt_info, 0, vect_body);
1269 }
1270 else
1271 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1272 &inside_cost, &prologue_cost,
1273 cost_vec, cost_vec, true);
1274 if (memory_access_type == VMAT_ELEMENTWISE
1275 || memory_access_type == VMAT_STRIDED_SLP)
1276 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1277 stmt_info, 0, vect_body);
1278
1279 if (dump_enabled_p ())
1280 dump_printf_loc (MSG_NOTE, vect_location,
1281 "vect_model_load_cost: inside_cost = %d, "
1282 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1283 }
1284
1285
1286 /* Calculate cost of DR's memory access. */
1287 void
1288 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1289 bool add_realign_cost, unsigned int *inside_cost,
1290 unsigned int *prologue_cost,
1291 stmt_vector_for_cost *prologue_cost_vec,
1292 stmt_vector_for_cost *body_cost_vec,
1293 bool record_prologue_costs)
1294 {
1295 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1296 int alignment_support_scheme
1297 = vect_supportable_dr_alignment (dr_info, false);
1298
1299 switch (alignment_support_scheme)
1300 {
1301 case dr_aligned:
1302 {
1303 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1304 stmt_info, 0, vect_body);
1305
1306 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE, vect_location,
1308 "vect_model_load_cost: aligned.\n");
1309
1310 break;
1311 }
1312 case dr_unaligned_supported:
1313 {
1314 /* Here, we assign an additional cost for the unaligned load. */
1315 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1316 unaligned_load, stmt_info,
1317 DR_MISALIGNMENT (dr_info),
1318 vect_body);
1319
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "vect_model_load_cost: unaligned supported by "
1323 "hardware.\n");
1324
1325 break;
1326 }
1327 case dr_explicit_realign:
1328 {
1329 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1330 vector_load, stmt_info, 0, vect_body);
1331 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1332 vec_perm, stmt_info, 0, vect_body);
1333
1334 /* FIXME: If the misalignment remains fixed across the iterations of
1335 the containing loop, the following cost should be added to the
1336 prologue costs. */
1337 if (targetm.vectorize.builtin_mask_for_load)
1338 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1339 stmt_info, 0, vect_body);
1340
1341 if (dump_enabled_p ())
1342 dump_printf_loc (MSG_NOTE, vect_location,
1343 "vect_model_load_cost: explicit realign\n");
1344
1345 break;
1346 }
1347 case dr_explicit_realign_optimized:
1348 {
1349 if (dump_enabled_p ())
1350 dump_printf_loc (MSG_NOTE, vect_location,
1351 "vect_model_load_cost: unaligned software "
1352 "pipelined.\n");
1353
1354 /* Unaligned software pipeline has a load of an address, an initial
1355 load, and possibly a mask operation to "prime" the loop. However,
1356 if this is an access in a group of loads, which provide grouped
1357 access, then the above cost should only be considered for one
1358 access in the group. Inside the loop, there is a load op
1359 and a realignment op. */
1360
1361 if (add_realign_cost && record_prologue_costs)
1362 {
1363 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1364 vector_stmt, stmt_info,
1365 0, vect_prologue);
1366 if (targetm.vectorize.builtin_mask_for_load)
1367 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1368 vector_stmt, stmt_info,
1369 0, vect_prologue);
1370 }
1371
1372 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1373 stmt_info, 0, vect_body);
1374 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1375 stmt_info, 0, vect_body);
1376
1377 if (dump_enabled_p ())
1378 dump_printf_loc (MSG_NOTE, vect_location,
1379 "vect_model_load_cost: explicit realign optimized"
1380 "\n");
1381
1382 break;
1383 }
1384
1385 case dr_unaligned_unsupported:
1386 {
1387 *inside_cost = VECT_MAX_COST;
1388
1389 if (dump_enabled_p ())
1390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1391 "vect_model_load_cost: unsupported access.\n");
1392 break;
1393 }
1394
1395 default:
1396 gcc_unreachable ();
1397 }
1398 }
1399
1400 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1401 the loop preheader for the vectorized stmt STMT_VINFO. */
1402
1403 static void
1404 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1405 gimple_stmt_iterator *gsi)
1406 {
1407 if (gsi)
1408 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1409 else
1410 {
1411 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1412
1413 if (loop_vinfo)
1414 {
1415 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1416 basic_block new_bb;
1417 edge pe;
1418
1419 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1420 loop = loop->inner;
1421
1422 pe = loop_preheader_edge (loop);
1423 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1424 gcc_assert (!new_bb);
1425 }
1426 else
1427 {
1428 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1429 basic_block bb;
1430 gimple_stmt_iterator gsi_bb_start;
1431
1432 gcc_assert (bb_vinfo);
1433 bb = BB_VINFO_BB (bb_vinfo);
1434 gsi_bb_start = gsi_after_labels (bb);
1435 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1436 }
1437 }
1438
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE, vect_location,
1441 "created new init_stmt: %G", new_stmt);
1442 }
1443
1444 /* Function vect_init_vector.
1445
1446 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1447 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1448 vector type a vector with all elements equal to VAL is created first.
1449 Place the initialization at GSI if it is not NULL. Otherwise, place the
1450 initialization at the loop preheader.
1451 Return the DEF of INIT_STMT.
1452 It will be used in the vectorization of STMT_INFO. */
1453
1454 tree
1455 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1456 gimple_stmt_iterator *gsi)
1457 {
1458 gimple *init_stmt;
1459 tree new_temp;
1460
1461 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1462 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1463 {
1464 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1465 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1466 {
1467 /* Scalar boolean value should be transformed into
1468 all zeros or all ones value before building a vector. */
1469 if (VECTOR_BOOLEAN_TYPE_P (type))
1470 {
1471 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1472 tree false_val = build_zero_cst (TREE_TYPE (type));
1473
1474 if (CONSTANT_CLASS_P (val))
1475 val = integer_zerop (val) ? false_val : true_val;
1476 else
1477 {
1478 new_temp = make_ssa_name (TREE_TYPE (type));
1479 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1480 val, true_val, false_val);
1481 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1482 val = new_temp;
1483 }
1484 }
1485 else
1486 {
1487 gimple_seq stmts = NULL;
1488 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1489 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1490 TREE_TYPE (type), val);
1491 else
1492 /* ??? Condition vectorization expects us to do
1493 promotion of invariant/external defs. */
1494 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1495 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1496 !gsi_end_p (gsi2); )
1497 {
1498 init_stmt = gsi_stmt (gsi2);
1499 gsi_remove (&gsi2, false);
1500 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1501 }
1502 }
1503 }
1504 val = build_vector_from_val (type, val);
1505 }
1506
1507 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1508 init_stmt = gimple_build_assign (new_temp, val);
1509 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1510 return new_temp;
1511 }
1512
1513 /* Function vect_get_vec_def_for_operand_1.
1514
1515 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1516 with type DT that will be used in the vectorized stmt. */
1517
1518 tree
1519 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1520 enum vect_def_type dt)
1521 {
1522 tree vec_oprnd;
1523 stmt_vec_info vec_stmt_info;
1524
1525 switch (dt)
1526 {
1527 /* operand is a constant or a loop invariant. */
1528 case vect_constant_def:
1529 case vect_external_def:
1530 /* Code should use vect_get_vec_def_for_operand. */
1531 gcc_unreachable ();
1532
1533 /* Operand is defined by a loop header phi. In case of nested
1534 cycles we also may have uses of the backedge def. */
1535 case vect_reduction_def:
1536 case vect_double_reduction_def:
1537 case vect_nested_cycle:
1538 case vect_induction_def:
1539 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1540 || dt == vect_nested_cycle);
1541 /* Fallthru. */
1542
1543 /* operand is defined inside the loop. */
1544 case vect_internal_def:
1545 {
1546 /* Get the def from the vectorized stmt. */
1547 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1548 /* Get vectorized pattern statement. */
1549 if (!vec_stmt_info
1550 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1551 && !STMT_VINFO_RELEVANT (def_stmt_info))
1552 vec_stmt_info = (STMT_VINFO_VEC_STMT
1553 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1554 gcc_assert (vec_stmt_info);
1555 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1556 vec_oprnd = PHI_RESULT (phi);
1557 else
1558 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1559 return vec_oprnd;
1560 }
1561
1562 default:
1563 gcc_unreachable ();
1564 }
1565 }
1566
1567
1568 /* Function vect_get_vec_def_for_operand.
1569
1570 OP is an operand in STMT_VINFO. This function returns a (vector) def
1571 that will be used in the vectorized stmt for STMT_VINFO.
1572
1573 In the case that OP is an SSA_NAME which is defined in the loop, then
1574 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1575
1576 In case OP is an invariant or constant, a new stmt that creates a vector def
1577 needs to be introduced. VECTYPE may be used to specify a required type for
1578 vector invariant. */
1579
1580 tree
1581 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1582 {
1583 gimple *def_stmt;
1584 enum vect_def_type dt;
1585 bool is_simple_use;
1586 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1587
1588 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_NOTE, vect_location,
1590 "vect_get_vec_def_for_operand: %T\n", op);
1591
1592 stmt_vec_info def_stmt_info;
1593 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1594 &def_stmt_info, &def_stmt);
1595 gcc_assert (is_simple_use);
1596 if (def_stmt && dump_enabled_p ())
1597 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1598
1599 if (dt == vect_constant_def || dt == vect_external_def)
1600 {
1601 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1602 tree vector_type;
1603
1604 if (vectype)
1605 vector_type = vectype;
1606 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1607 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1608 vector_type = truth_type_for (stmt_vectype);
1609 else
1610 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1611
1612 gcc_assert (vector_type);
1613 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1614 }
1615 else
1616 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1617 }
1618
1619
1620 /* Function vect_get_vec_def_for_stmt_copy
1621
1622 Return a vector-def for an operand. This function is used when the
1623 vectorized stmt to be created (by the caller to this function) is a "copy"
1624 created in case the vectorized result cannot fit in one vector, and several
1625 copies of the vector-stmt are required. In this case the vector-def is
1626 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1627 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1628
1629 Context:
1630 In case the vectorization factor (VF) is bigger than the number
1631 of elements that can fit in a vectype (nunits), we have to generate
1632 more than one vector stmt to vectorize the scalar stmt. This situation
1633 arises when there are multiple data-types operated upon in the loop; the
1634 smallest data-type determines the VF, and as a result, when vectorizing
1635 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1636 vector stmt (each computing a vector of 'nunits' results, and together
1637 computing 'VF' results in each iteration). This function is called when
1638 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1639 which VF=16 and nunits=4, so the number of copies required is 4):
1640
1641 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1642
1643 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1644 VS1.1: vx.1 = memref1 VS1.2
1645 VS1.2: vx.2 = memref2 VS1.3
1646 VS1.3: vx.3 = memref3
1647
1648 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1649 VSnew.1: vz1 = vx.1 + ... VSnew.2
1650 VSnew.2: vz2 = vx.2 + ... VSnew.3
1651 VSnew.3: vz3 = vx.3 + ...
1652
1653 The vectorization of S1 is explained in vectorizable_load.
1654 The vectorization of S2:
1655 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1656 the function 'vect_get_vec_def_for_operand' is called to
1657 get the relevant vector-def for each operand of S2. For operand x it
1658 returns the vector-def 'vx.0'.
1659
1660 To create the remaining copies of the vector-stmt (VSnew.j), this
1661 function is called to get the relevant vector-def for each operand. It is
1662 obtained from the respective VS1.j stmt, which is recorded in the
1663 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1664
1665 For example, to obtain the vector-def 'vx.1' in order to create the
1666 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1667 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1668 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1669 and return its def ('vx.1').
1670 Overall, to create the above sequence this function will be called 3 times:
1671 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1672 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1673 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1674
1675 tree
1676 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1677 {
1678 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1679 if (!def_stmt_info)
1680 /* Do nothing; can reuse same def. */
1681 return vec_oprnd;
1682
1683 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1684 gcc_assert (def_stmt_info);
1685 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1686 vec_oprnd = PHI_RESULT (phi);
1687 else
1688 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1689 return vec_oprnd;
1690 }
1691
1692
1693 /* Get vectorized definitions for the operands to create a copy of an original
1694 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1695
1696 void
1697 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1698 vec<tree> *vec_oprnds0,
1699 vec<tree> *vec_oprnds1)
1700 {
1701 tree vec_oprnd = vec_oprnds0->pop ();
1702
1703 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1704 vec_oprnds0->quick_push (vec_oprnd);
1705
1706 if (vec_oprnds1 && vec_oprnds1->length ())
1707 {
1708 vec_oprnd = vec_oprnds1->pop ();
1709 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1710 vec_oprnds1->quick_push (vec_oprnd);
1711 }
1712 }
1713
1714
1715 /* Get vectorized definitions for OP0 and OP1. */
1716
1717 void
1718 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1719 vec<tree> *vec_oprnds0,
1720 vec<tree> *vec_oprnds1,
1721 slp_tree slp_node)
1722 {
1723 if (slp_node)
1724 {
1725 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1726 vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1727 *vec_oprnds0 = vec_defs[0];
1728 if (op1)
1729 *vec_oprnds1 = vec_defs[1];
1730 }
1731 else
1732 {
1733 tree vec_oprnd;
1734
1735 vec_oprnds0->create (1);
1736 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1737 vec_oprnds0->quick_push (vec_oprnd);
1738
1739 if (op1)
1740 {
1741 vec_oprnds1->create (1);
1742 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1743 vec_oprnds1->quick_push (vec_oprnd);
1744 }
1745 }
1746 }
1747
1748 /* Helper function called by vect_finish_replace_stmt and
1749 vect_finish_stmt_generation. Set the location of the new
1750 statement and create and return a stmt_vec_info for it. */
1751
1752 static stmt_vec_info
1753 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1754 {
1755 vec_info *vinfo = stmt_info->vinfo;
1756
1757 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1758
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1761
1762 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1763
1764 /* While EH edges will generally prevent vectorization, stmt might
1765 e.g. be in a must-not-throw region. Ensure newly created stmts
1766 that could throw are part of the same region. */
1767 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1768 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1769 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1770
1771 return vec_stmt_info;
1772 }
1773
1774 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1775 which sets the same scalar result as STMT_INFO did. Create and return a
1776 stmt_vec_info for VEC_STMT. */
1777
1778 stmt_vec_info
1779 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1780 {
1781 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1782
1783 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1784 gsi_replace (&gsi, vec_stmt, true);
1785
1786 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1787 }
1788
1789 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1790 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1791
1792 stmt_vec_info
1793 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1794 gimple_stmt_iterator *gsi)
1795 {
1796 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1797
1798 if (!gsi_end_p (*gsi)
1799 && gimple_has_mem_ops (vec_stmt))
1800 {
1801 gimple *at_stmt = gsi_stmt (*gsi);
1802 tree vuse = gimple_vuse (at_stmt);
1803 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1804 {
1805 tree vdef = gimple_vdef (at_stmt);
1806 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1807 /* If we have an SSA vuse and insert a store, update virtual
1808 SSA form to avoid triggering the renamer. Do so only
1809 if we can easily see all uses - which is what almost always
1810 happens with the way vectorized stmts are inserted. */
1811 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1812 && ((is_gimple_assign (vec_stmt)
1813 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1814 || (is_gimple_call (vec_stmt)
1815 && !(gimple_call_flags (vec_stmt)
1816 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1817 {
1818 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1819 gimple_set_vdef (vec_stmt, new_vdef);
1820 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1821 }
1822 }
1823 }
1824 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1825 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1826 }
1827
1828 /* We want to vectorize a call to combined function CFN with function
1829 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1830 as the types of all inputs. Check whether this is possible using
1831 an internal function, returning its code if so or IFN_LAST if not. */
1832
1833 static internal_fn
1834 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1835 tree vectype_out, tree vectype_in)
1836 {
1837 internal_fn ifn;
1838 if (internal_fn_p (cfn))
1839 ifn = as_internal_fn (cfn);
1840 else
1841 ifn = associated_internal_fn (fndecl);
1842 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1843 {
1844 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1845 if (info.vectorizable)
1846 {
1847 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1848 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1849 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1850 OPTIMIZE_FOR_SPEED))
1851 return ifn;
1852 }
1853 }
1854 return IFN_LAST;
1855 }
1856
1857
1858 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1859 gimple_stmt_iterator *);
1860
1861 /* Check whether a load or store statement in the loop described by
1862 LOOP_VINFO is possible in a fully-masked loop. This is testing
1863 whether the vectorizer pass has the appropriate support, as well as
1864 whether the target does.
1865
1866 VLS_TYPE says whether the statement is a load or store and VECTYPE
1867 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1868 says how the load or store is going to be implemented and GROUP_SIZE
1869 is the number of load or store statements in the containing group.
1870 If the access is a gather load or scatter store, GS_INFO describes
1871 its arguments. If the load or store is conditional, SCALAR_MASK is the
1872 condition under which it occurs.
1873
1874 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1875 supported, otherwise record the required mask types. */
1876
1877 static void
1878 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1879 vec_load_store_type vls_type, int group_size,
1880 vect_memory_access_type memory_access_type,
1881 gather_scatter_info *gs_info, tree scalar_mask)
1882 {
1883 /* Invariant loads need no special support. */
1884 if (memory_access_type == VMAT_INVARIANT)
1885 return;
1886
1887 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1888 machine_mode vecmode = TYPE_MODE (vectype);
1889 bool is_load = (vls_type == VLS_LOAD);
1890 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1891 {
1892 if (is_load
1893 ? !vect_load_lanes_supported (vectype, group_size, true)
1894 : !vect_store_lanes_supported (vectype, group_size, true))
1895 {
1896 if (dump_enabled_p ())
1897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1898 "can't use a fully-masked loop because the"
1899 " target doesn't have an appropriate masked"
1900 " load/store-lanes instruction.\n");
1901 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1902 return;
1903 }
1904 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1905 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1906 return;
1907 }
1908
1909 if (memory_access_type == VMAT_GATHER_SCATTER)
1910 {
1911 internal_fn ifn = (is_load
1912 ? IFN_MASK_GATHER_LOAD
1913 : IFN_MASK_SCATTER_STORE);
1914 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1915 gs_info->memory_type,
1916 gs_info->offset_vectype,
1917 gs_info->scale))
1918 {
1919 if (dump_enabled_p ())
1920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1921 "can't use a fully-masked loop because the"
1922 " target doesn't have an appropriate masked"
1923 " gather load or scatter store instruction.\n");
1924 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1925 return;
1926 }
1927 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1928 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1929 return;
1930 }
1931
1932 if (memory_access_type != VMAT_CONTIGUOUS
1933 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1934 {
1935 /* Element X of the data must come from iteration i * VF + X of the
1936 scalar loop. We need more work to support other mappings. */
1937 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1939 "can't use a fully-masked loop because an access"
1940 " isn't contiguous.\n");
1941 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1942 return;
1943 }
1944
1945 machine_mode mask_mode;
1946 if (!VECTOR_MODE_P (vecmode)
1947 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1948 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1949 {
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952 "can't use a fully-masked loop because the target"
1953 " doesn't have the appropriate masked load or"
1954 " store.\n");
1955 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1956 return;
1957 }
1958 /* We might load more scalars than we need for permuting SLP loads.
1959 We checked in get_group_load_store_type that the extra elements
1960 don't leak into a new vector. */
1961 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1962 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1963 unsigned int nvectors;
1964 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1965 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1966 else
1967 gcc_unreachable ();
1968 }
1969
1970 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1971 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1972 that needs to be applied to all loads and stores in a vectorized loop.
1973 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1974
1975 MASK_TYPE is the type of both masks. If new statements are needed,
1976 insert them before GSI. */
1977
1978 static tree
1979 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1980 gimple_stmt_iterator *gsi)
1981 {
1982 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1983 if (!loop_mask)
1984 return vec_mask;
1985
1986 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1987 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1988 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1989 vec_mask, loop_mask);
1990 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1991 return and_res;
1992 }
1993
1994 /* Determine whether we can use a gather load or scatter store to vectorize
1995 strided load or store STMT_INFO by truncating the current offset to a
1996 smaller width. We need to be able to construct an offset vector:
1997
1998 { 0, X, X*2, X*3, ... }
1999
2000 without loss of precision, where X is STMT_INFO's DR_STEP.
2001
2002 Return true if this is possible, describing the gather load or scatter
2003 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2004
2005 static bool
2006 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2007 loop_vec_info loop_vinfo, bool masked_p,
2008 gather_scatter_info *gs_info)
2009 {
2010 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2011 data_reference *dr = dr_info->dr;
2012 tree step = DR_STEP (dr);
2013 if (TREE_CODE (step) != INTEGER_CST)
2014 {
2015 /* ??? Perhaps we could use range information here? */
2016 if (dump_enabled_p ())
2017 dump_printf_loc (MSG_NOTE, vect_location,
2018 "cannot truncate variable step.\n");
2019 return false;
2020 }
2021
2022 /* Get the number of bits in an element. */
2023 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2024 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2025 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2026
2027 /* Set COUNT to the upper limit on the number of elements - 1.
2028 Start with the maximum vectorization factor. */
2029 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2030
2031 /* Try lowering COUNT to the number of scalar latch iterations. */
2032 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2033 widest_int max_iters;
2034 if (max_loop_iterations (loop, &max_iters)
2035 && max_iters < count)
2036 count = max_iters.to_shwi ();
2037
2038 /* Try scales of 1 and the element size. */
2039 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2040 wi::overflow_type overflow = wi::OVF_NONE;
2041 for (int i = 0; i < 2; ++i)
2042 {
2043 int scale = scales[i];
2044 widest_int factor;
2045 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2046 continue;
2047
2048 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2049 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2050 if (overflow)
2051 continue;
2052 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2053 unsigned int min_offset_bits = wi::min_precision (range, sign);
2054
2055 /* Find the narrowest viable offset type. */
2056 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2057 tree offset_type = build_nonstandard_integer_type (offset_bits,
2058 sign == UNSIGNED);
2059
2060 /* See whether the target supports the operation with an offset
2061 no narrower than OFFSET_TYPE. */
2062 tree memory_type = TREE_TYPE (DR_REF (dr));
2063 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2064 vectype, memory_type, offset_type, scale,
2065 &gs_info->ifn, &gs_info->offset_vectype))
2066 continue;
2067
2068 gs_info->decl = NULL_TREE;
2069 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2070 but we don't need to store that here. */
2071 gs_info->base = NULL_TREE;
2072 gs_info->element_type = TREE_TYPE (vectype);
2073 gs_info->offset = fold_convert (offset_type, step);
2074 gs_info->offset_dt = vect_constant_def;
2075 gs_info->scale = scale;
2076 gs_info->memory_type = memory_type;
2077 return true;
2078 }
2079
2080 if (overflow && dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "truncating gather/scatter offset to %d bits"
2083 " might change its value.\n", element_bits);
2084
2085 return false;
2086 }
2087
2088 /* Return true if we can use gather/scatter internal functions to
2089 vectorize STMT_INFO, which is a grouped or strided load or store.
2090 MASKED_P is true if load or store is conditional. When returning
2091 true, fill in GS_INFO with the information required to perform the
2092 operation. */
2093
2094 static bool
2095 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2096 loop_vec_info loop_vinfo, bool masked_p,
2097 gather_scatter_info *gs_info)
2098 {
2099 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2100 || gs_info->decl)
2101 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2102 masked_p, gs_info);
2103
2104 tree old_offset_type = TREE_TYPE (gs_info->offset);
2105 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2106
2107 gcc_assert (TYPE_PRECISION (new_offset_type)
2108 >= TYPE_PRECISION (old_offset_type));
2109 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2110
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_NOTE, vect_location,
2113 "using gather/scatter for strided/grouped access,"
2114 " scale = %d\n", gs_info->scale);
2115
2116 return true;
2117 }
2118
2119 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2120 elements with a known constant step. Return -1 if that step
2121 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2122
2123 static int
2124 compare_step_with_zero (stmt_vec_info stmt_info)
2125 {
2126 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2127 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2128 size_zero_node);
2129 }
2130
2131 /* If the target supports a permute mask that reverses the elements in
2132 a vector of type VECTYPE, return that mask, otherwise return null. */
2133
2134 static tree
2135 perm_mask_for_reverse (tree vectype)
2136 {
2137 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2138
2139 /* The encoding has a single stepped pattern. */
2140 vec_perm_builder sel (nunits, 1, 3);
2141 for (int i = 0; i < 3; ++i)
2142 sel.quick_push (nunits - 1 - i);
2143
2144 vec_perm_indices indices (sel, 1, nunits);
2145 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2146 return NULL_TREE;
2147 return vect_gen_perm_mask_checked (vectype, indices);
2148 }
2149
2150 /* A subroutine of get_load_store_type, with a subset of the same
2151 arguments. Handle the case where STMT_INFO is a load or store that
2152 accesses consecutive elements with a negative step. */
2153
2154 static vect_memory_access_type
2155 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2156 vec_load_store_type vls_type,
2157 unsigned int ncopies)
2158 {
2159 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2160 dr_alignment_support alignment_support_scheme;
2161
2162 if (ncopies > 1)
2163 {
2164 if (dump_enabled_p ())
2165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2166 "multiple types with negative step.\n");
2167 return VMAT_ELEMENTWISE;
2168 }
2169
2170 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2171 if (alignment_support_scheme != dr_aligned
2172 && alignment_support_scheme != dr_unaligned_supported)
2173 {
2174 if (dump_enabled_p ())
2175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2176 "negative step but alignment required.\n");
2177 return VMAT_ELEMENTWISE;
2178 }
2179
2180 if (vls_type == VLS_STORE_INVARIANT)
2181 {
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_NOTE, vect_location,
2184 "negative step with invariant source;"
2185 " no permute needed.\n");
2186 return VMAT_CONTIGUOUS_DOWN;
2187 }
2188
2189 if (!perm_mask_for_reverse (vectype))
2190 {
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2193 "negative step and reversing not supported.\n");
2194 return VMAT_ELEMENTWISE;
2195 }
2196
2197 return VMAT_CONTIGUOUS_REVERSE;
2198 }
2199
2200 /* STMT_INFO is either a masked or unconditional store. Return the value
2201 being stored. */
2202
2203 tree
2204 vect_get_store_rhs (stmt_vec_info stmt_info)
2205 {
2206 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2207 {
2208 gcc_assert (gimple_assign_single_p (assign));
2209 return gimple_assign_rhs1 (assign);
2210 }
2211 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2212 {
2213 internal_fn ifn = gimple_call_internal_fn (call);
2214 int index = internal_fn_stored_value_index (ifn);
2215 gcc_assert (index >= 0);
2216 return gimple_call_arg (call, index);
2217 }
2218 gcc_unreachable ();
2219 }
2220
2221 /* A subroutine of get_load_store_type, with a subset of the same
2222 arguments. Handle the case where STMT_INFO is part of a grouped load
2223 or store.
2224
2225 For stores, the statements in the group are all consecutive
2226 and there is no gap at the end. For loads, the statements in the
2227 group might not be consecutive; there can be gaps between statements
2228 as well as at the end. */
2229
2230 static bool
2231 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2232 bool masked_p, vec_load_store_type vls_type,
2233 vect_memory_access_type *memory_access_type,
2234 gather_scatter_info *gs_info)
2235 {
2236 vec_info *vinfo = stmt_info->vinfo;
2237 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2238 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2239 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2240 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2241 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2242 bool single_element_p = (stmt_info == first_stmt_info
2243 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2244 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2245 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2246
2247 /* True if the vectorized statements would access beyond the last
2248 statement in the group. */
2249 bool overrun_p = false;
2250
2251 /* True if we can cope with such overrun by peeling for gaps, so that
2252 there is at least one final scalar iteration after the vector loop. */
2253 bool can_overrun_p = (!masked_p
2254 && vls_type == VLS_LOAD
2255 && loop_vinfo
2256 && !loop->inner);
2257
2258 /* There can only be a gap at the end of the group if the stride is
2259 known at compile time. */
2260 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2261
2262 /* Stores can't yet have gaps. */
2263 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2264
2265 if (slp)
2266 {
2267 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2268 {
2269 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2270 separated by the stride, until we have a complete vector.
2271 Fall back to scalar accesses if that isn't possible. */
2272 if (multiple_p (nunits, group_size))
2273 *memory_access_type = VMAT_STRIDED_SLP;
2274 else
2275 *memory_access_type = VMAT_ELEMENTWISE;
2276 }
2277 else
2278 {
2279 overrun_p = loop_vinfo && gap != 0;
2280 if (overrun_p && vls_type != VLS_LOAD)
2281 {
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2283 "Grouped store with gaps requires"
2284 " non-consecutive accesses\n");
2285 return false;
2286 }
2287 /* An overrun is fine if the trailing elements are smaller
2288 than the alignment boundary B. Every vector access will
2289 be a multiple of B and so we are guaranteed to access a
2290 non-gap element in the same B-sized block. */
2291 if (overrun_p
2292 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2293 / vect_get_scalar_dr_size (first_dr_info)))
2294 overrun_p = false;
2295
2296 /* If the gap splits the vector in half and the target
2297 can do half-vector operations avoid the epilogue peeling
2298 by simply loading half of the vector only. Usually
2299 the construction with an upper zero half will be elided. */
2300 dr_alignment_support alignment_support_scheme;
2301 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2302 machine_mode vmode;
2303 if (overrun_p
2304 && !masked_p
2305 && (((alignment_support_scheme
2306 = vect_supportable_dr_alignment (first_dr_info, false)))
2307 == dr_aligned
2308 || alignment_support_scheme == dr_unaligned_supported)
2309 && known_eq (nunits, (group_size - gap) * 2)
2310 && known_eq (nunits, group_size)
2311 && related_vector_mode (TYPE_MODE (vectype), elmode,
2312 group_size - gap).exists (&vmode)
2313 && (convert_optab_handler (vec_init_optab,
2314 TYPE_MODE (vectype), vmode)
2315 != CODE_FOR_nothing))
2316 overrun_p = false;
2317
2318 if (overrun_p && !can_overrun_p)
2319 {
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "Peeling for outer loop is not supported\n");
2323 return false;
2324 }
2325 int cmp = compare_step_with_zero (stmt_info);
2326 if (cmp < 0)
2327 *memory_access_type = get_negative_load_store_type
2328 (stmt_info, vectype, vls_type, 1);
2329 else
2330 {
2331 gcc_assert (!loop_vinfo || cmp > 0);
2332 *memory_access_type = VMAT_CONTIGUOUS;
2333 }
2334 }
2335 }
2336 else
2337 {
2338 /* We can always handle this case using elementwise accesses,
2339 but see if something more efficient is available. */
2340 *memory_access_type = VMAT_ELEMENTWISE;
2341
2342 /* If there is a gap at the end of the group then these optimizations
2343 would access excess elements in the last iteration. */
2344 bool would_overrun_p = (gap != 0);
2345 /* An overrun is fine if the trailing elements are smaller than the
2346 alignment boundary B. Every vector access will be a multiple of B
2347 and so we are guaranteed to access a non-gap element in the
2348 same B-sized block. */
2349 if (would_overrun_p
2350 && !masked_p
2351 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2352 / vect_get_scalar_dr_size (first_dr_info)))
2353 would_overrun_p = false;
2354
2355 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2356 && (can_overrun_p || !would_overrun_p)
2357 && compare_step_with_zero (stmt_info) > 0)
2358 {
2359 /* First cope with the degenerate case of a single-element
2360 vector. */
2361 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2362 *memory_access_type = VMAT_CONTIGUOUS;
2363
2364 /* Otherwise try using LOAD/STORE_LANES. */
2365 if (*memory_access_type == VMAT_ELEMENTWISE
2366 && (vls_type == VLS_LOAD
2367 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2368 : vect_store_lanes_supported (vectype, group_size,
2369 masked_p)))
2370 {
2371 *memory_access_type = VMAT_LOAD_STORE_LANES;
2372 overrun_p = would_overrun_p;
2373 }
2374
2375 /* If that fails, try using permuting loads. */
2376 if (*memory_access_type == VMAT_ELEMENTWISE
2377 && (vls_type == VLS_LOAD
2378 ? vect_grouped_load_supported (vectype, single_element_p,
2379 group_size)
2380 : vect_grouped_store_supported (vectype, group_size)))
2381 {
2382 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2383 overrun_p = would_overrun_p;
2384 }
2385 }
2386
2387 /* As a last resort, trying using a gather load or scatter store.
2388
2389 ??? Although the code can handle all group sizes correctly,
2390 it probably isn't a win to use separate strided accesses based
2391 on nearby locations. Or, even if it's a win over scalar code,
2392 it might not be a win over vectorizing at a lower VF, if that
2393 allows us to use contiguous accesses. */
2394 if (*memory_access_type == VMAT_ELEMENTWISE
2395 && single_element_p
2396 && loop_vinfo
2397 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2398 masked_p, gs_info))
2399 *memory_access_type = VMAT_GATHER_SCATTER;
2400 }
2401
2402 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2403 {
2404 /* STMT is the leader of the group. Check the operands of all the
2405 stmts of the group. */
2406 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2407 while (next_stmt_info)
2408 {
2409 tree op = vect_get_store_rhs (next_stmt_info);
2410 enum vect_def_type dt;
2411 if (!vect_is_simple_use (op, vinfo, &dt))
2412 {
2413 if (dump_enabled_p ())
2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415 "use not simple.\n");
2416 return false;
2417 }
2418 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2419 }
2420 }
2421
2422 if (overrun_p)
2423 {
2424 gcc_assert (can_overrun_p);
2425 if (dump_enabled_p ())
2426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2427 "Data access with gaps requires scalar "
2428 "epilogue loop\n");
2429 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2430 }
2431
2432 return true;
2433 }
2434
2435 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2436 if there is a memory access type that the vectorized form can use,
2437 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2438 or scatters, fill in GS_INFO accordingly.
2439
2440 SLP says whether we're performing SLP rather than loop vectorization.
2441 MASKED_P is true if the statement is conditional on a vectorized mask.
2442 VECTYPE is the vector type that the vectorized statements will use.
2443 NCOPIES is the number of vector statements that will be needed. */
2444
2445 static bool
2446 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2447 bool masked_p, vec_load_store_type vls_type,
2448 unsigned int ncopies,
2449 vect_memory_access_type *memory_access_type,
2450 gather_scatter_info *gs_info)
2451 {
2452 vec_info *vinfo = stmt_info->vinfo;
2453 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2454 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2455 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2456 {
2457 *memory_access_type = VMAT_GATHER_SCATTER;
2458 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2459 gcc_unreachable ();
2460 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2461 &gs_info->offset_dt,
2462 &gs_info->offset_vectype))
2463 {
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "%s index use not simple.\n",
2467 vls_type == VLS_LOAD ? "gather" : "scatter");
2468 return false;
2469 }
2470 }
2471 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2472 {
2473 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2474 vls_type, memory_access_type, gs_info))
2475 return false;
2476 }
2477 else if (STMT_VINFO_STRIDED_P (stmt_info))
2478 {
2479 gcc_assert (!slp);
2480 if (loop_vinfo
2481 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2482 masked_p, gs_info))
2483 *memory_access_type = VMAT_GATHER_SCATTER;
2484 else
2485 *memory_access_type = VMAT_ELEMENTWISE;
2486 }
2487 else
2488 {
2489 int cmp = compare_step_with_zero (stmt_info);
2490 if (cmp < 0)
2491 *memory_access_type = get_negative_load_store_type
2492 (stmt_info, vectype, vls_type, ncopies);
2493 else if (cmp == 0)
2494 {
2495 gcc_assert (vls_type == VLS_LOAD);
2496 *memory_access_type = VMAT_INVARIANT;
2497 }
2498 else
2499 *memory_access_type = VMAT_CONTIGUOUS;
2500 }
2501
2502 if ((*memory_access_type == VMAT_ELEMENTWISE
2503 || *memory_access_type == VMAT_STRIDED_SLP)
2504 && !nunits.is_constant ())
2505 {
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2508 "Not using elementwise accesses due to variable "
2509 "vectorization factor.\n");
2510 return false;
2511 }
2512
2513 /* FIXME: At the moment the cost model seems to underestimate the
2514 cost of using elementwise accesses. This check preserves the
2515 traditional behavior until that can be fixed. */
2516 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2517 if (!first_stmt_info)
2518 first_stmt_info = stmt_info;
2519 if (*memory_access_type == VMAT_ELEMENTWISE
2520 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2521 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2522 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2523 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2524 {
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "not falling back to elementwise accesses\n");
2528 return false;
2529 }
2530 return true;
2531 }
2532
2533 /* Return true if boolean argument MASK is suitable for vectorizing
2534 conditional load or store STMT_INFO. When returning true, store the type
2535 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2536 in *MASK_VECTYPE_OUT. */
2537
2538 static bool
2539 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2540 vect_def_type *mask_dt_out,
2541 tree *mask_vectype_out)
2542 {
2543 vec_info *vinfo = stmt_info->vinfo;
2544 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2545 {
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2548 "mask argument is not a boolean.\n");
2549 return false;
2550 }
2551
2552 if (TREE_CODE (mask) != SSA_NAME)
2553 {
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "mask argument is not an SSA name.\n");
2557 return false;
2558 }
2559
2560 enum vect_def_type mask_dt;
2561 tree mask_vectype;
2562 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2563 {
2564 if (dump_enabled_p ())
2565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2566 "mask use not simple.\n");
2567 return false;
2568 }
2569
2570 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2571 if (!mask_vectype)
2572 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2573
2574 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2575 {
2576 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2578 "could not find an appropriate vector mask type.\n");
2579 return false;
2580 }
2581
2582 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2583 TYPE_VECTOR_SUBPARTS (vectype)))
2584 {
2585 if (dump_enabled_p ())
2586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2587 "vector mask type %T"
2588 " does not match vector data type %T.\n",
2589 mask_vectype, vectype);
2590
2591 return false;
2592 }
2593
2594 *mask_dt_out = mask_dt;
2595 *mask_vectype_out = mask_vectype;
2596 return true;
2597 }
2598
2599 /* Return true if stored value RHS is suitable for vectorizing store
2600 statement STMT_INFO. When returning true, store the type of the
2601 definition in *RHS_DT_OUT, the type of the vectorized store value in
2602 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2603
2604 static bool
2605 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2606 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2607 vec_load_store_type *vls_type_out)
2608 {
2609 /* In the case this is a store from a constant make sure
2610 native_encode_expr can handle it. */
2611 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2612 {
2613 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2615 "cannot encode constant as a byte sequence.\n");
2616 return false;
2617 }
2618
2619 enum vect_def_type rhs_dt;
2620 tree rhs_vectype;
2621 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2622 {
2623 if (dump_enabled_p ())
2624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2625 "use not simple.\n");
2626 return false;
2627 }
2628
2629 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2630 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2631 {
2632 if (dump_enabled_p ())
2633 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2634 "incompatible vector types.\n");
2635 return false;
2636 }
2637
2638 *rhs_dt_out = rhs_dt;
2639 *rhs_vectype_out = rhs_vectype;
2640 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2641 *vls_type_out = VLS_STORE_INVARIANT;
2642 else
2643 *vls_type_out = VLS_STORE;
2644 return true;
2645 }
2646
2647 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2648 Note that we support masks with floating-point type, in which case the
2649 floats are interpreted as a bitmask. */
2650
2651 static tree
2652 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2653 {
2654 if (TREE_CODE (masktype) == INTEGER_TYPE)
2655 return build_int_cst (masktype, -1);
2656 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2657 {
2658 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2659 mask = build_vector_from_val (masktype, mask);
2660 return vect_init_vector (stmt_info, mask, masktype, NULL);
2661 }
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2663 {
2664 REAL_VALUE_TYPE r;
2665 long tmp[6];
2666 for (int j = 0; j < 6; ++j)
2667 tmp[j] = -1;
2668 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2669 tree mask = build_real (TREE_TYPE (masktype), r);
2670 mask = build_vector_from_val (masktype, mask);
2671 return vect_init_vector (stmt_info, mask, masktype, NULL);
2672 }
2673 gcc_unreachable ();
2674 }
2675
2676 /* Build an all-zero merge value of type VECTYPE while vectorizing
2677 STMT_INFO as a gather load. */
2678
2679 static tree
2680 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2681 {
2682 tree merge;
2683 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2684 merge = build_int_cst (TREE_TYPE (vectype), 0);
2685 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2686 {
2687 REAL_VALUE_TYPE r;
2688 long tmp[6];
2689 for (int j = 0; j < 6; ++j)
2690 tmp[j] = 0;
2691 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2692 merge = build_real (TREE_TYPE (vectype), r);
2693 }
2694 else
2695 gcc_unreachable ();
2696 merge = build_vector_from_val (vectype, merge);
2697 return vect_init_vector (stmt_info, merge, vectype, NULL);
2698 }
2699
2700 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2701 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2702 the gather load operation. If the load is conditional, MASK is the
2703 unvectorized condition and MASK_DT is its definition type, otherwise
2704 MASK is null. */
2705
2706 static void
2707 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2708 gimple_stmt_iterator *gsi,
2709 stmt_vec_info *vec_stmt,
2710 gather_scatter_info *gs_info,
2711 tree mask)
2712 {
2713 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2714 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2716 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2717 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2718 edge pe = loop_preheader_edge (loop);
2719 enum { NARROW, NONE, WIDEN } modifier;
2720 poly_uint64 gather_off_nunits
2721 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2722
2723 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2724 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2725 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2726 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2727 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2728 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2729 tree scaletype = TREE_VALUE (arglist);
2730 tree real_masktype = masktype;
2731 gcc_checking_assert (types_compatible_p (srctype, rettype)
2732 && (!mask
2733 || TREE_CODE (masktype) == INTEGER_TYPE
2734 || types_compatible_p (srctype, masktype)));
2735 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2736 masktype = truth_type_for (srctype);
2737
2738 tree mask_halftype = masktype;
2739 tree perm_mask = NULL_TREE;
2740 tree mask_perm_mask = NULL_TREE;
2741 if (known_eq (nunits, gather_off_nunits))
2742 modifier = NONE;
2743 else if (known_eq (nunits * 2, gather_off_nunits))
2744 {
2745 modifier = WIDEN;
2746
2747 /* Currently widening gathers and scatters are only supported for
2748 fixed-length vectors. */
2749 int count = gather_off_nunits.to_constant ();
2750 vec_perm_builder sel (count, count, 1);
2751 for (int i = 0; i < count; ++i)
2752 sel.quick_push (i | (count / 2));
2753
2754 vec_perm_indices indices (sel, 1, count);
2755 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2756 indices);
2757 }
2758 else if (known_eq (nunits, gather_off_nunits * 2))
2759 {
2760 modifier = NARROW;
2761
2762 /* Currently narrowing gathers and scatters are only supported for
2763 fixed-length vectors. */
2764 int count = nunits.to_constant ();
2765 vec_perm_builder sel (count, count, 1);
2766 sel.quick_grow (count);
2767 for (int i = 0; i < count; ++i)
2768 sel[i] = i < count / 2 ? i : i + count / 2;
2769 vec_perm_indices indices (sel, 2, count);
2770 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2771
2772 ncopies *= 2;
2773
2774 if (mask && masktype == real_masktype)
2775 {
2776 for (int i = 0; i < count; ++i)
2777 sel[i] = i | (count / 2);
2778 indices.new_vector (sel, 2, count);
2779 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2780 }
2781 else if (mask)
2782 mask_halftype = truth_type_for (gs_info->offset_vectype);
2783 }
2784 else
2785 gcc_unreachable ();
2786
2787 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2788 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2789
2790 tree ptr = fold_convert (ptrtype, gs_info->base);
2791 if (!is_gimple_min_invariant (ptr))
2792 {
2793 gimple_seq seq;
2794 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2795 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2796 gcc_assert (!new_bb);
2797 }
2798
2799 tree scale = build_int_cst (scaletype, gs_info->scale);
2800
2801 tree vec_oprnd0 = NULL_TREE;
2802 tree vec_mask = NULL_TREE;
2803 tree src_op = NULL_TREE;
2804 tree mask_op = NULL_TREE;
2805 tree prev_res = NULL_TREE;
2806 stmt_vec_info prev_stmt_info = NULL;
2807
2808 if (!mask)
2809 {
2810 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2811 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2812 }
2813
2814 for (int j = 0; j < ncopies; ++j)
2815 {
2816 tree op, var;
2817 if (modifier == WIDEN && (j & 1))
2818 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2819 perm_mask, stmt_info, gsi);
2820 else if (j == 0)
2821 op = vec_oprnd0
2822 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2823 else
2824 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2825 vec_oprnd0);
2826
2827 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2828 {
2829 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2830 TYPE_VECTOR_SUBPARTS (idxtype)));
2831 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2832 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2833 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2834 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2835 op = var;
2836 }
2837
2838 if (mask)
2839 {
2840 if (mask_perm_mask && (j & 1))
2841 mask_op = permute_vec_elements (mask_op, mask_op,
2842 mask_perm_mask, stmt_info, gsi);
2843 else
2844 {
2845 if (j == 0)
2846 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2847 else if (modifier != NARROW || (j & 1) == 0)
2848 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2849 vec_mask);
2850
2851 mask_op = vec_mask;
2852 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2853 {
2854 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2855 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2856 gcc_assert (known_eq (sub1, sub2));
2857 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2858 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2859 gassign *new_stmt
2860 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2861 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2862 mask_op = var;
2863 }
2864 }
2865 if (modifier == NARROW && masktype != real_masktype)
2866 {
2867 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2868 gassign *new_stmt
2869 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2870 : VEC_UNPACK_LO_EXPR,
2871 mask_op);
2872 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2873 mask_op = var;
2874 }
2875 src_op = mask_op;
2876 }
2877
2878 tree mask_arg = mask_op;
2879 if (masktype != real_masktype)
2880 {
2881 tree utype, optype = TREE_TYPE (mask_op);
2882 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2883 utype = real_masktype;
2884 else
2885 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2886 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2887 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2888 gassign *new_stmt
2889 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2890 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2891 mask_arg = var;
2892 if (!useless_type_conversion_p (real_masktype, utype))
2893 {
2894 gcc_assert (TYPE_PRECISION (utype)
2895 <= TYPE_PRECISION (real_masktype));
2896 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2897 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2898 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2899 mask_arg = var;
2900 }
2901 src_op = build_zero_cst (srctype);
2902 }
2903 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2904 mask_arg, scale);
2905
2906 stmt_vec_info new_stmt_info;
2907 if (!useless_type_conversion_p (vectype, rettype))
2908 {
2909 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2910 TYPE_VECTOR_SUBPARTS (rettype)));
2911 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2912 gimple_call_set_lhs (new_call, op);
2913 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2914 var = make_ssa_name (vec_dest);
2915 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2916 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2917 new_stmt_info
2918 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2919 }
2920 else
2921 {
2922 var = make_ssa_name (vec_dest, new_call);
2923 gimple_call_set_lhs (new_call, var);
2924 new_stmt_info
2925 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2926 }
2927
2928 if (modifier == NARROW)
2929 {
2930 if ((j & 1) == 0)
2931 {
2932 prev_res = var;
2933 continue;
2934 }
2935 var = permute_vec_elements (prev_res, var, perm_mask,
2936 stmt_info, gsi);
2937 new_stmt_info = loop_vinfo->lookup_def (var);
2938 }
2939
2940 if (prev_stmt_info == NULL)
2941 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2942 else
2943 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2944 prev_stmt_info = new_stmt_info;
2945 }
2946 }
2947
2948 /* Prepare the base and offset in GS_INFO for vectorization.
2949 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2950 to the vectorized offset argument for the first copy of STMT_INFO.
2951 STMT_INFO is the statement described by GS_INFO and LOOP is the
2952 containing loop. */
2953
2954 static void
2955 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
2956 gather_scatter_info *gs_info,
2957 tree *dataref_ptr, tree *vec_offset)
2958 {
2959 gimple_seq stmts = NULL;
2960 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2961 if (stmts != NULL)
2962 {
2963 basic_block new_bb;
2964 edge pe = loop_preheader_edge (loop);
2965 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2966 gcc_assert (!new_bb);
2967 }
2968 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2969 gs_info->offset_vectype);
2970 }
2971
2972 /* Prepare to implement a grouped or strided load or store using
2973 the gather load or scatter store operation described by GS_INFO.
2974 STMT_INFO is the load or store statement.
2975
2976 Set *DATAREF_BUMP to the amount that should be added to the base
2977 address after each copy of the vectorized statement. Set *VEC_OFFSET
2978 to an invariant offset vector in which element I has the value
2979 I * DR_STEP / SCALE. */
2980
2981 static void
2982 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2983 loop_vec_info loop_vinfo,
2984 gather_scatter_info *gs_info,
2985 tree *dataref_bump, tree *vec_offset)
2986 {
2987 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2988 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2989 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2990 gimple_seq stmts;
2991
2992 tree bump = size_binop (MULT_EXPR,
2993 fold_convert (sizetype, DR_STEP (dr)),
2994 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2995 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2996 if (stmts)
2997 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2998
2999 /* The offset given in GS_INFO can have pointer type, so use the element
3000 type of the vector instead. */
3001 tree offset_type = TREE_TYPE (gs_info->offset);
3002 offset_type = TREE_TYPE (gs_info->offset_vectype);
3003
3004 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3005 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
3006 ssize_int (gs_info->scale));
3007 step = fold_convert (offset_type, step);
3008 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3009
3010 /* Create {0, X, X*2, X*3, ...}. */
3011 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3012 build_zero_cst (offset_type), step);
3013 if (stmts)
3014 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3015 }
3016
3017 /* Return the amount that should be added to a vector pointer to move
3018 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3019 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3020 vectorization. */
3021
3022 static tree
3023 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3024 vect_memory_access_type memory_access_type)
3025 {
3026 if (memory_access_type == VMAT_INVARIANT)
3027 return size_zero_node;
3028
3029 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3030 tree step = vect_dr_behavior (dr_info)->step;
3031 if (tree_int_cst_sgn (step) == -1)
3032 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3033 return iv_step;
3034 }
3035
3036 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3037
3038 static bool
3039 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3040 stmt_vec_info *vec_stmt, slp_tree slp_node,
3041 tree vectype_in, stmt_vector_for_cost *cost_vec)
3042 {
3043 tree op, vectype;
3044 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3045 vec_info *vinfo = stmt_info->vinfo;
3046 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3047 unsigned ncopies;
3048
3049 op = gimple_call_arg (stmt, 0);
3050 vectype = STMT_VINFO_VECTYPE (stmt_info);
3051 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3052
3053 /* Multiple types in SLP are handled by creating the appropriate number of
3054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3055 case of SLP. */
3056 if (slp_node)
3057 ncopies = 1;
3058 else
3059 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3060
3061 gcc_assert (ncopies >= 1);
3062
3063 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3064 if (! char_vectype)
3065 return false;
3066
3067 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3068 unsigned word_bytes;
3069 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3070 return false;
3071
3072 /* The encoding uses one stepped pattern for each byte in the word. */
3073 vec_perm_builder elts (num_bytes, word_bytes, 3);
3074 for (unsigned i = 0; i < 3; ++i)
3075 for (unsigned j = 0; j < word_bytes; ++j)
3076 elts.quick_push ((i + 1) * word_bytes - j - 1);
3077
3078 vec_perm_indices indices (elts, 1, num_bytes);
3079 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3080 return false;
3081
3082 if (! vec_stmt)
3083 {
3084 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3085 DUMP_VECT_SCOPE ("vectorizable_bswap");
3086 if (! slp_node)
3087 {
3088 record_stmt_cost (cost_vec,
3089 1, vector_stmt, stmt_info, 0, vect_prologue);
3090 record_stmt_cost (cost_vec,
3091 ncopies, vec_perm, stmt_info, 0, vect_body);
3092 }
3093 return true;
3094 }
3095
3096 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3097
3098 /* Transform. */
3099 vec<tree> vec_oprnds = vNULL;
3100 stmt_vec_info new_stmt_info = NULL;
3101 stmt_vec_info prev_stmt_info = NULL;
3102 for (unsigned j = 0; j < ncopies; j++)
3103 {
3104 /* Handle uses. */
3105 if (j == 0)
3106 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3107 else
3108 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3109
3110 /* Arguments are ready. create the new vector stmt. */
3111 unsigned i;
3112 tree vop;
3113 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3114 {
3115 gimple *new_stmt;
3116 tree tem = make_ssa_name (char_vectype);
3117 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3118 char_vectype, vop));
3119 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3120 tree tem2 = make_ssa_name (char_vectype);
3121 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3122 tem, tem, bswap_vconst);
3123 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3124 tem = make_ssa_name (vectype);
3125 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3126 vectype, tem2));
3127 new_stmt_info
3128 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3129 if (slp_node)
3130 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3131 }
3132
3133 if (slp_node)
3134 continue;
3135
3136 if (j == 0)
3137 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3138 else
3139 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3140
3141 prev_stmt_info = new_stmt_info;
3142 }
3143
3144 vec_oprnds.release ();
3145 return true;
3146 }
3147
3148 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3149 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3150 in a single step. On success, store the binary pack code in
3151 *CONVERT_CODE. */
3152
3153 static bool
3154 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3155 tree_code *convert_code)
3156 {
3157 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3158 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3159 return false;
3160
3161 tree_code code;
3162 int multi_step_cvt = 0;
3163 auto_vec <tree, 8> interm_types;
3164 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3165 &code, &multi_step_cvt, &interm_types)
3166 || multi_step_cvt)
3167 return false;
3168
3169 *convert_code = code;
3170 return true;
3171 }
3172
3173 /* Function vectorizable_call.
3174
3175 Check if STMT_INFO performs a function call that can be vectorized.
3176 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3178 Return true if STMT_INFO is vectorizable in this way. */
3179
3180 static bool
3181 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3182 stmt_vec_info *vec_stmt, slp_tree slp_node,
3183 stmt_vector_for_cost *cost_vec)
3184 {
3185 gcall *stmt;
3186 tree vec_dest;
3187 tree scalar_dest;
3188 tree op;
3189 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3190 stmt_vec_info prev_stmt_info;
3191 tree vectype_out, vectype_in;
3192 poly_uint64 nunits_in;
3193 poly_uint64 nunits_out;
3194 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3195 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3196 vec_info *vinfo = stmt_info->vinfo;
3197 tree fndecl, new_temp, rhs_type;
3198 enum vect_def_type dt[4]
3199 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3200 vect_unknown_def_type };
3201 tree vectypes[ARRAY_SIZE (dt)] = {};
3202 int ndts = ARRAY_SIZE (dt);
3203 int ncopies, j;
3204 auto_vec<tree, 8> vargs;
3205 auto_vec<tree, 8> orig_vargs;
3206 enum { NARROW, NONE, WIDEN } modifier;
3207 size_t i, nargs;
3208 tree lhs;
3209
3210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3211 return false;
3212
3213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3214 && ! vec_stmt)
3215 return false;
3216
3217 /* Is STMT_INFO a vectorizable call? */
3218 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3219 if (!stmt)
3220 return false;
3221
3222 if (gimple_call_internal_p (stmt)
3223 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3224 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3225 /* Handled by vectorizable_load and vectorizable_store. */
3226 return false;
3227
3228 if (gimple_call_lhs (stmt) == NULL_TREE
3229 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3230 return false;
3231
3232 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3233
3234 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3235
3236 /* Process function arguments. */
3237 rhs_type = NULL_TREE;
3238 vectype_in = NULL_TREE;
3239 nargs = gimple_call_num_args (stmt);
3240
3241 /* Bail out if the function has more than three arguments, we do not have
3242 interesting builtin functions to vectorize with more than two arguments
3243 except for fma. No arguments is also not good. */
3244 if (nargs == 0 || nargs > 4)
3245 return false;
3246
3247 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3248 combined_fn cfn = gimple_call_combined_fn (stmt);
3249 if (cfn == CFN_GOMP_SIMD_LANE)
3250 {
3251 nargs = 0;
3252 rhs_type = unsigned_type_node;
3253 }
3254
3255 int mask_opno = -1;
3256 if (internal_fn_p (cfn))
3257 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3258
3259 for (i = 0; i < nargs; i++)
3260 {
3261 op = gimple_call_arg (stmt, i);
3262 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3263 {
3264 if (dump_enabled_p ())
3265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3266 "use not simple.\n");
3267 return false;
3268 }
3269
3270 /* Skip the mask argument to an internal function. This operand
3271 has been converted via a pattern if necessary. */
3272 if ((int) i == mask_opno)
3273 continue;
3274
3275 /* We can only handle calls with arguments of the same type. */
3276 if (rhs_type
3277 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3278 {
3279 if (dump_enabled_p ())
3280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3281 "argument types differ.\n");
3282 return false;
3283 }
3284 if (!rhs_type)
3285 rhs_type = TREE_TYPE (op);
3286
3287 if (!vectype_in)
3288 vectype_in = vectypes[i];
3289 else if (vectypes[i]
3290 && !types_compatible_p (vectypes[i], vectype_in))
3291 {
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "argument vector types differ.\n");
3295 return false;
3296 }
3297 }
3298 /* If all arguments are external or constant defs, infer the vector type
3299 from the scalar type. */
3300 if (!vectype_in)
3301 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
3302 if (vec_stmt)
3303 gcc_assert (vectype_in);
3304 if (!vectype_in)
3305 {
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 "no vectype for scalar type %T\n", rhs_type);
3309
3310 return false;
3311 }
3312 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3313 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3314 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3315 by a pack of the two vectors into an SI vector. We would need
3316 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3317 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3318 {
3319 if (dump_enabled_p ())
3320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3321 "mismatched vector sizes %T and %T\n",
3322 vectype_in, vectype_out);
3323 return false;
3324 }
3325
3326 /* FORNOW */
3327 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3328 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3329 if (known_eq (nunits_in * 2, nunits_out))
3330 modifier = NARROW;
3331 else if (known_eq (nunits_out, nunits_in))
3332 modifier = NONE;
3333 else if (known_eq (nunits_out * 2, nunits_in))
3334 modifier = WIDEN;
3335 else
3336 return false;
3337
3338 /* We only handle functions that do not read or clobber memory. */
3339 if (gimple_vuse (stmt))
3340 {
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3343 "function reads from or writes to memory.\n");
3344 return false;
3345 }
3346
3347 /* For now, we only vectorize functions if a target specific builtin
3348 is available. TODO -- in some cases, it might be profitable to
3349 insert the calls for pieces of the vector, in order to be able
3350 to vectorize other operations in the loop. */
3351 fndecl = NULL_TREE;
3352 internal_fn ifn = IFN_LAST;
3353 tree callee = gimple_call_fndecl (stmt);
3354
3355 /* First try using an internal function. */
3356 tree_code convert_code = ERROR_MARK;
3357 if (cfn != CFN_LAST
3358 && (modifier == NONE
3359 || (modifier == NARROW
3360 && simple_integer_narrowing (vectype_out, vectype_in,
3361 &convert_code))))
3362 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3363 vectype_in);
3364
3365 /* If that fails, try asking for a target-specific built-in function. */
3366 if (ifn == IFN_LAST)
3367 {
3368 if (cfn != CFN_LAST)
3369 fndecl = targetm.vectorize.builtin_vectorized_function
3370 (cfn, vectype_out, vectype_in);
3371 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3372 fndecl = targetm.vectorize.builtin_md_vectorized_function
3373 (callee, vectype_out, vectype_in);
3374 }
3375
3376 if (ifn == IFN_LAST && !fndecl)
3377 {
3378 if (cfn == CFN_GOMP_SIMD_LANE
3379 && !slp_node
3380 && loop_vinfo
3381 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3382 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3383 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3384 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3385 {
3386 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3387 { 0, 1, 2, ... vf - 1 } vector. */
3388 gcc_assert (nargs == 0);
3389 }
3390 else if (modifier == NONE
3391 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3392 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3393 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3394 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3395 vectype_in, cost_vec);
3396 else
3397 {
3398 if (dump_enabled_p ())
3399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3400 "function is not vectorizable.\n");
3401 return false;
3402 }
3403 }
3404
3405 if (slp_node)
3406 ncopies = 1;
3407 else if (modifier == NARROW && ifn == IFN_LAST)
3408 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3409 else
3410 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3411
3412 /* Sanity check: make sure that at least one copy of the vectorized stmt
3413 needs to be generated. */
3414 gcc_assert (ncopies >= 1);
3415
3416 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3417 if (!vec_stmt) /* transformation not required. */
3418 {
3419 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3420 DUMP_VECT_SCOPE ("vectorizable_call");
3421 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3422 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3423 record_stmt_cost (cost_vec, ncopies / 2,
3424 vec_promote_demote, stmt_info, 0, vect_body);
3425
3426 if (loop_vinfo && mask_opno >= 0)
3427 {
3428 unsigned int nvectors = (slp_node
3429 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3430 : ncopies);
3431 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3432 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3433 vectype_out, scalar_mask);
3434 }
3435 return true;
3436 }
3437
3438 /* Transform. */
3439
3440 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3442
3443 /* Handle def. */
3444 scalar_dest = gimple_call_lhs (stmt);
3445 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3446
3447 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3448
3449 stmt_vec_info new_stmt_info = NULL;
3450 prev_stmt_info = NULL;
3451 if (modifier == NONE || ifn != IFN_LAST)
3452 {
3453 tree prev_res = NULL_TREE;
3454 vargs.safe_grow (nargs);
3455 orig_vargs.safe_grow (nargs);
3456 for (j = 0; j < ncopies; ++j)
3457 {
3458 /* Build argument list for the vectorized call. */
3459 if (slp_node)
3460 {
3461 auto_vec<vec<tree> > vec_defs (nargs);
3462 vec<tree> vec_oprnds0;
3463
3464 vect_get_slp_defs (slp_node, &vec_defs);
3465 vec_oprnds0 = vec_defs[0];
3466
3467 /* Arguments are ready. Create the new vector stmt. */
3468 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3469 {
3470 size_t k;
3471 for (k = 0; k < nargs; k++)
3472 {
3473 vec<tree> vec_oprndsk = vec_defs[k];
3474 vargs[k] = vec_oprndsk[i];
3475 }
3476 if (modifier == NARROW)
3477 {
3478 /* We don't define any narrowing conditional functions
3479 at present. */
3480 gcc_assert (mask_opno < 0);
3481 tree half_res = make_ssa_name (vectype_in);
3482 gcall *call
3483 = gimple_build_call_internal_vec (ifn, vargs);
3484 gimple_call_set_lhs (call, half_res);
3485 gimple_call_set_nothrow (call, true);
3486 vect_finish_stmt_generation (stmt_info, call, gsi);
3487 if ((i & 1) == 0)
3488 {
3489 prev_res = half_res;
3490 continue;
3491 }
3492 new_temp = make_ssa_name (vec_dest);
3493 gimple *new_stmt
3494 = gimple_build_assign (new_temp, convert_code,
3495 prev_res, half_res);
3496 new_stmt_info
3497 = vect_finish_stmt_generation (stmt_info, new_stmt,
3498 gsi);
3499 }
3500 else
3501 {
3502 if (mask_opno >= 0 && masked_loop_p)
3503 {
3504 unsigned int vec_num = vec_oprnds0.length ();
3505 /* Always true for SLP. */
3506 gcc_assert (ncopies == 1);
3507 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3508 vectype_out, i);
3509 vargs[mask_opno] = prepare_load_store_mask
3510 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3511 }
3512
3513 gcall *call;
3514 if (ifn != IFN_LAST)
3515 call = gimple_build_call_internal_vec (ifn, vargs);
3516 else
3517 call = gimple_build_call_vec (fndecl, vargs);
3518 new_temp = make_ssa_name (vec_dest, call);
3519 gimple_call_set_lhs (call, new_temp);
3520 gimple_call_set_nothrow (call, true);
3521 new_stmt_info
3522 = vect_finish_stmt_generation (stmt_info, call, gsi);
3523 }
3524 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3525 }
3526
3527 for (i = 0; i < nargs; i++)
3528 {
3529 vec<tree> vec_oprndsi = vec_defs[i];
3530 vec_oprndsi.release ();
3531 }
3532 continue;
3533 }
3534
3535 if (mask_opno >= 0 && !vectypes[mask_opno])
3536 {
3537 gcc_assert (modifier != WIDEN);
3538 vectypes[mask_opno] = truth_type_for (vectype_in);
3539 }
3540
3541 for (i = 0; i < nargs; i++)
3542 {
3543 op = gimple_call_arg (stmt, i);
3544 if (j == 0)
3545 vec_oprnd0
3546 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3547 else
3548 vec_oprnd0
3549 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3550
3551 orig_vargs[i] = vargs[i] = vec_oprnd0;
3552 }
3553
3554 if (mask_opno >= 0 && masked_loop_p)
3555 {
3556 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3557 vectype_out, j);
3558 vargs[mask_opno]
3559 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3560 vargs[mask_opno], gsi);
3561 }
3562
3563 if (cfn == CFN_GOMP_SIMD_LANE)
3564 {
3565 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3566 tree new_var
3567 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3568 gimple *init_stmt = gimple_build_assign (new_var, cst);
3569 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3570 new_temp = make_ssa_name (vec_dest);
3571 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3572 new_stmt_info
3573 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3574 }
3575 else if (modifier == NARROW)
3576 {
3577 /* We don't define any narrowing conditional functions at
3578 present. */
3579 gcc_assert (mask_opno < 0);
3580 tree half_res = make_ssa_name (vectype_in);
3581 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3582 gimple_call_set_lhs (call, half_res);
3583 gimple_call_set_nothrow (call, true);
3584 vect_finish_stmt_generation (stmt_info, call, gsi);
3585 if ((j & 1) == 0)
3586 {
3587 prev_res = half_res;
3588 continue;
3589 }
3590 new_temp = make_ssa_name (vec_dest);
3591 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3592 prev_res, half_res);
3593 new_stmt_info
3594 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3595 }
3596 else
3597 {
3598 gcall *call;
3599 if (ifn != IFN_LAST)
3600 call = gimple_build_call_internal_vec (ifn, vargs);
3601 else
3602 call = gimple_build_call_vec (fndecl, vargs);
3603 new_temp = make_ssa_name (vec_dest, call);
3604 gimple_call_set_lhs (call, new_temp);
3605 gimple_call_set_nothrow (call, true);
3606 new_stmt_info
3607 = vect_finish_stmt_generation (stmt_info, call, gsi);
3608 }
3609
3610 if (j == (modifier == NARROW ? 1 : 0))
3611 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3612 else
3613 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3614
3615 prev_stmt_info = new_stmt_info;
3616 }
3617 }
3618 else if (modifier == NARROW)
3619 {
3620 /* We don't define any narrowing conditional functions at present. */
3621 gcc_assert (mask_opno < 0);
3622 for (j = 0; j < ncopies; ++j)
3623 {
3624 /* Build argument list for the vectorized call. */
3625 if (j == 0)
3626 vargs.create (nargs * 2);
3627 else
3628 vargs.truncate (0);
3629
3630 if (slp_node)
3631 {
3632 auto_vec<vec<tree> > vec_defs (nargs);
3633 vec<tree> vec_oprnds0;
3634
3635 vect_get_slp_defs (slp_node, &vec_defs);
3636 vec_oprnds0 = vec_defs[0];
3637
3638 /* Arguments are ready. Create the new vector stmt. */
3639 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3640 {
3641 size_t k;
3642 vargs.truncate (0);
3643 for (k = 0; k < nargs; k++)
3644 {
3645 vec<tree> vec_oprndsk = vec_defs[k];
3646 vargs.quick_push (vec_oprndsk[i]);
3647 vargs.quick_push (vec_oprndsk[i + 1]);
3648 }
3649 gcall *call;
3650 if (ifn != IFN_LAST)
3651 call = gimple_build_call_internal_vec (ifn, vargs);
3652 else
3653 call = gimple_build_call_vec (fndecl, vargs);
3654 new_temp = make_ssa_name (vec_dest, call);
3655 gimple_call_set_lhs (call, new_temp);
3656 gimple_call_set_nothrow (call, true);
3657 new_stmt_info
3658 = vect_finish_stmt_generation (stmt_info, call, gsi);
3659 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3660 }
3661
3662 for (i = 0; i < nargs; i++)
3663 {
3664 vec<tree> vec_oprndsi = vec_defs[i];
3665 vec_oprndsi.release ();
3666 }
3667 continue;
3668 }
3669
3670 for (i = 0; i < nargs; i++)
3671 {
3672 op = gimple_call_arg (stmt, i);
3673 if (j == 0)
3674 {
3675 vec_oprnd0
3676 = vect_get_vec_def_for_operand (op, stmt_info,
3677 vectypes[i]);
3678 vec_oprnd1
3679 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3680 }
3681 else
3682 {
3683 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3684 2 * i + 1);
3685 vec_oprnd0
3686 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3687 vec_oprnd1
3688 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3689 }
3690
3691 vargs.quick_push (vec_oprnd0);
3692 vargs.quick_push (vec_oprnd1);
3693 }
3694
3695 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3696 new_temp = make_ssa_name (vec_dest, new_stmt);
3697 gimple_call_set_lhs (new_stmt, new_temp);
3698 new_stmt_info
3699 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3700
3701 if (j == 0)
3702 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3703 else
3704 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3705
3706 prev_stmt_info = new_stmt_info;
3707 }
3708
3709 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3710 }
3711 else
3712 /* No current target implements this case. */
3713 return false;
3714
3715 vargs.release ();
3716
3717 /* The call in STMT might prevent it from being removed in dce.
3718 We however cannot remove it here, due to the way the ssa name
3719 it defines is mapped to the new definition. So just replace
3720 rhs of the statement with something harmless. */
3721
3722 if (slp_node)
3723 return true;
3724
3725 stmt_info = vect_orig_stmt (stmt_info);
3726 lhs = gimple_get_lhs (stmt_info->stmt);
3727
3728 gassign *new_stmt
3729 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3730 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3731
3732 return true;
3733 }
3734
3735
3736 struct simd_call_arg_info
3737 {
3738 tree vectype;
3739 tree op;
3740 HOST_WIDE_INT linear_step;
3741 enum vect_def_type dt;
3742 unsigned int align;
3743 bool simd_lane_linear;
3744 };
3745
3746 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3747 is linear within simd lane (but not within whole loop), note it in
3748 *ARGINFO. */
3749
3750 static void
3751 vect_simd_lane_linear (tree op, class loop *loop,
3752 struct simd_call_arg_info *arginfo)
3753 {
3754 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3755
3756 if (!is_gimple_assign (def_stmt)
3757 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3758 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3759 return;
3760
3761 tree base = gimple_assign_rhs1 (def_stmt);
3762 HOST_WIDE_INT linear_step = 0;
3763 tree v = gimple_assign_rhs2 (def_stmt);
3764 while (TREE_CODE (v) == SSA_NAME)
3765 {
3766 tree t;
3767 def_stmt = SSA_NAME_DEF_STMT (v);
3768 if (is_gimple_assign (def_stmt))
3769 switch (gimple_assign_rhs_code (def_stmt))
3770 {
3771 case PLUS_EXPR:
3772 t = gimple_assign_rhs2 (def_stmt);
3773 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3774 return;
3775 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3776 v = gimple_assign_rhs1 (def_stmt);
3777 continue;
3778 case MULT_EXPR:
3779 t = gimple_assign_rhs2 (def_stmt);
3780 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3781 return;
3782 linear_step = tree_to_shwi (t);
3783 v = gimple_assign_rhs1 (def_stmt);
3784 continue;
3785 CASE_CONVERT:
3786 t = gimple_assign_rhs1 (def_stmt);
3787 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3788 || (TYPE_PRECISION (TREE_TYPE (v))
3789 < TYPE_PRECISION (TREE_TYPE (t))))
3790 return;
3791 if (!linear_step)
3792 linear_step = 1;
3793 v = t;
3794 continue;
3795 default:
3796 return;
3797 }
3798 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3799 && loop->simduid
3800 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3801 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3802 == loop->simduid))
3803 {
3804 if (!linear_step)
3805 linear_step = 1;
3806 arginfo->linear_step = linear_step;
3807 arginfo->op = base;
3808 arginfo->simd_lane_linear = true;
3809 return;
3810 }
3811 }
3812 }
3813
3814 /* Return the number of elements in vector type VECTYPE, which is associated
3815 with a SIMD clone. At present these vectors always have a constant
3816 length. */
3817
3818 static unsigned HOST_WIDE_INT
3819 simd_clone_subparts (tree vectype)
3820 {
3821 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3822 }
3823
3824 /* Function vectorizable_simd_clone_call.
3825
3826 Check if STMT_INFO performs a function call that can be vectorized
3827 by calling a simd clone of the function.
3828 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3829 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3830 Return true if STMT_INFO is vectorizable in this way. */
3831
3832 static bool
3833 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3834 gimple_stmt_iterator *gsi,
3835 stmt_vec_info *vec_stmt, slp_tree slp_node,
3836 stmt_vector_for_cost *)
3837 {
3838 tree vec_dest;
3839 tree scalar_dest;
3840 tree op, type;
3841 tree vec_oprnd0 = NULL_TREE;
3842 stmt_vec_info prev_stmt_info;
3843 tree vectype;
3844 unsigned int nunits;
3845 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3846 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3847 vec_info *vinfo = stmt_info->vinfo;
3848 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3849 tree fndecl, new_temp;
3850 int ncopies, j;
3851 auto_vec<simd_call_arg_info> arginfo;
3852 vec<tree> vargs = vNULL;
3853 size_t i, nargs;
3854 tree lhs, rtype, ratype;
3855 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3856
3857 /* Is STMT a vectorizable call? */
3858 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3859 if (!stmt)
3860 return false;
3861
3862 fndecl = gimple_call_fndecl (stmt);
3863 if (fndecl == NULL_TREE)
3864 return false;
3865
3866 struct cgraph_node *node = cgraph_node::get (fndecl);
3867 if (node == NULL || node->simd_clones == NULL)
3868 return false;
3869
3870 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3871 return false;
3872
3873 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3874 && ! vec_stmt)
3875 return false;
3876
3877 if (gimple_call_lhs (stmt)
3878 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3879 return false;
3880
3881 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3882
3883 vectype = STMT_VINFO_VECTYPE (stmt_info);
3884
3885 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3886 return false;
3887
3888 /* FORNOW */
3889 if (slp_node)
3890 return false;
3891
3892 /* Process function arguments. */
3893 nargs = gimple_call_num_args (stmt);
3894
3895 /* Bail out if the function has zero arguments. */
3896 if (nargs == 0)
3897 return false;
3898
3899 arginfo.reserve (nargs, true);
3900
3901 for (i = 0; i < nargs; i++)
3902 {
3903 simd_call_arg_info thisarginfo;
3904 affine_iv iv;
3905
3906 thisarginfo.linear_step = 0;
3907 thisarginfo.align = 0;
3908 thisarginfo.op = NULL_TREE;
3909 thisarginfo.simd_lane_linear = false;
3910
3911 op = gimple_call_arg (stmt, i);
3912 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3913 &thisarginfo.vectype)
3914 || thisarginfo.dt == vect_uninitialized_def)
3915 {
3916 if (dump_enabled_p ())
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3918 "use not simple.\n");
3919 return false;
3920 }
3921
3922 if (thisarginfo.dt == vect_constant_def
3923 || thisarginfo.dt == vect_external_def)
3924 gcc_assert (thisarginfo.vectype == NULL_TREE);
3925 else
3926 gcc_assert (thisarginfo.vectype != NULL_TREE);
3927
3928 /* For linear arguments, the analyze phase should have saved
3929 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3930 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3931 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3932 {
3933 gcc_assert (vec_stmt);
3934 thisarginfo.linear_step
3935 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3936 thisarginfo.op
3937 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3938 thisarginfo.simd_lane_linear
3939 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3940 == boolean_true_node);
3941 /* If loop has been peeled for alignment, we need to adjust it. */
3942 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3943 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3944 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3945 {
3946 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3947 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3948 tree opt = TREE_TYPE (thisarginfo.op);
3949 bias = fold_convert (TREE_TYPE (step), bias);
3950 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3951 thisarginfo.op
3952 = fold_build2 (POINTER_TYPE_P (opt)
3953 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3954 thisarginfo.op, bias);
3955 }
3956 }
3957 else if (!vec_stmt
3958 && thisarginfo.dt != vect_constant_def
3959 && thisarginfo.dt != vect_external_def
3960 && loop_vinfo
3961 && TREE_CODE (op) == SSA_NAME
3962 && simple_iv (loop, loop_containing_stmt (stmt), op,
3963 &iv, false)
3964 && tree_fits_shwi_p (iv.step))
3965 {
3966 thisarginfo.linear_step = tree_to_shwi (iv.step);
3967 thisarginfo.op = iv.base;
3968 }
3969 else if ((thisarginfo.dt == vect_constant_def
3970 || thisarginfo.dt == vect_external_def)
3971 && POINTER_TYPE_P (TREE_TYPE (op)))
3972 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3973 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3974 linear too. */
3975 if (POINTER_TYPE_P (TREE_TYPE (op))
3976 && !thisarginfo.linear_step
3977 && !vec_stmt
3978 && thisarginfo.dt != vect_constant_def
3979 && thisarginfo.dt != vect_external_def
3980 && loop_vinfo
3981 && !slp_node
3982 && TREE_CODE (op) == SSA_NAME)
3983 vect_simd_lane_linear (op, loop, &thisarginfo);
3984
3985 arginfo.quick_push (thisarginfo);
3986 }
3987
3988 unsigned HOST_WIDE_INT vf;
3989 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3990 {
3991 if (dump_enabled_p ())
3992 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3993 "not considering SIMD clones; not yet supported"
3994 " for variable-width vectors.\n");
3995 return false;
3996 }
3997
3998 unsigned int badness = 0;
3999 struct cgraph_node *bestn = NULL;
4000 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4001 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4002 else
4003 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4004 n = n->simdclone->next_clone)
4005 {
4006 unsigned int this_badness = 0;
4007 if (n->simdclone->simdlen > vf
4008 || n->simdclone->nargs != nargs)
4009 continue;
4010 if (n->simdclone->simdlen < vf)
4011 this_badness += (exact_log2 (vf)
4012 - exact_log2 (n->simdclone->simdlen)) * 1024;
4013 if (n->simdclone->inbranch)
4014 this_badness += 2048;
4015 int target_badness = targetm.simd_clone.usable (n);
4016 if (target_badness < 0)
4017 continue;
4018 this_badness += target_badness * 512;
4019 /* FORNOW: Have to add code to add the mask argument. */
4020 if (n->simdclone->inbranch)
4021 continue;
4022 for (i = 0; i < nargs; i++)
4023 {
4024 switch (n->simdclone->args[i].arg_type)
4025 {
4026 case SIMD_CLONE_ARG_TYPE_VECTOR:
4027 if (!useless_type_conversion_p
4028 (n->simdclone->args[i].orig_type,
4029 TREE_TYPE (gimple_call_arg (stmt, i))))
4030 i = -1;
4031 else if (arginfo[i].dt == vect_constant_def
4032 || arginfo[i].dt == vect_external_def
4033 || arginfo[i].linear_step)
4034 this_badness += 64;
4035 break;
4036 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4037 if (arginfo[i].dt != vect_constant_def
4038 && arginfo[i].dt != vect_external_def)
4039 i = -1;
4040 break;
4041 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4042 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4043 if (arginfo[i].dt == vect_constant_def
4044 || arginfo[i].dt == vect_external_def
4045 || (arginfo[i].linear_step
4046 != n->simdclone->args[i].linear_step))
4047 i = -1;
4048 break;
4049 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4050 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4051 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4055 /* FORNOW */
4056 i = -1;
4057 break;
4058 case SIMD_CLONE_ARG_TYPE_MASK:
4059 gcc_unreachable ();
4060 }
4061 if (i == (size_t) -1)
4062 break;
4063 if (n->simdclone->args[i].alignment > arginfo[i].align)
4064 {
4065 i = -1;
4066 break;
4067 }
4068 if (arginfo[i].align)
4069 this_badness += (exact_log2 (arginfo[i].align)
4070 - exact_log2 (n->simdclone->args[i].alignment));
4071 }
4072 if (i == (size_t) -1)
4073 continue;
4074 if (bestn == NULL || this_badness < badness)
4075 {
4076 bestn = n;
4077 badness = this_badness;
4078 }
4079 }
4080
4081 if (bestn == NULL)
4082 return false;
4083
4084 for (i = 0; i < nargs; i++)
4085 if ((arginfo[i].dt == vect_constant_def
4086 || arginfo[i].dt == vect_external_def)
4087 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4088 {
4089 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4090 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
4091 if (arginfo[i].vectype == NULL
4092 || (simd_clone_subparts (arginfo[i].vectype)
4093 > bestn->simdclone->simdlen))
4094 return false;
4095 }
4096
4097 fndecl = bestn->decl;
4098 nunits = bestn->simdclone->simdlen;
4099 ncopies = vf / nunits;
4100
4101 /* If the function isn't const, only allow it in simd loops where user
4102 has asserted that at least nunits consecutive iterations can be
4103 performed using SIMD instructions. */
4104 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4105 && gimple_vuse (stmt))
4106 return false;
4107
4108 /* Sanity check: make sure that at least one copy of the vectorized stmt
4109 needs to be generated. */
4110 gcc_assert (ncopies >= 1);
4111
4112 if (!vec_stmt) /* transformation not required. */
4113 {
4114 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4115 for (i = 0; i < nargs; i++)
4116 if ((bestn->simdclone->args[i].arg_type
4117 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4118 || (bestn->simdclone->args[i].arg_type
4119 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4120 {
4121 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4122 + 1);
4123 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4124 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4125 ? size_type_node : TREE_TYPE (arginfo[i].op);
4126 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4127 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4128 tree sll = arginfo[i].simd_lane_linear
4129 ? boolean_true_node : boolean_false_node;
4130 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4131 }
4132 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4133 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4134 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4135 return true;
4136 }
4137
4138 /* Transform. */
4139
4140 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4142
4143 /* Handle def. */
4144 scalar_dest = gimple_call_lhs (stmt);
4145 vec_dest = NULL_TREE;
4146 rtype = NULL_TREE;
4147 ratype = NULL_TREE;
4148 if (scalar_dest)
4149 {
4150 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4151 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4152 if (TREE_CODE (rtype) == ARRAY_TYPE)
4153 {
4154 ratype = rtype;
4155 rtype = TREE_TYPE (ratype);
4156 }
4157 }
4158
4159 prev_stmt_info = NULL;
4160 for (j = 0; j < ncopies; ++j)
4161 {
4162 /* Build argument list for the vectorized call. */
4163 if (j == 0)
4164 vargs.create (nargs);
4165 else
4166 vargs.truncate (0);
4167
4168 for (i = 0; i < nargs; i++)
4169 {
4170 unsigned int k, l, m, o;
4171 tree atype;
4172 op = gimple_call_arg (stmt, i);
4173 switch (bestn->simdclone->args[i].arg_type)
4174 {
4175 case SIMD_CLONE_ARG_TYPE_VECTOR:
4176 atype = bestn->simdclone->args[i].vector_type;
4177 o = nunits / simd_clone_subparts (atype);
4178 for (m = j * o; m < (j + 1) * o; m++)
4179 {
4180 if (simd_clone_subparts (atype)
4181 < simd_clone_subparts (arginfo[i].vectype))
4182 {
4183 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4184 k = (simd_clone_subparts (arginfo[i].vectype)
4185 / simd_clone_subparts (atype));
4186 gcc_assert ((k & (k - 1)) == 0);
4187 if (m == 0)
4188 vec_oprnd0
4189 = vect_get_vec_def_for_operand (op, stmt_info);
4190 else
4191 {
4192 vec_oprnd0 = arginfo[i].op;
4193 if ((m & (k - 1)) == 0)
4194 vec_oprnd0
4195 = vect_get_vec_def_for_stmt_copy (vinfo,
4196 vec_oprnd0);
4197 }
4198 arginfo[i].op = vec_oprnd0;
4199 vec_oprnd0
4200 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4201 bitsize_int (prec),
4202 bitsize_int ((m & (k - 1)) * prec));
4203 gassign *new_stmt
4204 = gimple_build_assign (make_ssa_name (atype),
4205 vec_oprnd0);
4206 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4207 vargs.safe_push (gimple_assign_lhs (new_stmt));
4208 }
4209 else
4210 {
4211 k = (simd_clone_subparts (atype)
4212 / simd_clone_subparts (arginfo[i].vectype));
4213 gcc_assert ((k & (k - 1)) == 0);
4214 vec<constructor_elt, va_gc> *ctor_elts;
4215 if (k != 1)
4216 vec_alloc (ctor_elts, k);
4217 else
4218 ctor_elts = NULL;
4219 for (l = 0; l < k; l++)
4220 {
4221 if (m == 0 && l == 0)
4222 vec_oprnd0
4223 = vect_get_vec_def_for_operand (op, stmt_info);
4224 else
4225 vec_oprnd0
4226 = vect_get_vec_def_for_stmt_copy (vinfo,
4227 arginfo[i].op);
4228 arginfo[i].op = vec_oprnd0;
4229 if (k == 1)
4230 break;
4231 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4232 vec_oprnd0);
4233 }
4234 if (k == 1)
4235 vargs.safe_push (vec_oprnd0);
4236 else
4237 {
4238 vec_oprnd0 = build_constructor (atype, ctor_elts);
4239 gassign *new_stmt
4240 = gimple_build_assign (make_ssa_name (atype),
4241 vec_oprnd0);
4242 vect_finish_stmt_generation (stmt_info, new_stmt,
4243 gsi);
4244 vargs.safe_push (gimple_assign_lhs (new_stmt));
4245 }
4246 }
4247 }
4248 break;
4249 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4250 vargs.safe_push (op);
4251 break;
4252 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4253 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4254 if (j == 0)
4255 {
4256 gimple_seq stmts;
4257 arginfo[i].op
4258 = force_gimple_operand (unshare_expr (arginfo[i].op),
4259 &stmts, true, NULL_TREE);
4260 if (stmts != NULL)
4261 {
4262 basic_block new_bb;
4263 edge pe = loop_preheader_edge (loop);
4264 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4265 gcc_assert (!new_bb);
4266 }
4267 if (arginfo[i].simd_lane_linear)
4268 {
4269 vargs.safe_push (arginfo[i].op);
4270 break;
4271 }
4272 tree phi_res = copy_ssa_name (op);
4273 gphi *new_phi = create_phi_node (phi_res, loop->header);
4274 loop_vinfo->add_stmt (new_phi);
4275 add_phi_arg (new_phi, arginfo[i].op,
4276 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4277 enum tree_code code
4278 = POINTER_TYPE_P (TREE_TYPE (op))
4279 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4280 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4281 ? sizetype : TREE_TYPE (op);
4282 widest_int cst
4283 = wi::mul (bestn->simdclone->args[i].linear_step,
4284 ncopies * nunits);
4285 tree tcst = wide_int_to_tree (type, cst);
4286 tree phi_arg = copy_ssa_name (op);
4287 gassign *new_stmt
4288 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4289 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4290 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4291 loop_vinfo->add_stmt (new_stmt);
4292 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4293 UNKNOWN_LOCATION);
4294 arginfo[i].op = phi_res;
4295 vargs.safe_push (phi_res);
4296 }
4297 else
4298 {
4299 enum tree_code code
4300 = POINTER_TYPE_P (TREE_TYPE (op))
4301 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4302 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4303 ? sizetype : TREE_TYPE (op);
4304 widest_int cst
4305 = wi::mul (bestn->simdclone->args[i].linear_step,
4306 j * nunits);
4307 tree tcst = wide_int_to_tree (type, cst);
4308 new_temp = make_ssa_name (TREE_TYPE (op));
4309 gassign *new_stmt
4310 = gimple_build_assign (new_temp, code,
4311 arginfo[i].op, tcst);
4312 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4313 vargs.safe_push (new_temp);
4314 }
4315 break;
4316 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4317 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4318 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4319 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4320 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4321 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4322 default:
4323 gcc_unreachable ();
4324 }
4325 }
4326
4327 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4328 if (vec_dest)
4329 {
4330 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4331 if (ratype)
4332 new_temp = create_tmp_var (ratype);
4333 else if (simd_clone_subparts (vectype)
4334 == simd_clone_subparts (rtype))
4335 new_temp = make_ssa_name (vec_dest, new_call);
4336 else
4337 new_temp = make_ssa_name (rtype, new_call);
4338 gimple_call_set_lhs (new_call, new_temp);
4339 }
4340 stmt_vec_info new_stmt_info
4341 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4342
4343 if (vec_dest)
4344 {
4345 if (simd_clone_subparts (vectype) < nunits)
4346 {
4347 unsigned int k, l;
4348 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4349 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4350 k = nunits / simd_clone_subparts (vectype);
4351 gcc_assert ((k & (k - 1)) == 0);
4352 for (l = 0; l < k; l++)
4353 {
4354 tree t;
4355 if (ratype)
4356 {
4357 t = build_fold_addr_expr (new_temp);
4358 t = build2 (MEM_REF, vectype, t,
4359 build_int_cst (TREE_TYPE (t), l * bytes));
4360 }
4361 else
4362 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4363 bitsize_int (prec), bitsize_int (l * prec));
4364 gimple *new_stmt
4365 = gimple_build_assign (make_ssa_name (vectype), t);
4366 new_stmt_info
4367 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4368
4369 if (j == 0 && l == 0)
4370 STMT_VINFO_VEC_STMT (stmt_info)
4371 = *vec_stmt = new_stmt_info;
4372 else
4373 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4374
4375 prev_stmt_info = new_stmt_info;
4376 }
4377
4378 if (ratype)
4379 vect_clobber_variable (stmt_info, gsi, new_temp);
4380 continue;
4381 }
4382 else if (simd_clone_subparts (vectype) > nunits)
4383 {
4384 unsigned int k = (simd_clone_subparts (vectype)
4385 / simd_clone_subparts (rtype));
4386 gcc_assert ((k & (k - 1)) == 0);
4387 if ((j & (k - 1)) == 0)
4388 vec_alloc (ret_ctor_elts, k);
4389 if (ratype)
4390 {
4391 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4392 for (m = 0; m < o; m++)
4393 {
4394 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4395 size_int (m), NULL_TREE, NULL_TREE);
4396 gimple *new_stmt
4397 = gimple_build_assign (make_ssa_name (rtype), tem);
4398 new_stmt_info
4399 = vect_finish_stmt_generation (stmt_info, new_stmt,
4400 gsi);
4401 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4402 gimple_assign_lhs (new_stmt));
4403 }
4404 vect_clobber_variable (stmt_info, gsi, new_temp);
4405 }
4406 else
4407 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4408 if ((j & (k - 1)) != k - 1)
4409 continue;
4410 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4411 gimple *new_stmt
4412 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4413 new_stmt_info
4414 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4415
4416 if ((unsigned) j == k - 1)
4417 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4418 else
4419 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4420
4421 prev_stmt_info = new_stmt_info;
4422 continue;
4423 }
4424 else if (ratype)
4425 {
4426 tree t = build_fold_addr_expr (new_temp);
4427 t = build2 (MEM_REF, vectype, t,
4428 build_int_cst (TREE_TYPE (t), 0));
4429 gimple *new_stmt
4430 = gimple_build_assign (make_ssa_name (vec_dest), t);
4431 new_stmt_info
4432 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4433 vect_clobber_variable (stmt_info, gsi, new_temp);
4434 }
4435 }
4436
4437 if (j == 0)
4438 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4439 else
4440 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4441
4442 prev_stmt_info = new_stmt_info;
4443 }
4444
4445 vargs.release ();
4446
4447 /* The call in STMT might prevent it from being removed in dce.
4448 We however cannot remove it here, due to the way the ssa name
4449 it defines is mapped to the new definition. So just replace
4450 rhs of the statement with something harmless. */
4451
4452 if (slp_node)
4453 return true;
4454
4455 gimple *new_stmt;
4456 if (scalar_dest)
4457 {
4458 type = TREE_TYPE (scalar_dest);
4459 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4460 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4461 }
4462 else
4463 new_stmt = gimple_build_nop ();
4464 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4465 unlink_stmt_vdef (stmt);
4466
4467 return true;
4468 }
4469
4470
4471 /* Function vect_gen_widened_results_half
4472
4473 Create a vector stmt whose code, type, number of arguments, and result
4474 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4475 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4476 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4477 needs to be created (DECL is a function-decl of a target-builtin).
4478 STMT_INFO is the original scalar stmt that we are vectorizing. */
4479
4480 static gimple *
4481 vect_gen_widened_results_half (enum tree_code code,
4482 tree decl,
4483 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4484 tree vec_dest, gimple_stmt_iterator *gsi,
4485 stmt_vec_info stmt_info)
4486 {
4487 gimple *new_stmt;
4488 tree new_temp;
4489
4490 /* Generate half of the widened result: */
4491 if (code == CALL_EXPR)
4492 {
4493 /* Target specific support */
4494 if (op_type == binary_op)
4495 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4496 else
4497 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4498 new_temp = make_ssa_name (vec_dest, new_stmt);
4499 gimple_call_set_lhs (new_stmt, new_temp);
4500 }
4501 else
4502 {
4503 /* Generic support */
4504 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4505 if (op_type != binary_op)
4506 vec_oprnd1 = NULL;
4507 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4508 new_temp = make_ssa_name (vec_dest, new_stmt);
4509 gimple_assign_set_lhs (new_stmt, new_temp);
4510 }
4511 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4512
4513 return new_stmt;
4514 }
4515
4516
4517 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4518 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4519 containing scalar operand), and for the rest we get a copy with
4520 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4521 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4522 The vectors are collected into VEC_OPRNDS. */
4523
4524 static void
4525 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4526 vec<tree> *vec_oprnds, int multi_step_cvt)
4527 {
4528 vec_info *vinfo = stmt_info->vinfo;
4529 tree vec_oprnd;
4530
4531 /* Get first vector operand. */
4532 /* All the vector operands except the very first one (that is scalar oprnd)
4533 are stmt copies. */
4534 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4535 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4536 else
4537 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4538
4539 vec_oprnds->quick_push (vec_oprnd);
4540
4541 /* Get second vector operand. */
4542 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4543 vec_oprnds->quick_push (vec_oprnd);
4544
4545 *oprnd = vec_oprnd;
4546
4547 /* For conversion in multiple steps, continue to get operands
4548 recursively. */
4549 if (multi_step_cvt)
4550 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4551 multi_step_cvt - 1);
4552 }
4553
4554
4555 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4556 For multi-step conversions store the resulting vectors and call the function
4557 recursively. */
4558
4559 static void
4560 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4561 int multi_step_cvt,
4562 stmt_vec_info stmt_info,
4563 vec<tree> vec_dsts,
4564 gimple_stmt_iterator *gsi,
4565 slp_tree slp_node, enum tree_code code,
4566 stmt_vec_info *prev_stmt_info)
4567 {
4568 unsigned int i;
4569 tree vop0, vop1, new_tmp, vec_dest;
4570
4571 vec_dest = vec_dsts.pop ();
4572
4573 for (i = 0; i < vec_oprnds->length (); i += 2)
4574 {
4575 /* Create demotion operation. */
4576 vop0 = (*vec_oprnds)[i];
4577 vop1 = (*vec_oprnds)[i + 1];
4578 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4579 new_tmp = make_ssa_name (vec_dest, new_stmt);
4580 gimple_assign_set_lhs (new_stmt, new_tmp);
4581 stmt_vec_info new_stmt_info
4582 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4583
4584 if (multi_step_cvt)
4585 /* Store the resulting vector for next recursive call. */
4586 (*vec_oprnds)[i/2] = new_tmp;
4587 else
4588 {
4589 /* This is the last step of the conversion sequence. Store the
4590 vectors in SLP_NODE or in vector info of the scalar statement
4591 (or in STMT_VINFO_RELATED_STMT chain). */
4592 if (slp_node)
4593 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4594 else
4595 {
4596 if (!*prev_stmt_info)
4597 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4598 else
4599 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4600
4601 *prev_stmt_info = new_stmt_info;
4602 }
4603 }
4604 }
4605
4606 /* For multi-step demotion operations we first generate demotion operations
4607 from the source type to the intermediate types, and then combine the
4608 results (stored in VEC_OPRNDS) in demotion operation to the destination
4609 type. */
4610 if (multi_step_cvt)
4611 {
4612 /* At each level of recursion we have half of the operands we had at the
4613 previous level. */
4614 vec_oprnds->truncate ((i+1)/2);
4615 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4616 stmt_info, vec_dsts, gsi,
4617 slp_node, VEC_PACK_TRUNC_EXPR,
4618 prev_stmt_info);
4619 }
4620
4621 vec_dsts.quick_push (vec_dest);
4622 }
4623
4624
4625 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4626 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4627 STMT_INFO. For multi-step conversions store the resulting vectors and
4628 call the function recursively. */
4629
4630 static void
4631 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4632 vec<tree> *vec_oprnds1,
4633 stmt_vec_info stmt_info, tree vec_dest,
4634 gimple_stmt_iterator *gsi,
4635 enum tree_code code1,
4636 enum tree_code code2, tree decl1,
4637 tree decl2, int op_type)
4638 {
4639 int i;
4640 tree vop0, vop1, new_tmp1, new_tmp2;
4641 gimple *new_stmt1, *new_stmt2;
4642 vec<tree> vec_tmp = vNULL;
4643
4644 vec_tmp.create (vec_oprnds0->length () * 2);
4645 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4646 {
4647 if (op_type == binary_op)
4648 vop1 = (*vec_oprnds1)[i];
4649 else
4650 vop1 = NULL_TREE;
4651
4652 /* Generate the two halves of promotion operation. */
4653 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4654 op_type, vec_dest, gsi,
4655 stmt_info);
4656 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4657 op_type, vec_dest, gsi,
4658 stmt_info);
4659 if (is_gimple_call (new_stmt1))
4660 {
4661 new_tmp1 = gimple_call_lhs (new_stmt1);
4662 new_tmp2 = gimple_call_lhs (new_stmt2);
4663 }
4664 else
4665 {
4666 new_tmp1 = gimple_assign_lhs (new_stmt1);
4667 new_tmp2 = gimple_assign_lhs (new_stmt2);
4668 }
4669
4670 /* Store the results for the next step. */
4671 vec_tmp.quick_push (new_tmp1);
4672 vec_tmp.quick_push (new_tmp2);
4673 }
4674
4675 vec_oprnds0->release ();
4676 *vec_oprnds0 = vec_tmp;
4677 }
4678
4679
4680 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4681 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4682 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4683 Return true if STMT_INFO is vectorizable in this way. */
4684
4685 static bool
4686 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4687 stmt_vec_info *vec_stmt, slp_tree slp_node,
4688 stmt_vector_for_cost *cost_vec)
4689 {
4690 tree vec_dest;
4691 tree scalar_dest;
4692 tree op0, op1 = NULL_TREE;
4693 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4694 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4695 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4696 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4697 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4698 tree new_temp;
4699 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4700 int ndts = 2;
4701 stmt_vec_info prev_stmt_info;
4702 poly_uint64 nunits_in;
4703 poly_uint64 nunits_out;
4704 tree vectype_out, vectype_in;
4705 int ncopies, i, j;
4706 tree lhs_type, rhs_type;
4707 enum { NARROW, NONE, WIDEN } modifier;
4708 vec<tree> vec_oprnds0 = vNULL;
4709 vec<tree> vec_oprnds1 = vNULL;
4710 tree vop0;
4711 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4712 vec_info *vinfo = stmt_info->vinfo;
4713 int multi_step_cvt = 0;
4714 vec<tree> interm_types = vNULL;
4715 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4716 int op_type;
4717 unsigned short fltsz;
4718
4719 /* Is STMT a vectorizable conversion? */
4720
4721 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4722 return false;
4723
4724 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4725 && ! vec_stmt)
4726 return false;
4727
4728 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4729 if (!stmt)
4730 return false;
4731
4732 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4733 return false;
4734
4735 code = gimple_assign_rhs_code (stmt);
4736 if (!CONVERT_EXPR_CODE_P (code)
4737 && code != FIX_TRUNC_EXPR
4738 && code != FLOAT_EXPR
4739 && code != WIDEN_MULT_EXPR
4740 && code != WIDEN_LSHIFT_EXPR)
4741 return false;
4742
4743 op_type = TREE_CODE_LENGTH (code);
4744
4745 /* Check types of lhs and rhs. */
4746 scalar_dest = gimple_assign_lhs (stmt);
4747 lhs_type = TREE_TYPE (scalar_dest);
4748 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4749
4750 op0 = gimple_assign_rhs1 (stmt);
4751 rhs_type = TREE_TYPE (op0);
4752
4753 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4754 && !((INTEGRAL_TYPE_P (lhs_type)
4755 && INTEGRAL_TYPE_P (rhs_type))
4756 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4757 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4758 return false;
4759
4760 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4761 && ((INTEGRAL_TYPE_P (lhs_type)
4762 && !type_has_mode_precision_p (lhs_type))
4763 || (INTEGRAL_TYPE_P (rhs_type)
4764 && !type_has_mode_precision_p (rhs_type))))
4765 {
4766 if (dump_enabled_p ())
4767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4768 "type conversion to/from bit-precision unsupported."
4769 "\n");
4770 return false;
4771 }
4772
4773 /* Check the operands of the operation. */
4774 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4775 {
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4778 "use not simple.\n");
4779 return false;
4780 }
4781 if (op_type == binary_op)
4782 {
4783 bool ok;
4784
4785 op1 = gimple_assign_rhs2 (stmt);
4786 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4787 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4788 OP1. */
4789 if (CONSTANT_CLASS_P (op0))
4790 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4791 else
4792 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4793
4794 if (!ok)
4795 {
4796 if (dump_enabled_p ())
4797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4798 "use not simple.\n");
4799 return false;
4800 }
4801 }
4802
4803 /* If op0 is an external or constant def, infer the vector type
4804 from the scalar type. */
4805 if (!vectype_in)
4806 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
4807 if (vec_stmt)
4808 gcc_assert (vectype_in);
4809 if (!vectype_in)
4810 {
4811 if (dump_enabled_p ())
4812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4813 "no vectype for scalar type %T\n", rhs_type);
4814
4815 return false;
4816 }
4817
4818 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4819 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4820 {
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4823 "can't convert between boolean and non "
4824 "boolean vectors %T\n", rhs_type);
4825
4826 return false;
4827 }
4828
4829 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4830 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4831 if (known_eq (nunits_out, nunits_in))
4832 modifier = NONE;
4833 else if (multiple_p (nunits_out, nunits_in))
4834 modifier = NARROW;
4835 else
4836 {
4837 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4838 modifier = WIDEN;
4839 }
4840
4841 /* Multiple types in SLP are handled by creating the appropriate number of
4842 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4843 case of SLP. */
4844 if (slp_node)
4845 ncopies = 1;
4846 else if (modifier == NARROW)
4847 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4848 else
4849 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4850
4851 /* Sanity check: make sure that at least one copy of the vectorized stmt
4852 needs to be generated. */
4853 gcc_assert (ncopies >= 1);
4854
4855 bool found_mode = false;
4856 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4857 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4858 opt_scalar_mode rhs_mode_iter;
4859
4860 /* Supportable by target? */
4861 switch (modifier)
4862 {
4863 case NONE:
4864 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4865 return false;
4866 if (supportable_convert_operation (code, vectype_out, vectype_in,
4867 &decl1, &code1))
4868 break;
4869 /* FALLTHRU */
4870 unsupported:
4871 if (dump_enabled_p ())
4872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4873 "conversion not supported by target.\n");
4874 return false;
4875
4876 case WIDEN:
4877 if (supportable_widening_operation (code, stmt_info, vectype_out,
4878 vectype_in, &code1, &code2,
4879 &multi_step_cvt, &interm_types))
4880 {
4881 /* Binary widening operation can only be supported directly by the
4882 architecture. */
4883 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4884 break;
4885 }
4886
4887 if (code != FLOAT_EXPR
4888 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4889 goto unsupported;
4890
4891 fltsz = GET_MODE_SIZE (lhs_mode);
4892 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4893 {
4894 rhs_mode = rhs_mode_iter.require ();
4895 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4896 break;
4897
4898 cvt_type
4899 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4900 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4901 if (cvt_type == NULL_TREE)
4902 goto unsupported;
4903
4904 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4905 {
4906 if (!supportable_convert_operation (code, vectype_out,
4907 cvt_type, &decl1, &codecvt1))
4908 goto unsupported;
4909 }
4910 else if (!supportable_widening_operation (code, stmt_info,
4911 vectype_out, cvt_type,
4912 &codecvt1, &codecvt2,
4913 &multi_step_cvt,
4914 &interm_types))
4915 continue;
4916 else
4917 gcc_assert (multi_step_cvt == 0);
4918
4919 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4920 vectype_in, &code1, &code2,
4921 &multi_step_cvt, &interm_types))
4922 {
4923 found_mode = true;
4924 break;
4925 }
4926 }
4927
4928 if (!found_mode)
4929 goto unsupported;
4930
4931 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4932 codecvt2 = ERROR_MARK;
4933 else
4934 {
4935 multi_step_cvt++;
4936 interm_types.safe_push (cvt_type);
4937 cvt_type = NULL_TREE;
4938 }
4939 break;
4940
4941 case NARROW:
4942 gcc_assert (op_type == unary_op);
4943 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4944 &code1, &multi_step_cvt,
4945 &interm_types))
4946 break;
4947
4948 if (code != FIX_TRUNC_EXPR
4949 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4950 goto unsupported;
4951
4952 cvt_type
4953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4954 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4955 if (cvt_type == NULL_TREE)
4956 goto unsupported;
4957 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4958 &decl1, &codecvt1))
4959 goto unsupported;
4960 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4961 &code1, &multi_step_cvt,
4962 &interm_types))
4963 break;
4964 goto unsupported;
4965
4966 default:
4967 gcc_unreachable ();
4968 }
4969
4970 if (!vec_stmt) /* transformation not required. */
4971 {
4972 DUMP_VECT_SCOPE ("vectorizable_conversion");
4973 if (modifier == NONE)
4974 {
4975 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4976 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4977 cost_vec);
4978 }
4979 else if (modifier == NARROW)
4980 {
4981 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4982 /* The final packing step produces one vector result per copy. */
4983 unsigned int nvectors
4984 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4985 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4986 multi_step_cvt, cost_vec);
4987 }
4988 else
4989 {
4990 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4991 /* The initial unpacking step produces two vector results
4992 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4993 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4994 unsigned int nvectors
4995 = (slp_node
4996 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4997 : ncopies * 2);
4998 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4999 multi_step_cvt, cost_vec);
5000 }
5001 interm_types.release ();
5002 return true;
5003 }
5004
5005 /* Transform. */
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE, vect_location,
5008 "transform conversion. ncopies = %d.\n", ncopies);
5009
5010 if (op_type == binary_op)
5011 {
5012 if (CONSTANT_CLASS_P (op0))
5013 op0 = fold_convert (TREE_TYPE (op1), op0);
5014 else if (CONSTANT_CLASS_P (op1))
5015 op1 = fold_convert (TREE_TYPE (op0), op1);
5016 }
5017
5018 /* In case of multi-step conversion, we first generate conversion operations
5019 to the intermediate types, and then from that types to the final one.
5020 We create vector destinations for the intermediate type (TYPES) received
5021 from supportable_*_operation, and store them in the correct order
5022 for future use in vect_create_vectorized_*_stmts (). */
5023 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5024 vec_dest = vect_create_destination_var (scalar_dest,
5025 (cvt_type && modifier == WIDEN)
5026 ? cvt_type : vectype_out);
5027 vec_dsts.quick_push (vec_dest);
5028
5029 if (multi_step_cvt)
5030 {
5031 for (i = interm_types.length () - 1;
5032 interm_types.iterate (i, &intermediate_type); i--)
5033 {
5034 vec_dest = vect_create_destination_var (scalar_dest,
5035 intermediate_type);
5036 vec_dsts.quick_push (vec_dest);
5037 }
5038 }
5039
5040 if (cvt_type)
5041 vec_dest = vect_create_destination_var (scalar_dest,
5042 modifier == WIDEN
5043 ? vectype_out : cvt_type);
5044
5045 if (!slp_node)
5046 {
5047 if (modifier == WIDEN)
5048 {
5049 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5050 if (op_type == binary_op)
5051 vec_oprnds1.create (1);
5052 }
5053 else if (modifier == NARROW)
5054 vec_oprnds0.create (
5055 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5056 }
5057 else if (code == WIDEN_LSHIFT_EXPR)
5058 vec_oprnds1.create (slp_node->vec_stmts_size);
5059
5060 last_oprnd = op0;
5061 prev_stmt_info = NULL;
5062 switch (modifier)
5063 {
5064 case NONE:
5065 for (j = 0; j < ncopies; j++)
5066 {
5067 if (j == 0)
5068 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5069 NULL, slp_node);
5070 else
5071 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5072
5073 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5074 {
5075 stmt_vec_info new_stmt_info;
5076 /* Arguments are ready, create the new vector stmt. */
5077 if (code1 == CALL_EXPR)
5078 {
5079 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5080 new_temp = make_ssa_name (vec_dest, new_stmt);
5081 gimple_call_set_lhs (new_stmt, new_temp);
5082 new_stmt_info
5083 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5084 }
5085 else
5086 {
5087 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5088 gassign *new_stmt
5089 = gimple_build_assign (vec_dest, code1, vop0);
5090 new_temp = make_ssa_name (vec_dest, new_stmt);
5091 gimple_assign_set_lhs (new_stmt, new_temp);
5092 new_stmt_info
5093 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5094 }
5095
5096 if (slp_node)
5097 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5098 else
5099 {
5100 if (!prev_stmt_info)
5101 STMT_VINFO_VEC_STMT (stmt_info)
5102 = *vec_stmt = new_stmt_info;
5103 else
5104 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5105 prev_stmt_info = new_stmt_info;
5106 }
5107 }
5108 }
5109 break;
5110
5111 case WIDEN:
5112 /* In case the vectorization factor (VF) is bigger than the number
5113 of elements that we can fit in a vectype (nunits), we have to
5114 generate more than one vector stmt - i.e - we need to "unroll"
5115 the vector stmt by a factor VF/nunits. */
5116 for (j = 0; j < ncopies; j++)
5117 {
5118 /* Handle uses. */
5119 if (j == 0)
5120 {
5121 if (slp_node)
5122 {
5123 if (code == WIDEN_LSHIFT_EXPR)
5124 {
5125 unsigned int k;
5126
5127 vec_oprnd1 = op1;
5128 /* Store vec_oprnd1 for every vector stmt to be created
5129 for SLP_NODE. We check during the analysis that all
5130 the shift arguments are the same. */
5131 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5132 vec_oprnds1.quick_push (vec_oprnd1);
5133
5134 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5135 &vec_oprnds0, NULL, slp_node);
5136 }
5137 else
5138 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5139 &vec_oprnds1, slp_node);
5140 }
5141 else
5142 {
5143 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5144 vec_oprnds0.quick_push (vec_oprnd0);
5145 if (op_type == binary_op)
5146 {
5147 if (code == WIDEN_LSHIFT_EXPR)
5148 vec_oprnd1 = op1;
5149 else
5150 vec_oprnd1
5151 = vect_get_vec_def_for_operand (op1, stmt_info);
5152 vec_oprnds1.quick_push (vec_oprnd1);
5153 }
5154 }
5155 }
5156 else
5157 {
5158 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5159 vec_oprnds0.truncate (0);
5160 vec_oprnds0.quick_push (vec_oprnd0);
5161 if (op_type == binary_op)
5162 {
5163 if (code == WIDEN_LSHIFT_EXPR)
5164 vec_oprnd1 = op1;
5165 else
5166 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5167 vec_oprnd1);
5168 vec_oprnds1.truncate (0);
5169 vec_oprnds1.quick_push (vec_oprnd1);
5170 }
5171 }
5172
5173 /* Arguments are ready. Create the new vector stmts. */
5174 for (i = multi_step_cvt; i >= 0; i--)
5175 {
5176 tree this_dest = vec_dsts[i];
5177 enum tree_code c1 = code1, c2 = code2;
5178 if (i == 0 && codecvt2 != ERROR_MARK)
5179 {
5180 c1 = codecvt1;
5181 c2 = codecvt2;
5182 }
5183 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5184 &vec_oprnds1, stmt_info,
5185 this_dest, gsi,
5186 c1, c2, decl1, decl2,
5187 op_type);
5188 }
5189
5190 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5191 {
5192 stmt_vec_info new_stmt_info;
5193 if (cvt_type)
5194 {
5195 if (codecvt1 == CALL_EXPR)
5196 {
5197 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5198 new_temp = make_ssa_name (vec_dest, new_stmt);
5199 gimple_call_set_lhs (new_stmt, new_temp);
5200 new_stmt_info
5201 = vect_finish_stmt_generation (stmt_info, new_stmt,
5202 gsi);
5203 }
5204 else
5205 {
5206 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5207 new_temp = make_ssa_name (vec_dest);
5208 gassign *new_stmt
5209 = gimple_build_assign (new_temp, codecvt1, vop0);
5210 new_stmt_info
5211 = vect_finish_stmt_generation (stmt_info, new_stmt,
5212 gsi);
5213 }
5214 }
5215 else
5216 new_stmt_info = vinfo->lookup_def (vop0);
5217
5218 if (slp_node)
5219 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5220 else
5221 {
5222 if (!prev_stmt_info)
5223 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5224 else
5225 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5226 prev_stmt_info = new_stmt_info;
5227 }
5228 }
5229 }
5230
5231 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5232 break;
5233
5234 case NARROW:
5235 /* In case the vectorization factor (VF) is bigger than the number
5236 of elements that we can fit in a vectype (nunits), we have to
5237 generate more than one vector stmt - i.e - we need to "unroll"
5238 the vector stmt by a factor VF/nunits. */
5239 for (j = 0; j < ncopies; j++)
5240 {
5241 /* Handle uses. */
5242 if (slp_node)
5243 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5244 slp_node);
5245 else
5246 {
5247 vec_oprnds0.truncate (0);
5248 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5249 vect_pow2 (multi_step_cvt) - 1);
5250 }
5251
5252 /* Arguments are ready. Create the new vector stmts. */
5253 if (cvt_type)
5254 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5255 {
5256 if (codecvt1 == CALL_EXPR)
5257 {
5258 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5259 new_temp = make_ssa_name (vec_dest, new_stmt);
5260 gimple_call_set_lhs (new_stmt, new_temp);
5261 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5262 }
5263 else
5264 {
5265 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5266 new_temp = make_ssa_name (vec_dest);
5267 gassign *new_stmt
5268 = gimple_build_assign (new_temp, codecvt1, vop0);
5269 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5270 }
5271
5272 vec_oprnds0[i] = new_temp;
5273 }
5274
5275 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5276 stmt_info, vec_dsts, gsi,
5277 slp_node, code1,
5278 &prev_stmt_info);
5279 }
5280
5281 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5282 break;
5283 }
5284
5285 vec_oprnds0.release ();
5286 vec_oprnds1.release ();
5287 interm_types.release ();
5288
5289 return true;
5290 }
5291
5292 /* Return true if we can assume from the scalar form of STMT_INFO that
5293 neither the scalar nor the vector forms will generate code. STMT_INFO
5294 is known not to involve a data reference. */
5295
5296 bool
5297 vect_nop_conversion_p (stmt_vec_info stmt_info)
5298 {
5299 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5300 if (!stmt)
5301 return false;
5302
5303 tree lhs = gimple_assign_lhs (stmt);
5304 tree_code code = gimple_assign_rhs_code (stmt);
5305 tree rhs = gimple_assign_rhs1 (stmt);
5306
5307 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5308 return true;
5309
5310 if (CONVERT_EXPR_CODE_P (code))
5311 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5312
5313 return false;
5314 }
5315
5316 /* Function vectorizable_assignment.
5317
5318 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5319 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5320 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5321 Return true if STMT_INFO is vectorizable in this way. */
5322
5323 static bool
5324 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5325 stmt_vec_info *vec_stmt, slp_tree slp_node,
5326 stmt_vector_for_cost *cost_vec)
5327 {
5328 tree vec_dest;
5329 tree scalar_dest;
5330 tree op;
5331 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5332 tree new_temp;
5333 enum vect_def_type dt[1] = {vect_unknown_def_type};
5334 int ndts = 1;
5335 int ncopies;
5336 int i, j;
5337 vec<tree> vec_oprnds = vNULL;
5338 tree vop;
5339 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5340 vec_info *vinfo = stmt_info->vinfo;
5341 stmt_vec_info prev_stmt_info = NULL;
5342 enum tree_code code;
5343 tree vectype_in;
5344
5345 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5346 return false;
5347
5348 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5349 && ! vec_stmt)
5350 return false;
5351
5352 /* Is vectorizable assignment? */
5353 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5354 if (!stmt)
5355 return false;
5356
5357 scalar_dest = gimple_assign_lhs (stmt);
5358 if (TREE_CODE (scalar_dest) != SSA_NAME)
5359 return false;
5360
5361 code = gimple_assign_rhs_code (stmt);
5362 if (gimple_assign_single_p (stmt)
5363 || code == PAREN_EXPR
5364 || CONVERT_EXPR_CODE_P (code))
5365 op = gimple_assign_rhs1 (stmt);
5366 else
5367 return false;
5368
5369 if (code == VIEW_CONVERT_EXPR)
5370 op = TREE_OPERAND (op, 0);
5371
5372 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5373 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5374
5375 /* Multiple types in SLP are handled by creating the appropriate number of
5376 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5377 case of SLP. */
5378 if (slp_node)
5379 ncopies = 1;
5380 else
5381 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5382
5383 gcc_assert (ncopies >= 1);
5384
5385 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5386 {
5387 if (dump_enabled_p ())
5388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5389 "use not simple.\n");
5390 return false;
5391 }
5392
5393 /* We can handle NOP_EXPR conversions that do not change the number
5394 of elements or the vector size. */
5395 if ((CONVERT_EXPR_CODE_P (code)
5396 || code == VIEW_CONVERT_EXPR)
5397 && (!vectype_in
5398 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5399 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5400 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5401 return false;
5402
5403 /* We do not handle bit-precision changes. */
5404 if ((CONVERT_EXPR_CODE_P (code)
5405 || code == VIEW_CONVERT_EXPR)
5406 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5407 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5408 || !type_has_mode_precision_p (TREE_TYPE (op)))
5409 /* But a conversion that does not change the bit-pattern is ok. */
5410 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5411 > TYPE_PRECISION (TREE_TYPE (op)))
5412 && TYPE_UNSIGNED (TREE_TYPE (op)))
5413 /* Conversion between boolean types of different sizes is
5414 a simple assignment in case their vectypes are same
5415 boolean vectors. */
5416 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5417 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5418 {
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5421 "type conversion to/from bit-precision "
5422 "unsupported.\n");
5423 return false;
5424 }
5425
5426 if (!vec_stmt) /* transformation not required. */
5427 {
5428 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5429 DUMP_VECT_SCOPE ("vectorizable_assignment");
5430 if (!vect_nop_conversion_p (stmt_info))
5431 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5432 cost_vec);
5433 return true;
5434 }
5435
5436 /* Transform. */
5437 if (dump_enabled_p ())
5438 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5439
5440 /* Handle def. */
5441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5442
5443 /* Handle use. */
5444 for (j = 0; j < ncopies; j++)
5445 {
5446 /* Handle uses. */
5447 if (j == 0)
5448 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5449 else
5450 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5451
5452 /* Arguments are ready. create the new vector stmt. */
5453 stmt_vec_info new_stmt_info = NULL;
5454 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5455 {
5456 if (CONVERT_EXPR_CODE_P (code)
5457 || code == VIEW_CONVERT_EXPR)
5458 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5459 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5460 new_temp = make_ssa_name (vec_dest, new_stmt);
5461 gimple_assign_set_lhs (new_stmt, new_temp);
5462 new_stmt_info
5463 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5464 if (slp_node)
5465 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5466 }
5467
5468 if (slp_node)
5469 continue;
5470
5471 if (j == 0)
5472 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5473 else
5474 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5475
5476 prev_stmt_info = new_stmt_info;
5477 }
5478
5479 vec_oprnds.release ();
5480 return true;
5481 }
5482
5483
5484 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5485 either as shift by a scalar or by a vector. */
5486
5487 bool
5488 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5489 {
5490
5491 machine_mode vec_mode;
5492 optab optab;
5493 int icode;
5494 tree vectype;
5495
5496 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5497 if (!vectype)
5498 return false;
5499
5500 optab = optab_for_tree_code (code, vectype, optab_scalar);
5501 if (!optab
5502 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5503 {
5504 optab = optab_for_tree_code (code, vectype, optab_vector);
5505 if (!optab
5506 || (optab_handler (optab, TYPE_MODE (vectype))
5507 == CODE_FOR_nothing))
5508 return false;
5509 }
5510
5511 vec_mode = TYPE_MODE (vectype);
5512 icode = (int) optab_handler (optab, vec_mode);
5513 if (icode == CODE_FOR_nothing)
5514 return false;
5515
5516 return true;
5517 }
5518
5519
5520 /* Function vectorizable_shift.
5521
5522 Check if STMT_INFO performs a shift operation that can be vectorized.
5523 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5524 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5525 Return true if STMT_INFO is vectorizable in this way. */
5526
5527 static bool
5528 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5529 stmt_vec_info *vec_stmt, slp_tree slp_node,
5530 stmt_vector_for_cost *cost_vec)
5531 {
5532 tree vec_dest;
5533 tree scalar_dest;
5534 tree op0, op1 = NULL;
5535 tree vec_oprnd1 = NULL_TREE;
5536 tree vectype;
5537 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5538 enum tree_code code;
5539 machine_mode vec_mode;
5540 tree new_temp;
5541 optab optab;
5542 int icode;
5543 machine_mode optab_op2_mode;
5544 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5545 int ndts = 2;
5546 stmt_vec_info prev_stmt_info;
5547 poly_uint64 nunits_in;
5548 poly_uint64 nunits_out;
5549 tree vectype_out;
5550 tree op1_vectype;
5551 int ncopies;
5552 int j, i;
5553 vec<tree> vec_oprnds0 = vNULL;
5554 vec<tree> vec_oprnds1 = vNULL;
5555 tree vop0, vop1;
5556 unsigned int k;
5557 bool scalar_shift_arg = true;
5558 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5559 vec_info *vinfo = stmt_info->vinfo;
5560 bool incompatible_op1_vectype_p = false;
5561
5562 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5563 return false;
5564
5565 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5566 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5567 && ! vec_stmt)
5568 return false;
5569
5570 /* Is STMT a vectorizable binary/unary operation? */
5571 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5572 if (!stmt)
5573 return false;
5574
5575 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5576 return false;
5577
5578 code = gimple_assign_rhs_code (stmt);
5579
5580 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5581 || code == RROTATE_EXPR))
5582 return false;
5583
5584 scalar_dest = gimple_assign_lhs (stmt);
5585 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5586 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5587 {
5588 if (dump_enabled_p ())
5589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5590 "bit-precision shifts not supported.\n");
5591 return false;
5592 }
5593
5594 op0 = gimple_assign_rhs1 (stmt);
5595 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5596 {
5597 if (dump_enabled_p ())
5598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5599 "use not simple.\n");
5600 return false;
5601 }
5602 /* If op0 is an external or constant def, infer the vector type
5603 from the scalar type. */
5604 if (!vectype)
5605 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
5606 if (vec_stmt)
5607 gcc_assert (vectype);
5608 if (!vectype)
5609 {
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5612 "no vectype for scalar type\n");
5613 return false;
5614 }
5615
5616 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5617 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5618 if (maybe_ne (nunits_out, nunits_in))
5619 return false;
5620
5621 op1 = gimple_assign_rhs2 (stmt);
5622 stmt_vec_info op1_def_stmt_info;
5623 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5624 &op1_def_stmt_info))
5625 {
5626 if (dump_enabled_p ())
5627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5628 "use not simple.\n");
5629 return false;
5630 }
5631
5632 /* Multiple types in SLP are handled by creating the appropriate number of
5633 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5634 case of SLP. */
5635 if (slp_node)
5636 ncopies = 1;
5637 else
5638 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5639
5640 gcc_assert (ncopies >= 1);
5641
5642 /* Determine whether the shift amount is a vector, or scalar. If the
5643 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5644
5645 if ((dt[1] == vect_internal_def
5646 || dt[1] == vect_induction_def
5647 || dt[1] == vect_nested_cycle)
5648 && !slp_node)
5649 scalar_shift_arg = false;
5650 else if (dt[1] == vect_constant_def
5651 || dt[1] == vect_external_def
5652 || dt[1] == vect_internal_def)
5653 {
5654 /* In SLP, need to check whether the shift count is the same,
5655 in loops if it is a constant or invariant, it is always
5656 a scalar shift. */
5657 if (slp_node)
5658 {
5659 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5660 stmt_vec_info slpstmt_info;
5661
5662 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5663 {
5664 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5665 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5666 scalar_shift_arg = false;
5667 }
5668
5669 /* For internal SLP defs we have to make sure we see scalar stmts
5670 for all vector elements.
5671 ??? For different vectors we could resort to a different
5672 scalar shift operand but code-generation below simply always
5673 takes the first. */
5674 if (dt[1] == vect_internal_def
5675 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5676 stmts.length ()))
5677 scalar_shift_arg = false;
5678 }
5679
5680 /* If the shift amount is computed by a pattern stmt we cannot
5681 use the scalar amount directly thus give up and use a vector
5682 shift. */
5683 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5684 scalar_shift_arg = false;
5685 }
5686 else
5687 {
5688 if (dump_enabled_p ())
5689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5690 "operand mode requires invariant argument.\n");
5691 return false;
5692 }
5693
5694 /* Vector shifted by vector. */
5695 bool was_scalar_shift_arg = scalar_shift_arg;
5696 if (!scalar_shift_arg)
5697 {
5698 optab = optab_for_tree_code (code, vectype, optab_vector);
5699 if (dump_enabled_p ())
5700 dump_printf_loc (MSG_NOTE, vect_location,
5701 "vector/vector shift/rotate found.\n");
5702
5703 if (!op1_vectype)
5704 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1));
5705 incompatible_op1_vectype_p
5706 = (op1_vectype == NULL_TREE
5707 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5708 TYPE_VECTOR_SUBPARTS (vectype))
5709 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5710 if (incompatible_op1_vectype_p
5711 && (!slp_node
5712 || SLP_TREE_DEF_TYPE
5713 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5714 {
5715 if (dump_enabled_p ())
5716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5717 "unusable type for last operand in"
5718 " vector/vector shift/rotate.\n");
5719 return false;
5720 }
5721 }
5722 /* See if the machine has a vector shifted by scalar insn and if not
5723 then see if it has a vector shifted by vector insn. */
5724 else
5725 {
5726 optab = optab_for_tree_code (code, vectype, optab_scalar);
5727 if (optab
5728 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5729 {
5730 if (dump_enabled_p ())
5731 dump_printf_loc (MSG_NOTE, vect_location,
5732 "vector/scalar shift/rotate found.\n");
5733 }
5734 else
5735 {
5736 optab = optab_for_tree_code (code, vectype, optab_vector);
5737 if (optab
5738 && (optab_handler (optab, TYPE_MODE (vectype))
5739 != CODE_FOR_nothing))
5740 {
5741 scalar_shift_arg = false;
5742
5743 if (dump_enabled_p ())
5744 dump_printf_loc (MSG_NOTE, vect_location,
5745 "vector/vector shift/rotate found.\n");
5746
5747 /* Unlike the other binary operators, shifts/rotates have
5748 the rhs being int, instead of the same type as the lhs,
5749 so make sure the scalar is the right type if we are
5750 dealing with vectors of long long/long/short/char. */
5751 if (dt[1] == vect_constant_def)
5752 {
5753 if (!slp_node)
5754 op1 = fold_convert (TREE_TYPE (vectype), op1);
5755 }
5756 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5757 TREE_TYPE (op1)))
5758 {
5759 if (vec_stmt && !slp_node)
5760 {
5761 op1 = fold_convert (TREE_TYPE (vectype), op1);
5762 op1 = vect_init_vector (stmt_info, op1,
5763 TREE_TYPE (vectype), NULL);
5764 }
5765 }
5766 }
5767 }
5768 }
5769
5770 /* Supportable by target? */
5771 if (!optab)
5772 {
5773 if (dump_enabled_p ())
5774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5775 "no optab.\n");
5776 return false;
5777 }
5778 vec_mode = TYPE_MODE (vectype);
5779 icode = (int) optab_handler (optab, vec_mode);
5780 if (icode == CODE_FOR_nothing)
5781 {
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5784 "op not supported by target.\n");
5785 /* Check only during analysis. */
5786 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5787 || (!vec_stmt
5788 && !vect_worthwhile_without_simd_p (vinfo, code)))
5789 return false;
5790 if (dump_enabled_p ())
5791 dump_printf_loc (MSG_NOTE, vect_location,
5792 "proceeding using word mode.\n");
5793 }
5794
5795 /* Worthwhile without SIMD support? Check only during analysis. */
5796 if (!vec_stmt
5797 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5798 && !vect_worthwhile_without_simd_p (vinfo, code))
5799 {
5800 if (dump_enabled_p ())
5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5802 "not worthwhile without SIMD support.\n");
5803 return false;
5804 }
5805
5806 if (!vec_stmt) /* transformation not required. */
5807 {
5808 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5809 DUMP_VECT_SCOPE ("vectorizable_shift");
5810 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5811 return true;
5812 }
5813
5814 /* Transform. */
5815
5816 if (dump_enabled_p ())
5817 dump_printf_loc (MSG_NOTE, vect_location,
5818 "transform binary/unary operation.\n");
5819
5820 /* Handle def. */
5821 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5822
5823 prev_stmt_info = NULL;
5824 for (j = 0; j < ncopies; j++)
5825 {
5826 /* Handle uses. */
5827 if (j == 0)
5828 {
5829 if (scalar_shift_arg)
5830 {
5831 /* Vector shl and shr insn patterns can be defined with scalar
5832 operand 2 (shift operand). In this case, use constant or loop
5833 invariant op1 directly, without extending it to vector mode
5834 first. */
5835 optab_op2_mode = insn_data[icode].operand[2].mode;
5836 if (!VECTOR_MODE_P (optab_op2_mode))
5837 {
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_NOTE, vect_location,
5840 "operand 1 using scalar mode.\n");
5841 vec_oprnd1 = op1;
5842 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5843 vec_oprnds1.quick_push (vec_oprnd1);
5844 if (slp_node)
5845 {
5846 /* Store vec_oprnd1 for every vector stmt to be created
5847 for SLP_NODE. We check during the analysis that all
5848 the shift arguments are the same.
5849 TODO: Allow different constants for different vector
5850 stmts generated for an SLP instance. */
5851 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5852 vec_oprnds1.quick_push (vec_oprnd1);
5853 }
5854 }
5855 }
5856 else if (slp_node && incompatible_op1_vectype_p)
5857 {
5858 if (was_scalar_shift_arg)
5859 {
5860 /* If the argument was the same in all lanes create
5861 the correctly typed vector shift amount directly. */
5862 op1 = fold_convert (TREE_TYPE (vectype), op1);
5863 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5864 !loop_vinfo ? gsi : NULL);
5865 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5866 !loop_vinfo ? gsi : NULL);
5867 vec_oprnds1.create (slp_node->vec_stmts_size);
5868 for (k = 0; k < slp_node->vec_stmts_size; k++)
5869 vec_oprnds1.quick_push (vec_oprnd1);
5870 }
5871 else if (dt[1] == vect_constant_def)
5872 {
5873 /* Convert the scalar constant shift amounts in-place. */
5874 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5875 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5876 for (unsigned i = 0;
5877 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5878 {
5879 SLP_TREE_SCALAR_OPS (shift)[i]
5880 = fold_convert (TREE_TYPE (vectype),
5881 SLP_TREE_SCALAR_OPS (shift)[i]);
5882 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5883 == INTEGER_CST));
5884 }
5885 }
5886 else
5887 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5888 }
5889
5890 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5891 (a special case for certain kind of vector shifts); otherwise,
5892 operand 1 should be of a vector type (the usual case). */
5893 if (vec_oprnd1)
5894 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5895 slp_node);
5896 else
5897 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5898 slp_node);
5899 }
5900 else
5901 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5902
5903 /* Arguments are ready. Create the new vector stmt. */
5904 stmt_vec_info new_stmt_info = NULL;
5905 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5906 {
5907 vop1 = vec_oprnds1[i];
5908 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5909 new_temp = make_ssa_name (vec_dest, new_stmt);
5910 gimple_assign_set_lhs (new_stmt, new_temp);
5911 new_stmt_info
5912 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5913 if (slp_node)
5914 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5915 }
5916
5917 if (slp_node)
5918 continue;
5919
5920 if (j == 0)
5921 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5922 else
5923 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5924 prev_stmt_info = new_stmt_info;
5925 }
5926
5927 vec_oprnds0.release ();
5928 vec_oprnds1.release ();
5929
5930 return true;
5931 }
5932
5933
5934 /* Function vectorizable_operation.
5935
5936 Check if STMT_INFO performs a binary, unary or ternary operation that can
5937 be vectorized.
5938 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5939 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5940 Return true if STMT_INFO is vectorizable in this way. */
5941
5942 static bool
5943 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5944 stmt_vec_info *vec_stmt, slp_tree slp_node,
5945 stmt_vector_for_cost *cost_vec)
5946 {
5947 tree vec_dest;
5948 tree scalar_dest;
5949 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5950 tree vectype;
5951 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5952 enum tree_code code, orig_code;
5953 machine_mode vec_mode;
5954 tree new_temp;
5955 int op_type;
5956 optab optab;
5957 bool target_support_p;
5958 enum vect_def_type dt[3]
5959 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5960 int ndts = 3;
5961 stmt_vec_info prev_stmt_info;
5962 poly_uint64 nunits_in;
5963 poly_uint64 nunits_out;
5964 tree vectype_out;
5965 int ncopies, vec_num;
5966 int j, i;
5967 vec<tree> vec_oprnds0 = vNULL;
5968 vec<tree> vec_oprnds1 = vNULL;
5969 vec<tree> vec_oprnds2 = vNULL;
5970 tree vop0, vop1, vop2;
5971 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5972 vec_info *vinfo = stmt_info->vinfo;
5973
5974 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5975 return false;
5976
5977 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5978 && ! vec_stmt)
5979 return false;
5980
5981 /* Is STMT a vectorizable binary/unary operation? */
5982 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5983 if (!stmt)
5984 return false;
5985
5986 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5987 return false;
5988
5989 orig_code = code = gimple_assign_rhs_code (stmt);
5990
5991 /* For pointer addition and subtraction, we should use the normal
5992 plus and minus for the vector operation. */
5993 if (code == POINTER_PLUS_EXPR)
5994 code = PLUS_EXPR;
5995 if (code == POINTER_DIFF_EXPR)
5996 code = MINUS_EXPR;
5997
5998 /* Support only unary or binary operations. */
5999 op_type = TREE_CODE_LENGTH (code);
6000 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6001 {
6002 if (dump_enabled_p ())
6003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6004 "num. args = %d (not unary/binary/ternary op).\n",
6005 op_type);
6006 return false;
6007 }
6008
6009 scalar_dest = gimple_assign_lhs (stmt);
6010 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6011
6012 /* Most operations cannot handle bit-precision types without extra
6013 truncations. */
6014 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
6015 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6016 /* Exception are bitwise binary operations. */
6017 && code != BIT_IOR_EXPR
6018 && code != BIT_XOR_EXPR
6019 && code != BIT_AND_EXPR)
6020 {
6021 if (dump_enabled_p ())
6022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6023 "bit-precision arithmetic not supported.\n");
6024 return false;
6025 }
6026
6027 op0 = gimple_assign_rhs1 (stmt);
6028 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6029 {
6030 if (dump_enabled_p ())
6031 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6032 "use not simple.\n");
6033 return false;
6034 }
6035 /* If op0 is an external or constant def, infer the vector type
6036 from the scalar type. */
6037 if (!vectype)
6038 {
6039 /* For boolean type we cannot determine vectype by
6040 invariant value (don't know whether it is a vector
6041 of booleans or vector of integers). We use output
6042 vectype because operations on boolean don't change
6043 type. */
6044 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6045 {
6046 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6047 {
6048 if (dump_enabled_p ())
6049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6050 "not supported operation on bool value.\n");
6051 return false;
6052 }
6053 vectype = vectype_out;
6054 }
6055 else
6056 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
6057 }
6058 if (vec_stmt)
6059 gcc_assert (vectype);
6060 if (!vectype)
6061 {
6062 if (dump_enabled_p ())
6063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6064 "no vectype for scalar type %T\n",
6065 TREE_TYPE (op0));
6066
6067 return false;
6068 }
6069
6070 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6071 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6072 if (maybe_ne (nunits_out, nunits_in))
6073 return false;
6074
6075 if (op_type == binary_op || op_type == ternary_op)
6076 {
6077 op1 = gimple_assign_rhs2 (stmt);
6078 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
6079 {
6080 if (dump_enabled_p ())
6081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6082 "use not simple.\n");
6083 return false;
6084 }
6085 }
6086 if (op_type == ternary_op)
6087 {
6088 op2 = gimple_assign_rhs3 (stmt);
6089 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6090 {
6091 if (dump_enabled_p ())
6092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6093 "use not simple.\n");
6094 return false;
6095 }
6096 }
6097
6098 /* Multiple types in SLP are handled by creating the appropriate number of
6099 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6100 case of SLP. */
6101 if (slp_node)
6102 {
6103 ncopies = 1;
6104 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6105 }
6106 else
6107 {
6108 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6109 vec_num = 1;
6110 }
6111
6112 gcc_assert (ncopies >= 1);
6113
6114 /* Shifts are handled in vectorizable_shift (). */
6115 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6116 || code == RROTATE_EXPR)
6117 return false;
6118
6119 /* Supportable by target? */
6120
6121 vec_mode = TYPE_MODE (vectype);
6122 if (code == MULT_HIGHPART_EXPR)
6123 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6124 else
6125 {
6126 optab = optab_for_tree_code (code, vectype, optab_default);
6127 if (!optab)
6128 {
6129 if (dump_enabled_p ())
6130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6131 "no optab.\n");
6132 return false;
6133 }
6134 target_support_p = (optab_handler (optab, vec_mode)
6135 != CODE_FOR_nothing);
6136 }
6137
6138 if (!target_support_p)
6139 {
6140 if (dump_enabled_p ())
6141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6142 "op not supported by target.\n");
6143 /* Check only during analysis. */
6144 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6145 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6146 return false;
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_NOTE, vect_location,
6149 "proceeding using word mode.\n");
6150 }
6151
6152 /* Worthwhile without SIMD support? Check only during analysis. */
6153 if (!VECTOR_MODE_P (vec_mode)
6154 && !vec_stmt
6155 && !vect_worthwhile_without_simd_p (vinfo, code))
6156 {
6157 if (dump_enabled_p ())
6158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6159 "not worthwhile without SIMD support.\n");
6160 return false;
6161 }
6162
6163 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6164 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6165 internal_fn cond_fn = get_conditional_internal_fn (code);
6166
6167 if (!vec_stmt) /* transformation not required. */
6168 {
6169 /* If this operation is part of a reduction, a fully-masked loop
6170 should only change the active lanes of the reduction chain,
6171 keeping the inactive lanes as-is. */
6172 if (loop_vinfo
6173 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6174 && reduc_idx >= 0)
6175 {
6176 if (cond_fn == IFN_LAST
6177 || !direct_internal_fn_supported_p (cond_fn, vectype,
6178 OPTIMIZE_FOR_SPEED))
6179 {
6180 if (dump_enabled_p ())
6181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6182 "can't use a fully-masked loop because no"
6183 " conditional operation is available.\n");
6184 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6185 }
6186 else
6187 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6188 vectype, NULL);
6189 }
6190
6191 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6192 DUMP_VECT_SCOPE ("vectorizable_operation");
6193 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6194 return true;
6195 }
6196
6197 /* Transform. */
6198
6199 if (dump_enabled_p ())
6200 dump_printf_loc (MSG_NOTE, vect_location,
6201 "transform binary/unary operation.\n");
6202
6203 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6204
6205 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6206 vectors with unsigned elements, but the result is signed. So, we
6207 need to compute the MINUS_EXPR into vectype temporary and
6208 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6209 tree vec_cvt_dest = NULL_TREE;
6210 if (orig_code == POINTER_DIFF_EXPR)
6211 {
6212 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6213 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6214 }
6215 /* Handle def. */
6216 else
6217 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6218
6219 /* In case the vectorization factor (VF) is bigger than the number
6220 of elements that we can fit in a vectype (nunits), we have to generate
6221 more than one vector stmt - i.e - we need to "unroll" the
6222 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6223 from one copy of the vector stmt to the next, in the field
6224 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6225 stages to find the correct vector defs to be used when vectorizing
6226 stmts that use the defs of the current stmt. The example below
6227 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6228 we need to create 4 vectorized stmts):
6229
6230 before vectorization:
6231 RELATED_STMT VEC_STMT
6232 S1: x = memref - -
6233 S2: z = x + 1 - -
6234
6235 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6236 there):
6237 RELATED_STMT VEC_STMT
6238 VS1_0: vx0 = memref0 VS1_1 -
6239 VS1_1: vx1 = memref1 VS1_2 -
6240 VS1_2: vx2 = memref2 VS1_3 -
6241 VS1_3: vx3 = memref3 - -
6242 S1: x = load - VS1_0
6243 S2: z = x + 1 - -
6244
6245 step2: vectorize stmt S2 (done here):
6246 To vectorize stmt S2 we first need to find the relevant vector
6247 def for the first operand 'x'. This is, as usual, obtained from
6248 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6249 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6250 relevant vector def 'vx0'. Having found 'vx0' we can generate
6251 the vector stmt VS2_0, and as usual, record it in the
6252 STMT_VINFO_VEC_STMT of stmt S2.
6253 When creating the second copy (VS2_1), we obtain the relevant vector
6254 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6255 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6256 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6257 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6258 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6259 chain of stmts and pointers:
6260 RELATED_STMT VEC_STMT
6261 VS1_0: vx0 = memref0 VS1_1 -
6262 VS1_1: vx1 = memref1 VS1_2 -
6263 VS1_2: vx2 = memref2 VS1_3 -
6264 VS1_3: vx3 = memref3 - -
6265 S1: x = load - VS1_0
6266 VS2_0: vz0 = vx0 + v1 VS2_1 -
6267 VS2_1: vz1 = vx1 + v1 VS2_2 -
6268 VS2_2: vz2 = vx2 + v1 VS2_3 -
6269 VS2_3: vz3 = vx3 + v1 - -
6270 S2: z = x + 1 - VS2_0 */
6271
6272 prev_stmt_info = NULL;
6273 for (j = 0; j < ncopies; j++)
6274 {
6275 /* Handle uses. */
6276 if (j == 0)
6277 {
6278 if (op_type == binary_op)
6279 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6280 slp_node);
6281 else if (op_type == ternary_op)
6282 {
6283 if (slp_node)
6284 {
6285 auto_vec<vec<tree> > vec_defs(3);
6286 vect_get_slp_defs (slp_node, &vec_defs);
6287 vec_oprnds0 = vec_defs[0];
6288 vec_oprnds1 = vec_defs[1];
6289 vec_oprnds2 = vec_defs[2];
6290 }
6291 else
6292 {
6293 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6294 &vec_oprnds1, NULL);
6295 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6296 NULL, NULL);
6297 }
6298 }
6299 else
6300 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6301 slp_node);
6302 }
6303 else
6304 {
6305 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6306 if (op_type == ternary_op)
6307 {
6308 tree vec_oprnd = vec_oprnds2.pop ();
6309 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6310 vec_oprnd));
6311 }
6312 }
6313
6314 /* Arguments are ready. Create the new vector stmt. */
6315 stmt_vec_info new_stmt_info = NULL;
6316 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6317 {
6318 vop1 = ((op_type == binary_op || op_type == ternary_op)
6319 ? vec_oprnds1[i] : NULL_TREE);
6320 vop2 = ((op_type == ternary_op)
6321 ? vec_oprnds2[i] : NULL_TREE);
6322 if (masked_loop_p && reduc_idx >= 0)
6323 {
6324 /* Perform the operation on active elements only and take
6325 inactive elements from the reduction chain input. */
6326 gcc_assert (!vop2);
6327 vop2 = reduc_idx == 1 ? vop1 : vop0;
6328 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6329 vectype, i * ncopies + j);
6330 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6331 vop0, vop1, vop2);
6332 new_temp = make_ssa_name (vec_dest, call);
6333 gimple_call_set_lhs (call, new_temp);
6334 gimple_call_set_nothrow (call, true);
6335 new_stmt_info
6336 = vect_finish_stmt_generation (stmt_info, call, gsi);
6337 }
6338 else
6339 {
6340 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6341 vop0, vop1, vop2);
6342 new_temp = make_ssa_name (vec_dest, new_stmt);
6343 gimple_assign_set_lhs (new_stmt, new_temp);
6344 new_stmt_info
6345 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6346 if (vec_cvt_dest)
6347 {
6348 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6349 gassign *new_stmt
6350 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6351 new_temp);
6352 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6353 gimple_assign_set_lhs (new_stmt, new_temp);
6354 new_stmt_info
6355 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6356 }
6357 }
6358 if (slp_node)
6359 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6360 }
6361
6362 if (slp_node)
6363 continue;
6364
6365 if (j == 0)
6366 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6367 else
6368 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6369 prev_stmt_info = new_stmt_info;
6370 }
6371
6372 vec_oprnds0.release ();
6373 vec_oprnds1.release ();
6374 vec_oprnds2.release ();
6375
6376 return true;
6377 }
6378
6379 /* A helper function to ensure data reference DR_INFO's base alignment. */
6380
6381 static void
6382 ensure_base_align (dr_vec_info *dr_info)
6383 {
6384 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6385 return;
6386
6387 if (dr_info->base_misaligned)
6388 {
6389 tree base_decl = dr_info->base_decl;
6390
6391 // We should only be able to increase the alignment of a base object if
6392 // we know what its new alignment should be at compile time.
6393 unsigned HOST_WIDE_INT align_base_to =
6394 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6395
6396 if (decl_in_symtab_p (base_decl))
6397 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6398 else if (DECL_ALIGN (base_decl) < align_base_to)
6399 {
6400 SET_DECL_ALIGN (base_decl, align_base_to);
6401 DECL_USER_ALIGN (base_decl) = 1;
6402 }
6403 dr_info->base_misaligned = false;
6404 }
6405 }
6406
6407
6408 /* Function get_group_alias_ptr_type.
6409
6410 Return the alias type for the group starting at FIRST_STMT_INFO. */
6411
6412 static tree
6413 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6414 {
6415 struct data_reference *first_dr, *next_dr;
6416
6417 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6418 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6419 while (next_stmt_info)
6420 {
6421 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6422 if (get_alias_set (DR_REF (first_dr))
6423 != get_alias_set (DR_REF (next_dr)))
6424 {
6425 if (dump_enabled_p ())
6426 dump_printf_loc (MSG_NOTE, vect_location,
6427 "conflicting alias set types.\n");
6428 return ptr_type_node;
6429 }
6430 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6431 }
6432 return reference_alias_ptr_type (DR_REF (first_dr));
6433 }
6434
6435
6436 /* Function scan_operand_equal_p.
6437
6438 Helper function for check_scan_store. Compare two references
6439 with .GOMP_SIMD_LANE bases. */
6440
6441 static bool
6442 scan_operand_equal_p (tree ref1, tree ref2)
6443 {
6444 tree ref[2] = { ref1, ref2 };
6445 poly_int64 bitsize[2], bitpos[2];
6446 tree offset[2], base[2];
6447 for (int i = 0; i < 2; ++i)
6448 {
6449 machine_mode mode;
6450 int unsignedp, reversep, volatilep = 0;
6451 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6452 &offset[i], &mode, &unsignedp,
6453 &reversep, &volatilep);
6454 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6455 return false;
6456 if (TREE_CODE (base[i]) == MEM_REF
6457 && offset[i] == NULL_TREE
6458 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6459 {
6460 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6461 if (is_gimple_assign (def_stmt)
6462 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6463 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6464 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6465 {
6466 if (maybe_ne (mem_ref_offset (base[i]), 0))
6467 return false;
6468 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6469 offset[i] = gimple_assign_rhs2 (def_stmt);
6470 }
6471 }
6472 }
6473
6474 if (!operand_equal_p (base[0], base[1], 0))
6475 return false;
6476 if (maybe_ne (bitsize[0], bitsize[1]))
6477 return false;
6478 if (offset[0] != offset[1])
6479 {
6480 if (!offset[0] || !offset[1])
6481 return false;
6482 if (!operand_equal_p (offset[0], offset[1], 0))
6483 {
6484 tree step[2];
6485 for (int i = 0; i < 2; ++i)
6486 {
6487 step[i] = integer_one_node;
6488 if (TREE_CODE (offset[i]) == SSA_NAME)
6489 {
6490 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6491 if (is_gimple_assign (def_stmt)
6492 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6493 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6494 == INTEGER_CST))
6495 {
6496 step[i] = gimple_assign_rhs2 (def_stmt);
6497 offset[i] = gimple_assign_rhs1 (def_stmt);
6498 }
6499 }
6500 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6501 {
6502 step[i] = TREE_OPERAND (offset[i], 1);
6503 offset[i] = TREE_OPERAND (offset[i], 0);
6504 }
6505 tree rhs1 = NULL_TREE;
6506 if (TREE_CODE (offset[i]) == SSA_NAME)
6507 {
6508 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6509 if (gimple_assign_cast_p (def_stmt))
6510 rhs1 = gimple_assign_rhs1 (def_stmt);
6511 }
6512 else if (CONVERT_EXPR_P (offset[i]))
6513 rhs1 = TREE_OPERAND (offset[i], 0);
6514 if (rhs1
6515 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6516 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6517 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6518 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6519 offset[i] = rhs1;
6520 }
6521 if (!operand_equal_p (offset[0], offset[1], 0)
6522 || !operand_equal_p (step[0], step[1], 0))
6523 return false;
6524 }
6525 }
6526 return true;
6527 }
6528
6529
6530 enum scan_store_kind {
6531 /* Normal permutation. */
6532 scan_store_kind_perm,
6533
6534 /* Whole vector left shift permutation with zero init. */
6535 scan_store_kind_lshift_zero,
6536
6537 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6538 scan_store_kind_lshift_cond
6539 };
6540
6541 /* Function check_scan_store.
6542
6543 Verify if we can perform the needed permutations or whole vector shifts.
6544 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6545 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6546 to do at each step. */
6547
6548 static int
6549 scan_store_can_perm_p (tree vectype, tree init,
6550 vec<enum scan_store_kind> *use_whole_vector = NULL)
6551 {
6552 enum machine_mode vec_mode = TYPE_MODE (vectype);
6553 unsigned HOST_WIDE_INT nunits;
6554 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6555 return -1;
6556 int units_log2 = exact_log2 (nunits);
6557 if (units_log2 <= 0)
6558 return -1;
6559
6560 int i;
6561 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6562 for (i = 0; i <= units_log2; ++i)
6563 {
6564 unsigned HOST_WIDE_INT j, k;
6565 enum scan_store_kind kind = scan_store_kind_perm;
6566 vec_perm_builder sel (nunits, nunits, 1);
6567 sel.quick_grow (nunits);
6568 if (i == units_log2)
6569 {
6570 for (j = 0; j < nunits; ++j)
6571 sel[j] = nunits - 1;
6572 }
6573 else
6574 {
6575 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6576 sel[j] = j;
6577 for (k = 0; j < nunits; ++j, ++k)
6578 sel[j] = nunits + k;
6579 }
6580 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6581 if (!can_vec_perm_const_p (vec_mode, indices))
6582 {
6583 if (i == units_log2)
6584 return -1;
6585
6586 if (whole_vector_shift_kind == scan_store_kind_perm)
6587 {
6588 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6589 return -1;
6590 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6591 /* Whole vector shifts shift in zeros, so if init is all zero
6592 constant, there is no need to do anything further. */
6593 if ((TREE_CODE (init) != INTEGER_CST
6594 && TREE_CODE (init) != REAL_CST)
6595 || !initializer_zerop (init))
6596 {
6597 tree masktype = truth_type_for (vectype);
6598 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6599 return -1;
6600 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6601 }
6602 }
6603 kind = whole_vector_shift_kind;
6604 }
6605 if (use_whole_vector)
6606 {
6607 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6608 use_whole_vector->safe_grow_cleared (i);
6609 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6610 use_whole_vector->safe_push (kind);
6611 }
6612 }
6613
6614 return units_log2;
6615 }
6616
6617
6618 /* Function check_scan_store.
6619
6620 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6621
6622 static bool
6623 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6624 enum vect_def_type rhs_dt, bool slp, tree mask,
6625 vect_memory_access_type memory_access_type)
6626 {
6627 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6628 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6629 tree ref_type;
6630
6631 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6632 if (slp
6633 || mask
6634 || memory_access_type != VMAT_CONTIGUOUS
6635 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6636 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6637 || loop_vinfo == NULL
6638 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6639 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6640 || !integer_zerop (DR_OFFSET (dr_info->dr))
6641 || !integer_zerop (DR_INIT (dr_info->dr))
6642 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6643 || !alias_sets_conflict_p (get_alias_set (vectype),
6644 get_alias_set (TREE_TYPE (ref_type))))
6645 {
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6648 "unsupported OpenMP scan store.\n");
6649 return false;
6650 }
6651
6652 /* We need to pattern match code built by OpenMP lowering and simplified
6653 by following optimizations into something we can handle.
6654 #pragma omp simd reduction(inscan,+:r)
6655 for (...)
6656 {
6657 r += something ();
6658 #pragma omp scan inclusive (r)
6659 use (r);
6660 }
6661 shall have body with:
6662 // Initialization for input phase, store the reduction initializer:
6663 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6664 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6665 D.2042[_21] = 0;
6666 // Actual input phase:
6667 ...
6668 r.0_5 = D.2042[_20];
6669 _6 = _4 + r.0_5;
6670 D.2042[_20] = _6;
6671 // Initialization for scan phase:
6672 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6673 _26 = D.2043[_25];
6674 _27 = D.2042[_25];
6675 _28 = _26 + _27;
6676 D.2043[_25] = _28;
6677 D.2042[_25] = _28;
6678 // Actual scan phase:
6679 ...
6680 r.1_8 = D.2042[_20];
6681 ...
6682 The "omp simd array" variable D.2042 holds the privatized copy used
6683 inside of the loop and D.2043 is another one that holds copies of
6684 the current original list item. The separate GOMP_SIMD_LANE ifn
6685 kinds are there in order to allow optimizing the initializer store
6686 and combiner sequence, e.g. if it is originally some C++ish user
6687 defined reduction, but allow the vectorizer to pattern recognize it
6688 and turn into the appropriate vectorized scan.
6689
6690 For exclusive scan, this is slightly different:
6691 #pragma omp simd reduction(inscan,+:r)
6692 for (...)
6693 {
6694 use (r);
6695 #pragma omp scan exclusive (r)
6696 r += something ();
6697 }
6698 shall have body with:
6699 // Initialization for input phase, store the reduction initializer:
6700 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6701 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6702 D.2042[_21] = 0;
6703 // Actual input phase:
6704 ...
6705 r.0_5 = D.2042[_20];
6706 _6 = _4 + r.0_5;
6707 D.2042[_20] = _6;
6708 // Initialization for scan phase:
6709 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6710 _26 = D.2043[_25];
6711 D.2044[_25] = _26;
6712 _27 = D.2042[_25];
6713 _28 = _26 + _27;
6714 D.2043[_25] = _28;
6715 // Actual scan phase:
6716 ...
6717 r.1_8 = D.2044[_20];
6718 ... */
6719
6720 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6721 {
6722 /* Match the D.2042[_21] = 0; store above. Just require that
6723 it is a constant or external definition store. */
6724 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6725 {
6726 fail_init:
6727 if (dump_enabled_p ())
6728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6729 "unsupported OpenMP scan initializer store.\n");
6730 return false;
6731 }
6732
6733 if (! loop_vinfo->scan_map)
6734 loop_vinfo->scan_map = new hash_map<tree, tree>;
6735 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6736 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6737 if (cached)
6738 goto fail_init;
6739 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6740
6741 /* These stores can be vectorized normally. */
6742 return true;
6743 }
6744
6745 if (rhs_dt != vect_internal_def)
6746 {
6747 fail:
6748 if (dump_enabled_p ())
6749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6750 "unsupported OpenMP scan combiner pattern.\n");
6751 return false;
6752 }
6753
6754 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6755 tree rhs = gimple_assign_rhs1 (stmt);
6756 if (TREE_CODE (rhs) != SSA_NAME)
6757 goto fail;
6758
6759 gimple *other_store_stmt = NULL;
6760 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6761 bool inscan_var_store
6762 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6763
6764 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6765 {
6766 if (!inscan_var_store)
6767 {
6768 use_operand_p use_p;
6769 imm_use_iterator iter;
6770 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6771 {
6772 gimple *use_stmt = USE_STMT (use_p);
6773 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6774 continue;
6775 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6776 || !is_gimple_assign (use_stmt)
6777 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6778 || other_store_stmt
6779 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6780 goto fail;
6781 other_store_stmt = use_stmt;
6782 }
6783 if (other_store_stmt == NULL)
6784 goto fail;
6785 rhs = gimple_assign_lhs (other_store_stmt);
6786 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6787 goto fail;
6788 }
6789 }
6790 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6791 {
6792 use_operand_p use_p;
6793 imm_use_iterator iter;
6794 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6795 {
6796 gimple *use_stmt = USE_STMT (use_p);
6797 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6798 continue;
6799 if (other_store_stmt)
6800 goto fail;
6801 other_store_stmt = use_stmt;
6802 }
6803 }
6804 else
6805 goto fail;
6806
6807 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6808 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6809 || !is_gimple_assign (def_stmt)
6810 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6811 goto fail;
6812
6813 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6814 /* For pointer addition, we should use the normal plus for the vector
6815 operation. */
6816 switch (code)
6817 {
6818 case POINTER_PLUS_EXPR:
6819 code = PLUS_EXPR;
6820 break;
6821 case MULT_HIGHPART_EXPR:
6822 goto fail;
6823 default:
6824 break;
6825 }
6826 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6827 goto fail;
6828
6829 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6830 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6831 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6832 goto fail;
6833
6834 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6835 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6836 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6837 || !gimple_assign_load_p (load1_stmt)
6838 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6839 || !gimple_assign_load_p (load2_stmt))
6840 goto fail;
6841
6842 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6843 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6844 if (load1_stmt_info == NULL
6845 || load2_stmt_info == NULL
6846 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6847 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6848 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6849 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6850 goto fail;
6851
6852 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6853 {
6854 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6855 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6856 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6857 goto fail;
6858 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6859 tree lrhs;
6860 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6861 lrhs = rhs1;
6862 else
6863 lrhs = rhs2;
6864 use_operand_p use_p;
6865 imm_use_iterator iter;
6866 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6867 {
6868 gimple *use_stmt = USE_STMT (use_p);
6869 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6870 continue;
6871 if (other_store_stmt)
6872 goto fail;
6873 other_store_stmt = use_stmt;
6874 }
6875 }
6876
6877 if (other_store_stmt == NULL)
6878 goto fail;
6879 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6880 || !gimple_store_p (other_store_stmt))
6881 goto fail;
6882
6883 stmt_vec_info other_store_stmt_info
6884 = loop_vinfo->lookup_stmt (other_store_stmt);
6885 if (other_store_stmt_info == NULL
6886 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6887 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6888 goto fail;
6889
6890 gimple *stmt1 = stmt;
6891 gimple *stmt2 = other_store_stmt;
6892 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6893 std::swap (stmt1, stmt2);
6894 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6895 gimple_assign_rhs1 (load2_stmt)))
6896 {
6897 std::swap (rhs1, rhs2);
6898 std::swap (load1_stmt, load2_stmt);
6899 std::swap (load1_stmt_info, load2_stmt_info);
6900 }
6901 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6902 gimple_assign_rhs1 (load1_stmt)))
6903 goto fail;
6904
6905 tree var3 = NULL_TREE;
6906 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6907 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6908 gimple_assign_rhs1 (load2_stmt)))
6909 goto fail;
6910 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6911 {
6912 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6913 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6914 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6915 goto fail;
6916 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6917 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6918 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6919 || lookup_attribute ("omp simd inscan exclusive",
6920 DECL_ATTRIBUTES (var3)))
6921 goto fail;
6922 }
6923
6924 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6925 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6926 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6927 goto fail;
6928
6929 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6930 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6931 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6932 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6933 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6934 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6935 goto fail;
6936
6937 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6938 std::swap (var1, var2);
6939
6940 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6941 {
6942 if (!lookup_attribute ("omp simd inscan exclusive",
6943 DECL_ATTRIBUTES (var1)))
6944 goto fail;
6945 var1 = var3;
6946 }
6947
6948 if (loop_vinfo->scan_map == NULL)
6949 goto fail;
6950 tree *init = loop_vinfo->scan_map->get (var1);
6951 if (init == NULL)
6952 goto fail;
6953
6954 /* The IL is as expected, now check if we can actually vectorize it.
6955 Inclusive scan:
6956 _26 = D.2043[_25];
6957 _27 = D.2042[_25];
6958 _28 = _26 + _27;
6959 D.2043[_25] = _28;
6960 D.2042[_25] = _28;
6961 should be vectorized as (where _40 is the vectorized rhs
6962 from the D.2042[_21] = 0; store):
6963 _30 = MEM <vector(8) int> [(int *)&D.2043];
6964 _31 = MEM <vector(8) int> [(int *)&D.2042];
6965 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6966 _33 = _31 + _32;
6967 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6968 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6969 _35 = _33 + _34;
6970 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6971 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6972 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6973 _37 = _35 + _36;
6974 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6975 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6976 _38 = _30 + _37;
6977 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6978 MEM <vector(8) int> [(int *)&D.2043] = _39;
6979 MEM <vector(8) int> [(int *)&D.2042] = _38;
6980 Exclusive scan:
6981 _26 = D.2043[_25];
6982 D.2044[_25] = _26;
6983 _27 = D.2042[_25];
6984 _28 = _26 + _27;
6985 D.2043[_25] = _28;
6986 should be vectorized as (where _40 is the vectorized rhs
6987 from the D.2042[_21] = 0; store):
6988 _30 = MEM <vector(8) int> [(int *)&D.2043];
6989 _31 = MEM <vector(8) int> [(int *)&D.2042];
6990 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6991 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6992 _34 = _32 + _33;
6993 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6994 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6995 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6996 _36 = _34 + _35;
6997 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6998 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6999 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7000 _38 = _36 + _37;
7001 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7002 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7003 _39 = _30 + _38;
7004 _50 = _31 + _39;
7005 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7006 MEM <vector(8) int> [(int *)&D.2044] = _39;
7007 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7008 enum machine_mode vec_mode = TYPE_MODE (vectype);
7009 optab optab = optab_for_tree_code (code, vectype, optab_default);
7010 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7011 goto fail;
7012
7013 int units_log2 = scan_store_can_perm_p (vectype, *init);
7014 if (units_log2 == -1)
7015 goto fail;
7016
7017 return true;
7018 }
7019
7020
7021 /* Function vectorizable_scan_store.
7022
7023 Helper of vectorizable_score, arguments like on vectorizable_store.
7024 Handle only the transformation, checking is done in check_scan_store. */
7025
7026 static bool
7027 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7028 stmt_vec_info *vec_stmt, int ncopies)
7029 {
7030 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7031 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7032 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7033 vec_info *vinfo = stmt_info->vinfo;
7034 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7035
7036 if (dump_enabled_p ())
7037 dump_printf_loc (MSG_NOTE, vect_location,
7038 "transform scan store. ncopies = %d\n", ncopies);
7039
7040 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7041 tree rhs = gimple_assign_rhs1 (stmt);
7042 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7043
7044 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7045 bool inscan_var_store
7046 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7047
7048 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7049 {
7050 use_operand_p use_p;
7051 imm_use_iterator iter;
7052 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7053 {
7054 gimple *use_stmt = USE_STMT (use_p);
7055 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7056 continue;
7057 rhs = gimple_assign_lhs (use_stmt);
7058 break;
7059 }
7060 }
7061
7062 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7063 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7064 if (code == POINTER_PLUS_EXPR)
7065 code = PLUS_EXPR;
7066 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7067 && commutative_tree_code (code));
7068 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7069 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7070 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7071 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7072 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7073 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7074 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7075 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7076 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7077 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7078 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7079
7080 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7081 {
7082 std::swap (rhs1, rhs2);
7083 std::swap (var1, var2);
7084 std::swap (load1_dr_info, load2_dr_info);
7085 }
7086
7087 tree *init = loop_vinfo->scan_map->get (var1);
7088 gcc_assert (init);
7089
7090 unsigned HOST_WIDE_INT nunits;
7091 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7092 gcc_unreachable ();
7093 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7094 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7095 gcc_assert (units_log2 > 0);
7096 auto_vec<tree, 16> perms;
7097 perms.quick_grow (units_log2 + 1);
7098 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7099 for (int i = 0; i <= units_log2; ++i)
7100 {
7101 unsigned HOST_WIDE_INT j, k;
7102 vec_perm_builder sel (nunits, nunits, 1);
7103 sel.quick_grow (nunits);
7104 if (i == units_log2)
7105 for (j = 0; j < nunits; ++j)
7106 sel[j] = nunits - 1;
7107 else
7108 {
7109 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7110 sel[j] = j;
7111 for (k = 0; j < nunits; ++j, ++k)
7112 sel[j] = nunits + k;
7113 }
7114 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7115 if (!use_whole_vector.is_empty ()
7116 && use_whole_vector[i] != scan_store_kind_perm)
7117 {
7118 if (zero_vec == NULL_TREE)
7119 zero_vec = build_zero_cst (vectype);
7120 if (masktype == NULL_TREE
7121 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7122 masktype = truth_type_for (vectype);
7123 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7124 }
7125 else
7126 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7127 }
7128
7129 stmt_vec_info prev_stmt_info = NULL;
7130 tree vec_oprnd1 = NULL_TREE;
7131 tree vec_oprnd2 = NULL_TREE;
7132 tree vec_oprnd3 = NULL_TREE;
7133 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7134 tree dataref_offset = build_int_cst (ref_type, 0);
7135 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7136 tree ldataref_ptr = NULL_TREE;
7137 tree orig = NULL_TREE;
7138 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7139 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7140 for (int j = 0; j < ncopies; j++)
7141 {
7142 stmt_vec_info new_stmt_info;
7143 if (j == 0)
7144 {
7145 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7146 if (ldataref_ptr == NULL)
7147 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7148 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7149 orig = vec_oprnd3;
7150 }
7151 else
7152 {
7153 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7154 if (ldataref_ptr == NULL)
7155 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7156 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7157 if (!inscan_var_store)
7158 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7159 }
7160
7161 if (ldataref_ptr)
7162 {
7163 vec_oprnd2 = make_ssa_name (vectype);
7164 tree data_ref = fold_build2 (MEM_REF, vectype,
7165 unshare_expr (ldataref_ptr),
7166 dataref_offset);
7167 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7168 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7169 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7170 if (prev_stmt_info == NULL)
7171 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7172 else
7173 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7174 prev_stmt_info = new_stmt_info;
7175 }
7176
7177 tree v = vec_oprnd2;
7178 for (int i = 0; i < units_log2; ++i)
7179 {
7180 tree new_temp = make_ssa_name (vectype);
7181 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7182 (zero_vec
7183 && (use_whole_vector[i]
7184 != scan_store_kind_perm))
7185 ? zero_vec : vec_oprnd1, v,
7186 perms[i]);
7187 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7188 if (prev_stmt_info == NULL)
7189 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7190 else
7191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7192 prev_stmt_info = new_stmt_info;
7193
7194 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7195 {
7196 /* Whole vector shift shifted in zero bits, but if *init
7197 is not initializer_zerop, we need to replace those elements
7198 with elements from vec_oprnd1. */
7199 tree_vector_builder vb (masktype, nunits, 1);
7200 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7201 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7202 ? boolean_false_node : boolean_true_node);
7203
7204 tree new_temp2 = make_ssa_name (vectype);
7205 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7206 new_temp, vec_oprnd1);
7207 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7208 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7209 prev_stmt_info = new_stmt_info;
7210 new_temp = new_temp2;
7211 }
7212
7213 /* For exclusive scan, perform the perms[i] permutation once
7214 more. */
7215 if (i == 0
7216 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7217 && v == vec_oprnd2)
7218 {
7219 v = new_temp;
7220 --i;
7221 continue;
7222 }
7223
7224 tree new_temp2 = make_ssa_name (vectype);
7225 g = gimple_build_assign (new_temp2, code, v, new_temp);
7226 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7227 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7228 prev_stmt_info = new_stmt_info;
7229
7230 v = new_temp2;
7231 }
7232
7233 tree new_temp = make_ssa_name (vectype);
7234 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7235 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7236 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7237 prev_stmt_info = new_stmt_info;
7238
7239 tree last_perm_arg = new_temp;
7240 /* For exclusive scan, new_temp computed above is the exclusive scan
7241 prefix sum. Turn it into inclusive prefix sum for the broadcast
7242 of the last element into orig. */
7243 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7244 {
7245 last_perm_arg = make_ssa_name (vectype);
7246 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7247 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7248 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7249 prev_stmt_info = new_stmt_info;
7250 }
7251
7252 orig = make_ssa_name (vectype);
7253 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7254 last_perm_arg, perms[units_log2]);
7255 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7256 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7257 prev_stmt_info = new_stmt_info;
7258
7259 if (!inscan_var_store)
7260 {
7261 tree data_ref = fold_build2 (MEM_REF, vectype,
7262 unshare_expr (dataref_ptr),
7263 dataref_offset);
7264 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7265 g = gimple_build_assign (data_ref, new_temp);
7266 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7267 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7268 prev_stmt_info = new_stmt_info;
7269 }
7270 }
7271
7272 if (inscan_var_store)
7273 for (int j = 0; j < ncopies; j++)
7274 {
7275 if (j != 0)
7276 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7277
7278 tree data_ref = fold_build2 (MEM_REF, vectype,
7279 unshare_expr (dataref_ptr),
7280 dataref_offset);
7281 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7282 gimple *g = gimple_build_assign (data_ref, orig);
7283 stmt_vec_info new_stmt_info
7284 = vect_finish_stmt_generation (stmt_info, g, gsi);
7285 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7286 prev_stmt_info = new_stmt_info;
7287 }
7288 return true;
7289 }
7290
7291
7292 /* Function vectorizable_store.
7293
7294 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7295 that can be vectorized.
7296 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7297 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7298 Return true if STMT_INFO is vectorizable in this way. */
7299
7300 static bool
7301 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7302 stmt_vec_info *vec_stmt, slp_tree slp_node,
7303 stmt_vector_for_cost *cost_vec)
7304 {
7305 tree data_ref;
7306 tree op;
7307 tree vec_oprnd = NULL_TREE;
7308 tree elem_type;
7309 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7310 class loop *loop = NULL;
7311 machine_mode vec_mode;
7312 tree dummy;
7313 enum dr_alignment_support alignment_support_scheme;
7314 enum vect_def_type rhs_dt = vect_unknown_def_type;
7315 enum vect_def_type mask_dt = vect_unknown_def_type;
7316 stmt_vec_info prev_stmt_info = NULL;
7317 tree dataref_ptr = NULL_TREE;
7318 tree dataref_offset = NULL_TREE;
7319 gimple *ptr_incr = NULL;
7320 int ncopies;
7321 int j;
7322 stmt_vec_info first_stmt_info;
7323 bool grouped_store;
7324 unsigned int group_size, i;
7325 vec<tree> oprnds = vNULL;
7326 vec<tree> result_chain = vNULL;
7327 tree offset = NULL_TREE;
7328 vec<tree> vec_oprnds = vNULL;
7329 bool slp = (slp_node != NULL);
7330 unsigned int vec_num;
7331 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7332 vec_info *vinfo = stmt_info->vinfo;
7333 tree aggr_type;
7334 gather_scatter_info gs_info;
7335 poly_uint64 vf;
7336 vec_load_store_type vls_type;
7337 tree ref_type;
7338
7339 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7340 return false;
7341
7342 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7343 && ! vec_stmt)
7344 return false;
7345
7346 /* Is vectorizable store? */
7347
7348 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7349 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7350 {
7351 tree scalar_dest = gimple_assign_lhs (assign);
7352 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7353 && is_pattern_stmt_p (stmt_info))
7354 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7355 if (TREE_CODE (scalar_dest) != ARRAY_REF
7356 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7357 && TREE_CODE (scalar_dest) != INDIRECT_REF
7358 && TREE_CODE (scalar_dest) != COMPONENT_REF
7359 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7360 && TREE_CODE (scalar_dest) != REALPART_EXPR
7361 && TREE_CODE (scalar_dest) != MEM_REF)
7362 return false;
7363 }
7364 else
7365 {
7366 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7367 if (!call || !gimple_call_internal_p (call))
7368 return false;
7369
7370 internal_fn ifn = gimple_call_internal_fn (call);
7371 if (!internal_store_fn_p (ifn))
7372 return false;
7373
7374 if (slp_node != NULL)
7375 {
7376 if (dump_enabled_p ())
7377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7378 "SLP of masked stores not supported.\n");
7379 return false;
7380 }
7381
7382 int mask_index = internal_fn_mask_index (ifn);
7383 if (mask_index >= 0)
7384 {
7385 mask = gimple_call_arg (call, mask_index);
7386 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7387 &mask_vectype))
7388 return false;
7389 }
7390 }
7391
7392 op = vect_get_store_rhs (stmt_info);
7393
7394 /* Cannot have hybrid store SLP -- that would mean storing to the
7395 same location twice. */
7396 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7397
7398 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7399 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7400
7401 if (loop_vinfo)
7402 {
7403 loop = LOOP_VINFO_LOOP (loop_vinfo);
7404 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7405 }
7406 else
7407 vf = 1;
7408
7409 /* Multiple types in SLP are handled by creating the appropriate number of
7410 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7411 case of SLP. */
7412 if (slp)
7413 ncopies = 1;
7414 else
7415 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7416
7417 gcc_assert (ncopies >= 1);
7418
7419 /* FORNOW. This restriction should be relaxed. */
7420 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7421 {
7422 if (dump_enabled_p ())
7423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7424 "multiple types in nested loop.\n");
7425 return false;
7426 }
7427
7428 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7429 return false;
7430
7431 elem_type = TREE_TYPE (vectype);
7432 vec_mode = TYPE_MODE (vectype);
7433
7434 if (!STMT_VINFO_DATA_REF (stmt_info))
7435 return false;
7436
7437 vect_memory_access_type memory_access_type;
7438 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7439 &memory_access_type, &gs_info))
7440 return false;
7441
7442 if (mask)
7443 {
7444 if (memory_access_type == VMAT_CONTIGUOUS)
7445 {
7446 if (!VECTOR_MODE_P (vec_mode)
7447 || !can_vec_mask_load_store_p (vec_mode,
7448 TYPE_MODE (mask_vectype), false))
7449 return false;
7450 }
7451 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7452 && (memory_access_type != VMAT_GATHER_SCATTER
7453 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7454 {
7455 if (dump_enabled_p ())
7456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7457 "unsupported access type for masked store.\n");
7458 return false;
7459 }
7460 }
7461 else
7462 {
7463 /* FORNOW. In some cases can vectorize even if data-type not supported
7464 (e.g. - array initialization with 0). */
7465 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7466 return false;
7467 }
7468
7469 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7470 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7471 && memory_access_type != VMAT_GATHER_SCATTER
7472 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7473 if (grouped_store)
7474 {
7475 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7476 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7477 group_size = DR_GROUP_SIZE (first_stmt_info);
7478 }
7479 else
7480 {
7481 first_stmt_info = stmt_info;
7482 first_dr_info = dr_info;
7483 group_size = vec_num = 1;
7484 }
7485
7486 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7487 {
7488 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7489 memory_access_type))
7490 return false;
7491 }
7492
7493 if (!vec_stmt) /* transformation not required. */
7494 {
7495 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7496
7497 if (loop_vinfo
7498 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7499 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7500 memory_access_type, &gs_info, mask);
7501
7502 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7503 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7504 vls_type, slp_node, cost_vec);
7505 return true;
7506 }
7507 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7508
7509 /* Transform. */
7510
7511 ensure_base_align (dr_info);
7512
7513 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7514 {
7515 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7516 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7517 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7518 tree ptr, var, scale, vec_mask;
7519 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7520 tree mask_halfvectype = mask_vectype;
7521 edge pe = loop_preheader_edge (loop);
7522 gimple_seq seq;
7523 basic_block new_bb;
7524 enum { NARROW, NONE, WIDEN } modifier;
7525 poly_uint64 scatter_off_nunits
7526 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7527
7528 if (known_eq (nunits, scatter_off_nunits))
7529 modifier = NONE;
7530 else if (known_eq (nunits * 2, scatter_off_nunits))
7531 {
7532 modifier = WIDEN;
7533
7534 /* Currently gathers and scatters are only supported for
7535 fixed-length vectors. */
7536 unsigned int count = scatter_off_nunits.to_constant ();
7537 vec_perm_builder sel (count, count, 1);
7538 for (i = 0; i < (unsigned int) count; ++i)
7539 sel.quick_push (i | (count / 2));
7540
7541 vec_perm_indices indices (sel, 1, count);
7542 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7543 indices);
7544 gcc_assert (perm_mask != NULL_TREE);
7545 }
7546 else if (known_eq (nunits, scatter_off_nunits * 2))
7547 {
7548 modifier = NARROW;
7549
7550 /* Currently gathers and scatters are only supported for
7551 fixed-length vectors. */
7552 unsigned int count = nunits.to_constant ();
7553 vec_perm_builder sel (count, count, 1);
7554 for (i = 0; i < (unsigned int) count; ++i)
7555 sel.quick_push (i | (count / 2));
7556
7557 vec_perm_indices indices (sel, 2, count);
7558 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7559 gcc_assert (perm_mask != NULL_TREE);
7560 ncopies *= 2;
7561
7562 if (mask)
7563 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7564 }
7565 else
7566 gcc_unreachable ();
7567
7568 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7569 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7570 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7571 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7572 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7573 scaletype = TREE_VALUE (arglist);
7574
7575 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7576 && TREE_CODE (rettype) == VOID_TYPE);
7577
7578 ptr = fold_convert (ptrtype, gs_info.base);
7579 if (!is_gimple_min_invariant (ptr))
7580 {
7581 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7582 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7583 gcc_assert (!new_bb);
7584 }
7585
7586 if (mask == NULL_TREE)
7587 {
7588 mask_arg = build_int_cst (masktype, -1);
7589 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7590 }
7591
7592 scale = build_int_cst (scaletype, gs_info.scale);
7593
7594 prev_stmt_info = NULL;
7595 for (j = 0; j < ncopies; ++j)
7596 {
7597 if (j == 0)
7598 {
7599 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7600 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7601 stmt_info);
7602 if (mask)
7603 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
7604 stmt_info);
7605 }
7606 else if (modifier != NONE && (j & 1))
7607 {
7608 if (modifier == WIDEN)
7609 {
7610 src
7611 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7612 vec_oprnd1);
7613 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7614 stmt_info, gsi);
7615 if (mask)
7616 mask_op
7617 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7618 vec_mask);
7619 }
7620 else if (modifier == NARROW)
7621 {
7622 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7623 stmt_info, gsi);
7624 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7625 vec_oprnd0);
7626 }
7627 else
7628 gcc_unreachable ();
7629 }
7630 else
7631 {
7632 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7633 vec_oprnd1);
7634 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7635 vec_oprnd0);
7636 if (mask)
7637 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7638 vec_mask);
7639 }
7640
7641 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7642 {
7643 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7644 TYPE_VECTOR_SUBPARTS (srctype)));
7645 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7646 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7647 gassign *new_stmt
7648 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7649 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7650 src = var;
7651 }
7652
7653 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7654 {
7655 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7656 TYPE_VECTOR_SUBPARTS (idxtype)));
7657 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7658 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7659 gassign *new_stmt
7660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7661 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7662 op = var;
7663 }
7664
7665 if (mask)
7666 {
7667 tree utype;
7668 mask_arg = mask_op;
7669 if (modifier == NARROW)
7670 {
7671 var = vect_get_new_ssa_name (mask_halfvectype,
7672 vect_simple_var);
7673 gassign *new_stmt
7674 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7675 : VEC_UNPACK_LO_EXPR,
7676 mask_op);
7677 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7678 mask_arg = var;
7679 }
7680 tree optype = TREE_TYPE (mask_arg);
7681 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7682 utype = masktype;
7683 else
7684 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7685 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7686 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7687 gassign *new_stmt
7688 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7689 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7690 mask_arg = var;
7691 if (!useless_type_conversion_p (masktype, utype))
7692 {
7693 gcc_assert (TYPE_PRECISION (utype)
7694 <= TYPE_PRECISION (masktype));
7695 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7696 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7697 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7698 mask_arg = var;
7699 }
7700 }
7701
7702 gcall *new_stmt
7703 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7704 stmt_vec_info new_stmt_info
7705 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7706
7707 if (prev_stmt_info == NULL)
7708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7709 else
7710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7711 prev_stmt_info = new_stmt_info;
7712 }
7713 return true;
7714 }
7715 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7716 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7717
7718 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7719 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7720
7721 if (grouped_store)
7722 {
7723 /* FORNOW */
7724 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7725
7726 /* We vectorize all the stmts of the interleaving group when we
7727 reach the last stmt in the group. */
7728 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7729 < DR_GROUP_SIZE (first_stmt_info)
7730 && !slp)
7731 {
7732 *vec_stmt = NULL;
7733 return true;
7734 }
7735
7736 if (slp)
7737 {
7738 grouped_store = false;
7739 /* VEC_NUM is the number of vect stmts to be created for this
7740 group. */
7741 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7742 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7743 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7744 == first_stmt_info);
7745 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7746 op = vect_get_store_rhs (first_stmt_info);
7747 }
7748 else
7749 /* VEC_NUM is the number of vect stmts to be created for this
7750 group. */
7751 vec_num = group_size;
7752
7753 ref_type = get_group_alias_ptr_type (first_stmt_info);
7754 }
7755 else
7756 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7757
7758 if (dump_enabled_p ())
7759 dump_printf_loc (MSG_NOTE, vect_location,
7760 "transform store. ncopies = %d\n", ncopies);
7761
7762 if (memory_access_type == VMAT_ELEMENTWISE
7763 || memory_access_type == VMAT_STRIDED_SLP)
7764 {
7765 gimple_stmt_iterator incr_gsi;
7766 bool insert_after;
7767 gimple *incr;
7768 tree offvar;
7769 tree ivstep;
7770 tree running_off;
7771 tree stride_base, stride_step, alias_off;
7772 tree vec_oprnd;
7773 unsigned int g;
7774 /* Checked by get_load_store_type. */
7775 unsigned int const_nunits = nunits.to_constant ();
7776
7777 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7778 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7779
7780 stride_base
7781 = fold_build_pointer_plus
7782 (DR_BASE_ADDRESS (first_dr_info->dr),
7783 size_binop (PLUS_EXPR,
7784 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7785 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7786 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7787
7788 /* For a store with loop-invariant (but other than power-of-2)
7789 stride (i.e. not a grouped access) like so:
7790
7791 for (i = 0; i < n; i += stride)
7792 array[i] = ...;
7793
7794 we generate a new induction variable and new stores from
7795 the components of the (vectorized) rhs:
7796
7797 for (j = 0; ; j += VF*stride)
7798 vectemp = ...;
7799 tmp1 = vectemp[0];
7800 array[j] = tmp1;
7801 tmp2 = vectemp[1];
7802 array[j + stride] = tmp2;
7803 ...
7804 */
7805
7806 unsigned nstores = const_nunits;
7807 unsigned lnel = 1;
7808 tree ltype = elem_type;
7809 tree lvectype = vectype;
7810 if (slp)
7811 {
7812 if (group_size < const_nunits
7813 && const_nunits % group_size == 0)
7814 {
7815 nstores = const_nunits / group_size;
7816 lnel = group_size;
7817 ltype = build_vector_type (elem_type, group_size);
7818 lvectype = vectype;
7819
7820 /* First check if vec_extract optab doesn't support extraction
7821 of vector elts directly. */
7822 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7823 machine_mode vmode;
7824 if (!related_vector_mode (TYPE_MODE (vectype), elmode,
7825 group_size).exists (&vmode)
7826 || (convert_optab_handler (vec_extract_optab,
7827 TYPE_MODE (vectype), vmode)
7828 == CODE_FOR_nothing))
7829 {
7830 /* Try to avoid emitting an extract of vector elements
7831 by performing the extracts using an integer type of the
7832 same size, extracting from a vector of those and then
7833 re-interpreting it as the original vector type if
7834 supported. */
7835 unsigned lsize
7836 = group_size * GET_MODE_BITSIZE (elmode);
7837 unsigned int lnunits = const_nunits / group_size;
7838 /* If we can't construct such a vector fall back to
7839 element extracts from the original vector type and
7840 element size stores. */
7841 if (int_mode_for_size (lsize, 0).exists (&elmode)
7842 && related_vector_mode (TYPE_MODE (vectype), elmode,
7843 lnunits).exists (&vmode)
7844 && (convert_optab_handler (vec_extract_optab,
7845 vmode, elmode)
7846 != CODE_FOR_nothing))
7847 {
7848 nstores = lnunits;
7849 lnel = group_size;
7850 ltype = build_nonstandard_integer_type (lsize, 1);
7851 lvectype = build_vector_type (ltype, nstores);
7852 }
7853 /* Else fall back to vector extraction anyway.
7854 Fewer stores are more important than avoiding spilling
7855 of the vector we extract from. Compared to the
7856 construction case in vectorizable_load no store-forwarding
7857 issue exists here for reasonable archs. */
7858 }
7859 }
7860 else if (group_size >= const_nunits
7861 && group_size % const_nunits == 0)
7862 {
7863 nstores = 1;
7864 lnel = const_nunits;
7865 ltype = vectype;
7866 lvectype = vectype;
7867 }
7868 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7869 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7870 }
7871
7872 ivstep = stride_step;
7873 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7874 build_int_cst (TREE_TYPE (ivstep), vf));
7875
7876 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7877
7878 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7879 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7880 create_iv (stride_base, ivstep, NULL,
7881 loop, &incr_gsi, insert_after,
7882 &offvar, NULL);
7883 incr = gsi_stmt (incr_gsi);
7884 loop_vinfo->add_stmt (incr);
7885
7886 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7887
7888 prev_stmt_info = NULL;
7889 alias_off = build_int_cst (ref_type, 0);
7890 stmt_vec_info next_stmt_info = first_stmt_info;
7891 for (g = 0; g < group_size; g++)
7892 {
7893 running_off = offvar;
7894 if (g)
7895 {
7896 tree size = TYPE_SIZE_UNIT (ltype);
7897 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7898 size);
7899 tree newoff = copy_ssa_name (running_off, NULL);
7900 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7901 running_off, pos);
7902 vect_finish_stmt_generation (stmt_info, incr, gsi);
7903 running_off = newoff;
7904 }
7905 unsigned int group_el = 0;
7906 unsigned HOST_WIDE_INT
7907 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7908 for (j = 0; j < ncopies; j++)
7909 {
7910 /* We've set op and dt above, from vect_get_store_rhs,
7911 and first_stmt_info == stmt_info. */
7912 if (j == 0)
7913 {
7914 if (slp)
7915 {
7916 vect_get_vec_defs (op, NULL_TREE, stmt_info,
7917 &vec_oprnds, NULL, slp_node);
7918 vec_oprnd = vec_oprnds[0];
7919 }
7920 else
7921 {
7922 op = vect_get_store_rhs (next_stmt_info);
7923 vec_oprnd = vect_get_vec_def_for_operand
7924 (op, next_stmt_info);
7925 }
7926 }
7927 else
7928 {
7929 if (slp)
7930 vec_oprnd = vec_oprnds[j];
7931 else
7932 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7933 vec_oprnd);
7934 }
7935 /* Pun the vector to extract from if necessary. */
7936 if (lvectype != vectype)
7937 {
7938 tree tem = make_ssa_name (lvectype);
7939 gimple *pun
7940 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7941 lvectype, vec_oprnd));
7942 vect_finish_stmt_generation (stmt_info, pun, gsi);
7943 vec_oprnd = tem;
7944 }
7945 for (i = 0; i < nstores; i++)
7946 {
7947 tree newref, newoff;
7948 gimple *incr, *assign;
7949 tree size = TYPE_SIZE (ltype);
7950 /* Extract the i'th component. */
7951 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7952 bitsize_int (i), size);
7953 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7954 size, pos);
7955
7956 elem = force_gimple_operand_gsi (gsi, elem, true,
7957 NULL_TREE, true,
7958 GSI_SAME_STMT);
7959
7960 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7961 group_el * elsz);
7962 newref = build2 (MEM_REF, ltype,
7963 running_off, this_off);
7964 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7965
7966 /* And store it to *running_off. */
7967 assign = gimple_build_assign (newref, elem);
7968 stmt_vec_info assign_info
7969 = vect_finish_stmt_generation (stmt_info, assign, gsi);
7970
7971 group_el += lnel;
7972 if (! slp
7973 || group_el == group_size)
7974 {
7975 newoff = copy_ssa_name (running_off, NULL);
7976 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7977 running_off, stride_step);
7978 vect_finish_stmt_generation (stmt_info, incr, gsi);
7979
7980 running_off = newoff;
7981 group_el = 0;
7982 }
7983 if (g == group_size - 1
7984 && !slp)
7985 {
7986 if (j == 0 && i == 0)
7987 STMT_VINFO_VEC_STMT (stmt_info)
7988 = *vec_stmt = assign_info;
7989 else
7990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
7991 prev_stmt_info = assign_info;
7992 }
7993 }
7994 }
7995 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7996 if (slp)
7997 break;
7998 }
7999
8000 vec_oprnds.release ();
8001 return true;
8002 }
8003
8004 auto_vec<tree> dr_chain (group_size);
8005 oprnds.create (group_size);
8006
8007 alignment_support_scheme
8008 = vect_supportable_dr_alignment (first_dr_info, false);
8009 gcc_assert (alignment_support_scheme);
8010 vec_loop_masks *loop_masks
8011 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8012 ? &LOOP_VINFO_MASKS (loop_vinfo)
8013 : NULL);
8014 /* Targets with store-lane instructions must not require explicit
8015 realignment. vect_supportable_dr_alignment always returns either
8016 dr_aligned or dr_unaligned_supported for masked operations. */
8017 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8018 && !mask
8019 && !loop_masks)
8020 || alignment_support_scheme == dr_aligned
8021 || alignment_support_scheme == dr_unaligned_supported);
8022
8023 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8024 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8025 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8026
8027 tree bump;
8028 tree vec_offset = NULL_TREE;
8029 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8030 {
8031 aggr_type = NULL_TREE;
8032 bump = NULL_TREE;
8033 }
8034 else if (memory_access_type == VMAT_GATHER_SCATTER)
8035 {
8036 aggr_type = elem_type;
8037 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8038 &bump, &vec_offset);
8039 }
8040 else
8041 {
8042 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8043 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8044 else
8045 aggr_type = vectype;
8046 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8047 memory_access_type);
8048 }
8049
8050 if (mask)
8051 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8052
8053 /* In case the vectorization factor (VF) is bigger than the number
8054 of elements that we can fit in a vectype (nunits), we have to generate
8055 more than one vector stmt - i.e - we need to "unroll" the
8056 vector stmt by a factor VF/nunits. For more details see documentation in
8057 vect_get_vec_def_for_copy_stmt. */
8058
8059 /* In case of interleaving (non-unit grouped access):
8060
8061 S1: &base + 2 = x2
8062 S2: &base = x0
8063 S3: &base + 1 = x1
8064 S4: &base + 3 = x3
8065
8066 We create vectorized stores starting from base address (the access of the
8067 first stmt in the chain (S2 in the above example), when the last store stmt
8068 of the chain (S4) is reached:
8069
8070 VS1: &base = vx2
8071 VS2: &base + vec_size*1 = vx0
8072 VS3: &base + vec_size*2 = vx1
8073 VS4: &base + vec_size*3 = vx3
8074
8075 Then permutation statements are generated:
8076
8077 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8078 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8079 ...
8080
8081 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8082 (the order of the data-refs in the output of vect_permute_store_chain
8083 corresponds to the order of scalar stmts in the interleaving chain - see
8084 the documentation of vect_permute_store_chain()).
8085
8086 In case of both multiple types and interleaving, above vector stores and
8087 permutation stmts are created for every copy. The result vector stmts are
8088 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8089 STMT_VINFO_RELATED_STMT for the next copies.
8090 */
8091
8092 prev_stmt_info = NULL;
8093 tree vec_mask = NULL_TREE;
8094 for (j = 0; j < ncopies; j++)
8095 {
8096 stmt_vec_info new_stmt_info;
8097 if (j == 0)
8098 {
8099 if (slp)
8100 {
8101 /* Get vectorized arguments for SLP_NODE. */
8102 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8103 NULL, slp_node);
8104
8105 vec_oprnd = vec_oprnds[0];
8106 }
8107 else
8108 {
8109 /* For interleaved stores we collect vectorized defs for all the
8110 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8111 used as an input to vect_permute_store_chain(), and OPRNDS as
8112 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8113
8114 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8115 OPRNDS are of size 1. */
8116 stmt_vec_info next_stmt_info = first_stmt_info;
8117 for (i = 0; i < group_size; i++)
8118 {
8119 /* Since gaps are not supported for interleaved stores,
8120 DR_GROUP_SIZE is the exact number of stmts in the chain.
8121 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8122 that there is no interleaving, DR_GROUP_SIZE is 1,
8123 and only one iteration of the loop will be executed. */
8124 op = vect_get_store_rhs (next_stmt_info);
8125 vec_oprnd = vect_get_vec_def_for_operand
8126 (op, next_stmt_info);
8127 dr_chain.quick_push (vec_oprnd);
8128 oprnds.quick_push (vec_oprnd);
8129 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8130 }
8131 if (mask)
8132 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8133 mask_vectype);
8134 }
8135
8136 /* We should have catched mismatched types earlier. */
8137 gcc_assert (useless_type_conversion_p (vectype,
8138 TREE_TYPE (vec_oprnd)));
8139 bool simd_lane_access_p
8140 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8141 if (simd_lane_access_p
8142 && !loop_masks
8143 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8144 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8145 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8146 && integer_zerop (DR_INIT (first_dr_info->dr))
8147 && alias_sets_conflict_p (get_alias_set (aggr_type),
8148 get_alias_set (TREE_TYPE (ref_type))))
8149 {
8150 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8151 dataref_offset = build_int_cst (ref_type, 0);
8152 }
8153 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8154 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8155 &dataref_ptr, &vec_offset);
8156 else
8157 dataref_ptr
8158 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8159 simd_lane_access_p ? loop : NULL,
8160 offset, &dummy, gsi, &ptr_incr,
8161 simd_lane_access_p, NULL_TREE, bump);
8162 }
8163 else
8164 {
8165 /* For interleaved stores we created vectorized defs for all the
8166 defs stored in OPRNDS in the previous iteration (previous copy).
8167 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8168 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8169 next copy.
8170 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8171 OPRNDS are of size 1. */
8172 for (i = 0; i < group_size; i++)
8173 {
8174 op = oprnds[i];
8175 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8176 dr_chain[i] = vec_oprnd;
8177 oprnds[i] = vec_oprnd;
8178 }
8179 if (mask)
8180 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8181 if (dataref_offset)
8182 dataref_offset
8183 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8184 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8185 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8186 else
8187 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8188 stmt_info, bump);
8189 }
8190
8191 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8192 {
8193 tree vec_array;
8194
8195 /* Get an array into which we can store the individual vectors. */
8196 vec_array = create_vector_array (vectype, vec_num);
8197
8198 /* Invalidate the current contents of VEC_ARRAY. This should
8199 become an RTL clobber too, which prevents the vector registers
8200 from being upward-exposed. */
8201 vect_clobber_variable (stmt_info, gsi, vec_array);
8202
8203 /* Store the individual vectors into the array. */
8204 for (i = 0; i < vec_num; i++)
8205 {
8206 vec_oprnd = dr_chain[i];
8207 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8208 }
8209
8210 tree final_mask = NULL;
8211 if (loop_masks)
8212 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8213 vectype, j);
8214 if (vec_mask)
8215 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8216 vec_mask, gsi);
8217
8218 gcall *call;
8219 if (final_mask)
8220 {
8221 /* Emit:
8222 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8223 VEC_ARRAY). */
8224 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8225 tree alias_ptr = build_int_cst (ref_type, align);
8226 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8227 dataref_ptr, alias_ptr,
8228 final_mask, vec_array);
8229 }
8230 else
8231 {
8232 /* Emit:
8233 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8234 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8235 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8236 vec_array);
8237 gimple_call_set_lhs (call, data_ref);
8238 }
8239 gimple_call_set_nothrow (call, true);
8240 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8241
8242 /* Record that VEC_ARRAY is now dead. */
8243 vect_clobber_variable (stmt_info, gsi, vec_array);
8244 }
8245 else
8246 {
8247 new_stmt_info = NULL;
8248 if (grouped_store)
8249 {
8250 if (j == 0)
8251 result_chain.create (group_size);
8252 /* Permute. */
8253 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8254 &result_chain);
8255 }
8256
8257 stmt_vec_info next_stmt_info = first_stmt_info;
8258 for (i = 0; i < vec_num; i++)
8259 {
8260 unsigned misalign;
8261 unsigned HOST_WIDE_INT align;
8262
8263 tree final_mask = NULL_TREE;
8264 if (loop_masks)
8265 final_mask = vect_get_loop_mask (gsi, loop_masks,
8266 vec_num * ncopies,
8267 vectype, vec_num * j + i);
8268 if (vec_mask)
8269 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8270 vec_mask, gsi);
8271
8272 if (memory_access_type == VMAT_GATHER_SCATTER)
8273 {
8274 tree scale = size_int (gs_info.scale);
8275 gcall *call;
8276 if (loop_masks)
8277 call = gimple_build_call_internal
8278 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8279 scale, vec_oprnd, final_mask);
8280 else
8281 call = gimple_build_call_internal
8282 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8283 scale, vec_oprnd);
8284 gimple_call_set_nothrow (call, true);
8285 new_stmt_info
8286 = vect_finish_stmt_generation (stmt_info, call, gsi);
8287 break;
8288 }
8289
8290 if (i > 0)
8291 /* Bump the vector pointer. */
8292 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8293 stmt_info, bump);
8294
8295 if (slp)
8296 vec_oprnd = vec_oprnds[i];
8297 else if (grouped_store)
8298 /* For grouped stores vectorized defs are interleaved in
8299 vect_permute_store_chain(). */
8300 vec_oprnd = result_chain[i];
8301
8302 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8303 if (aligned_access_p (first_dr_info))
8304 misalign = 0;
8305 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8306 {
8307 align = dr_alignment (vect_dr_behavior (first_dr_info));
8308 misalign = 0;
8309 }
8310 else
8311 misalign = DR_MISALIGNMENT (first_dr_info);
8312 if (dataref_offset == NULL_TREE
8313 && TREE_CODE (dataref_ptr) == SSA_NAME)
8314 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8315 misalign);
8316
8317 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8318 {
8319 tree perm_mask = perm_mask_for_reverse (vectype);
8320 tree perm_dest = vect_create_destination_var
8321 (vect_get_store_rhs (stmt_info), vectype);
8322 tree new_temp = make_ssa_name (perm_dest);
8323
8324 /* Generate the permute statement. */
8325 gimple *perm_stmt
8326 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8327 vec_oprnd, perm_mask);
8328 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8329
8330 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8331 vec_oprnd = new_temp;
8332 }
8333
8334 /* Arguments are ready. Create the new vector stmt. */
8335 if (final_mask)
8336 {
8337 align = least_bit_hwi (misalign | align);
8338 tree ptr = build_int_cst (ref_type, align);
8339 gcall *call
8340 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8341 dataref_ptr, ptr,
8342 final_mask, vec_oprnd);
8343 gimple_call_set_nothrow (call, true);
8344 new_stmt_info
8345 = vect_finish_stmt_generation (stmt_info, call, gsi);
8346 }
8347 else
8348 {
8349 data_ref = fold_build2 (MEM_REF, vectype,
8350 dataref_ptr,
8351 dataref_offset
8352 ? dataref_offset
8353 : build_int_cst (ref_type, 0));
8354 if (aligned_access_p (first_dr_info))
8355 ;
8356 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8357 TREE_TYPE (data_ref)
8358 = build_aligned_type (TREE_TYPE (data_ref),
8359 align * BITS_PER_UNIT);
8360 else
8361 TREE_TYPE (data_ref)
8362 = build_aligned_type (TREE_TYPE (data_ref),
8363 TYPE_ALIGN (elem_type));
8364 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8365 gassign *new_stmt
8366 = gimple_build_assign (data_ref, vec_oprnd);
8367 new_stmt_info
8368 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8369 }
8370
8371 if (slp)
8372 continue;
8373
8374 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8375 if (!next_stmt_info)
8376 break;
8377 }
8378 }
8379 if (!slp)
8380 {
8381 if (j == 0)
8382 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8383 else
8384 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8385 prev_stmt_info = new_stmt_info;
8386 }
8387 }
8388
8389 oprnds.release ();
8390 result_chain.release ();
8391 vec_oprnds.release ();
8392
8393 return true;
8394 }
8395
8396 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8397 VECTOR_CST mask. No checks are made that the target platform supports the
8398 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8399 vect_gen_perm_mask_checked. */
8400
8401 tree
8402 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8403 {
8404 tree mask_type;
8405
8406 poly_uint64 nunits = sel.length ();
8407 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8408
8409 mask_type = build_vector_type (ssizetype, nunits);
8410 return vec_perm_indices_to_tree (mask_type, sel);
8411 }
8412
8413 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8414 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8415
8416 tree
8417 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8418 {
8419 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8420 return vect_gen_perm_mask_any (vectype, sel);
8421 }
8422
8423 /* Given a vector variable X and Y, that was generated for the scalar
8424 STMT_INFO, generate instructions to permute the vector elements of X and Y
8425 using permutation mask MASK_VEC, insert them at *GSI and return the
8426 permuted vector variable. */
8427
8428 static tree
8429 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8430 gimple_stmt_iterator *gsi)
8431 {
8432 tree vectype = TREE_TYPE (x);
8433 tree perm_dest, data_ref;
8434 gimple *perm_stmt;
8435
8436 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8437 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8438 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8439 else
8440 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8441 data_ref = make_ssa_name (perm_dest);
8442
8443 /* Generate the permute statement. */
8444 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8445 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8446
8447 return data_ref;
8448 }
8449
8450 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8451 inserting them on the loops preheader edge. Returns true if we
8452 were successful in doing so (and thus STMT_INFO can be moved then),
8453 otherwise returns false. */
8454
8455 static bool
8456 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8457 {
8458 ssa_op_iter i;
8459 tree op;
8460 bool any = false;
8461
8462 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8463 {
8464 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8465 if (!gimple_nop_p (def_stmt)
8466 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8467 {
8468 /* Make sure we don't need to recurse. While we could do
8469 so in simple cases when there are more complex use webs
8470 we don't have an easy way to preserve stmt order to fulfil
8471 dependencies within them. */
8472 tree op2;
8473 ssa_op_iter i2;
8474 if (gimple_code (def_stmt) == GIMPLE_PHI)
8475 return false;
8476 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8477 {
8478 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8479 if (!gimple_nop_p (def_stmt2)
8480 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8481 return false;
8482 }
8483 any = true;
8484 }
8485 }
8486
8487 if (!any)
8488 return true;
8489
8490 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8491 {
8492 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8493 if (!gimple_nop_p (def_stmt)
8494 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8495 {
8496 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8497 gsi_remove (&gsi, false);
8498 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8499 }
8500 }
8501
8502 return true;
8503 }
8504
8505 /* vectorizable_load.
8506
8507 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8508 that can be vectorized.
8509 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8510 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8511 Return true if STMT_INFO is vectorizable in this way. */
8512
8513 static bool
8514 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8515 stmt_vec_info *vec_stmt, slp_tree slp_node,
8516 slp_instance slp_node_instance,
8517 stmt_vector_for_cost *cost_vec)
8518 {
8519 tree scalar_dest;
8520 tree vec_dest = NULL;
8521 tree data_ref = NULL;
8522 stmt_vec_info prev_stmt_info;
8523 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8524 class loop *loop = NULL;
8525 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8526 bool nested_in_vect_loop = false;
8527 tree elem_type;
8528 tree new_temp;
8529 machine_mode mode;
8530 tree dummy;
8531 enum dr_alignment_support alignment_support_scheme;
8532 tree dataref_ptr = NULL_TREE;
8533 tree dataref_offset = NULL_TREE;
8534 gimple *ptr_incr = NULL;
8535 int ncopies;
8536 int i, j;
8537 unsigned int group_size;
8538 poly_uint64 group_gap_adj;
8539 tree msq = NULL_TREE, lsq;
8540 tree offset = NULL_TREE;
8541 tree byte_offset = NULL_TREE;
8542 tree realignment_token = NULL_TREE;
8543 gphi *phi = NULL;
8544 vec<tree> dr_chain = vNULL;
8545 bool grouped_load = false;
8546 stmt_vec_info first_stmt_info;
8547 stmt_vec_info first_stmt_info_for_drptr = NULL;
8548 bool compute_in_loop = false;
8549 class loop *at_loop;
8550 int vec_num;
8551 bool slp = (slp_node != NULL);
8552 bool slp_perm = false;
8553 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8554 poly_uint64 vf;
8555 tree aggr_type;
8556 gather_scatter_info gs_info;
8557 vec_info *vinfo = stmt_info->vinfo;
8558 tree ref_type;
8559 enum vect_def_type mask_dt = vect_unknown_def_type;
8560
8561 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8562 return false;
8563
8564 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8565 && ! vec_stmt)
8566 return false;
8567
8568 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8569 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8570 {
8571 scalar_dest = gimple_assign_lhs (assign);
8572 if (TREE_CODE (scalar_dest) != SSA_NAME)
8573 return false;
8574
8575 tree_code code = gimple_assign_rhs_code (assign);
8576 if (code != ARRAY_REF
8577 && code != BIT_FIELD_REF
8578 && code != INDIRECT_REF
8579 && code != COMPONENT_REF
8580 && code != IMAGPART_EXPR
8581 && code != REALPART_EXPR
8582 && code != MEM_REF
8583 && TREE_CODE_CLASS (code) != tcc_declaration)
8584 return false;
8585 }
8586 else
8587 {
8588 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8589 if (!call || !gimple_call_internal_p (call))
8590 return false;
8591
8592 internal_fn ifn = gimple_call_internal_fn (call);
8593 if (!internal_load_fn_p (ifn))
8594 return false;
8595
8596 scalar_dest = gimple_call_lhs (call);
8597 if (!scalar_dest)
8598 return false;
8599
8600 int mask_index = internal_fn_mask_index (ifn);
8601 if (mask_index >= 0)
8602 {
8603 mask = gimple_call_arg (call, mask_index);
8604 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
8605 &mask_vectype))
8606 return false;
8607 }
8608 }
8609
8610 if (!STMT_VINFO_DATA_REF (stmt_info))
8611 return false;
8612
8613 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8614 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8615
8616 if (loop_vinfo)
8617 {
8618 loop = LOOP_VINFO_LOOP (loop_vinfo);
8619 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8620 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8621 }
8622 else
8623 vf = 1;
8624
8625 /* Multiple types in SLP are handled by creating the appropriate number of
8626 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8627 case of SLP. */
8628 if (slp)
8629 ncopies = 1;
8630 else
8631 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8632
8633 gcc_assert (ncopies >= 1);
8634
8635 /* FORNOW. This restriction should be relaxed. */
8636 if (nested_in_vect_loop && ncopies > 1)
8637 {
8638 if (dump_enabled_p ())
8639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8640 "multiple types in nested loop.\n");
8641 return false;
8642 }
8643
8644 /* Invalidate assumptions made by dependence analysis when vectorization
8645 on the unrolled body effectively re-orders stmts. */
8646 if (ncopies > 1
8647 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8648 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8649 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8650 {
8651 if (dump_enabled_p ())
8652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8653 "cannot perform implicit CSE when unrolling "
8654 "with negative dependence distance\n");
8655 return false;
8656 }
8657
8658 elem_type = TREE_TYPE (vectype);
8659 mode = TYPE_MODE (vectype);
8660
8661 /* FORNOW. In some cases can vectorize even if data-type not supported
8662 (e.g. - data copies). */
8663 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8664 {
8665 if (dump_enabled_p ())
8666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8667 "Aligned load, but unsupported type.\n");
8668 return false;
8669 }
8670
8671 /* Check if the load is a part of an interleaving chain. */
8672 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8673 {
8674 grouped_load = true;
8675 /* FORNOW */
8676 gcc_assert (!nested_in_vect_loop);
8677 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8678
8679 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8680 group_size = DR_GROUP_SIZE (first_stmt_info);
8681
8682 /* Refuse non-SLP vectorization of SLP-only groups. */
8683 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8684 {
8685 if (dump_enabled_p ())
8686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8687 "cannot vectorize load in non-SLP mode.\n");
8688 return false;
8689 }
8690
8691 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8692 slp_perm = true;
8693
8694 /* Invalidate assumptions made by dependence analysis when vectorization
8695 on the unrolled body effectively re-orders stmts. */
8696 if (!PURE_SLP_STMT (stmt_info)
8697 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8698 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8699 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8700 {
8701 if (dump_enabled_p ())
8702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8703 "cannot perform implicit CSE when performing "
8704 "group loads with negative dependence distance\n");
8705 return false;
8706 }
8707 }
8708 else
8709 group_size = 1;
8710
8711 vect_memory_access_type memory_access_type;
8712 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8713 &memory_access_type, &gs_info))
8714 return false;
8715
8716 if (mask)
8717 {
8718 if (memory_access_type == VMAT_CONTIGUOUS)
8719 {
8720 machine_mode vec_mode = TYPE_MODE (vectype);
8721 if (!VECTOR_MODE_P (vec_mode)
8722 || !can_vec_mask_load_store_p (vec_mode,
8723 TYPE_MODE (mask_vectype), true))
8724 return false;
8725 }
8726 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8727 && memory_access_type != VMAT_GATHER_SCATTER)
8728 {
8729 if (dump_enabled_p ())
8730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8731 "unsupported access type for masked load.\n");
8732 return false;
8733 }
8734 }
8735
8736 if (!vec_stmt) /* transformation not required. */
8737 {
8738 if (!slp)
8739 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8740
8741 if (loop_vinfo
8742 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8743 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8744 memory_access_type, &gs_info, mask);
8745
8746 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8747 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8748 slp_node_instance, slp_node, cost_vec);
8749 return true;
8750 }
8751
8752 if (!slp)
8753 gcc_assert (memory_access_type
8754 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8755
8756 if (dump_enabled_p ())
8757 dump_printf_loc (MSG_NOTE, vect_location,
8758 "transform load. ncopies = %d\n", ncopies);
8759
8760 /* Transform. */
8761
8762 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8763 ensure_base_align (dr_info);
8764
8765 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8766 {
8767 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8768 return true;
8769 }
8770
8771 if (memory_access_type == VMAT_INVARIANT)
8772 {
8773 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8774 /* If we have versioned for aliasing or the loop doesn't
8775 have any data dependencies that would preclude this,
8776 then we are sure this is a loop invariant load and
8777 thus we can insert it on the preheader edge. */
8778 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8779 && !nested_in_vect_loop
8780 && hoist_defs_of_uses (stmt_info, loop));
8781 if (hoist_p)
8782 {
8783 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8784 if (dump_enabled_p ())
8785 dump_printf_loc (MSG_NOTE, vect_location,
8786 "hoisting out of the vectorized loop: %G", stmt);
8787 scalar_dest = copy_ssa_name (scalar_dest);
8788 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8789 gsi_insert_on_edge_immediate
8790 (loop_preheader_edge (loop),
8791 gimple_build_assign (scalar_dest, rhs));
8792 }
8793 /* These copies are all equivalent, but currently the representation
8794 requires a separate STMT_VINFO_VEC_STMT for each one. */
8795 prev_stmt_info = NULL;
8796 gimple_stmt_iterator gsi2 = *gsi;
8797 gsi_next (&gsi2);
8798 for (j = 0; j < ncopies; j++)
8799 {
8800 stmt_vec_info new_stmt_info;
8801 if (hoist_p)
8802 {
8803 new_temp = vect_init_vector (stmt_info, scalar_dest,
8804 vectype, NULL);
8805 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8806 new_stmt_info = vinfo->add_stmt (new_stmt);
8807 }
8808 else
8809 {
8810 new_temp = vect_init_vector (stmt_info, scalar_dest,
8811 vectype, &gsi2);
8812 new_stmt_info = vinfo->lookup_def (new_temp);
8813 }
8814 if (slp)
8815 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8816 else if (j == 0)
8817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8818 else
8819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8820 prev_stmt_info = new_stmt_info;
8821 }
8822 return true;
8823 }
8824
8825 if (memory_access_type == VMAT_ELEMENTWISE
8826 || memory_access_type == VMAT_STRIDED_SLP)
8827 {
8828 gimple_stmt_iterator incr_gsi;
8829 bool insert_after;
8830 gimple *incr;
8831 tree offvar;
8832 tree ivstep;
8833 tree running_off;
8834 vec<constructor_elt, va_gc> *v = NULL;
8835 tree stride_base, stride_step, alias_off;
8836 /* Checked by get_load_store_type. */
8837 unsigned int const_nunits = nunits.to_constant ();
8838 unsigned HOST_WIDE_INT cst_offset = 0;
8839
8840 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8841 gcc_assert (!nested_in_vect_loop);
8842
8843 if (grouped_load)
8844 {
8845 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8846 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8847 }
8848 else
8849 {
8850 first_stmt_info = stmt_info;
8851 first_dr_info = dr_info;
8852 }
8853 if (slp && grouped_load)
8854 {
8855 group_size = DR_GROUP_SIZE (first_stmt_info);
8856 ref_type = get_group_alias_ptr_type (first_stmt_info);
8857 }
8858 else
8859 {
8860 if (grouped_load)
8861 cst_offset
8862 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8863 * vect_get_place_in_interleaving_chain (stmt_info,
8864 first_stmt_info));
8865 group_size = 1;
8866 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8867 }
8868
8869 stride_base
8870 = fold_build_pointer_plus
8871 (DR_BASE_ADDRESS (first_dr_info->dr),
8872 size_binop (PLUS_EXPR,
8873 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
8874 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8875 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8876
8877 /* For a load with loop-invariant (but other than power-of-2)
8878 stride (i.e. not a grouped access) like so:
8879
8880 for (i = 0; i < n; i += stride)
8881 ... = array[i];
8882
8883 we generate a new induction variable and new accesses to
8884 form a new vector (or vectors, depending on ncopies):
8885
8886 for (j = 0; ; j += VF*stride)
8887 tmp1 = array[j];
8888 tmp2 = array[j + stride];
8889 ...
8890 vectemp = {tmp1, tmp2, ...}
8891 */
8892
8893 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8894 build_int_cst (TREE_TYPE (stride_step), vf));
8895
8896 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8897
8898 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8899 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8900 create_iv (stride_base, ivstep, NULL,
8901 loop, &incr_gsi, insert_after,
8902 &offvar, NULL);
8903 incr = gsi_stmt (incr_gsi);
8904 loop_vinfo->add_stmt (incr);
8905
8906 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8907
8908 prev_stmt_info = NULL;
8909 running_off = offvar;
8910 alias_off = build_int_cst (ref_type, 0);
8911 int nloads = const_nunits;
8912 int lnel = 1;
8913 tree ltype = TREE_TYPE (vectype);
8914 tree lvectype = vectype;
8915 auto_vec<tree> dr_chain;
8916 if (memory_access_type == VMAT_STRIDED_SLP)
8917 {
8918 if (group_size < const_nunits)
8919 {
8920 /* First check if vec_init optab supports construction from
8921 vector elts directly. */
8922 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
8923 machine_mode vmode;
8924 if (related_vector_mode (TYPE_MODE (vectype), elmode,
8925 group_size).exists (&vmode)
8926 && (convert_optab_handler (vec_init_optab,
8927 TYPE_MODE (vectype), vmode)
8928 != CODE_FOR_nothing))
8929 {
8930 nloads = const_nunits / group_size;
8931 lnel = group_size;
8932 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
8933 }
8934 else
8935 {
8936 /* Otherwise avoid emitting a constructor of vector elements
8937 by performing the loads using an integer type of the same
8938 size, constructing a vector of those and then
8939 re-interpreting it as the original vector type.
8940 This avoids a huge runtime penalty due to the general
8941 inability to perform store forwarding from smaller stores
8942 to a larger load. */
8943 unsigned lsize
8944 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
8945 unsigned int lnunits = const_nunits / group_size;
8946 /* If we can't construct such a vector fall back to
8947 element loads of the original vector type. */
8948 if (int_mode_for_size (lsize, 0).exists (&elmode)
8949 && related_vector_mode (TYPE_MODE (vectype), elmode,
8950 lnunits).exists (&vmode)
8951 && (convert_optab_handler (vec_init_optab, vmode, elmode)
8952 != CODE_FOR_nothing))
8953 {
8954 nloads = lnunits;
8955 lnel = group_size;
8956 ltype = build_nonstandard_integer_type (lsize, 1);
8957 lvectype = build_vector_type (ltype, nloads);
8958 }
8959 }
8960 }
8961 else
8962 {
8963 nloads = 1;
8964 lnel = const_nunits;
8965 ltype = vectype;
8966 }
8967 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8968 }
8969 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8970 else if (nloads == 1)
8971 ltype = vectype;
8972
8973 if (slp)
8974 {
8975 /* For SLP permutation support we need to load the whole group,
8976 not only the number of vector stmts the permutation result
8977 fits in. */
8978 if (slp_perm)
8979 {
8980 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8981 variable VF. */
8982 unsigned int const_vf = vf.to_constant ();
8983 ncopies = CEIL (group_size * const_vf, const_nunits);
8984 dr_chain.create (ncopies);
8985 }
8986 else
8987 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8988 }
8989 unsigned int group_el = 0;
8990 unsigned HOST_WIDE_INT
8991 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8992 for (j = 0; j < ncopies; j++)
8993 {
8994 if (nloads > 1)
8995 vec_alloc (v, nloads);
8996 stmt_vec_info new_stmt_info = NULL;
8997 for (i = 0; i < nloads; i++)
8998 {
8999 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9000 group_el * elsz + cst_offset);
9001 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9002 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9003 gassign *new_stmt
9004 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9005 new_stmt_info
9006 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9007 if (nloads > 1)
9008 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9009 gimple_assign_lhs (new_stmt));
9010
9011 group_el += lnel;
9012 if (! slp
9013 || group_el == group_size)
9014 {
9015 tree newoff = copy_ssa_name (running_off);
9016 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9017 running_off, stride_step);
9018 vect_finish_stmt_generation (stmt_info, incr, gsi);
9019
9020 running_off = newoff;
9021 group_el = 0;
9022 }
9023 }
9024 if (nloads > 1)
9025 {
9026 tree vec_inv = build_constructor (lvectype, v);
9027 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9028 new_stmt_info = vinfo->lookup_def (new_temp);
9029 if (lvectype != vectype)
9030 {
9031 gassign *new_stmt
9032 = gimple_build_assign (make_ssa_name (vectype),
9033 VIEW_CONVERT_EXPR,
9034 build1 (VIEW_CONVERT_EXPR,
9035 vectype, new_temp));
9036 new_stmt_info
9037 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9038 }
9039 }
9040
9041 if (slp)
9042 {
9043 if (slp_perm)
9044 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9045 else
9046 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9047 }
9048 else
9049 {
9050 if (j == 0)
9051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9052 else
9053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9054 prev_stmt_info = new_stmt_info;
9055 }
9056 }
9057 if (slp_perm)
9058 {
9059 unsigned n_perms;
9060 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9061 slp_node_instance, false, &n_perms);
9062 }
9063 return true;
9064 }
9065
9066 if (memory_access_type == VMAT_GATHER_SCATTER
9067 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9068 grouped_load = false;
9069
9070 if (grouped_load)
9071 {
9072 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9073 group_size = DR_GROUP_SIZE (first_stmt_info);
9074 /* For SLP vectorization we directly vectorize a subchain
9075 without permutation. */
9076 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9077 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9078 /* For BB vectorization always use the first stmt to base
9079 the data ref pointer on. */
9080 if (bb_vinfo)
9081 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9082
9083 /* Check if the chain of loads is already vectorized. */
9084 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9085 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9086 ??? But we can only do so if there is exactly one
9087 as we have no way to get at the rest. Leave the CSE
9088 opportunity alone.
9089 ??? With the group load eventually participating
9090 in multiple different permutations (having multiple
9091 slp nodes which refer to the same group) the CSE
9092 is even wrong code. See PR56270. */
9093 && !slp)
9094 {
9095 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9096 return true;
9097 }
9098 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9099 group_gap_adj = 0;
9100
9101 /* VEC_NUM is the number of vect stmts to be created for this group. */
9102 if (slp)
9103 {
9104 grouped_load = false;
9105 /* If an SLP permutation is from N elements to N elements,
9106 and if one vector holds a whole number of N, we can load
9107 the inputs to the permutation in the same way as an
9108 unpermuted sequence. In other cases we need to load the
9109 whole group, not only the number of vector stmts the
9110 permutation result fits in. */
9111 if (slp_perm
9112 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9113 || !multiple_p (nunits, group_size)))
9114 {
9115 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9116 variable VF; see vect_transform_slp_perm_load. */
9117 unsigned int const_vf = vf.to_constant ();
9118 unsigned int const_nunits = nunits.to_constant ();
9119 vec_num = CEIL (group_size * const_vf, const_nunits);
9120 group_gap_adj = vf * group_size - nunits * vec_num;
9121 }
9122 else
9123 {
9124 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9125 group_gap_adj
9126 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9127 }
9128 }
9129 else
9130 vec_num = group_size;
9131
9132 ref_type = get_group_alias_ptr_type (first_stmt_info);
9133 }
9134 else
9135 {
9136 first_stmt_info = stmt_info;
9137 first_dr_info = dr_info;
9138 group_size = vec_num = 1;
9139 group_gap_adj = 0;
9140 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9141 }
9142
9143 alignment_support_scheme
9144 = vect_supportable_dr_alignment (first_dr_info, false);
9145 gcc_assert (alignment_support_scheme);
9146 vec_loop_masks *loop_masks
9147 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9148 ? &LOOP_VINFO_MASKS (loop_vinfo)
9149 : NULL);
9150 /* Targets with store-lane instructions must not require explicit
9151 realignment. vect_supportable_dr_alignment always returns either
9152 dr_aligned or dr_unaligned_supported for masked operations. */
9153 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9154 && !mask
9155 && !loop_masks)
9156 || alignment_support_scheme == dr_aligned
9157 || alignment_support_scheme == dr_unaligned_supported);
9158
9159 /* In case the vectorization factor (VF) is bigger than the number
9160 of elements that we can fit in a vectype (nunits), we have to generate
9161 more than one vector stmt - i.e - we need to "unroll" the
9162 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9163 from one copy of the vector stmt to the next, in the field
9164 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9165 stages to find the correct vector defs to be used when vectorizing
9166 stmts that use the defs of the current stmt. The example below
9167 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9168 need to create 4 vectorized stmts):
9169
9170 before vectorization:
9171 RELATED_STMT VEC_STMT
9172 S1: x = memref - -
9173 S2: z = x + 1 - -
9174
9175 step 1: vectorize stmt S1:
9176 We first create the vector stmt VS1_0, and, as usual, record a
9177 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9178 Next, we create the vector stmt VS1_1, and record a pointer to
9179 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9180 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9181 stmts and pointers:
9182 RELATED_STMT VEC_STMT
9183 VS1_0: vx0 = memref0 VS1_1 -
9184 VS1_1: vx1 = memref1 VS1_2 -
9185 VS1_2: vx2 = memref2 VS1_3 -
9186 VS1_3: vx3 = memref3 - -
9187 S1: x = load - VS1_0
9188 S2: z = x + 1 - -
9189
9190 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9191 information we recorded in RELATED_STMT field is used to vectorize
9192 stmt S2. */
9193
9194 /* In case of interleaving (non-unit grouped access):
9195
9196 S1: x2 = &base + 2
9197 S2: x0 = &base
9198 S3: x1 = &base + 1
9199 S4: x3 = &base + 3
9200
9201 Vectorized loads are created in the order of memory accesses
9202 starting from the access of the first stmt of the chain:
9203
9204 VS1: vx0 = &base
9205 VS2: vx1 = &base + vec_size*1
9206 VS3: vx3 = &base + vec_size*2
9207 VS4: vx4 = &base + vec_size*3
9208
9209 Then permutation statements are generated:
9210
9211 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9212 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9213 ...
9214
9215 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9216 (the order of the data-refs in the output of vect_permute_load_chain
9217 corresponds to the order of scalar stmts in the interleaving chain - see
9218 the documentation of vect_permute_load_chain()).
9219 The generation of permutation stmts and recording them in
9220 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9221
9222 In case of both multiple types and interleaving, the vector loads and
9223 permutation stmts above are created for every copy. The result vector
9224 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9225 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9226
9227 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9228 on a target that supports unaligned accesses (dr_unaligned_supported)
9229 we generate the following code:
9230 p = initial_addr;
9231 indx = 0;
9232 loop {
9233 p = p + indx * vectype_size;
9234 vec_dest = *(p);
9235 indx = indx + 1;
9236 }
9237
9238 Otherwise, the data reference is potentially unaligned on a target that
9239 does not support unaligned accesses (dr_explicit_realign_optimized) -
9240 then generate the following code, in which the data in each iteration is
9241 obtained by two vector loads, one from the previous iteration, and one
9242 from the current iteration:
9243 p1 = initial_addr;
9244 msq_init = *(floor(p1))
9245 p2 = initial_addr + VS - 1;
9246 realignment_token = call target_builtin;
9247 indx = 0;
9248 loop {
9249 p2 = p2 + indx * vectype_size
9250 lsq = *(floor(p2))
9251 vec_dest = realign_load (msq, lsq, realignment_token)
9252 indx = indx + 1;
9253 msq = lsq;
9254 } */
9255
9256 /* If the misalignment remains the same throughout the execution of the
9257 loop, we can create the init_addr and permutation mask at the loop
9258 preheader. Otherwise, it needs to be created inside the loop.
9259 This can only occur when vectorizing memory accesses in the inner-loop
9260 nested within an outer-loop that is being vectorized. */
9261
9262 if (nested_in_vect_loop
9263 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9264 GET_MODE_SIZE (TYPE_MODE (vectype))))
9265 {
9266 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9267 compute_in_loop = true;
9268 }
9269
9270 if ((alignment_support_scheme == dr_explicit_realign_optimized
9271 || alignment_support_scheme == dr_explicit_realign)
9272 && !compute_in_loop)
9273 {
9274 msq = vect_setup_realignment (first_stmt_info_for_drptr
9275 ? first_stmt_info_for_drptr
9276 : first_stmt_info, gsi, &realignment_token,
9277 alignment_support_scheme, NULL_TREE,
9278 &at_loop);
9279 if (alignment_support_scheme == dr_explicit_realign_optimized)
9280 {
9281 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9282 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9283 size_one_node);
9284 }
9285 }
9286 else
9287 at_loop = loop;
9288
9289 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9290 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9291
9292 tree bump;
9293 tree vec_offset = NULL_TREE;
9294 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9295 {
9296 aggr_type = NULL_TREE;
9297 bump = NULL_TREE;
9298 }
9299 else if (memory_access_type == VMAT_GATHER_SCATTER)
9300 {
9301 aggr_type = elem_type;
9302 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9303 &bump, &vec_offset);
9304 }
9305 else
9306 {
9307 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9308 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9309 else
9310 aggr_type = vectype;
9311 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9312 memory_access_type);
9313 }
9314
9315 tree vec_mask = NULL_TREE;
9316 prev_stmt_info = NULL;
9317 poly_uint64 group_elt = 0;
9318 for (j = 0; j < ncopies; j++)
9319 {
9320 stmt_vec_info new_stmt_info = NULL;
9321 /* 1. Create the vector or array pointer update chain. */
9322 if (j == 0)
9323 {
9324 bool simd_lane_access_p
9325 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9326 if (simd_lane_access_p
9327 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9328 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9329 && integer_zerop (DR_OFFSET (first_dr_info->dr))
9330 && integer_zerop (DR_INIT (first_dr_info->dr))
9331 && alias_sets_conflict_p (get_alias_set (aggr_type),
9332 get_alias_set (TREE_TYPE (ref_type)))
9333 && (alignment_support_scheme == dr_aligned
9334 || alignment_support_scheme == dr_unaligned_supported))
9335 {
9336 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9337 dataref_offset = build_int_cst (ref_type, 0);
9338 }
9339 else if (first_stmt_info_for_drptr
9340 && first_stmt_info != first_stmt_info_for_drptr)
9341 {
9342 dataref_ptr
9343 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9344 aggr_type, at_loop, offset, &dummy,
9345 gsi, &ptr_incr, simd_lane_access_p,
9346 byte_offset, bump);
9347 /* Adjust the pointer by the difference to first_stmt. */
9348 data_reference_p ptrdr
9349 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9350 tree diff
9351 = fold_convert (sizetype,
9352 size_binop (MINUS_EXPR,
9353 DR_INIT (first_dr_info->dr),
9354 DR_INIT (ptrdr)));
9355 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9356 stmt_info, diff);
9357 }
9358 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9359 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9360 &dataref_ptr, &vec_offset);
9361 else
9362 dataref_ptr
9363 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9364 offset, &dummy, gsi, &ptr_incr,
9365 simd_lane_access_p,
9366 byte_offset, bump);
9367 if (mask)
9368 {
9369 if (slp_node)
9370 {
9371 auto_vec<vec<tree> > vec_defs (1);
9372 vect_get_slp_defs (slp_node, &vec_defs);
9373 vec_mask = vec_defs[0][0];
9374 }
9375 else
9376 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9377 mask_vectype);
9378 }
9379 }
9380 else
9381 {
9382 if (dataref_offset)
9383 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9384 bump);
9385 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9386 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9387 else
9388 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9389 stmt_info, bump);
9390 if (mask)
9391 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9392 }
9393
9394 if (grouped_load || slp_perm)
9395 dr_chain.create (vec_num);
9396
9397 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9398 {
9399 tree vec_array;
9400
9401 vec_array = create_vector_array (vectype, vec_num);
9402
9403 tree final_mask = NULL_TREE;
9404 if (loop_masks)
9405 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9406 vectype, j);
9407 if (vec_mask)
9408 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9409 vec_mask, gsi);
9410
9411 gcall *call;
9412 if (final_mask)
9413 {
9414 /* Emit:
9415 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9416 VEC_MASK). */
9417 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9418 tree alias_ptr = build_int_cst (ref_type, align);
9419 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9420 dataref_ptr, alias_ptr,
9421 final_mask);
9422 }
9423 else
9424 {
9425 /* Emit:
9426 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9427 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9428 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9429 }
9430 gimple_call_set_lhs (call, vec_array);
9431 gimple_call_set_nothrow (call, true);
9432 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9433
9434 /* Extract each vector into an SSA_NAME. */
9435 for (i = 0; i < vec_num; i++)
9436 {
9437 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9438 vec_array, i);
9439 dr_chain.quick_push (new_temp);
9440 }
9441
9442 /* Record the mapping between SSA_NAMEs and statements. */
9443 vect_record_grouped_load_vectors (stmt_info, dr_chain);
9444
9445 /* Record that VEC_ARRAY is now dead. */
9446 vect_clobber_variable (stmt_info, gsi, vec_array);
9447 }
9448 else
9449 {
9450 for (i = 0; i < vec_num; i++)
9451 {
9452 tree final_mask = NULL_TREE;
9453 if (loop_masks
9454 && memory_access_type != VMAT_INVARIANT)
9455 final_mask = vect_get_loop_mask (gsi, loop_masks,
9456 vec_num * ncopies,
9457 vectype, vec_num * j + i);
9458 if (vec_mask)
9459 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9460 vec_mask, gsi);
9461
9462 if (i > 0)
9463 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9464 stmt_info, bump);
9465
9466 /* 2. Create the vector-load in the loop. */
9467 gimple *new_stmt = NULL;
9468 switch (alignment_support_scheme)
9469 {
9470 case dr_aligned:
9471 case dr_unaligned_supported:
9472 {
9473 unsigned int misalign;
9474 unsigned HOST_WIDE_INT align;
9475
9476 if (memory_access_type == VMAT_GATHER_SCATTER)
9477 {
9478 tree zero = build_zero_cst (vectype);
9479 tree scale = size_int (gs_info.scale);
9480 gcall *call;
9481 if (loop_masks)
9482 call = gimple_build_call_internal
9483 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9484 vec_offset, scale, zero, final_mask);
9485 else
9486 call = gimple_build_call_internal
9487 (IFN_GATHER_LOAD, 4, dataref_ptr,
9488 vec_offset, scale, zero);
9489 gimple_call_set_nothrow (call, true);
9490 new_stmt = call;
9491 data_ref = NULL_TREE;
9492 break;
9493 }
9494
9495 align =
9496 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9497 if (alignment_support_scheme == dr_aligned)
9498 {
9499 gcc_assert (aligned_access_p (first_dr_info));
9500 misalign = 0;
9501 }
9502 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9503 {
9504 align = dr_alignment
9505 (vect_dr_behavior (first_dr_info));
9506 misalign = 0;
9507 }
9508 else
9509 misalign = DR_MISALIGNMENT (first_dr_info);
9510 if (dataref_offset == NULL_TREE
9511 && TREE_CODE (dataref_ptr) == SSA_NAME)
9512 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9513 align, misalign);
9514
9515 if (final_mask)
9516 {
9517 align = least_bit_hwi (misalign | align);
9518 tree ptr = build_int_cst (ref_type, align);
9519 gcall *call
9520 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9521 dataref_ptr, ptr,
9522 final_mask);
9523 gimple_call_set_nothrow (call, true);
9524 new_stmt = call;
9525 data_ref = NULL_TREE;
9526 }
9527 else
9528 {
9529 tree ltype = vectype;
9530 /* If there's no peeling for gaps but we have a gap
9531 with slp loads then load the lower half of the
9532 vector only. See get_group_load_store_type for
9533 when we apply this optimization. */
9534 if (slp
9535 && loop_vinfo
9536 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9537 && DR_GROUP_GAP (first_stmt_info) != 0
9538 && known_eq (nunits,
9539 (group_size
9540 - DR_GROUP_GAP (first_stmt_info)) * 2)
9541 && known_eq (nunits, group_size))
9542 ltype = build_vector_type (TREE_TYPE (vectype),
9543 (group_size
9544 - DR_GROUP_GAP
9545 (first_stmt_info)));
9546 data_ref
9547 = fold_build2 (MEM_REF, ltype, dataref_ptr,
9548 dataref_offset
9549 ? dataref_offset
9550 : build_int_cst (ref_type, 0));
9551 if (alignment_support_scheme == dr_aligned)
9552 ;
9553 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9554 TREE_TYPE (data_ref)
9555 = build_aligned_type (TREE_TYPE (data_ref),
9556 align * BITS_PER_UNIT);
9557 else
9558 TREE_TYPE (data_ref)
9559 = build_aligned_type (TREE_TYPE (data_ref),
9560 TYPE_ALIGN (elem_type));
9561 if (ltype != vectype)
9562 {
9563 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9564 tree tem = make_ssa_name (ltype);
9565 new_stmt = gimple_build_assign (tem, data_ref);
9566 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9567 data_ref = NULL;
9568 vec<constructor_elt, va_gc> *v;
9569 vec_alloc (v, 2);
9570 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9571 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9572 build_zero_cst (ltype));
9573 new_stmt
9574 = gimple_build_assign (vec_dest,
9575 build_constructor
9576 (vectype, v));
9577 }
9578 }
9579 break;
9580 }
9581 case dr_explicit_realign:
9582 {
9583 tree ptr, bump;
9584
9585 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9586
9587 if (compute_in_loop)
9588 msq = vect_setup_realignment (first_stmt_info, gsi,
9589 &realignment_token,
9590 dr_explicit_realign,
9591 dataref_ptr, NULL);
9592
9593 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9594 ptr = copy_ssa_name (dataref_ptr);
9595 else
9596 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9597 // For explicit realign the target alignment should be
9598 // known at compile time.
9599 unsigned HOST_WIDE_INT align =
9600 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9601 new_stmt = gimple_build_assign
9602 (ptr, BIT_AND_EXPR, dataref_ptr,
9603 build_int_cst
9604 (TREE_TYPE (dataref_ptr),
9605 -(HOST_WIDE_INT) align));
9606 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9607 data_ref
9608 = build2 (MEM_REF, vectype, ptr,
9609 build_int_cst (ref_type, 0));
9610 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9611 vec_dest = vect_create_destination_var (scalar_dest,
9612 vectype);
9613 new_stmt = gimple_build_assign (vec_dest, data_ref);
9614 new_temp = make_ssa_name (vec_dest, new_stmt);
9615 gimple_assign_set_lhs (new_stmt, new_temp);
9616 gimple_move_vops (new_stmt, stmt_info->stmt);
9617 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9618 msq = new_temp;
9619
9620 bump = size_binop (MULT_EXPR, vs,
9621 TYPE_SIZE_UNIT (elem_type));
9622 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9623 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9624 stmt_info, bump);
9625 new_stmt = gimple_build_assign
9626 (NULL_TREE, BIT_AND_EXPR, ptr,
9627 build_int_cst
9628 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9629 ptr = copy_ssa_name (ptr, new_stmt);
9630 gimple_assign_set_lhs (new_stmt, ptr);
9631 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9632 data_ref
9633 = build2 (MEM_REF, vectype, ptr,
9634 build_int_cst (ref_type, 0));
9635 break;
9636 }
9637 case dr_explicit_realign_optimized:
9638 {
9639 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9640 new_temp = copy_ssa_name (dataref_ptr);
9641 else
9642 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9643 // We should only be doing this if we know the target
9644 // alignment at compile time.
9645 unsigned HOST_WIDE_INT align =
9646 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9647 new_stmt = gimple_build_assign
9648 (new_temp, BIT_AND_EXPR, dataref_ptr,
9649 build_int_cst (TREE_TYPE (dataref_ptr),
9650 -(HOST_WIDE_INT) align));
9651 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9652 data_ref
9653 = build2 (MEM_REF, vectype, new_temp,
9654 build_int_cst (ref_type, 0));
9655 break;
9656 }
9657 default:
9658 gcc_unreachable ();
9659 }
9660 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9661 /* DATA_REF is null if we've already built the statement. */
9662 if (data_ref)
9663 {
9664 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9665 new_stmt = gimple_build_assign (vec_dest, data_ref);
9666 }
9667 new_temp = make_ssa_name (vec_dest, new_stmt);
9668 gimple_set_lhs (new_stmt, new_temp);
9669 new_stmt_info
9670 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9671
9672 /* 3. Handle explicit realignment if necessary/supported.
9673 Create in loop:
9674 vec_dest = realign_load (msq, lsq, realignment_token) */
9675 if (alignment_support_scheme == dr_explicit_realign_optimized
9676 || alignment_support_scheme == dr_explicit_realign)
9677 {
9678 lsq = gimple_assign_lhs (new_stmt);
9679 if (!realignment_token)
9680 realignment_token = dataref_ptr;
9681 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9682 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9683 msq, lsq, realignment_token);
9684 new_temp = make_ssa_name (vec_dest, new_stmt);
9685 gimple_assign_set_lhs (new_stmt, new_temp);
9686 new_stmt_info
9687 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9688
9689 if (alignment_support_scheme == dr_explicit_realign_optimized)
9690 {
9691 gcc_assert (phi);
9692 if (i == vec_num - 1 && j == ncopies - 1)
9693 add_phi_arg (phi, lsq,
9694 loop_latch_edge (containing_loop),
9695 UNKNOWN_LOCATION);
9696 msq = lsq;
9697 }
9698 }
9699
9700 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9701 {
9702 tree perm_mask = perm_mask_for_reverse (vectype);
9703 new_temp = permute_vec_elements (new_temp, new_temp,
9704 perm_mask, stmt_info, gsi);
9705 new_stmt_info = vinfo->lookup_def (new_temp);
9706 }
9707
9708 /* Collect vector loads and later create their permutation in
9709 vect_transform_grouped_load (). */
9710 if (grouped_load || slp_perm)
9711 dr_chain.quick_push (new_temp);
9712
9713 /* Store vector loads in the corresponding SLP_NODE. */
9714 if (slp && !slp_perm)
9715 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9716
9717 /* With SLP permutation we load the gaps as well, without
9718 we need to skip the gaps after we manage to fully load
9719 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9720 group_elt += nunits;
9721 if (maybe_ne (group_gap_adj, 0U)
9722 && !slp_perm
9723 && known_eq (group_elt, group_size - group_gap_adj))
9724 {
9725 poly_wide_int bump_val
9726 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9727 * group_gap_adj);
9728 tree bump = wide_int_to_tree (sizetype, bump_val);
9729 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9730 stmt_info, bump);
9731 group_elt = 0;
9732 }
9733 }
9734 /* Bump the vector pointer to account for a gap or for excess
9735 elements loaded for a permuted SLP load. */
9736 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9737 {
9738 poly_wide_int bump_val
9739 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9740 * group_gap_adj);
9741 tree bump = wide_int_to_tree (sizetype, bump_val);
9742 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9743 stmt_info, bump);
9744 }
9745 }
9746
9747 if (slp && !slp_perm)
9748 continue;
9749
9750 if (slp_perm)
9751 {
9752 unsigned n_perms;
9753 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9754 slp_node_instance, false,
9755 &n_perms))
9756 {
9757 dr_chain.release ();
9758 return false;
9759 }
9760 }
9761 else
9762 {
9763 if (grouped_load)
9764 {
9765 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9766 vect_transform_grouped_load (stmt_info, dr_chain,
9767 group_size, gsi);
9768 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9769 }
9770 else
9771 {
9772 if (j == 0)
9773 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9774 else
9775 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9776 prev_stmt_info = new_stmt_info;
9777 }
9778 }
9779 dr_chain.release ();
9780 }
9781
9782 return true;
9783 }
9784
9785 /* Function vect_is_simple_cond.
9786
9787 Input:
9788 LOOP - the loop that is being vectorized.
9789 COND - Condition that is checked for simple use.
9790
9791 Output:
9792 *COMP_VECTYPE - the vector type for the comparison.
9793 *DTS - The def types for the arguments of the comparison
9794
9795 Returns whether a COND can be vectorized. Checks whether
9796 condition operands are supportable using vec_is_simple_use. */
9797
9798 static bool
9799 vect_is_simple_cond (tree cond, vec_info *vinfo,
9800 tree *comp_vectype, enum vect_def_type *dts,
9801 tree vectype)
9802 {
9803 tree lhs, rhs;
9804 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9805
9806 /* Mask case. */
9807 if (TREE_CODE (cond) == SSA_NAME
9808 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9809 {
9810 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9811 || !*comp_vectype
9812 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9813 return false;
9814 return true;
9815 }
9816
9817 if (!COMPARISON_CLASS_P (cond))
9818 return false;
9819
9820 lhs = TREE_OPERAND (cond, 0);
9821 rhs = TREE_OPERAND (cond, 1);
9822
9823 if (TREE_CODE (lhs) == SSA_NAME)
9824 {
9825 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9826 return false;
9827 }
9828 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9829 || TREE_CODE (lhs) == FIXED_CST)
9830 dts[0] = vect_constant_def;
9831 else
9832 return false;
9833
9834 if (TREE_CODE (rhs) == SSA_NAME)
9835 {
9836 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9837 return false;
9838 }
9839 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9840 || TREE_CODE (rhs) == FIXED_CST)
9841 dts[1] = vect_constant_def;
9842 else
9843 return false;
9844
9845 if (vectype1 && vectype2
9846 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9847 TYPE_VECTOR_SUBPARTS (vectype2)))
9848 return false;
9849
9850 *comp_vectype = vectype1 ? vectype1 : vectype2;
9851 /* Invariant comparison. */
9852 if (! *comp_vectype)
9853 {
9854 tree scalar_type = TREE_TYPE (lhs);
9855 /* If we can widen the comparison to match vectype do so. */
9856 if (INTEGRAL_TYPE_P (scalar_type)
9857 && vectype
9858 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9859 TYPE_SIZE (TREE_TYPE (vectype))))
9860 scalar_type = build_nonstandard_integer_type
9861 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9862 TYPE_UNSIGNED (scalar_type));
9863 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
9864 }
9865
9866 return true;
9867 }
9868
9869 /* vectorizable_condition.
9870
9871 Check if STMT_INFO is conditional modify expression that can be vectorized.
9872 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9873 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9874 at GSI.
9875
9876 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9877
9878 Return true if STMT_INFO is vectorizable in this way. */
9879
9880 static bool
9881 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9882 stmt_vec_info *vec_stmt,
9883 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9884 {
9885 vec_info *vinfo = stmt_info->vinfo;
9886 tree scalar_dest = NULL_TREE;
9887 tree vec_dest = NULL_TREE;
9888 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9889 tree then_clause, else_clause;
9890 tree comp_vectype = NULL_TREE;
9891 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9892 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9893 tree vec_compare;
9894 tree new_temp;
9895 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9896 enum vect_def_type dts[4]
9897 = {vect_unknown_def_type, vect_unknown_def_type,
9898 vect_unknown_def_type, vect_unknown_def_type};
9899 int ndts = 4;
9900 int ncopies;
9901 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9902 stmt_vec_info prev_stmt_info = NULL;
9903 int i, j;
9904 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9905 vec<tree> vec_oprnds0 = vNULL;
9906 vec<tree> vec_oprnds1 = vNULL;
9907 vec<tree> vec_oprnds2 = vNULL;
9908 vec<tree> vec_oprnds3 = vNULL;
9909 tree vec_cmp_type;
9910 bool masked = false;
9911
9912 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9913 return false;
9914
9915 /* Is vectorizable conditional operation? */
9916 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9917 if (!stmt)
9918 return false;
9919
9920 code = gimple_assign_rhs_code (stmt);
9921 if (code != COND_EXPR)
9922 return false;
9923
9924 stmt_vec_info reduc_info = NULL;
9925 int reduc_index = -1;
9926 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9927 bool for_reduction
9928 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9929 if (for_reduction)
9930 {
9931 if (STMT_SLP_TYPE (stmt_info))
9932 return false;
9933 reduc_info = info_for_reduction (stmt_info);
9934 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9935 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9936 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9937 || reduc_index != -1);
9938 }
9939 else
9940 {
9941 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9942 return false;
9943
9944 /* FORNOW: only supported as part of a reduction. */
9945 if (STMT_VINFO_LIVE_P (stmt_info))
9946 {
9947 if (dump_enabled_p ())
9948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9949 "value used after loop.\n");
9950 return false;
9951 }
9952 }
9953
9954 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9955 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9956
9957 if (slp_node)
9958 ncopies = 1;
9959 else
9960 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9961
9962 gcc_assert (ncopies >= 1);
9963 if (for_reduction && ncopies > 1)
9964 return false; /* FORNOW */
9965
9966 cond_expr = gimple_assign_rhs1 (stmt);
9967 then_clause = gimple_assign_rhs2 (stmt);
9968 else_clause = gimple_assign_rhs3 (stmt);
9969
9970 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
9971 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
9972 || !comp_vectype)
9973 return false;
9974
9975 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
9976 return false;
9977 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
9978 return false;
9979
9980 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9981 return false;
9982
9983 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9984 return false;
9985
9986 masked = !COMPARISON_CLASS_P (cond_expr);
9987 vec_cmp_type = truth_type_for (comp_vectype);
9988
9989 if (vec_cmp_type == NULL_TREE)
9990 return false;
9991
9992 cond_code = TREE_CODE (cond_expr);
9993 if (!masked)
9994 {
9995 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9996 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9997 }
9998
9999 /* For conditional reductions, the "then" value needs to be the candidate
10000 value calculated by this iteration while the "else" value needs to be
10001 the result carried over from previous iterations. If the COND_EXPR
10002 is the other way around, we need to swap it. */
10003 bool must_invert_cmp_result = false;
10004 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10005 {
10006 if (masked)
10007 must_invert_cmp_result = true;
10008 else
10009 {
10010 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10011 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10012 if (new_code == ERROR_MARK)
10013 must_invert_cmp_result = true;
10014 else
10015 cond_code = new_code;
10016 }
10017 /* Make sure we don't accidentally use the old condition. */
10018 cond_expr = NULL_TREE;
10019 std::swap (then_clause, else_clause);
10020 }
10021
10022 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10023 {
10024 /* Boolean values may have another representation in vectors
10025 and therefore we prefer bit operations over comparison for
10026 them (which also works for scalar masks). We store opcodes
10027 to use in bitop1 and bitop2. Statement is vectorized as
10028 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10029 depending on bitop1 and bitop2 arity. */
10030 switch (cond_code)
10031 {
10032 case GT_EXPR:
10033 bitop1 = BIT_NOT_EXPR;
10034 bitop2 = BIT_AND_EXPR;
10035 break;
10036 case GE_EXPR:
10037 bitop1 = BIT_NOT_EXPR;
10038 bitop2 = BIT_IOR_EXPR;
10039 break;
10040 case LT_EXPR:
10041 bitop1 = BIT_NOT_EXPR;
10042 bitop2 = BIT_AND_EXPR;
10043 std::swap (cond_expr0, cond_expr1);
10044 break;
10045 case LE_EXPR:
10046 bitop1 = BIT_NOT_EXPR;
10047 bitop2 = BIT_IOR_EXPR;
10048 std::swap (cond_expr0, cond_expr1);
10049 break;
10050 case NE_EXPR:
10051 bitop1 = BIT_XOR_EXPR;
10052 break;
10053 case EQ_EXPR:
10054 bitop1 = BIT_XOR_EXPR;
10055 bitop2 = BIT_NOT_EXPR;
10056 break;
10057 default:
10058 return false;
10059 }
10060 cond_code = SSA_NAME;
10061 }
10062
10063 if (!vec_stmt)
10064 {
10065 if (bitop1 != NOP_EXPR)
10066 {
10067 machine_mode mode = TYPE_MODE (comp_vectype);
10068 optab optab;
10069
10070 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10071 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10072 return false;
10073
10074 if (bitop2 != NOP_EXPR)
10075 {
10076 optab = optab_for_tree_code (bitop2, comp_vectype,
10077 optab_default);
10078 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10079 return false;
10080 }
10081 }
10082 if (expand_vec_cond_expr_p (vectype, comp_vectype,
10083 cond_code))
10084 {
10085 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10086 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10087 cost_vec);
10088 return true;
10089 }
10090 return false;
10091 }
10092
10093 /* Transform. */
10094
10095 if (!slp_node)
10096 {
10097 vec_oprnds0.create (1);
10098 vec_oprnds1.create (1);
10099 vec_oprnds2.create (1);
10100 vec_oprnds3.create (1);
10101 }
10102
10103 /* Handle def. */
10104 scalar_dest = gimple_assign_lhs (stmt);
10105 if (reduction_type != EXTRACT_LAST_REDUCTION)
10106 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10107
10108 /* Handle cond expr. */
10109 for (j = 0; j < ncopies; j++)
10110 {
10111 bool swap_cond_operands = false;
10112
10113 /* See whether another part of the vectorized code applies a loop
10114 mask to the condition, or to its inverse. */
10115
10116 vec_loop_masks *masks = NULL;
10117 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10118 {
10119 if (reduction_type == EXTRACT_LAST_REDUCTION)
10120 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10121 else
10122 {
10123 scalar_cond_masked_key cond (cond_expr, ncopies);
10124 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10125 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10126 else
10127 {
10128 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10129 cond.code = invert_tree_comparison (cond.code, honor_nans);
10130 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10131 {
10132 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10133 cond_code = cond.code;
10134 swap_cond_operands = true;
10135 }
10136 }
10137 }
10138 }
10139
10140 stmt_vec_info new_stmt_info = NULL;
10141 if (j == 0)
10142 {
10143 if (slp_node)
10144 {
10145 auto_vec<vec<tree>, 4> vec_defs;
10146 vect_get_slp_defs (slp_node, &vec_defs);
10147 vec_oprnds3 = vec_defs.pop ();
10148 vec_oprnds2 = vec_defs.pop ();
10149 if (!masked)
10150 vec_oprnds1 = vec_defs.pop ();
10151 vec_oprnds0 = vec_defs.pop ();
10152 }
10153 else
10154 {
10155 if (masked)
10156 {
10157 vec_cond_lhs
10158 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10159 comp_vectype);
10160 }
10161 else
10162 {
10163 vec_cond_lhs
10164 = vect_get_vec_def_for_operand (cond_expr0,
10165 stmt_info, comp_vectype);
10166 vec_cond_rhs
10167 = vect_get_vec_def_for_operand (cond_expr1,
10168 stmt_info, comp_vectype);
10169 }
10170 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10171 stmt_info);
10172 if (reduction_type != EXTRACT_LAST_REDUCTION)
10173 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10174 stmt_info);
10175 }
10176 }
10177 else
10178 {
10179 vec_cond_lhs
10180 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10181 if (!masked)
10182 vec_cond_rhs
10183 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10184
10185 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10186 vec_oprnds2.pop ());
10187 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10188 vec_oprnds3.pop ());
10189 }
10190
10191 if (!slp_node)
10192 {
10193 vec_oprnds0.quick_push (vec_cond_lhs);
10194 if (!masked)
10195 vec_oprnds1.quick_push (vec_cond_rhs);
10196 vec_oprnds2.quick_push (vec_then_clause);
10197 vec_oprnds3.quick_push (vec_else_clause);
10198 }
10199
10200 /* Arguments are ready. Create the new vector stmt. */
10201 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10202 {
10203 vec_then_clause = vec_oprnds2[i];
10204 vec_else_clause = vec_oprnds3[i];
10205
10206 if (swap_cond_operands)
10207 std::swap (vec_then_clause, vec_else_clause);
10208
10209 if (masked)
10210 vec_compare = vec_cond_lhs;
10211 else
10212 {
10213 vec_cond_rhs = vec_oprnds1[i];
10214 if (bitop1 == NOP_EXPR)
10215 vec_compare = build2 (cond_code, vec_cmp_type,
10216 vec_cond_lhs, vec_cond_rhs);
10217 else
10218 {
10219 new_temp = make_ssa_name (vec_cmp_type);
10220 gassign *new_stmt;
10221 if (bitop1 == BIT_NOT_EXPR)
10222 new_stmt = gimple_build_assign (new_temp, bitop1,
10223 vec_cond_rhs);
10224 else
10225 new_stmt
10226 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10227 vec_cond_rhs);
10228 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10229 if (bitop2 == NOP_EXPR)
10230 vec_compare = new_temp;
10231 else if (bitop2 == BIT_NOT_EXPR)
10232 {
10233 /* Instead of doing ~x ? y : z do x ? z : y. */
10234 vec_compare = new_temp;
10235 std::swap (vec_then_clause, vec_else_clause);
10236 }
10237 else
10238 {
10239 vec_compare = make_ssa_name (vec_cmp_type);
10240 new_stmt
10241 = gimple_build_assign (vec_compare, bitop2,
10242 vec_cond_lhs, new_temp);
10243 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10244 }
10245 }
10246 }
10247
10248 /* If we decided to apply a loop mask to the result of the vector
10249 comparison, AND the comparison with the mask now. Later passes
10250 should then be able to reuse the AND results between mulitple
10251 vector statements.
10252
10253 For example:
10254 for (int i = 0; i < 100; ++i)
10255 x[i] = y[i] ? z[i] : 10;
10256
10257 results in following optimized GIMPLE:
10258
10259 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10260 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10261 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10262 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10263 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10264 vect_iftmp.11_47, { 10, ... }>;
10265
10266 instead of using a masked and unmasked forms of
10267 vec != { 0, ... } (masked in the MASK_LOAD,
10268 unmasked in the VEC_COND_EXPR). */
10269
10270 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10271 in cases where that's necessary. */
10272
10273 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10274 {
10275 if (!is_gimple_val (vec_compare))
10276 {
10277 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10278 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10279 vec_compare);
10280 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10281 vec_compare = vec_compare_name;
10282 }
10283
10284 if (must_invert_cmp_result)
10285 {
10286 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10287 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10288 BIT_NOT_EXPR,
10289 vec_compare);
10290 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10291 vec_compare = vec_compare_name;
10292 }
10293
10294 if (masks)
10295 {
10296 unsigned vec_num = vec_oprnds0.length ();
10297 tree loop_mask
10298 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10299 vectype, vec_num * j + i);
10300 tree tmp2 = make_ssa_name (vec_cmp_type);
10301 gassign *g
10302 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10303 loop_mask);
10304 vect_finish_stmt_generation (stmt_info, g, gsi);
10305 vec_compare = tmp2;
10306 }
10307 }
10308
10309 if (reduction_type == EXTRACT_LAST_REDUCTION)
10310 {
10311 gcall *new_stmt = gimple_build_call_internal
10312 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10313 vec_then_clause);
10314 gimple_call_set_lhs (new_stmt, scalar_dest);
10315 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
10316 if (stmt_info->stmt == gsi_stmt (*gsi))
10317 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10318 else
10319 {
10320 /* In this case we're moving the definition to later in the
10321 block. That doesn't matter because the only uses of the
10322 lhs are in phi statements. */
10323 gimple_stmt_iterator old_gsi
10324 = gsi_for_stmt (stmt_info->stmt);
10325 gsi_remove (&old_gsi, true);
10326 new_stmt_info
10327 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10328 }
10329 }
10330 else
10331 {
10332 new_temp = make_ssa_name (vec_dest);
10333 gassign *new_stmt
10334 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10335 vec_then_clause, vec_else_clause);
10336 new_stmt_info
10337 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10338 }
10339 if (slp_node)
10340 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10341 }
10342
10343 if (slp_node)
10344 continue;
10345
10346 if (j == 0)
10347 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10348 else
10349 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10350
10351 prev_stmt_info = new_stmt_info;
10352 }
10353
10354 vec_oprnds0.release ();
10355 vec_oprnds1.release ();
10356 vec_oprnds2.release ();
10357 vec_oprnds3.release ();
10358
10359 return true;
10360 }
10361
10362 /* vectorizable_comparison.
10363
10364 Check if STMT_INFO is comparison expression that can be vectorized.
10365 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10366 comparison, put it in VEC_STMT, and insert it at GSI.
10367
10368 Return true if STMT_INFO is vectorizable in this way. */
10369
10370 static bool
10371 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10372 stmt_vec_info *vec_stmt,
10373 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10374 {
10375 vec_info *vinfo = stmt_info->vinfo;
10376 tree lhs, rhs1, rhs2;
10377 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10378 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10379 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10380 tree new_temp;
10381 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10382 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10383 int ndts = 2;
10384 poly_uint64 nunits;
10385 int ncopies;
10386 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10387 stmt_vec_info prev_stmt_info = NULL;
10388 int i, j;
10389 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10390 vec<tree> vec_oprnds0 = vNULL;
10391 vec<tree> vec_oprnds1 = vNULL;
10392 tree mask_type;
10393 tree mask;
10394
10395 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10396 return false;
10397
10398 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10399 return false;
10400
10401 mask_type = vectype;
10402 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10403
10404 if (slp_node)
10405 ncopies = 1;
10406 else
10407 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10408
10409 gcc_assert (ncopies >= 1);
10410 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10411 return false;
10412
10413 if (STMT_VINFO_LIVE_P (stmt_info))
10414 {
10415 if (dump_enabled_p ())
10416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10417 "value used after loop.\n");
10418 return false;
10419 }
10420
10421 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10422 if (!stmt)
10423 return false;
10424
10425 code = gimple_assign_rhs_code (stmt);
10426
10427 if (TREE_CODE_CLASS (code) != tcc_comparison)
10428 return false;
10429
10430 rhs1 = gimple_assign_rhs1 (stmt);
10431 rhs2 = gimple_assign_rhs2 (stmt);
10432
10433 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10434 return false;
10435
10436 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10437 return false;
10438
10439 if (vectype1 && vectype2
10440 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10441 TYPE_VECTOR_SUBPARTS (vectype2)))
10442 return false;
10443
10444 vectype = vectype1 ? vectype1 : vectype2;
10445
10446 /* Invariant comparison. */
10447 if (!vectype)
10448 {
10449 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
10450 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10451 return false;
10452 }
10453 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10454 return false;
10455
10456 /* Can't compare mask and non-mask types. */
10457 if (vectype1 && vectype2
10458 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10459 return false;
10460
10461 /* Boolean values may have another representation in vectors
10462 and therefore we prefer bit operations over comparison for
10463 them (which also works for scalar masks). We store opcodes
10464 to use in bitop1 and bitop2. Statement is vectorized as
10465 BITOP2 (rhs1 BITOP1 rhs2) or
10466 rhs1 BITOP2 (BITOP1 rhs2)
10467 depending on bitop1 and bitop2 arity. */
10468 bool swap_p = false;
10469 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10470 {
10471 if (code == GT_EXPR)
10472 {
10473 bitop1 = BIT_NOT_EXPR;
10474 bitop2 = BIT_AND_EXPR;
10475 }
10476 else if (code == GE_EXPR)
10477 {
10478 bitop1 = BIT_NOT_EXPR;
10479 bitop2 = BIT_IOR_EXPR;
10480 }
10481 else if (code == LT_EXPR)
10482 {
10483 bitop1 = BIT_NOT_EXPR;
10484 bitop2 = BIT_AND_EXPR;
10485 swap_p = true;
10486 }
10487 else if (code == LE_EXPR)
10488 {
10489 bitop1 = BIT_NOT_EXPR;
10490 bitop2 = BIT_IOR_EXPR;
10491 swap_p = true;
10492 }
10493 else
10494 {
10495 bitop1 = BIT_XOR_EXPR;
10496 if (code == EQ_EXPR)
10497 bitop2 = BIT_NOT_EXPR;
10498 }
10499 }
10500
10501 if (!vec_stmt)
10502 {
10503 if (bitop1 == NOP_EXPR)
10504 {
10505 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10506 return false;
10507 }
10508 else
10509 {
10510 machine_mode mode = TYPE_MODE (vectype);
10511 optab optab;
10512
10513 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10514 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10515 return false;
10516
10517 if (bitop2 != NOP_EXPR)
10518 {
10519 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10520 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10521 return false;
10522 }
10523 }
10524
10525 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10526 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10527 dts, ndts, slp_node, cost_vec);
10528 return true;
10529 }
10530
10531 /* Transform. */
10532 if (!slp_node)
10533 {
10534 vec_oprnds0.create (1);
10535 vec_oprnds1.create (1);
10536 }
10537
10538 /* Handle def. */
10539 lhs = gimple_assign_lhs (stmt);
10540 mask = vect_create_destination_var (lhs, mask_type);
10541
10542 /* Handle cmp expr. */
10543 for (j = 0; j < ncopies; j++)
10544 {
10545 stmt_vec_info new_stmt_info = NULL;
10546 if (j == 0)
10547 {
10548 if (slp_node)
10549 {
10550 auto_vec<vec<tree>, 2> vec_defs;
10551 vect_get_slp_defs (slp_node, &vec_defs);
10552 vec_oprnds1 = vec_defs.pop ();
10553 vec_oprnds0 = vec_defs.pop ();
10554 if (swap_p)
10555 std::swap (vec_oprnds0, vec_oprnds1);
10556 }
10557 else
10558 {
10559 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10560 vectype);
10561 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10562 vectype);
10563 }
10564 }
10565 else
10566 {
10567 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10568 vec_oprnds0.pop ());
10569 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10570 vec_oprnds1.pop ());
10571 }
10572
10573 if (!slp_node)
10574 {
10575 if (swap_p && j == 0)
10576 std::swap (vec_rhs1, vec_rhs2);
10577 vec_oprnds0.quick_push (vec_rhs1);
10578 vec_oprnds1.quick_push (vec_rhs2);
10579 }
10580
10581 /* Arguments are ready. Create the new vector stmt. */
10582 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10583 {
10584 vec_rhs2 = vec_oprnds1[i];
10585
10586 new_temp = make_ssa_name (mask);
10587 if (bitop1 == NOP_EXPR)
10588 {
10589 gassign *new_stmt = gimple_build_assign (new_temp, code,
10590 vec_rhs1, vec_rhs2);
10591 new_stmt_info
10592 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10593 }
10594 else
10595 {
10596 gassign *new_stmt;
10597 if (bitop1 == BIT_NOT_EXPR)
10598 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10599 else
10600 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10601 vec_rhs2);
10602 new_stmt_info
10603 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10604 if (bitop2 != NOP_EXPR)
10605 {
10606 tree res = make_ssa_name (mask);
10607 if (bitop2 == BIT_NOT_EXPR)
10608 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10609 else
10610 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10611 new_temp);
10612 new_stmt_info
10613 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10614 }
10615 }
10616 if (slp_node)
10617 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10618 }
10619
10620 if (slp_node)
10621 continue;
10622
10623 if (j == 0)
10624 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10625 else
10626 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10627
10628 prev_stmt_info = new_stmt_info;
10629 }
10630
10631 vec_oprnds0.release ();
10632 vec_oprnds1.release ();
10633
10634 return true;
10635 }
10636
10637 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10638 can handle all live statements in the node. Otherwise return true
10639 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10640 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10641
10642 static bool
10643 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10644 slp_tree slp_node, slp_instance slp_node_instance,
10645 bool vec_stmt_p,
10646 stmt_vector_for_cost *cost_vec)
10647 {
10648 if (slp_node)
10649 {
10650 stmt_vec_info slp_stmt_info;
10651 unsigned int i;
10652 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10653 {
10654 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10655 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10656 slp_node_instance, i,
10657 vec_stmt_p, cost_vec))
10658 return false;
10659 }
10660 }
10661 else if (STMT_VINFO_LIVE_P (stmt_info)
10662 && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10663 slp_node_instance, -1,
10664 vec_stmt_p, cost_vec))
10665 return false;
10666
10667 return true;
10668 }
10669
10670 /* Make sure the statement is vectorizable. */
10671
10672 opt_result
10673 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10674 slp_tree node, slp_instance node_instance,
10675 stmt_vector_for_cost *cost_vec)
10676 {
10677 vec_info *vinfo = stmt_info->vinfo;
10678 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10679 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10680 bool ok;
10681 gimple_seq pattern_def_seq;
10682
10683 if (dump_enabled_p ())
10684 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10685 stmt_info->stmt);
10686
10687 if (gimple_has_volatile_ops (stmt_info->stmt))
10688 return opt_result::failure_at (stmt_info->stmt,
10689 "not vectorized:"
10690 " stmt has volatile operands: %G\n",
10691 stmt_info->stmt);
10692
10693 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10694 && node == NULL
10695 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10696 {
10697 gimple_stmt_iterator si;
10698
10699 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10700 {
10701 stmt_vec_info pattern_def_stmt_info
10702 = vinfo->lookup_stmt (gsi_stmt (si));
10703 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10704 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10705 {
10706 /* Analyze def stmt of STMT if it's a pattern stmt. */
10707 if (dump_enabled_p ())
10708 dump_printf_loc (MSG_NOTE, vect_location,
10709 "==> examining pattern def statement: %G",
10710 pattern_def_stmt_info->stmt);
10711
10712 opt_result res
10713 = vect_analyze_stmt (pattern_def_stmt_info,
10714 need_to_vectorize, node, node_instance,
10715 cost_vec);
10716 if (!res)
10717 return res;
10718 }
10719 }
10720 }
10721
10722 /* Skip stmts that do not need to be vectorized. In loops this is expected
10723 to include:
10724 - the COND_EXPR which is the loop exit condition
10725 - any LABEL_EXPRs in the loop
10726 - computations that are used only for array indexing or loop control.
10727 In basic blocks we only analyze statements that are a part of some SLP
10728 instance, therefore, all the statements are relevant.
10729
10730 Pattern statement needs to be analyzed instead of the original statement
10731 if the original statement is not relevant. Otherwise, we analyze both
10732 statements. In basic blocks we are called from some SLP instance
10733 traversal, don't analyze pattern stmts instead, the pattern stmts
10734 already will be part of SLP instance. */
10735
10736 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10737 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10738 && !STMT_VINFO_LIVE_P (stmt_info))
10739 {
10740 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10741 && pattern_stmt_info
10742 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10743 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10744 {
10745 /* Analyze PATTERN_STMT instead of the original stmt. */
10746 stmt_info = pattern_stmt_info;
10747 if (dump_enabled_p ())
10748 dump_printf_loc (MSG_NOTE, vect_location,
10749 "==> examining pattern statement: %G",
10750 stmt_info->stmt);
10751 }
10752 else
10753 {
10754 if (dump_enabled_p ())
10755 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10756
10757 return opt_result::success ();
10758 }
10759 }
10760 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10761 && node == NULL
10762 && pattern_stmt_info
10763 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10764 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10765 {
10766 /* Analyze PATTERN_STMT too. */
10767 if (dump_enabled_p ())
10768 dump_printf_loc (MSG_NOTE, vect_location,
10769 "==> examining pattern statement: %G",
10770 pattern_stmt_info->stmt);
10771
10772 opt_result res
10773 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10774 node_instance, cost_vec);
10775 if (!res)
10776 return res;
10777 }
10778
10779 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10780 {
10781 case vect_internal_def:
10782 break;
10783
10784 case vect_reduction_def:
10785 case vect_nested_cycle:
10786 gcc_assert (!bb_vinfo
10787 && (relevance == vect_used_in_outer
10788 || relevance == vect_used_in_outer_by_reduction
10789 || relevance == vect_used_by_reduction
10790 || relevance == vect_unused_in_scope
10791 || relevance == vect_used_only_live));
10792 break;
10793
10794 case vect_induction_def:
10795 gcc_assert (!bb_vinfo);
10796 break;
10797
10798 case vect_constant_def:
10799 case vect_external_def:
10800 case vect_unknown_def_type:
10801 default:
10802 gcc_unreachable ();
10803 }
10804
10805 if (STMT_VINFO_RELEVANT_P (stmt_info))
10806 {
10807 tree type = gimple_expr_type (stmt_info->stmt);
10808 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10809 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10810 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10811 || (call && gimple_call_lhs (call) == NULL_TREE));
10812 *need_to_vectorize = true;
10813 }
10814
10815 if (PURE_SLP_STMT (stmt_info) && !node)
10816 {
10817 if (dump_enabled_p ())
10818 dump_printf_loc (MSG_NOTE, vect_location,
10819 "handled only by SLP analysis\n");
10820 return opt_result::success ();
10821 }
10822
10823 ok = true;
10824 if (!bb_vinfo
10825 && (STMT_VINFO_RELEVANT_P (stmt_info)
10826 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10827 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10828 -mveclibabi= takes preference over library functions with
10829 the simd attribute. */
10830 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10831 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10832 cost_vec)
10833 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10834 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10835 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10836 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10837 cost_vec)
10838 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10839 || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10840 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10841 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10842 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10843 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10844 cost_vec)
10845 || vectorizable_lc_phi (stmt_info, NULL, node));
10846 else
10847 {
10848 if (bb_vinfo)
10849 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10850 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10851 cost_vec)
10852 || vectorizable_conversion (stmt_info, NULL, NULL, node,
10853 cost_vec)
10854 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10855 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10856 || vectorizable_assignment (stmt_info, NULL, NULL, node,
10857 cost_vec)
10858 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10859 cost_vec)
10860 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10861 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10862 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10863 cost_vec));
10864 }
10865
10866 if (!ok)
10867 return opt_result::failure_at (stmt_info->stmt,
10868 "not vectorized:"
10869 " relevant stmt not supported: %G",
10870 stmt_info->stmt);
10871
10872 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10873 need extra handling, except for vectorizable reductions. */
10874 if (!bb_vinfo
10875 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10876 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10877 && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
10878 false, cost_vec))
10879 return opt_result::failure_at (stmt_info->stmt,
10880 "not vectorized:"
10881 " live stmt not supported: %G",
10882 stmt_info->stmt);
10883
10884 return opt_result::success ();
10885 }
10886
10887
10888 /* Function vect_transform_stmt.
10889
10890 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10891
10892 bool
10893 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10894 slp_tree slp_node, slp_instance slp_node_instance)
10895 {
10896 vec_info *vinfo = stmt_info->vinfo;
10897 bool is_store = false;
10898 stmt_vec_info vec_stmt = NULL;
10899 bool done;
10900
10901 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10902 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
10903
10904 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
10905 && nested_in_vect_loop_p
10906 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
10907 stmt_info));
10908
10909 gimple *stmt = stmt_info->stmt;
10910 switch (STMT_VINFO_TYPE (stmt_info))
10911 {
10912 case type_demotion_vec_info_type:
10913 case type_promotion_vec_info_type:
10914 case type_conversion_vec_info_type:
10915 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
10916 NULL);
10917 gcc_assert (done);
10918 break;
10919
10920 case induc_vec_info_type:
10921 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
10922 NULL);
10923 gcc_assert (done);
10924 break;
10925
10926 case shift_vec_info_type:
10927 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10928 gcc_assert (done);
10929 break;
10930
10931 case op_vec_info_type:
10932 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
10933 NULL);
10934 gcc_assert (done);
10935 break;
10936
10937 case assignment_vec_info_type:
10938 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
10939 NULL);
10940 gcc_assert (done);
10941 break;
10942
10943 case load_vec_info_type:
10944 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
10945 slp_node_instance, NULL);
10946 gcc_assert (done);
10947 break;
10948
10949 case store_vec_info_type:
10950 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10951 gcc_assert (done);
10952 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10953 {
10954 /* In case of interleaving, the whole chain is vectorized when the
10955 last store in the chain is reached. Store stmts before the last
10956 one are skipped, and there vec_stmt_info shouldn't be freed
10957 meanwhile. */
10958 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10959 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10960 is_store = true;
10961 }
10962 else
10963 is_store = true;
10964 break;
10965
10966 case condition_vec_info_type:
10967 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10968 gcc_assert (done);
10969 break;
10970
10971 case comparison_vec_info_type:
10972 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
10973 slp_node, NULL);
10974 gcc_assert (done);
10975 break;
10976
10977 case call_vec_info_type:
10978 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10979 stmt = gsi_stmt (*gsi);
10980 break;
10981
10982 case call_simd_clone_vec_info_type:
10983 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
10984 slp_node, NULL);
10985 stmt = gsi_stmt (*gsi);
10986 break;
10987
10988 case reduc_vec_info_type:
10989 done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
10990 gcc_assert (done);
10991 break;
10992
10993 case cycle_phi_info_type:
10994 done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
10995 slp_node_instance);
10996 gcc_assert (done);
10997 break;
10998
10999 case lc_phi_info_type:
11000 done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
11001 gcc_assert (done);
11002 break;
11003
11004 default:
11005 if (!STMT_VINFO_LIVE_P (stmt_info))
11006 {
11007 if (dump_enabled_p ())
11008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11009 "stmt not supported.\n");
11010 gcc_unreachable ();
11011 }
11012 }
11013
11014 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11015 This would break hybrid SLP vectorization. */
11016 if (slp_node)
11017 gcc_assert (!vec_stmt
11018 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11019
11020 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11021 is being vectorized, but outside the immediately enclosing loop. */
11022 if (vec_stmt
11023 && nested_p
11024 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11025 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11026 || STMT_VINFO_RELEVANT (stmt_info) ==
11027 vect_used_in_outer_by_reduction))
11028 {
11029 class loop *innerloop = LOOP_VINFO_LOOP (
11030 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11031 imm_use_iterator imm_iter;
11032 use_operand_p use_p;
11033 tree scalar_dest;
11034
11035 if (dump_enabled_p ())
11036 dump_printf_loc (MSG_NOTE, vect_location,
11037 "Record the vdef for outer-loop vectorization.\n");
11038
11039 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11040 (to be used when vectorizing outer-loop stmts that use the DEF of
11041 STMT). */
11042 if (gimple_code (stmt) == GIMPLE_PHI)
11043 scalar_dest = PHI_RESULT (stmt);
11044 else
11045 scalar_dest = gimple_get_lhs (stmt);
11046
11047 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11048 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11049 {
11050 stmt_vec_info exit_phi_info
11051 = vinfo->lookup_stmt (USE_STMT (use_p));
11052 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11053 }
11054 }
11055
11056 if (vec_stmt)
11057 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11058
11059 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11060 return is_store;
11061
11062 /* If this stmt defines a value used on a backedge, update the
11063 vectorized PHIs. */
11064 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11065 stmt_vec_info reduc_info;
11066 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11067 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11068 && (reduc_info = info_for_reduction (orig_stmt_info))
11069 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11070 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11071 {
11072 gphi *phi;
11073 edge e;
11074 if (!slp_node
11075 && (phi = dyn_cast <gphi *>
11076 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11077 && dominated_by_p (CDI_DOMINATORS,
11078 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11079 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11080 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11081 == gimple_get_lhs (orig_stmt_info->stmt)))
11082 {
11083 stmt_vec_info phi_info
11084 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11085 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11086 do
11087 {
11088 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11089 gimple_get_lhs (vec_stmt->stmt), e,
11090 gimple_phi_arg_location (phi, e->dest_idx));
11091 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11092 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11093 }
11094 while (phi_info);
11095 gcc_assert (!vec_stmt);
11096 }
11097 else if (slp_node
11098 && slp_node != slp_node_instance->reduc_phis)
11099 {
11100 slp_tree phi_node = slp_node_instance->reduc_phis;
11101 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11102 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11103 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11104 == SLP_TREE_VEC_STMTS (slp_node).length ());
11105 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11106 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11107 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11108 e, gimple_phi_arg_location (phi, e->dest_idx));
11109 }
11110 }
11111
11112 /* Handle stmts whose DEF is used outside the loop-nest that is
11113 being vectorized. */
11114 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11115 slp_node_instance, true, NULL);
11116 gcc_assert (done);
11117
11118 return false;
11119 }
11120
11121
11122 /* Remove a group of stores (for SLP or interleaving), free their
11123 stmt_vec_info. */
11124
11125 void
11126 vect_remove_stores (stmt_vec_info first_stmt_info)
11127 {
11128 vec_info *vinfo = first_stmt_info->vinfo;
11129 stmt_vec_info next_stmt_info = first_stmt_info;
11130
11131 while (next_stmt_info)
11132 {
11133 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11134 next_stmt_info = vect_orig_stmt (next_stmt_info);
11135 /* Free the attached stmt_vec_info and remove the stmt. */
11136 vinfo->remove_stmt (next_stmt_info);
11137 next_stmt_info = tmp;
11138 }
11139 }
11140
11141 /* Function get_vectype_for_scalar_type_and_size.
11142
11143 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
11144 by the target. */
11145
11146 tree
11147 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
11148 {
11149 tree orig_scalar_type = scalar_type;
11150 scalar_mode inner_mode;
11151 machine_mode simd_mode;
11152 poly_uint64 nunits;
11153 tree vectype;
11154
11155 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11156 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11157 return NULL_TREE;
11158
11159 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11160
11161 /* For vector types of elements whose mode precision doesn't
11162 match their types precision we use a element type of mode
11163 precision. The vectorization routines will have to make sure
11164 they support the proper result truncation/extension.
11165 We also make sure to build vector types with INTEGER_TYPE
11166 component type only. */
11167 if (INTEGRAL_TYPE_P (scalar_type)
11168 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11169 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11170 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11171 TYPE_UNSIGNED (scalar_type));
11172
11173 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11174 When the component mode passes the above test simply use a type
11175 corresponding to that mode. The theory is that any use that
11176 would cause problems with this will disable vectorization anyway. */
11177 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11178 && !INTEGRAL_TYPE_P (scalar_type))
11179 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11180
11181 /* We can't build a vector type of elements with alignment bigger than
11182 their size. */
11183 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11184 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11185 TYPE_UNSIGNED (scalar_type));
11186
11187 /* If we felt back to using the mode fail if there was
11188 no scalar type for it. */
11189 if (scalar_type == NULL_TREE)
11190 return NULL_TREE;
11191
11192 /* If no size was supplied use the mode the target prefers. Otherwise
11193 lookup a vector mode of the specified size. */
11194 if (known_eq (size, 0U))
11195 {
11196 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11197 if (SCALAR_INT_MODE_P (simd_mode))
11198 {
11199 /* Traditional behavior is not to take the integer mode
11200 literally, but simply to use it as a way of determining
11201 the vector size. It is up to mode_for_vector to decide
11202 what the TYPE_MODE should be.
11203
11204 Note that nunits == 1 is allowed in order to support single
11205 element vector types. */
11206 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11207 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11208 return NULL_TREE;
11209 }
11210 }
11211 else if (!multiple_p (size, nbytes, &nunits)
11212 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11213 return NULL_TREE;
11214
11215 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11216
11217 /* In cases where the mode was chosen by mode_for_vector, check that
11218 the target actually supports the chosen mode, or that it at least
11219 allows the vector mode to be replaced by a like-sized integer. */
11220 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11221 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11222 return NULL_TREE;
11223
11224 /* Re-attach the address-space qualifier if we canonicalized the scalar
11225 type. */
11226 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11227 return build_qualified_type
11228 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11229
11230 return vectype;
11231 }
11232
11233 /* Function get_vectype_for_scalar_type.
11234
11235 Returns the vector type corresponding to SCALAR_TYPE as supported
11236 by the target. */
11237
11238 tree
11239 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
11240 {
11241 tree vectype;
11242 poly_uint64 vector_size = GET_MODE_SIZE (vinfo->vector_mode);
11243 vectype = get_vectype_for_scalar_type_and_size (scalar_type, vector_size);
11244 if (vectype && vinfo->vector_mode == VOIDmode)
11245 vinfo->vector_mode = TYPE_MODE (vectype);
11246 return vectype;
11247 }
11248
11249 /* Function get_mask_type_for_scalar_type.
11250
11251 Returns the mask type corresponding to a result of comparison
11252 of vectors of specified SCALAR_TYPE as supported by target. */
11253
11254 tree
11255 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
11256 {
11257 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
11258
11259 if (!vectype)
11260 return NULL;
11261
11262 return truth_type_for (vectype);
11263 }
11264
11265 /* Function get_same_sized_vectype
11266
11267 Returns a vector type corresponding to SCALAR_TYPE of size
11268 VECTOR_TYPE if supported by the target. */
11269
11270 tree
11271 get_same_sized_vectype (tree scalar_type, tree vector_type)
11272 {
11273 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11274 return truth_type_for (vector_type);
11275
11276 return get_vectype_for_scalar_type_and_size
11277 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
11278 }
11279
11280 /* Function vect_is_simple_use.
11281
11282 Input:
11283 VINFO - the vect info of the loop or basic block that is being vectorized.
11284 OPERAND - operand in the loop or bb.
11285 Output:
11286 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11287 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11288 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11289 the definition could be anywhere in the function
11290 DT - the type of definition
11291
11292 Returns whether a stmt with OPERAND can be vectorized.
11293 For loops, supportable operands are constants, loop invariants, and operands
11294 that are defined by the current iteration of the loop. Unsupportable
11295 operands are those that are defined by a previous iteration of the loop (as
11296 is the case in reduction/induction computations).
11297 For basic blocks, supportable operands are constants and bb invariants.
11298 For now, operands defined outside the basic block are not supported. */
11299
11300 bool
11301 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11302 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11303 {
11304 if (def_stmt_info_out)
11305 *def_stmt_info_out = NULL;
11306 if (def_stmt_out)
11307 *def_stmt_out = NULL;
11308 *dt = vect_unknown_def_type;
11309
11310 if (dump_enabled_p ())
11311 {
11312 dump_printf_loc (MSG_NOTE, vect_location,
11313 "vect_is_simple_use: operand ");
11314 if (TREE_CODE (operand) == SSA_NAME
11315 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11316 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11317 else
11318 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11319 }
11320
11321 if (CONSTANT_CLASS_P (operand))
11322 *dt = vect_constant_def;
11323 else if (is_gimple_min_invariant (operand))
11324 *dt = vect_external_def;
11325 else if (TREE_CODE (operand) != SSA_NAME)
11326 *dt = vect_unknown_def_type;
11327 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11328 *dt = vect_external_def;
11329 else
11330 {
11331 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11332 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11333 if (!stmt_vinfo)
11334 *dt = vect_external_def;
11335 else
11336 {
11337 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11338 def_stmt = stmt_vinfo->stmt;
11339 switch (gimple_code (def_stmt))
11340 {
11341 case GIMPLE_PHI:
11342 case GIMPLE_ASSIGN:
11343 case GIMPLE_CALL:
11344 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11345 break;
11346 default:
11347 *dt = vect_unknown_def_type;
11348 break;
11349 }
11350 if (def_stmt_info_out)
11351 *def_stmt_info_out = stmt_vinfo;
11352 }
11353 if (def_stmt_out)
11354 *def_stmt_out = def_stmt;
11355 }
11356
11357 if (dump_enabled_p ())
11358 {
11359 dump_printf (MSG_NOTE, ", type of def: ");
11360 switch (*dt)
11361 {
11362 case vect_uninitialized_def:
11363 dump_printf (MSG_NOTE, "uninitialized\n");
11364 break;
11365 case vect_constant_def:
11366 dump_printf (MSG_NOTE, "constant\n");
11367 break;
11368 case vect_external_def:
11369 dump_printf (MSG_NOTE, "external\n");
11370 break;
11371 case vect_internal_def:
11372 dump_printf (MSG_NOTE, "internal\n");
11373 break;
11374 case vect_induction_def:
11375 dump_printf (MSG_NOTE, "induction\n");
11376 break;
11377 case vect_reduction_def:
11378 dump_printf (MSG_NOTE, "reduction\n");
11379 break;
11380 case vect_double_reduction_def:
11381 dump_printf (MSG_NOTE, "double reduction\n");
11382 break;
11383 case vect_nested_cycle:
11384 dump_printf (MSG_NOTE, "nested cycle\n");
11385 break;
11386 case vect_unknown_def_type:
11387 dump_printf (MSG_NOTE, "unknown\n");
11388 break;
11389 }
11390 }
11391
11392 if (*dt == vect_unknown_def_type)
11393 {
11394 if (dump_enabled_p ())
11395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11396 "Unsupported pattern.\n");
11397 return false;
11398 }
11399
11400 return true;
11401 }
11402
11403 /* Function vect_is_simple_use.
11404
11405 Same as vect_is_simple_use but also determines the vector operand
11406 type of OPERAND and stores it to *VECTYPE. If the definition of
11407 OPERAND is vect_uninitialized_def, vect_constant_def or
11408 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11409 is responsible to compute the best suited vector type for the
11410 scalar operand. */
11411
11412 bool
11413 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11414 tree *vectype, stmt_vec_info *def_stmt_info_out,
11415 gimple **def_stmt_out)
11416 {
11417 stmt_vec_info def_stmt_info;
11418 gimple *def_stmt;
11419 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11420 return false;
11421
11422 if (def_stmt_out)
11423 *def_stmt_out = def_stmt;
11424 if (def_stmt_info_out)
11425 *def_stmt_info_out = def_stmt_info;
11426
11427 /* Now get a vector type if the def is internal, otherwise supply
11428 NULL_TREE and leave it up to the caller to figure out a proper
11429 type for the use stmt. */
11430 if (*dt == vect_internal_def
11431 || *dt == vect_induction_def
11432 || *dt == vect_reduction_def
11433 || *dt == vect_double_reduction_def
11434 || *dt == vect_nested_cycle)
11435 {
11436 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11437 gcc_assert (*vectype != NULL_TREE);
11438 if (dump_enabled_p ())
11439 dump_printf_loc (MSG_NOTE, vect_location,
11440 "vect_is_simple_use: vectype %T\n", *vectype);
11441 }
11442 else if (*dt == vect_uninitialized_def
11443 || *dt == vect_constant_def
11444 || *dt == vect_external_def)
11445 *vectype = NULL_TREE;
11446 else
11447 gcc_unreachable ();
11448
11449 return true;
11450 }
11451
11452
11453 /* Function supportable_widening_operation
11454
11455 Check whether an operation represented by the code CODE is a
11456 widening operation that is supported by the target platform in
11457 vector form (i.e., when operating on arguments of type VECTYPE_IN
11458 producing a result of type VECTYPE_OUT).
11459
11460 Widening operations we currently support are NOP (CONVERT), FLOAT,
11461 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11462 are supported by the target platform either directly (via vector
11463 tree-codes), or via target builtins.
11464
11465 Output:
11466 - CODE1 and CODE2 are codes of vector operations to be used when
11467 vectorizing the operation, if available.
11468 - MULTI_STEP_CVT determines the number of required intermediate steps in
11469 case of multi-step conversion (like char->short->int - in that case
11470 MULTI_STEP_CVT will be 1).
11471 - INTERM_TYPES contains the intermediate type required to perform the
11472 widening operation (short in the above example). */
11473
11474 bool
11475 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11476 tree vectype_out, tree vectype_in,
11477 enum tree_code *code1, enum tree_code *code2,
11478 int *multi_step_cvt,
11479 vec<tree> *interm_types)
11480 {
11481 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11482 class loop *vect_loop = NULL;
11483 machine_mode vec_mode;
11484 enum insn_code icode1, icode2;
11485 optab optab1, optab2;
11486 tree vectype = vectype_in;
11487 tree wide_vectype = vectype_out;
11488 enum tree_code c1, c2;
11489 int i;
11490 tree prev_type, intermediate_type;
11491 machine_mode intermediate_mode, prev_mode;
11492 optab optab3, optab4;
11493
11494 *multi_step_cvt = 0;
11495 if (loop_info)
11496 vect_loop = LOOP_VINFO_LOOP (loop_info);
11497
11498 switch (code)
11499 {
11500 case WIDEN_MULT_EXPR:
11501 /* The result of a vectorized widening operation usually requires
11502 two vectors (because the widened results do not fit into one vector).
11503 The generated vector results would normally be expected to be
11504 generated in the same order as in the original scalar computation,
11505 i.e. if 8 results are generated in each vector iteration, they are
11506 to be organized as follows:
11507 vect1: [res1,res2,res3,res4],
11508 vect2: [res5,res6,res7,res8].
11509
11510 However, in the special case that the result of the widening
11511 operation is used in a reduction computation only, the order doesn't
11512 matter (because when vectorizing a reduction we change the order of
11513 the computation). Some targets can take advantage of this and
11514 generate more efficient code. For example, targets like Altivec,
11515 that support widen_mult using a sequence of {mult_even,mult_odd}
11516 generate the following vectors:
11517 vect1: [res1,res3,res5,res7],
11518 vect2: [res2,res4,res6,res8].
11519
11520 When vectorizing outer-loops, we execute the inner-loop sequentially
11521 (each vectorized inner-loop iteration contributes to VF outer-loop
11522 iterations in parallel). We therefore don't allow to change the
11523 order of the computation in the inner-loop during outer-loop
11524 vectorization. */
11525 /* TODO: Another case in which order doesn't *really* matter is when we
11526 widen and then contract again, e.g. (short)((int)x * y >> 8).
11527 Normally, pack_trunc performs an even/odd permute, whereas the
11528 repack from an even/odd expansion would be an interleave, which
11529 would be significantly simpler for e.g. AVX2. */
11530 /* In any case, in order to avoid duplicating the code below, recurse
11531 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11532 are properly set up for the caller. If we fail, we'll continue with
11533 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11534 if (vect_loop
11535 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11536 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11537 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11538 stmt_info, vectype_out,
11539 vectype_in, code1, code2,
11540 multi_step_cvt, interm_types))
11541 {
11542 /* Elements in a vector with vect_used_by_reduction property cannot
11543 be reordered if the use chain with this property does not have the
11544 same operation. One such an example is s += a * b, where elements
11545 in a and b cannot be reordered. Here we check if the vector defined
11546 by STMT is only directly used in the reduction statement. */
11547 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11548 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11549 if (use_stmt_info
11550 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11551 return true;
11552 }
11553 c1 = VEC_WIDEN_MULT_LO_EXPR;
11554 c2 = VEC_WIDEN_MULT_HI_EXPR;
11555 break;
11556
11557 case DOT_PROD_EXPR:
11558 c1 = DOT_PROD_EXPR;
11559 c2 = DOT_PROD_EXPR;
11560 break;
11561
11562 case SAD_EXPR:
11563 c1 = SAD_EXPR;
11564 c2 = SAD_EXPR;
11565 break;
11566
11567 case VEC_WIDEN_MULT_EVEN_EXPR:
11568 /* Support the recursion induced just above. */
11569 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11570 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11571 break;
11572
11573 case WIDEN_LSHIFT_EXPR:
11574 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11575 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11576 break;
11577
11578 CASE_CONVERT:
11579 c1 = VEC_UNPACK_LO_EXPR;
11580 c2 = VEC_UNPACK_HI_EXPR;
11581 break;
11582
11583 case FLOAT_EXPR:
11584 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11585 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11586 break;
11587
11588 case FIX_TRUNC_EXPR:
11589 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11590 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11591 break;
11592
11593 default:
11594 gcc_unreachable ();
11595 }
11596
11597 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11598 std::swap (c1, c2);
11599
11600 if (code == FIX_TRUNC_EXPR)
11601 {
11602 /* The signedness is determined from output operand. */
11603 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11604 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11605 }
11606 else if (CONVERT_EXPR_CODE_P (code)
11607 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11608 && VECTOR_BOOLEAN_TYPE_P (vectype)
11609 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11610 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11611 {
11612 /* If the input and result modes are the same, a different optab
11613 is needed where we pass in the number of units in vectype. */
11614 optab1 = vec_unpacks_sbool_lo_optab;
11615 optab2 = vec_unpacks_sbool_hi_optab;
11616 }
11617 else
11618 {
11619 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11620 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11621 }
11622
11623 if (!optab1 || !optab2)
11624 return false;
11625
11626 vec_mode = TYPE_MODE (vectype);
11627 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11628 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11629 return false;
11630
11631 *code1 = c1;
11632 *code2 = c2;
11633
11634 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11635 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11636 {
11637 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11638 return true;
11639 /* For scalar masks we may have different boolean
11640 vector types having the same QImode. Thus we
11641 add additional check for elements number. */
11642 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11643 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11644 return true;
11645 }
11646
11647 /* Check if it's a multi-step conversion that can be done using intermediate
11648 types. */
11649
11650 prev_type = vectype;
11651 prev_mode = vec_mode;
11652
11653 if (!CONVERT_EXPR_CODE_P (code))
11654 return false;
11655
11656 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11657 intermediate steps in promotion sequence. We try
11658 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11659 not. */
11660 interm_types->create (MAX_INTERM_CVT_STEPS);
11661 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11662 {
11663 intermediate_mode = insn_data[icode1].operand[0].mode;
11664 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11665 intermediate_type
11666 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11667 else
11668 intermediate_type
11669 = lang_hooks.types.type_for_mode (intermediate_mode,
11670 TYPE_UNSIGNED (prev_type));
11671
11672 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11673 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11674 && intermediate_mode == prev_mode
11675 && SCALAR_INT_MODE_P (prev_mode))
11676 {
11677 /* If the input and result modes are the same, a different optab
11678 is needed where we pass in the number of units in vectype. */
11679 optab3 = vec_unpacks_sbool_lo_optab;
11680 optab4 = vec_unpacks_sbool_hi_optab;
11681 }
11682 else
11683 {
11684 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11685 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11686 }
11687
11688 if (!optab3 || !optab4
11689 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11690 || insn_data[icode1].operand[0].mode != intermediate_mode
11691 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11692 || insn_data[icode2].operand[0].mode != intermediate_mode
11693 || ((icode1 = optab_handler (optab3, intermediate_mode))
11694 == CODE_FOR_nothing)
11695 || ((icode2 = optab_handler (optab4, intermediate_mode))
11696 == CODE_FOR_nothing))
11697 break;
11698
11699 interm_types->quick_push (intermediate_type);
11700 (*multi_step_cvt)++;
11701
11702 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11703 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11704 {
11705 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11706 return true;
11707 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11708 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11709 return true;
11710 }
11711
11712 prev_type = intermediate_type;
11713 prev_mode = intermediate_mode;
11714 }
11715
11716 interm_types->release ();
11717 return false;
11718 }
11719
11720
11721 /* Function supportable_narrowing_operation
11722
11723 Check whether an operation represented by the code CODE is a
11724 narrowing operation that is supported by the target platform in
11725 vector form (i.e., when operating on arguments of type VECTYPE_IN
11726 and producing a result of type VECTYPE_OUT).
11727
11728 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11729 and FLOAT. This function checks if these operations are supported by
11730 the target platform directly via vector tree-codes.
11731
11732 Output:
11733 - CODE1 is the code of a vector operation to be used when
11734 vectorizing the operation, if available.
11735 - MULTI_STEP_CVT determines the number of required intermediate steps in
11736 case of multi-step conversion (like int->short->char - in that case
11737 MULTI_STEP_CVT will be 1).
11738 - INTERM_TYPES contains the intermediate type required to perform the
11739 narrowing operation (short in the above example). */
11740
11741 bool
11742 supportable_narrowing_operation (enum tree_code code,
11743 tree vectype_out, tree vectype_in,
11744 enum tree_code *code1, int *multi_step_cvt,
11745 vec<tree> *interm_types)
11746 {
11747 machine_mode vec_mode;
11748 enum insn_code icode1;
11749 optab optab1, interm_optab;
11750 tree vectype = vectype_in;
11751 tree narrow_vectype = vectype_out;
11752 enum tree_code c1;
11753 tree intermediate_type, prev_type;
11754 machine_mode intermediate_mode, prev_mode;
11755 int i;
11756 bool uns;
11757
11758 *multi_step_cvt = 0;
11759 switch (code)
11760 {
11761 CASE_CONVERT:
11762 c1 = VEC_PACK_TRUNC_EXPR;
11763 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11764 && VECTOR_BOOLEAN_TYPE_P (vectype)
11765 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11766 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11767 optab1 = vec_pack_sbool_trunc_optab;
11768 else
11769 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11770 break;
11771
11772 case FIX_TRUNC_EXPR:
11773 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11774 /* The signedness is determined from output operand. */
11775 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11776 break;
11777
11778 case FLOAT_EXPR:
11779 c1 = VEC_PACK_FLOAT_EXPR;
11780 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11781 break;
11782
11783 default:
11784 gcc_unreachable ();
11785 }
11786
11787 if (!optab1)
11788 return false;
11789
11790 vec_mode = TYPE_MODE (vectype);
11791 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11792 return false;
11793
11794 *code1 = c1;
11795
11796 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11797 {
11798 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11799 return true;
11800 /* For scalar masks we may have different boolean
11801 vector types having the same QImode. Thus we
11802 add additional check for elements number. */
11803 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11804 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11805 return true;
11806 }
11807
11808 if (code == FLOAT_EXPR)
11809 return false;
11810
11811 /* Check if it's a multi-step conversion that can be done using intermediate
11812 types. */
11813 prev_mode = vec_mode;
11814 prev_type = vectype;
11815 if (code == FIX_TRUNC_EXPR)
11816 uns = TYPE_UNSIGNED (vectype_out);
11817 else
11818 uns = TYPE_UNSIGNED (vectype);
11819
11820 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11821 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11822 costly than signed. */
11823 if (code == FIX_TRUNC_EXPR && uns)
11824 {
11825 enum insn_code icode2;
11826
11827 intermediate_type
11828 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11829 interm_optab
11830 = optab_for_tree_code (c1, intermediate_type, optab_default);
11831 if (interm_optab != unknown_optab
11832 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11833 && insn_data[icode1].operand[0].mode
11834 == insn_data[icode2].operand[0].mode)
11835 {
11836 uns = false;
11837 optab1 = interm_optab;
11838 icode1 = icode2;
11839 }
11840 }
11841
11842 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11843 intermediate steps in promotion sequence. We try
11844 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11845 interm_types->create (MAX_INTERM_CVT_STEPS);
11846 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11847 {
11848 intermediate_mode = insn_data[icode1].operand[0].mode;
11849 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11850 intermediate_type
11851 = vect_double_mask_nunits (prev_type, intermediate_mode);
11852 else
11853 intermediate_type
11854 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11855 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11856 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11857 && intermediate_mode == prev_mode
11858 && SCALAR_INT_MODE_P (prev_mode))
11859 interm_optab = vec_pack_sbool_trunc_optab;
11860 else
11861 interm_optab
11862 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11863 optab_default);
11864 if (!interm_optab
11865 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11866 || insn_data[icode1].operand[0].mode != intermediate_mode
11867 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11868 == CODE_FOR_nothing))
11869 break;
11870
11871 interm_types->quick_push (intermediate_type);
11872 (*multi_step_cvt)++;
11873
11874 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11875 {
11876 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11877 return true;
11878 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11879 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11880 return true;
11881 }
11882
11883 prev_mode = intermediate_mode;
11884 prev_type = intermediate_type;
11885 optab1 = interm_optab;
11886 }
11887
11888 interm_types->release ();
11889 return false;
11890 }
11891
11892 /* Generate and return a statement that sets vector mask MASK such that
11893 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11894
11895 gcall *
11896 vect_gen_while (tree mask, tree start_index, tree end_index)
11897 {
11898 tree cmp_type = TREE_TYPE (start_index);
11899 tree mask_type = TREE_TYPE (mask);
11900 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11901 cmp_type, mask_type,
11902 OPTIMIZE_FOR_SPEED));
11903 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11904 start_index, end_index,
11905 build_zero_cst (mask_type));
11906 gimple_call_set_lhs (call, mask);
11907 return call;
11908 }
11909
11910 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11911 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11912
11913 tree
11914 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11915 tree end_index)
11916 {
11917 tree tmp = make_ssa_name (mask_type);
11918 gcall *call = vect_gen_while (tmp, start_index, end_index);
11919 gimple_seq_add_stmt (seq, call);
11920 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11921 }
11922
11923 /* Try to compute the vector types required to vectorize STMT_INFO,
11924 returning true on success and false if vectorization isn't possible.
11925
11926 On success:
11927
11928 - Set *STMT_VECTYPE_OUT to:
11929 - NULL_TREE if the statement doesn't need to be vectorized;
11930 - boolean_type_node if the statement is a boolean operation whose
11931 vector type can only be determined once all the other vector types
11932 are known; and
11933 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11934
11935 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11936 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11937 statement does not help to determine the overall number of units. */
11938
11939 opt_result
11940 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
11941 tree *stmt_vectype_out,
11942 tree *nunits_vectype_out)
11943 {
11944 vec_info *vinfo = stmt_info->vinfo;
11945 gimple *stmt = stmt_info->stmt;
11946
11947 *stmt_vectype_out = NULL_TREE;
11948 *nunits_vectype_out = NULL_TREE;
11949
11950 if (gimple_get_lhs (stmt) == NULL_TREE
11951 /* MASK_STORE has no lhs, but is ok. */
11952 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11953 {
11954 if (is_a <gcall *> (stmt))
11955 {
11956 /* Ignore calls with no lhs. These must be calls to
11957 #pragma omp simd functions, and what vectorization factor
11958 it really needs can't be determined until
11959 vectorizable_simd_clone_call. */
11960 if (dump_enabled_p ())
11961 dump_printf_loc (MSG_NOTE, vect_location,
11962 "defer to SIMD clone analysis.\n");
11963 return opt_result::success ();
11964 }
11965
11966 return opt_result::failure_at (stmt,
11967 "not vectorized: irregular stmt.%G", stmt);
11968 }
11969
11970 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11971 return opt_result::failure_at (stmt,
11972 "not vectorized: vector stmt in loop:%G",
11973 stmt);
11974
11975 tree vectype;
11976 tree scalar_type = NULL_TREE;
11977 if (STMT_VINFO_VECTYPE (stmt_info))
11978 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
11979 else
11980 {
11981 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
11982 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11983 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11984 else
11985 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11986
11987 /* Pure bool ops don't participate in number-of-units computation.
11988 For comparisons use the types being compared. */
11989 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
11990 && is_gimple_assign (stmt)
11991 && gimple_assign_rhs_code (stmt) != COND_EXPR)
11992 {
11993 *stmt_vectype_out = boolean_type_node;
11994
11995 tree rhs1 = gimple_assign_rhs1 (stmt);
11996 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
11997 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11998 scalar_type = TREE_TYPE (rhs1);
11999 else
12000 {
12001 if (dump_enabled_p ())
12002 dump_printf_loc (MSG_NOTE, vect_location,
12003 "pure bool operation.\n");
12004 return opt_result::success ();
12005 }
12006 }
12007
12008 if (dump_enabled_p ())
12009 dump_printf_loc (MSG_NOTE, vect_location,
12010 "get vectype for scalar type: %T\n", scalar_type);
12011 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
12012 if (!vectype)
12013 return opt_result::failure_at (stmt,
12014 "not vectorized:"
12015 " unsupported data-type %T\n",
12016 scalar_type);
12017
12018 if (!*stmt_vectype_out)
12019 *stmt_vectype_out = vectype;
12020
12021 if (dump_enabled_p ())
12022 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12023 }
12024
12025 /* Don't try to compute scalar types if the stmt produces a boolean
12026 vector; use the existing vector type instead. */
12027 tree nunits_vectype;
12028 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12029 nunits_vectype = vectype;
12030 else
12031 {
12032 /* The number of units is set according to the smallest scalar
12033 type (or the largest vector size, but we only support one
12034 vector size per vectorization). */
12035 if (*stmt_vectype_out != boolean_type_node)
12036 {
12037 HOST_WIDE_INT dummy;
12038 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12039 &dummy, &dummy);
12040 }
12041 if (dump_enabled_p ())
12042 dump_printf_loc (MSG_NOTE, vect_location,
12043 "get vectype for scalar type: %T\n", scalar_type);
12044 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
12045 }
12046 if (!nunits_vectype)
12047 return opt_result::failure_at (stmt,
12048 "not vectorized: unsupported data-type %T\n",
12049 scalar_type);
12050
12051 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
12052 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
12053 return opt_result::failure_at (stmt,
12054 "not vectorized: different sized vector "
12055 "types in statement, %T and %T\n",
12056 vectype, nunits_vectype);
12057
12058 if (dump_enabled_p ())
12059 {
12060 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
12061 nunits_vectype);
12062
12063 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12064 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12065 dump_printf (MSG_NOTE, "\n");
12066 }
12067
12068 *nunits_vectype_out = nunits_vectype;
12069 return opt_result::success ();
12070 }
12071
12072 /* Try to determine the correct vector type for STMT_INFO, which is a
12073 statement that produces a scalar boolean result. Return the vector
12074 type on success, otherwise return NULL_TREE. */
12075
12076 opt_tree
12077 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
12078 {
12079 vec_info *vinfo = stmt_info->vinfo;
12080 gimple *stmt = stmt_info->stmt;
12081 tree mask_type = NULL;
12082 tree vectype, scalar_type;
12083
12084 if (is_gimple_assign (stmt)
12085 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
12086 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
12087 {
12088 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
12089 mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
12090
12091 if (!mask_type)
12092 return opt_tree::failure_at (stmt,
12093 "not vectorized: unsupported mask\n");
12094 }
12095 else
12096 {
12097 tree rhs;
12098 ssa_op_iter iter;
12099 enum vect_def_type dt;
12100
12101 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
12102 {
12103 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
12104 return opt_tree::failure_at (stmt,
12105 "not vectorized:can't compute mask"
12106 " type for statement, %G", stmt);
12107
12108 /* No vectype probably means external definition.
12109 Allow it in case there is another operand which
12110 allows to determine mask type. */
12111 if (!vectype)
12112 continue;
12113
12114 if (!mask_type)
12115 mask_type = vectype;
12116 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
12117 TYPE_VECTOR_SUBPARTS (vectype)))
12118 return opt_tree::failure_at (stmt,
12119 "not vectorized: different sized mask"
12120 " types in statement, %T and %T\n",
12121 mask_type, vectype);
12122 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
12123 != VECTOR_BOOLEAN_TYPE_P (vectype))
12124 return opt_tree::failure_at (stmt,
12125 "not vectorized: mixed mask and "
12126 "nonmask vector types in statement, "
12127 "%T and %T\n",
12128 mask_type, vectype);
12129 }
12130
12131 /* We may compare boolean value loaded as vector of integers.
12132 Fix mask_type in such case. */
12133 if (mask_type
12134 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
12135 && gimple_code (stmt) == GIMPLE_ASSIGN
12136 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
12137 mask_type = truth_type_for (mask_type);
12138 }
12139
12140 /* No mask_type should mean loop invariant predicate.
12141 This is probably a subject for optimization in if-conversion. */
12142 if (!mask_type)
12143 return opt_tree::failure_at (stmt,
12144 "not vectorized: can't compute mask type "
12145 "for statement: %G", stmt);
12146
12147 return opt_tree::success (mask_type);
12148 }