Replace vec_info::vector_size with vec_info::vector_mode
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
96 {
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
103
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
106
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
129 {
130 tree vect_type, vect, vect_name, array_ref;
131 gimple *new_stmt;
132
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
139
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
144
145 return vect_name;
146 }
147
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
151
152 static void
153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 tree vect, tree array, unsigned HOST_WIDE_INT n)
155 {
156 tree array_ref;
157 gimple *new_stmt;
158
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
162
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
165 }
166
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
170
171 static tree
172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 {
174 tree mem_ref;
175
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
180 }
181
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
184
185 static void
186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 tree var)
188 {
189 tree clobber = build_clobber (TREE_TYPE (var));
190 gimple *new_stmt = gimple_build_assign (var, clobber);
191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
192 }
193
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195
196 /* Function vect_mark_relevant.
197
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
199
200 static void
201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 enum vect_relevant relevant, bool live_p)
203 {
204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
206
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: %G", relevant, live_p,
210 stmt_info->stmt);
211
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 {
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
222
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE, vect_location,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info = stmt_info;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
232 }
233
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 {
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
245 }
246
247 worklist->safe_push (stmt_info);
248 }
249
250
251 /* Function is_simple_and_all_uses_invariant
252
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
254
255 bool
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 loop_vec_info loop_vinfo)
258 {
259 tree op;
260 ssa_op_iter iter;
261
262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263 if (!stmt)
264 return false;
265
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 {
268 enum vect_def_type dt = vect_uninitialized_def;
269
270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 {
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
276 }
277
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
280 }
281 return true;
282 }
283
284 /* Function vect_stmt_relevant_p.
285
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
288
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
293
294 CHECKME: what other side effects would the vectorizer allow? */
295
296 static bool
297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
299 {
300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
305
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
308
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info->stmt)
311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
313
314 /* changing memory. */
315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt_info->stmt)
317 && !gimple_clobber_p (stmt_info->stmt))
318 {
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE, vect_location,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant = vect_used_in_scope;
323 }
324
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 {
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 {
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
332 {
333 if (is_gimple_debug (USE_STMT (use_p)))
334 continue;
335
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
339
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
344
345 *live_p = true;
346 }
347 }
348 }
349
350 if (*live_p && *relevant == vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 {
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant = vect_used_only_live;
357 }
358
359 return (*live_p || *relevant);
360 }
361
362
363 /* Function exist_non_indexing_operands_for_use_p
364
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
367
368 static bool
369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 {
371 tree operand;
372
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
378
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
384
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
388
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
391
392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
394 {
395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396 if (call && gimple_call_internal_p (call))
397 {
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
410 }
411 return false;
412 }
413
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
419
420 if (operand == use)
421 return true;
422
423 return false;
424 }
425
426
427 /*
428 Function process_use.
429
430 Inputs:
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
437
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
451
452 Return true if everything is as expected. Return false otherwise. */
453
454 static opt_result
455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 bool force)
458 {
459 stmt_vec_info dstmt_vinfo;
460 enum vect_def_type dt;
461
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return opt_result::success ();
466
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468 return opt_result::failure_at (stmt_vinfo->stmt,
469 "not vectorized:"
470 " unsupported use in stmt.\n");
471
472 if (!dstmt_vinfo)
473 return opt_result::success ();
474
475 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
476 basic_block bb = gimple_bb (stmt_vinfo->stmt);
477
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 We have to force the stmt live since the epilogue loop needs it to
480 continue computing the reduction. */
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
491 return opt_result::success ();
492 }
493
494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 outer-loop-header-bb:
496 d = dstmt_vinfo
497 inner-loop:
498 stmt # use (d)
499 outer-loop-tail-bb:
500 ... */
501 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
502 {
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE, vect_location,
505 "outer-loop def-stmt defining inner-loop stmt.\n");
506
507 switch (relevant)
508 {
509 case vect_unused_in_scope:
510 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
511 vect_used_in_scope : vect_unused_in_scope;
512 break;
513
514 case vect_used_in_outer_by_reduction:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 relevant = vect_used_by_reduction;
517 break;
518
519 case vect_used_in_outer:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_in_scope;
522 break;
523
524 case vect_used_in_scope:
525 break;
526
527 default:
528 gcc_unreachable ();
529 }
530 }
531
532 /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 outer-loop-header-bb:
534 ...
535 inner-loop:
536 d = dstmt_vinfo
537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
538 stmt # use (d) */
539 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
540 {
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "inner-loop def-stmt defining outer-loop stmt.\n");
544
545 switch (relevant)
546 {
547 case vect_unused_in_scope:
548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
549 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
550 vect_used_in_outer_by_reduction : vect_unused_in_scope;
551 break;
552
553 case vect_used_by_reduction:
554 case vect_used_only_live:
555 relevant = vect_used_in_outer_by_reduction;
556 break;
557
558 case vect_used_in_scope:
559 relevant = vect_used_in_outer;
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566 /* We are also not interested in uses on loop PHI backedges that are
567 inductions. Otherwise we'll needlessly vectorize the IV increment
568 and cause hybrid SLP for SLP inductions. Unless the PHI is live
569 of course. */
570 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
571 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
572 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
574 loop_latch_edge (bb->loop_father))
575 == use))
576 {
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
579 "induction value on backedge.\n");
580 return opt_result::success ();
581 }
582
583
584 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
585 return opt_result::success ();
586 }
587
588
589 /* Function vect_mark_stmts_to_be_vectorized.
590
591 Not all stmts in the loop need to be vectorized. For example:
592
593 for i...
594 for j...
595 1. T0 = i + j
596 2. T1 = a[T0]
597
598 3. j = j + 1
599
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
602
603 This pass detects such stmts. */
604
605 opt_result
606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
607 {
608 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610 unsigned int nbbs = loop->num_nodes;
611 gimple_stmt_iterator si;
612 unsigned int i;
613 basic_block bb;
614 bool live_p;
615 enum vect_relevant relevant;
616
617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
618
619 auto_vec<stmt_vec_info, 64> worklist;
620
621 /* 1. Init worklist. */
622 for (i = 0; i < nbbs; i++)
623 {
624 bb = bbs[i];
625 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
626 {
627 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
630 phi_info->stmt);
631
632 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
634 }
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 {
637 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "init: stmt relevant? %G", stmt_info->stmt);
641
642 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
643 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
644 }
645 }
646
647 /* 2. Process_worklist */
648 while (worklist.length () > 0)
649 {
650 use_operand_p use_p;
651 ssa_op_iter iter;
652
653 stmt_vec_info stmt_vinfo = worklist.pop ();
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "worklist: examine stmt: %G", stmt_vinfo->stmt);
657
658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 (DEF_STMT) as relevant/irrelevant according to the relevance property
660 of STMT. */
661 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
662
663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 propagated as is to the DEF_STMTs of its USEs.
665
666 One exception is when STMT has been identified as defining a reduction
667 variable; in this case we set the relevance to vect_used_by_reduction.
668 This is because we distinguish between two kinds of relevant stmts -
669 those that are used by a reduction computation, and those that are
670 (also) used by a regular computation. This allows us later on to
671 identify stmts that are used solely by a reduction, and therefore the
672 order of the results that they produce does not have to be kept. */
673
674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
675 {
676 case vect_reduction_def:
677 gcc_assert (relevant != vect_unused_in_scope);
678 if (relevant != vect_unused_in_scope
679 && relevant != vect_used_in_scope
680 && relevant != vect_used_by_reduction
681 && relevant != vect_used_only_live)
682 return opt_result::failure_at
683 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
684 break;
685
686 case vect_nested_cycle:
687 if (relevant != vect_unused_in_scope
688 && relevant != vect_used_in_outer_by_reduction
689 && relevant != vect_used_in_outer)
690 return opt_result::failure_at
691 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
692 break;
693
694 case vect_double_reduction_def:
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_by_reduction
697 && relevant != vect_used_only_live)
698 return opt_result::failure_at
699 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
700 break;
701
702 default:
703 break;
704 }
705
706 if (is_pattern_stmt_p (stmt_vinfo))
707 {
708 /* Pattern statements are not inserted into the code, so
709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710 have to scan the RHS or function arguments instead. */
711 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
712 {
713 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
714 tree op = gimple_assign_rhs1 (assign);
715
716 i = 1;
717 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
718 {
719 opt_result res
720 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
721 loop_vinfo, relevant, &worklist, false);
722 if (!res)
723 return res;
724 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 i = 2;
729 }
730 for (; i < gimple_num_ops (assign); i++)
731 {
732 op = gimple_op (assign, i);
733 if (TREE_CODE (op) == SSA_NAME)
734 {
735 opt_result res
736 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
737 &worklist, false);
738 if (!res)
739 return res;
740 }
741 }
742 }
743 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
744 {
745 for (i = 0; i < gimple_call_num_args (call); i++)
746 {
747 tree arg = gimple_call_arg (call, i);
748 opt_result res
749 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
750 &worklist, false);
751 if (!res)
752 return res;
753 }
754 }
755 }
756 else
757 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
758 {
759 tree op = USE_FROM_PTR (use_p);
760 opt_result res
761 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
762 &worklist, false);
763 if (!res)
764 return res;
765 }
766
767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
768 {
769 gather_scatter_info gs_info;
770 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
771 gcc_unreachable ();
772 opt_result res
773 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
774 &worklist, true);
775 if (!res)
776 {
777 if (fatal)
778 *fatal = false;
779 return res;
780 }
781 }
782 } /* while worklist */
783
784 return opt_result::success ();
785 }
786
787 /* Compute the prologue cost for invariant or constant operands. */
788
789 static unsigned
790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 unsigned opno, enum vect_def_type dt,
792 stmt_vector_for_cost *cost_vec)
793 {
794 vec_info *vinfo = stmt_info->vinfo;
795 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796 tree op = gimple_op (stmt, opno);
797 unsigned prologue_cost = 0;
798
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
803 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804 unsigned num_vects_to_check;
805 unsigned HOST_WIDE_INT const_nunits;
806 unsigned nelt_limit;
807 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808 && ! multiple_p (const_nunits, group_size))
809 {
810 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811 nelt_limit = const_nunits;
812 }
813 else
814 {
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check = 1;
819 nelt_limit = group_size;
820 }
821 tree elt = NULL_TREE;
822 unsigned nelt = 0;
823 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824 {
825 unsigned si = j % group_size;
826 if (nelt == 0)
827 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 opno))
833 elt = NULL_TREE;
834 nelt++;
835 if (nelt == nelt_limit)
836 {
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost += record_stmt_cost
840 (cost_vec, 1,
841 dt == vect_external_def
842 ? (elt ? scalar_to_vec : vec_construct)
843 : vector_load,
844 stmt_info, 0, vect_prologue);
845 nelt = 0;
846 }
847 }
848
849 return prologue_cost;
850 }
851
852 /* Function vect_model_simple_cost.
853
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
857
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 enum vect_def_type *dt,
861 int ndts,
862 slp_tree node,
863 stmt_vector_for_cost *cost_vec)
864 {
865 int inside_cost = 0, prologue_cost = 0;
866
867 gcc_assert (cost_vec != NULL);
868
869 /* ??? Somehow we need to fix this at the callers. */
870 if (node)
871 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
872
873 if (node)
874 {
875 /* Scan operands and account for prologue cost of constants/externals.
876 ??? This over-estimates cost for multiple uses and should be
877 re-engineered. */
878 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
879 tree lhs = gimple_get_lhs (stmt);
880 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
881 {
882 tree op = gimple_op (stmt, i);
883 enum vect_def_type dt;
884 if (!op || op == lhs)
885 continue;
886 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
887 && (dt == vect_constant_def || dt == vect_external_def))
888 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
889 i, dt, cost_vec);
890 }
891 }
892 else
893 /* Cost the "broadcast" of a scalar operand in to a vector operand.
894 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
895 cost model. */
896 for (int i = 0; i < ndts; i++)
897 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
898 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
899 stmt_info, 0, vect_prologue);
900
901 /* Adjust for two-operator SLP nodes. */
902 if (node && SLP_TREE_TWO_OPERATORS (node))
903 {
904 ncopies *= 2;
905 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
906 stmt_info, 0, vect_body);
907 }
908
909 /* Pass the inside-of-loop statements to the target-specific cost model. */
910 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
911 stmt_info, 0, vect_body);
912
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_NOTE, vect_location,
915 "vect_model_simple_cost: inside_cost = %d, "
916 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 }
918
919
920 /* Model cost for type demotion and promotion operations. PWR is
921 normally zero for single-step promotions and demotions. It will be
922 one if two-step promotion/demotion is required, and so on. NCOPIES
923 is the number of vector results (and thus number of instructions)
924 for the narrowest end of the operation chain. Each additional
925 step doubles the number of instructions required. */
926
927 static void
928 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
929 enum vect_def_type *dt,
930 unsigned int ncopies, int pwr,
931 stmt_vector_for_cost *cost_vec)
932 {
933 int i;
934 int inside_cost = 0, prologue_cost = 0;
935
936 for (i = 0; i < pwr + 1; i++)
937 {
938 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
939 stmt_info, 0, vect_body);
940 ncopies *= 2;
941 }
942
943 /* FORNOW: Assuming maximum 2 args per stmts. */
944 for (i = 0; i < 2; i++)
945 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
946 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
947 stmt_info, 0, vect_prologue);
948
949 if (dump_enabled_p ())
950 dump_printf_loc (MSG_NOTE, vect_location,
951 "vect_model_promotion_demotion_cost: inside_cost = %d, "
952 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 }
954
955 /* Returns true if the current function returns DECL. */
956
957 static bool
958 cfun_returns (tree decl)
959 {
960 edge_iterator ei;
961 edge e;
962 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
963 {
964 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
965 if (!ret)
966 continue;
967 if (gimple_return_retval (ret) == decl)
968 return true;
969 /* We often end up with an aggregate copy to the result decl,
970 handle that case as well. First skip intermediate clobbers
971 though. */
972 gimple *def = ret;
973 do
974 {
975 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
976 }
977 while (gimple_clobber_p (def));
978 if (is_a <gassign *> (def)
979 && gimple_assign_lhs (def) == gimple_return_retval (ret)
980 && gimple_assign_rhs1 (def) == decl)
981 return true;
982 }
983 return false;
984 }
985
986 /* Function vect_model_store_cost
987
988 Models cost for stores. In the case of grouped accesses, one access
989 has the overhead of the grouped access attributed to it. */
990
991 static void
992 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
993 enum vect_def_type dt,
994 vect_memory_access_type memory_access_type,
995 vec_load_store_type vls_type, slp_tree slp_node,
996 stmt_vector_for_cost *cost_vec)
997 {
998 unsigned int inside_cost = 0, prologue_cost = 0;
999 stmt_vec_info first_stmt_info = stmt_info;
1000 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1001
1002 /* ??? Somehow we need to fix this at the callers. */
1003 if (slp_node)
1004 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1005
1006 if (vls_type == VLS_STORE_INVARIANT)
1007 {
1008 if (slp_node)
1009 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1010 1, dt, cost_vec);
1011 else
1012 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1013 stmt_info, 0, vect_prologue);
1014 }
1015
1016 /* Grouped stores update all elements in the group at once,
1017 so we want the DR for the first statement. */
1018 if (!slp_node && grouped_access_p)
1019 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1020
1021 /* True if we should include any once-per-group costs as well as
1022 the cost of the statement itself. For SLP we only get called
1023 once per group anyhow. */
1024 bool first_stmt_p = (first_stmt_info == stmt_info);
1025
1026 /* We assume that the cost of a single store-lanes instruction is
1027 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1028 access is instead being provided by a permute-and-store operation,
1029 include the cost of the permutes. */
1030 if (first_stmt_p
1031 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1032 {
1033 /* Uses a high and low interleave or shuffle operations for each
1034 needed permute. */
1035 int group_size = DR_GROUP_SIZE (first_stmt_info);
1036 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1037 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1038 stmt_info, 0, vect_body);
1039
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE, vect_location,
1042 "vect_model_store_cost: strided group_size = %d .\n",
1043 group_size);
1044 }
1045
1046 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1047 /* Costs of the stores. */
1048 if (memory_access_type == VMAT_ELEMENTWISE
1049 || memory_access_type == VMAT_GATHER_SCATTER)
1050 {
1051 /* N scalar stores plus extracting the elements. */
1052 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1053 inside_cost += record_stmt_cost (cost_vec,
1054 ncopies * assumed_nunits,
1055 scalar_store, stmt_info, 0, vect_body);
1056 }
1057 else
1058 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1059
1060 if (memory_access_type == VMAT_ELEMENTWISE
1061 || memory_access_type == VMAT_STRIDED_SLP)
1062 {
1063 /* N scalar stores plus extracting the elements. */
1064 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1065 inside_cost += record_stmt_cost (cost_vec,
1066 ncopies * assumed_nunits,
1067 vec_to_scalar, stmt_info, 0, vect_body);
1068 }
1069
1070 /* When vectorizing a store into the function result assign
1071 a penalty if the function returns in a multi-register location.
1072 In this case we assume we'll end up with having to spill the
1073 vector result and do piecewise loads as a conservative estimate. */
1074 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1075 if (base
1076 && (TREE_CODE (base) == RESULT_DECL
1077 || (DECL_P (base) && cfun_returns (base)))
1078 && !aggregate_value_p (base, cfun->decl))
1079 {
1080 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1081 /* ??? Handle PARALLEL in some way. */
1082 if (REG_P (reg))
1083 {
1084 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1085 /* Assume that a single reg-reg move is possible and cheap,
1086 do not account for vector to gp register move cost. */
1087 if (nregs > 1)
1088 {
1089 /* Spill. */
1090 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1091 vector_store,
1092 stmt_info, 0, vect_epilogue);
1093 /* Loads. */
1094 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1095 scalar_load,
1096 stmt_info, 0, vect_epilogue);
1097 }
1098 }
1099 }
1100
1101 if (dump_enabled_p ())
1102 dump_printf_loc (MSG_NOTE, vect_location,
1103 "vect_model_store_cost: inside_cost = %d, "
1104 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1105 }
1106
1107
1108 /* Calculate cost of DR's memory access. */
1109 void
1110 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1111 unsigned int *inside_cost,
1112 stmt_vector_for_cost *body_cost_vec)
1113 {
1114 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1115 int alignment_support_scheme
1116 = vect_supportable_dr_alignment (dr_info, false);
1117
1118 switch (alignment_support_scheme)
1119 {
1120 case dr_aligned:
1121 {
1122 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1123 vector_store, stmt_info, 0,
1124 vect_body);
1125
1126 if (dump_enabled_p ())
1127 dump_printf_loc (MSG_NOTE, vect_location,
1128 "vect_model_store_cost: aligned.\n");
1129 break;
1130 }
1131
1132 case dr_unaligned_supported:
1133 {
1134 /* Here, we assign an additional cost for the unaligned store. */
1135 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1136 unaligned_store, stmt_info,
1137 DR_MISALIGNMENT (dr_info),
1138 vect_body);
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location,
1141 "vect_model_store_cost: unaligned supported by "
1142 "hardware.\n");
1143 break;
1144 }
1145
1146 case dr_unaligned_unsupported:
1147 {
1148 *inside_cost = VECT_MAX_COST;
1149
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1152 "vect_model_store_cost: unsupported access.\n");
1153 break;
1154 }
1155
1156 default:
1157 gcc_unreachable ();
1158 }
1159 }
1160
1161
1162 /* Function vect_model_load_cost
1163
1164 Models cost for loads. In the case of grouped accesses, one access has
1165 the overhead of the grouped access attributed to it. Since unaligned
1166 accesses are supported for loads, we also account for the costs of the
1167 access scheme chosen. */
1168
1169 static void
1170 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1171 vect_memory_access_type memory_access_type,
1172 slp_instance instance,
1173 slp_tree slp_node,
1174 stmt_vector_for_cost *cost_vec)
1175 {
1176 unsigned int inside_cost = 0, prologue_cost = 0;
1177 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1178
1179 gcc_assert (cost_vec);
1180
1181 /* ??? Somehow we need to fix this at the callers. */
1182 if (slp_node)
1183 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1184
1185 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1186 {
1187 /* If the load is permuted then the alignment is determined by
1188 the first group element not by the first scalar stmt DR. */
1189 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1190 /* Record the cost for the permutation. */
1191 unsigned n_perms;
1192 unsigned assumed_nunits
1193 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1194 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1195 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1196 slp_vf, instance, true,
1197 &n_perms);
1198 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1199 first_stmt_info, 0, vect_body);
1200 /* And adjust the number of loads performed. This handles
1201 redundancies as well as loads that are later dead. */
1202 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1203 bitmap_clear (perm);
1204 for (unsigned i = 0;
1205 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1206 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1207 ncopies = 0;
1208 bool load_seen = false;
1209 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1210 {
1211 if (i % assumed_nunits == 0)
1212 {
1213 if (load_seen)
1214 ncopies++;
1215 load_seen = false;
1216 }
1217 if (bitmap_bit_p (perm, i))
1218 load_seen = true;
1219 }
1220 if (load_seen)
1221 ncopies++;
1222 gcc_assert (ncopies
1223 <= (DR_GROUP_SIZE (first_stmt_info)
1224 - DR_GROUP_GAP (first_stmt_info)
1225 + assumed_nunits - 1) / assumed_nunits);
1226 }
1227
1228 /* Grouped loads read all elements in the group at once,
1229 so we want the DR for the first statement. */
1230 stmt_vec_info first_stmt_info = stmt_info;
1231 if (!slp_node && grouped_access_p)
1232 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1233
1234 /* True if we should include any once-per-group costs as well as
1235 the cost of the statement itself. For SLP we only get called
1236 once per group anyhow. */
1237 bool first_stmt_p = (first_stmt_info == stmt_info);
1238
1239 /* We assume that the cost of a single load-lanes instruction is
1240 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1241 access is instead being provided by a load-and-permute operation,
1242 include the cost of the permutes. */
1243 if (first_stmt_p
1244 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1245 {
1246 /* Uses an even and odd extract operations or shuffle operations
1247 for each needed permute. */
1248 int group_size = DR_GROUP_SIZE (first_stmt_info);
1249 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1250 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1251 stmt_info, 0, vect_body);
1252
1253 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE, vect_location,
1255 "vect_model_load_cost: strided group_size = %d .\n",
1256 group_size);
1257 }
1258
1259 /* The loads themselves. */
1260 if (memory_access_type == VMAT_ELEMENTWISE
1261 || memory_access_type == VMAT_GATHER_SCATTER)
1262 {
1263 /* N scalar loads plus gathering them into a vector. */
1264 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1265 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1266 inside_cost += record_stmt_cost (cost_vec,
1267 ncopies * assumed_nunits,
1268 scalar_load, stmt_info, 0, vect_body);
1269 }
1270 else
1271 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1272 &inside_cost, &prologue_cost,
1273 cost_vec, cost_vec, true);
1274 if (memory_access_type == VMAT_ELEMENTWISE
1275 || memory_access_type == VMAT_STRIDED_SLP)
1276 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1277 stmt_info, 0, vect_body);
1278
1279 if (dump_enabled_p ())
1280 dump_printf_loc (MSG_NOTE, vect_location,
1281 "vect_model_load_cost: inside_cost = %d, "
1282 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1283 }
1284
1285
1286 /* Calculate cost of DR's memory access. */
1287 void
1288 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1289 bool add_realign_cost, unsigned int *inside_cost,
1290 unsigned int *prologue_cost,
1291 stmt_vector_for_cost *prologue_cost_vec,
1292 stmt_vector_for_cost *body_cost_vec,
1293 bool record_prologue_costs)
1294 {
1295 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1296 int alignment_support_scheme
1297 = vect_supportable_dr_alignment (dr_info, false);
1298
1299 switch (alignment_support_scheme)
1300 {
1301 case dr_aligned:
1302 {
1303 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1304 stmt_info, 0, vect_body);
1305
1306 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE, vect_location,
1308 "vect_model_load_cost: aligned.\n");
1309
1310 break;
1311 }
1312 case dr_unaligned_supported:
1313 {
1314 /* Here, we assign an additional cost for the unaligned load. */
1315 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1316 unaligned_load, stmt_info,
1317 DR_MISALIGNMENT (dr_info),
1318 vect_body);
1319
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "vect_model_load_cost: unaligned supported by "
1323 "hardware.\n");
1324
1325 break;
1326 }
1327 case dr_explicit_realign:
1328 {
1329 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1330 vector_load, stmt_info, 0, vect_body);
1331 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1332 vec_perm, stmt_info, 0, vect_body);
1333
1334 /* FIXME: If the misalignment remains fixed across the iterations of
1335 the containing loop, the following cost should be added to the
1336 prologue costs. */
1337 if (targetm.vectorize.builtin_mask_for_load)
1338 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1339 stmt_info, 0, vect_body);
1340
1341 if (dump_enabled_p ())
1342 dump_printf_loc (MSG_NOTE, vect_location,
1343 "vect_model_load_cost: explicit realign\n");
1344
1345 break;
1346 }
1347 case dr_explicit_realign_optimized:
1348 {
1349 if (dump_enabled_p ())
1350 dump_printf_loc (MSG_NOTE, vect_location,
1351 "vect_model_load_cost: unaligned software "
1352 "pipelined.\n");
1353
1354 /* Unaligned software pipeline has a load of an address, an initial
1355 load, and possibly a mask operation to "prime" the loop. However,
1356 if this is an access in a group of loads, which provide grouped
1357 access, then the above cost should only be considered for one
1358 access in the group. Inside the loop, there is a load op
1359 and a realignment op. */
1360
1361 if (add_realign_cost && record_prologue_costs)
1362 {
1363 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1364 vector_stmt, stmt_info,
1365 0, vect_prologue);
1366 if (targetm.vectorize.builtin_mask_for_load)
1367 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1368 vector_stmt, stmt_info,
1369 0, vect_prologue);
1370 }
1371
1372 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1373 stmt_info, 0, vect_body);
1374 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1375 stmt_info, 0, vect_body);
1376
1377 if (dump_enabled_p ())
1378 dump_printf_loc (MSG_NOTE, vect_location,
1379 "vect_model_load_cost: explicit realign optimized"
1380 "\n");
1381
1382 break;
1383 }
1384
1385 case dr_unaligned_unsupported:
1386 {
1387 *inside_cost = VECT_MAX_COST;
1388
1389 if (dump_enabled_p ())
1390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1391 "vect_model_load_cost: unsupported access.\n");
1392 break;
1393 }
1394
1395 default:
1396 gcc_unreachable ();
1397 }
1398 }
1399
1400 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1401 the loop preheader for the vectorized stmt STMT_VINFO. */
1402
1403 static void
1404 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1405 gimple_stmt_iterator *gsi)
1406 {
1407 if (gsi)
1408 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1409 else
1410 {
1411 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1412
1413 if (loop_vinfo)
1414 {
1415 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1416 basic_block new_bb;
1417 edge pe;
1418
1419 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1420 loop = loop->inner;
1421
1422 pe = loop_preheader_edge (loop);
1423 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1424 gcc_assert (!new_bb);
1425 }
1426 else
1427 {
1428 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1429 basic_block bb;
1430 gimple_stmt_iterator gsi_bb_start;
1431
1432 gcc_assert (bb_vinfo);
1433 bb = BB_VINFO_BB (bb_vinfo);
1434 gsi_bb_start = gsi_after_labels (bb);
1435 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1436 }
1437 }
1438
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE, vect_location,
1441 "created new init_stmt: %G", new_stmt);
1442 }
1443
1444 /* Function vect_init_vector.
1445
1446 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1447 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1448 vector type a vector with all elements equal to VAL is created first.
1449 Place the initialization at GSI if it is not NULL. Otherwise, place the
1450 initialization at the loop preheader.
1451 Return the DEF of INIT_STMT.
1452 It will be used in the vectorization of STMT_INFO. */
1453
1454 tree
1455 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1456 gimple_stmt_iterator *gsi)
1457 {
1458 gimple *init_stmt;
1459 tree new_temp;
1460
1461 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1462 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1463 {
1464 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1465 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1466 {
1467 /* Scalar boolean value should be transformed into
1468 all zeros or all ones value before building a vector. */
1469 if (VECTOR_BOOLEAN_TYPE_P (type))
1470 {
1471 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1472 tree false_val = build_zero_cst (TREE_TYPE (type));
1473
1474 if (CONSTANT_CLASS_P (val))
1475 val = integer_zerop (val) ? false_val : true_val;
1476 else
1477 {
1478 new_temp = make_ssa_name (TREE_TYPE (type));
1479 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1480 val, true_val, false_val);
1481 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1482 val = new_temp;
1483 }
1484 }
1485 else
1486 {
1487 gimple_seq stmts = NULL;
1488 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1489 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1490 TREE_TYPE (type), val);
1491 else
1492 /* ??? Condition vectorization expects us to do
1493 promotion of invariant/external defs. */
1494 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1495 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1496 !gsi_end_p (gsi2); )
1497 {
1498 init_stmt = gsi_stmt (gsi2);
1499 gsi_remove (&gsi2, false);
1500 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1501 }
1502 }
1503 }
1504 val = build_vector_from_val (type, val);
1505 }
1506
1507 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1508 init_stmt = gimple_build_assign (new_temp, val);
1509 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1510 return new_temp;
1511 }
1512
1513 /* Function vect_get_vec_def_for_operand_1.
1514
1515 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1516 with type DT that will be used in the vectorized stmt. */
1517
1518 tree
1519 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1520 enum vect_def_type dt)
1521 {
1522 tree vec_oprnd;
1523 stmt_vec_info vec_stmt_info;
1524
1525 switch (dt)
1526 {
1527 /* operand is a constant or a loop invariant. */
1528 case vect_constant_def:
1529 case vect_external_def:
1530 /* Code should use vect_get_vec_def_for_operand. */
1531 gcc_unreachable ();
1532
1533 /* Operand is defined by a loop header phi. In case of nested
1534 cycles we also may have uses of the backedge def. */
1535 case vect_reduction_def:
1536 case vect_double_reduction_def:
1537 case vect_nested_cycle:
1538 case vect_induction_def:
1539 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1540 || dt == vect_nested_cycle);
1541 /* Fallthru. */
1542
1543 /* operand is defined inside the loop. */
1544 case vect_internal_def:
1545 {
1546 /* Get the def from the vectorized stmt. */
1547 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1548 /* Get vectorized pattern statement. */
1549 if (!vec_stmt_info
1550 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1551 && !STMT_VINFO_RELEVANT (def_stmt_info))
1552 vec_stmt_info = (STMT_VINFO_VEC_STMT
1553 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1554 gcc_assert (vec_stmt_info);
1555 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1556 vec_oprnd = PHI_RESULT (phi);
1557 else
1558 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1559 return vec_oprnd;
1560 }
1561
1562 default:
1563 gcc_unreachable ();
1564 }
1565 }
1566
1567
1568 /* Function vect_get_vec_def_for_operand.
1569
1570 OP is an operand in STMT_VINFO. This function returns a (vector) def
1571 that will be used in the vectorized stmt for STMT_VINFO.
1572
1573 In the case that OP is an SSA_NAME which is defined in the loop, then
1574 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1575
1576 In case OP is an invariant or constant, a new stmt that creates a vector def
1577 needs to be introduced. VECTYPE may be used to specify a required type for
1578 vector invariant. */
1579
1580 tree
1581 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1582 {
1583 gimple *def_stmt;
1584 enum vect_def_type dt;
1585 bool is_simple_use;
1586 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1587
1588 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_NOTE, vect_location,
1590 "vect_get_vec_def_for_operand: %T\n", op);
1591
1592 stmt_vec_info def_stmt_info;
1593 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1594 &def_stmt_info, &def_stmt);
1595 gcc_assert (is_simple_use);
1596 if (def_stmt && dump_enabled_p ())
1597 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1598
1599 if (dt == vect_constant_def || dt == vect_external_def)
1600 {
1601 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1602 tree vector_type;
1603
1604 if (vectype)
1605 vector_type = vectype;
1606 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1607 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1608 vector_type = truth_type_for (stmt_vectype);
1609 else
1610 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1611
1612 gcc_assert (vector_type);
1613 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1614 }
1615 else
1616 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1617 }
1618
1619
1620 /* Function vect_get_vec_def_for_stmt_copy
1621
1622 Return a vector-def for an operand. This function is used when the
1623 vectorized stmt to be created (by the caller to this function) is a "copy"
1624 created in case the vectorized result cannot fit in one vector, and several
1625 copies of the vector-stmt are required. In this case the vector-def is
1626 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1627 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1628
1629 Context:
1630 In case the vectorization factor (VF) is bigger than the number
1631 of elements that can fit in a vectype (nunits), we have to generate
1632 more than one vector stmt to vectorize the scalar stmt. This situation
1633 arises when there are multiple data-types operated upon in the loop; the
1634 smallest data-type determines the VF, and as a result, when vectorizing
1635 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1636 vector stmt (each computing a vector of 'nunits' results, and together
1637 computing 'VF' results in each iteration). This function is called when
1638 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1639 which VF=16 and nunits=4, so the number of copies required is 4):
1640
1641 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1642
1643 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1644 VS1.1: vx.1 = memref1 VS1.2
1645 VS1.2: vx.2 = memref2 VS1.3
1646 VS1.3: vx.3 = memref3
1647
1648 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1649 VSnew.1: vz1 = vx.1 + ... VSnew.2
1650 VSnew.2: vz2 = vx.2 + ... VSnew.3
1651 VSnew.3: vz3 = vx.3 + ...
1652
1653 The vectorization of S1 is explained in vectorizable_load.
1654 The vectorization of S2:
1655 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1656 the function 'vect_get_vec_def_for_operand' is called to
1657 get the relevant vector-def for each operand of S2. For operand x it
1658 returns the vector-def 'vx.0'.
1659
1660 To create the remaining copies of the vector-stmt (VSnew.j), this
1661 function is called to get the relevant vector-def for each operand. It is
1662 obtained from the respective VS1.j stmt, which is recorded in the
1663 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1664
1665 For example, to obtain the vector-def 'vx.1' in order to create the
1666 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1667 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1668 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1669 and return its def ('vx.1').
1670 Overall, to create the above sequence this function will be called 3 times:
1671 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1672 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1673 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1674
1675 tree
1676 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1677 {
1678 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1679 if (!def_stmt_info)
1680 /* Do nothing; can reuse same def. */
1681 return vec_oprnd;
1682
1683 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1684 gcc_assert (def_stmt_info);
1685 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1686 vec_oprnd = PHI_RESULT (phi);
1687 else
1688 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1689 return vec_oprnd;
1690 }
1691
1692
1693 /* Get vectorized definitions for the operands to create a copy of an original
1694 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1695
1696 void
1697 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1698 vec<tree> *vec_oprnds0,
1699 vec<tree> *vec_oprnds1)
1700 {
1701 tree vec_oprnd = vec_oprnds0->pop ();
1702
1703 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1704 vec_oprnds0->quick_push (vec_oprnd);
1705
1706 if (vec_oprnds1 && vec_oprnds1->length ())
1707 {
1708 vec_oprnd = vec_oprnds1->pop ();
1709 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1710 vec_oprnds1->quick_push (vec_oprnd);
1711 }
1712 }
1713
1714
1715 /* Get vectorized definitions for OP0 and OP1. */
1716
1717 void
1718 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1719 vec<tree> *vec_oprnds0,
1720 vec<tree> *vec_oprnds1,
1721 slp_tree slp_node)
1722 {
1723 if (slp_node)
1724 {
1725 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1726 vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1727 *vec_oprnds0 = vec_defs[0];
1728 if (op1)
1729 *vec_oprnds1 = vec_defs[1];
1730 }
1731 else
1732 {
1733 tree vec_oprnd;
1734
1735 vec_oprnds0->create (1);
1736 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1737 vec_oprnds0->quick_push (vec_oprnd);
1738
1739 if (op1)
1740 {
1741 vec_oprnds1->create (1);
1742 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1743 vec_oprnds1->quick_push (vec_oprnd);
1744 }
1745 }
1746 }
1747
1748 /* Helper function called by vect_finish_replace_stmt and
1749 vect_finish_stmt_generation. Set the location of the new
1750 statement and create and return a stmt_vec_info for it. */
1751
1752 static stmt_vec_info
1753 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1754 {
1755 vec_info *vinfo = stmt_info->vinfo;
1756
1757 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1758
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1761
1762 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1763
1764 /* While EH edges will generally prevent vectorization, stmt might
1765 e.g. be in a must-not-throw region. Ensure newly created stmts
1766 that could throw are part of the same region. */
1767 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1768 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1769 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1770
1771 return vec_stmt_info;
1772 }
1773
1774 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1775 which sets the same scalar result as STMT_INFO did. Create and return a
1776 stmt_vec_info for VEC_STMT. */
1777
1778 stmt_vec_info
1779 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1780 {
1781 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1782
1783 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1784 gsi_replace (&gsi, vec_stmt, true);
1785
1786 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1787 }
1788
1789 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1790 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1791
1792 stmt_vec_info
1793 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1794 gimple_stmt_iterator *gsi)
1795 {
1796 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1797
1798 if (!gsi_end_p (*gsi)
1799 && gimple_has_mem_ops (vec_stmt))
1800 {
1801 gimple *at_stmt = gsi_stmt (*gsi);
1802 tree vuse = gimple_vuse (at_stmt);
1803 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1804 {
1805 tree vdef = gimple_vdef (at_stmt);
1806 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1807 /* If we have an SSA vuse and insert a store, update virtual
1808 SSA form to avoid triggering the renamer. Do so only
1809 if we can easily see all uses - which is what almost always
1810 happens with the way vectorized stmts are inserted. */
1811 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1812 && ((is_gimple_assign (vec_stmt)
1813 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1814 || (is_gimple_call (vec_stmt)
1815 && !(gimple_call_flags (vec_stmt)
1816 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1817 {
1818 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1819 gimple_set_vdef (vec_stmt, new_vdef);
1820 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1821 }
1822 }
1823 }
1824 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1825 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1826 }
1827
1828 /* We want to vectorize a call to combined function CFN with function
1829 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1830 as the types of all inputs. Check whether this is possible using
1831 an internal function, returning its code if so or IFN_LAST if not. */
1832
1833 static internal_fn
1834 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1835 tree vectype_out, tree vectype_in)
1836 {
1837 internal_fn ifn;
1838 if (internal_fn_p (cfn))
1839 ifn = as_internal_fn (cfn);
1840 else
1841 ifn = associated_internal_fn (fndecl);
1842 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1843 {
1844 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1845 if (info.vectorizable)
1846 {
1847 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1848 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1849 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1850 OPTIMIZE_FOR_SPEED))
1851 return ifn;
1852 }
1853 }
1854 return IFN_LAST;
1855 }
1856
1857
1858 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1859 gimple_stmt_iterator *);
1860
1861 /* Check whether a load or store statement in the loop described by
1862 LOOP_VINFO is possible in a fully-masked loop. This is testing
1863 whether the vectorizer pass has the appropriate support, as well as
1864 whether the target does.
1865
1866 VLS_TYPE says whether the statement is a load or store and VECTYPE
1867 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1868 says how the load or store is going to be implemented and GROUP_SIZE
1869 is the number of load or store statements in the containing group.
1870 If the access is a gather load or scatter store, GS_INFO describes
1871 its arguments. If the load or store is conditional, SCALAR_MASK is the
1872 condition under which it occurs.
1873
1874 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1875 supported, otherwise record the required mask types. */
1876
1877 static void
1878 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1879 vec_load_store_type vls_type, int group_size,
1880 vect_memory_access_type memory_access_type,
1881 gather_scatter_info *gs_info, tree scalar_mask)
1882 {
1883 /* Invariant loads need no special support. */
1884 if (memory_access_type == VMAT_INVARIANT)
1885 return;
1886
1887 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1888 machine_mode vecmode = TYPE_MODE (vectype);
1889 bool is_load = (vls_type == VLS_LOAD);
1890 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1891 {
1892 if (is_load
1893 ? !vect_load_lanes_supported (vectype, group_size, true)
1894 : !vect_store_lanes_supported (vectype, group_size, true))
1895 {
1896 if (dump_enabled_p ())
1897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1898 "can't use a fully-masked loop because the"
1899 " target doesn't have an appropriate masked"
1900 " load/store-lanes instruction.\n");
1901 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1902 return;
1903 }
1904 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1905 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1906 return;
1907 }
1908
1909 if (memory_access_type == VMAT_GATHER_SCATTER)
1910 {
1911 internal_fn ifn = (is_load
1912 ? IFN_MASK_GATHER_LOAD
1913 : IFN_MASK_SCATTER_STORE);
1914 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1915 gs_info->memory_type,
1916 gs_info->offset_vectype,
1917 gs_info->scale))
1918 {
1919 if (dump_enabled_p ())
1920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1921 "can't use a fully-masked loop because the"
1922 " target doesn't have an appropriate masked"
1923 " gather load or scatter store instruction.\n");
1924 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1925 return;
1926 }
1927 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1928 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1929 return;
1930 }
1931
1932 if (memory_access_type != VMAT_CONTIGUOUS
1933 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1934 {
1935 /* Element X of the data must come from iteration i * VF + X of the
1936 scalar loop. We need more work to support other mappings. */
1937 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1939 "can't use a fully-masked loop because an access"
1940 " isn't contiguous.\n");
1941 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1942 return;
1943 }
1944
1945 machine_mode mask_mode;
1946 if (!VECTOR_MODE_P (vecmode)
1947 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1948 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1949 {
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952 "can't use a fully-masked loop because the target"
1953 " doesn't have the appropriate masked load or"
1954 " store.\n");
1955 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1956 return;
1957 }
1958 /* We might load more scalars than we need for permuting SLP loads.
1959 We checked in get_group_load_store_type that the extra elements
1960 don't leak into a new vector. */
1961 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1962 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1963 unsigned int nvectors;
1964 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1965 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1966 else
1967 gcc_unreachable ();
1968 }
1969
1970 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1971 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1972 that needs to be applied to all loads and stores in a vectorized loop.
1973 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1974
1975 MASK_TYPE is the type of both masks. If new statements are needed,
1976 insert them before GSI. */
1977
1978 static tree
1979 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1980 gimple_stmt_iterator *gsi)
1981 {
1982 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1983 if (!loop_mask)
1984 return vec_mask;
1985
1986 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1987 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1988 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1989 vec_mask, loop_mask);
1990 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1991 return and_res;
1992 }
1993
1994 /* Determine whether we can use a gather load or scatter store to vectorize
1995 strided load or store STMT_INFO by truncating the current offset to a
1996 smaller width. We need to be able to construct an offset vector:
1997
1998 { 0, X, X*2, X*3, ... }
1999
2000 without loss of precision, where X is STMT_INFO's DR_STEP.
2001
2002 Return true if this is possible, describing the gather load or scatter
2003 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2004
2005 static bool
2006 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2007 loop_vec_info loop_vinfo, bool masked_p,
2008 gather_scatter_info *gs_info)
2009 {
2010 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2011 data_reference *dr = dr_info->dr;
2012 tree step = DR_STEP (dr);
2013 if (TREE_CODE (step) != INTEGER_CST)
2014 {
2015 /* ??? Perhaps we could use range information here? */
2016 if (dump_enabled_p ())
2017 dump_printf_loc (MSG_NOTE, vect_location,
2018 "cannot truncate variable step.\n");
2019 return false;
2020 }
2021
2022 /* Get the number of bits in an element. */
2023 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2024 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2025 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2026
2027 /* Set COUNT to the upper limit on the number of elements - 1.
2028 Start with the maximum vectorization factor. */
2029 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2030
2031 /* Try lowering COUNT to the number of scalar latch iterations. */
2032 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2033 widest_int max_iters;
2034 if (max_loop_iterations (loop, &max_iters)
2035 && max_iters < count)
2036 count = max_iters.to_shwi ();
2037
2038 /* Try scales of 1 and the element size. */
2039 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2040 wi::overflow_type overflow = wi::OVF_NONE;
2041 for (int i = 0; i < 2; ++i)
2042 {
2043 int scale = scales[i];
2044 widest_int factor;
2045 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2046 continue;
2047
2048 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2049 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2050 if (overflow)
2051 continue;
2052 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2053 unsigned int min_offset_bits = wi::min_precision (range, sign);
2054
2055 /* Find the narrowest viable offset type. */
2056 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2057 tree offset_type = build_nonstandard_integer_type (offset_bits,
2058 sign == UNSIGNED);
2059
2060 /* See whether the target supports the operation with an offset
2061 no narrower than OFFSET_TYPE. */
2062 tree memory_type = TREE_TYPE (DR_REF (dr));
2063 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2064 vectype, memory_type, offset_type, scale,
2065 &gs_info->ifn, &gs_info->offset_vectype))
2066 continue;
2067
2068 gs_info->decl = NULL_TREE;
2069 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2070 but we don't need to store that here. */
2071 gs_info->base = NULL_TREE;
2072 gs_info->element_type = TREE_TYPE (vectype);
2073 gs_info->offset = fold_convert (offset_type, step);
2074 gs_info->offset_dt = vect_constant_def;
2075 gs_info->scale = scale;
2076 gs_info->memory_type = memory_type;
2077 return true;
2078 }
2079
2080 if (overflow && dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "truncating gather/scatter offset to %d bits"
2083 " might change its value.\n", element_bits);
2084
2085 return false;
2086 }
2087
2088 /* Return true if we can use gather/scatter internal functions to
2089 vectorize STMT_INFO, which is a grouped or strided load or store.
2090 MASKED_P is true if load or store is conditional. When returning
2091 true, fill in GS_INFO with the information required to perform the
2092 operation. */
2093
2094 static bool
2095 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2096 loop_vec_info loop_vinfo, bool masked_p,
2097 gather_scatter_info *gs_info)
2098 {
2099 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2100 || gs_info->decl)
2101 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2102 masked_p, gs_info);
2103
2104 tree old_offset_type = TREE_TYPE (gs_info->offset);
2105 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2106
2107 gcc_assert (TYPE_PRECISION (new_offset_type)
2108 >= TYPE_PRECISION (old_offset_type));
2109 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2110
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_NOTE, vect_location,
2113 "using gather/scatter for strided/grouped access,"
2114 " scale = %d\n", gs_info->scale);
2115
2116 return true;
2117 }
2118
2119 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2120 elements with a known constant step. Return -1 if that step
2121 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2122
2123 static int
2124 compare_step_with_zero (stmt_vec_info stmt_info)
2125 {
2126 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2127 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2128 size_zero_node);
2129 }
2130
2131 /* If the target supports a permute mask that reverses the elements in
2132 a vector of type VECTYPE, return that mask, otherwise return null. */
2133
2134 static tree
2135 perm_mask_for_reverse (tree vectype)
2136 {
2137 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2138
2139 /* The encoding has a single stepped pattern. */
2140 vec_perm_builder sel (nunits, 1, 3);
2141 for (int i = 0; i < 3; ++i)
2142 sel.quick_push (nunits - 1 - i);
2143
2144 vec_perm_indices indices (sel, 1, nunits);
2145 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2146 return NULL_TREE;
2147 return vect_gen_perm_mask_checked (vectype, indices);
2148 }
2149
2150 /* A subroutine of get_load_store_type, with a subset of the same
2151 arguments. Handle the case where STMT_INFO is a load or store that
2152 accesses consecutive elements with a negative step. */
2153
2154 static vect_memory_access_type
2155 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2156 vec_load_store_type vls_type,
2157 unsigned int ncopies)
2158 {
2159 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2160 dr_alignment_support alignment_support_scheme;
2161
2162 if (ncopies > 1)
2163 {
2164 if (dump_enabled_p ())
2165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2166 "multiple types with negative step.\n");
2167 return VMAT_ELEMENTWISE;
2168 }
2169
2170 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2171 if (alignment_support_scheme != dr_aligned
2172 && alignment_support_scheme != dr_unaligned_supported)
2173 {
2174 if (dump_enabled_p ())
2175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2176 "negative step but alignment required.\n");
2177 return VMAT_ELEMENTWISE;
2178 }
2179
2180 if (vls_type == VLS_STORE_INVARIANT)
2181 {
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_NOTE, vect_location,
2184 "negative step with invariant source;"
2185 " no permute needed.\n");
2186 return VMAT_CONTIGUOUS_DOWN;
2187 }
2188
2189 if (!perm_mask_for_reverse (vectype))
2190 {
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2193 "negative step and reversing not supported.\n");
2194 return VMAT_ELEMENTWISE;
2195 }
2196
2197 return VMAT_CONTIGUOUS_REVERSE;
2198 }
2199
2200 /* STMT_INFO is either a masked or unconditional store. Return the value
2201 being stored. */
2202
2203 tree
2204 vect_get_store_rhs (stmt_vec_info stmt_info)
2205 {
2206 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2207 {
2208 gcc_assert (gimple_assign_single_p (assign));
2209 return gimple_assign_rhs1 (assign);
2210 }
2211 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2212 {
2213 internal_fn ifn = gimple_call_internal_fn (call);
2214 int index = internal_fn_stored_value_index (ifn);
2215 gcc_assert (index >= 0);
2216 return gimple_call_arg (call, index);
2217 }
2218 gcc_unreachable ();
2219 }
2220
2221 /* A subroutine of get_load_store_type, with a subset of the same
2222 arguments. Handle the case where STMT_INFO is part of a grouped load
2223 or store.
2224
2225 For stores, the statements in the group are all consecutive
2226 and there is no gap at the end. For loads, the statements in the
2227 group might not be consecutive; there can be gaps between statements
2228 as well as at the end. */
2229
2230 static bool
2231 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2232 bool masked_p, vec_load_store_type vls_type,
2233 vect_memory_access_type *memory_access_type,
2234 gather_scatter_info *gs_info)
2235 {
2236 vec_info *vinfo = stmt_info->vinfo;
2237 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2238 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2239 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2240 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2241 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2242 bool single_element_p = (stmt_info == first_stmt_info
2243 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2244 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2245 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2246
2247 /* True if the vectorized statements would access beyond the last
2248 statement in the group. */
2249 bool overrun_p = false;
2250
2251 /* True if we can cope with such overrun by peeling for gaps, so that
2252 there is at least one final scalar iteration after the vector loop. */
2253 bool can_overrun_p = (!masked_p
2254 && vls_type == VLS_LOAD
2255 && loop_vinfo
2256 && !loop->inner);
2257
2258 /* There can only be a gap at the end of the group if the stride is
2259 known at compile time. */
2260 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2261
2262 /* Stores can't yet have gaps. */
2263 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2264
2265 if (slp)
2266 {
2267 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2268 {
2269 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2270 separated by the stride, until we have a complete vector.
2271 Fall back to scalar accesses if that isn't possible. */
2272 if (multiple_p (nunits, group_size))
2273 *memory_access_type = VMAT_STRIDED_SLP;
2274 else
2275 *memory_access_type = VMAT_ELEMENTWISE;
2276 }
2277 else
2278 {
2279 overrun_p = loop_vinfo && gap != 0;
2280 if (overrun_p && vls_type != VLS_LOAD)
2281 {
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2283 "Grouped store with gaps requires"
2284 " non-consecutive accesses\n");
2285 return false;
2286 }
2287 /* An overrun is fine if the trailing elements are smaller
2288 than the alignment boundary B. Every vector access will
2289 be a multiple of B and so we are guaranteed to access a
2290 non-gap element in the same B-sized block. */
2291 if (overrun_p
2292 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2293 / vect_get_scalar_dr_size (first_dr_info)))
2294 overrun_p = false;
2295
2296 /* If the gap splits the vector in half and the target
2297 can do half-vector operations avoid the epilogue peeling
2298 by simply loading half of the vector only. Usually
2299 the construction with an upper zero half will be elided. */
2300 dr_alignment_support alignment_support_scheme;
2301 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2302 machine_mode vmode;
2303 if (overrun_p
2304 && !masked_p
2305 && (((alignment_support_scheme
2306 = vect_supportable_dr_alignment (first_dr_info, false)))
2307 == dr_aligned
2308 || alignment_support_scheme == dr_unaligned_supported)
2309 && known_eq (nunits, (group_size - gap) * 2)
2310 && known_eq (nunits, group_size)
2311 && related_vector_mode (TYPE_MODE (vectype), elmode,
2312 group_size - gap).exists (&vmode)
2313 && (convert_optab_handler (vec_init_optab,
2314 TYPE_MODE (vectype), vmode)
2315 != CODE_FOR_nothing))
2316 overrun_p = false;
2317
2318 if (overrun_p && !can_overrun_p)
2319 {
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "Peeling for outer loop is not supported\n");
2323 return false;
2324 }
2325 int cmp = compare_step_with_zero (stmt_info);
2326 if (cmp < 0)
2327 *memory_access_type = get_negative_load_store_type
2328 (stmt_info, vectype, vls_type, 1);
2329 else
2330 {
2331 gcc_assert (!loop_vinfo || cmp > 0);
2332 *memory_access_type = VMAT_CONTIGUOUS;
2333 }
2334 }
2335 }
2336 else
2337 {
2338 /* We can always handle this case using elementwise accesses,
2339 but see if something more efficient is available. */
2340 *memory_access_type = VMAT_ELEMENTWISE;
2341
2342 /* If there is a gap at the end of the group then these optimizations
2343 would access excess elements in the last iteration. */
2344 bool would_overrun_p = (gap != 0);
2345 /* An overrun is fine if the trailing elements are smaller than the
2346 alignment boundary B. Every vector access will be a multiple of B
2347 and so we are guaranteed to access a non-gap element in the
2348 same B-sized block. */
2349 if (would_overrun_p
2350 && !masked_p
2351 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2352 / vect_get_scalar_dr_size (first_dr_info)))
2353 would_overrun_p = false;
2354
2355 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2356 && (can_overrun_p || !would_overrun_p)
2357 && compare_step_with_zero (stmt_info) > 0)
2358 {
2359 /* First cope with the degenerate case of a single-element
2360 vector. */
2361 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2362 *memory_access_type = VMAT_CONTIGUOUS;
2363
2364 /* Otherwise try using LOAD/STORE_LANES. */
2365 if (*memory_access_type == VMAT_ELEMENTWISE
2366 && (vls_type == VLS_LOAD
2367 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2368 : vect_store_lanes_supported (vectype, group_size,
2369 masked_p)))
2370 {
2371 *memory_access_type = VMAT_LOAD_STORE_LANES;
2372 overrun_p = would_overrun_p;
2373 }
2374
2375 /* If that fails, try using permuting loads. */
2376 if (*memory_access_type == VMAT_ELEMENTWISE
2377 && (vls_type == VLS_LOAD
2378 ? vect_grouped_load_supported (vectype, single_element_p,
2379 group_size)
2380 : vect_grouped_store_supported (vectype, group_size)))
2381 {
2382 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2383 overrun_p = would_overrun_p;
2384 }
2385 }
2386
2387 /* As a last resort, trying using a gather load or scatter store.
2388
2389 ??? Although the code can handle all group sizes correctly,
2390 it probably isn't a win to use separate strided accesses based
2391 on nearby locations. Or, even if it's a win over scalar code,
2392 it might not be a win over vectorizing at a lower VF, if that
2393 allows us to use contiguous accesses. */
2394 if (*memory_access_type == VMAT_ELEMENTWISE
2395 && single_element_p
2396 && loop_vinfo
2397 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2398 masked_p, gs_info))
2399 *memory_access_type = VMAT_GATHER_SCATTER;
2400 }
2401
2402 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2403 {
2404 /* STMT is the leader of the group. Check the operands of all the
2405 stmts of the group. */
2406 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2407 while (next_stmt_info)
2408 {
2409 tree op = vect_get_store_rhs (next_stmt_info);
2410 enum vect_def_type dt;
2411 if (!vect_is_simple_use (op, vinfo, &dt))
2412 {
2413 if (dump_enabled_p ())
2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415 "use not simple.\n");
2416 return false;
2417 }
2418 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2419 }
2420 }
2421
2422 if (overrun_p)
2423 {
2424 gcc_assert (can_overrun_p);
2425 if (dump_enabled_p ())
2426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2427 "Data access with gaps requires scalar "
2428 "epilogue loop\n");
2429 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2430 }
2431
2432 return true;
2433 }
2434
2435 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2436 if there is a memory access type that the vectorized form can use,
2437 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2438 or scatters, fill in GS_INFO accordingly.
2439
2440 SLP says whether we're performing SLP rather than loop vectorization.
2441 MASKED_P is true if the statement is conditional on a vectorized mask.
2442 VECTYPE is the vector type that the vectorized statements will use.
2443 NCOPIES is the number of vector statements that will be needed. */
2444
2445 static bool
2446 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2447 bool masked_p, vec_load_store_type vls_type,
2448 unsigned int ncopies,
2449 vect_memory_access_type *memory_access_type,
2450 gather_scatter_info *gs_info)
2451 {
2452 vec_info *vinfo = stmt_info->vinfo;
2453 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2454 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2455 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2456 {
2457 *memory_access_type = VMAT_GATHER_SCATTER;
2458 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2459 gcc_unreachable ();
2460 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2461 &gs_info->offset_dt,
2462 &gs_info->offset_vectype))
2463 {
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "%s index use not simple.\n",
2467 vls_type == VLS_LOAD ? "gather" : "scatter");
2468 return false;
2469 }
2470 }
2471 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2472 {
2473 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2474 vls_type, memory_access_type, gs_info))
2475 return false;
2476 }
2477 else if (STMT_VINFO_STRIDED_P (stmt_info))
2478 {
2479 gcc_assert (!slp);
2480 if (loop_vinfo
2481 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2482 masked_p, gs_info))
2483 *memory_access_type = VMAT_GATHER_SCATTER;
2484 else
2485 *memory_access_type = VMAT_ELEMENTWISE;
2486 }
2487 else
2488 {
2489 int cmp = compare_step_with_zero (stmt_info);
2490 if (cmp < 0)
2491 *memory_access_type = get_negative_load_store_type
2492 (stmt_info, vectype, vls_type, ncopies);
2493 else if (cmp == 0)
2494 {
2495 gcc_assert (vls_type == VLS_LOAD);
2496 *memory_access_type = VMAT_INVARIANT;
2497 }
2498 else
2499 *memory_access_type = VMAT_CONTIGUOUS;
2500 }
2501
2502 if ((*memory_access_type == VMAT_ELEMENTWISE
2503 || *memory_access_type == VMAT_STRIDED_SLP)
2504 && !nunits.is_constant ())
2505 {
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2508 "Not using elementwise accesses due to variable "
2509 "vectorization factor.\n");
2510 return false;
2511 }
2512
2513 /* FIXME: At the moment the cost model seems to underestimate the
2514 cost of using elementwise accesses. This check preserves the
2515 traditional behavior until that can be fixed. */
2516 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2517 if (!first_stmt_info)
2518 first_stmt_info = stmt_info;
2519 if (*memory_access_type == VMAT_ELEMENTWISE
2520 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2521 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2522 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2523 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2524 {
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "not falling back to elementwise accesses\n");
2528 return false;
2529 }
2530 return true;
2531 }
2532
2533 /* Return true if boolean argument MASK is suitable for vectorizing
2534 conditional load or store STMT_INFO. When returning true, store the type
2535 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2536 in *MASK_VECTYPE_OUT. */
2537
2538 static bool
2539 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2540 vect_def_type *mask_dt_out,
2541 tree *mask_vectype_out)
2542 {
2543 vec_info *vinfo = stmt_info->vinfo;
2544 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2545 {
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2548 "mask argument is not a boolean.\n");
2549 return false;
2550 }
2551
2552 if (TREE_CODE (mask) != SSA_NAME)
2553 {
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "mask argument is not an SSA name.\n");
2557 return false;
2558 }
2559
2560 enum vect_def_type mask_dt;
2561 tree mask_vectype;
2562 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2563 {
2564 if (dump_enabled_p ())
2565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2566 "mask use not simple.\n");
2567 return false;
2568 }
2569
2570 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2571 if (!mask_vectype)
2572 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2573
2574 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2575 {
2576 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2578 "could not find an appropriate vector mask type.\n");
2579 return false;
2580 }
2581
2582 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2583 TYPE_VECTOR_SUBPARTS (vectype)))
2584 {
2585 if (dump_enabled_p ())
2586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2587 "vector mask type %T"
2588 " does not match vector data type %T.\n",
2589 mask_vectype, vectype);
2590
2591 return false;
2592 }
2593
2594 *mask_dt_out = mask_dt;
2595 *mask_vectype_out = mask_vectype;
2596 return true;
2597 }
2598
2599 /* Return true if stored value RHS is suitable for vectorizing store
2600 statement STMT_INFO. When returning true, store the type of the
2601 definition in *RHS_DT_OUT, the type of the vectorized store value in
2602 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2603
2604 static bool
2605 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2606 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2607 vec_load_store_type *vls_type_out)
2608 {
2609 /* In the case this is a store from a constant make sure
2610 native_encode_expr can handle it. */
2611 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2612 {
2613 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2615 "cannot encode constant as a byte sequence.\n");
2616 return false;
2617 }
2618
2619 enum vect_def_type rhs_dt;
2620 tree rhs_vectype;
2621 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2622 {
2623 if (dump_enabled_p ())
2624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2625 "use not simple.\n");
2626 return false;
2627 }
2628
2629 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2630 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2631 {
2632 if (dump_enabled_p ())
2633 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2634 "incompatible vector types.\n");
2635 return false;
2636 }
2637
2638 *rhs_dt_out = rhs_dt;
2639 *rhs_vectype_out = rhs_vectype;
2640 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2641 *vls_type_out = VLS_STORE_INVARIANT;
2642 else
2643 *vls_type_out = VLS_STORE;
2644 return true;
2645 }
2646
2647 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2648 Note that we support masks with floating-point type, in which case the
2649 floats are interpreted as a bitmask. */
2650
2651 static tree
2652 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2653 {
2654 if (TREE_CODE (masktype) == INTEGER_TYPE)
2655 return build_int_cst (masktype, -1);
2656 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2657 {
2658 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2659 mask = build_vector_from_val (masktype, mask);
2660 return vect_init_vector (stmt_info, mask, masktype, NULL);
2661 }
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2663 {
2664 REAL_VALUE_TYPE r;
2665 long tmp[6];
2666 for (int j = 0; j < 6; ++j)
2667 tmp[j] = -1;
2668 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2669 tree mask = build_real (TREE_TYPE (masktype), r);
2670 mask = build_vector_from_val (masktype, mask);
2671 return vect_init_vector (stmt_info, mask, masktype, NULL);
2672 }
2673 gcc_unreachable ();
2674 }
2675
2676 /* Build an all-zero merge value of type VECTYPE while vectorizing
2677 STMT_INFO as a gather load. */
2678
2679 static tree
2680 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2681 {
2682 tree merge;
2683 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2684 merge = build_int_cst (TREE_TYPE (vectype), 0);
2685 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2686 {
2687 REAL_VALUE_TYPE r;
2688 long tmp[6];
2689 for (int j = 0; j < 6; ++j)
2690 tmp[j] = 0;
2691 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2692 merge = build_real (TREE_TYPE (vectype), r);
2693 }
2694 else
2695 gcc_unreachable ();
2696 merge = build_vector_from_val (vectype, merge);
2697 return vect_init_vector (stmt_info, merge, vectype, NULL);
2698 }
2699
2700 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2701 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2702 the gather load operation. If the load is conditional, MASK is the
2703 unvectorized condition and MASK_DT is its definition type, otherwise
2704 MASK is null. */
2705
2706 static void
2707 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2708 gimple_stmt_iterator *gsi,
2709 stmt_vec_info *vec_stmt,
2710 gather_scatter_info *gs_info,
2711 tree mask)
2712 {
2713 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2714 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2716 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2717 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2718 edge pe = loop_preheader_edge (loop);
2719 enum { NARROW, NONE, WIDEN } modifier;
2720 poly_uint64 gather_off_nunits
2721 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2722
2723 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2724 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2725 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2726 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2727 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2728 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2729 tree scaletype = TREE_VALUE (arglist);
2730 tree real_masktype = masktype;
2731 gcc_checking_assert (types_compatible_p (srctype, rettype)
2732 && (!mask
2733 || TREE_CODE (masktype) == INTEGER_TYPE
2734 || types_compatible_p (srctype, masktype)));
2735 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2736 masktype = truth_type_for (srctype);
2737
2738 tree mask_halftype = masktype;
2739 tree perm_mask = NULL_TREE;
2740 tree mask_perm_mask = NULL_TREE;
2741 if (known_eq (nunits, gather_off_nunits))
2742 modifier = NONE;
2743 else if (known_eq (nunits * 2, gather_off_nunits))
2744 {
2745 modifier = WIDEN;
2746
2747 /* Currently widening gathers and scatters are only supported for
2748 fixed-length vectors. */
2749 int count = gather_off_nunits.to_constant ();
2750 vec_perm_builder sel (count, count, 1);
2751 for (int i = 0; i < count; ++i)
2752 sel.quick_push (i | (count / 2));
2753
2754 vec_perm_indices indices (sel, 1, count);
2755 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2756 indices);
2757 }
2758 else if (known_eq (nunits, gather_off_nunits * 2))
2759 {
2760 modifier = NARROW;
2761
2762 /* Currently narrowing gathers and scatters are only supported for
2763 fixed-length vectors. */
2764 int count = nunits.to_constant ();
2765 vec_perm_builder sel (count, count, 1);
2766 sel.quick_grow (count);
2767 for (int i = 0; i < count; ++i)
2768 sel[i] = i < count / 2 ? i : i + count / 2;
2769 vec_perm_indices indices (sel, 2, count);
2770 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2771
2772 ncopies *= 2;
2773
2774 if (mask && masktype == real_masktype)
2775 {
2776 for (int i = 0; i < count; ++i)
2777 sel[i] = i | (count / 2);
2778 indices.new_vector (sel, 2, count);
2779 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2780 }
2781 else if (mask)
2782 mask_halftype = truth_type_for (gs_info->offset_vectype);
2783 }
2784 else
2785 gcc_unreachable ();
2786
2787 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2788 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2789
2790 tree ptr = fold_convert (ptrtype, gs_info->base);
2791 if (!is_gimple_min_invariant (ptr))
2792 {
2793 gimple_seq seq;
2794 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2795 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2796 gcc_assert (!new_bb);
2797 }
2798
2799 tree scale = build_int_cst (scaletype, gs_info->scale);
2800
2801 tree vec_oprnd0 = NULL_TREE;
2802 tree vec_mask = NULL_TREE;
2803 tree src_op = NULL_TREE;
2804 tree mask_op = NULL_TREE;
2805 tree prev_res = NULL_TREE;
2806 stmt_vec_info prev_stmt_info = NULL;
2807
2808 if (!mask)
2809 {
2810 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2811 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2812 }
2813
2814 for (int j = 0; j < ncopies; ++j)
2815 {
2816 tree op, var;
2817 if (modifier == WIDEN && (j & 1))
2818 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2819 perm_mask, stmt_info, gsi);
2820 else if (j == 0)
2821 op = vec_oprnd0
2822 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2823 else
2824 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2825 vec_oprnd0);
2826
2827 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2828 {
2829 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2830 TYPE_VECTOR_SUBPARTS (idxtype)));
2831 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2832 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2833 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2834 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2835 op = var;
2836 }
2837
2838 if (mask)
2839 {
2840 if (mask_perm_mask && (j & 1))
2841 mask_op = permute_vec_elements (mask_op, mask_op,
2842 mask_perm_mask, stmt_info, gsi);
2843 else
2844 {
2845 if (j == 0)
2846 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2847 else if (modifier != NARROW || (j & 1) == 0)
2848 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2849 vec_mask);
2850
2851 mask_op = vec_mask;
2852 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2853 {
2854 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2855 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2856 gcc_assert (known_eq (sub1, sub2));
2857 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2858 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2859 gassign *new_stmt
2860 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2861 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2862 mask_op = var;
2863 }
2864 }
2865 if (modifier == NARROW && masktype != real_masktype)
2866 {
2867 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2868 gassign *new_stmt
2869 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2870 : VEC_UNPACK_LO_EXPR,
2871 mask_op);
2872 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2873 mask_op = var;
2874 }
2875 src_op = mask_op;
2876 }
2877
2878 tree mask_arg = mask_op;
2879 if (masktype != real_masktype)
2880 {
2881 tree utype, optype = TREE_TYPE (mask_op);
2882 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2883 utype = real_masktype;
2884 else
2885 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2886 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2887 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2888 gassign *new_stmt
2889 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2890 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2891 mask_arg = var;
2892 if (!useless_type_conversion_p (real_masktype, utype))
2893 {
2894 gcc_assert (TYPE_PRECISION (utype)
2895 <= TYPE_PRECISION (real_masktype));
2896 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2897 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2898 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2899 mask_arg = var;
2900 }
2901 src_op = build_zero_cst (srctype);
2902 }
2903 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2904 mask_arg, scale);
2905
2906 stmt_vec_info new_stmt_info;
2907 if (!useless_type_conversion_p (vectype, rettype))
2908 {
2909 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2910 TYPE_VECTOR_SUBPARTS (rettype)));
2911 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2912 gimple_call_set_lhs (new_call, op);
2913 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2914 var = make_ssa_name (vec_dest);
2915 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2916 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2917 new_stmt_info
2918 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2919 }
2920 else
2921 {
2922 var = make_ssa_name (vec_dest, new_call);
2923 gimple_call_set_lhs (new_call, var);
2924 new_stmt_info
2925 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2926 }
2927
2928 if (modifier == NARROW)
2929 {
2930 if ((j & 1) == 0)
2931 {
2932 prev_res = var;
2933 continue;
2934 }
2935 var = permute_vec_elements (prev_res, var, perm_mask,
2936 stmt_info, gsi);
2937 new_stmt_info = loop_vinfo->lookup_def (var);
2938 }
2939
2940 if (prev_stmt_info == NULL)
2941 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2942 else
2943 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2944 prev_stmt_info = new_stmt_info;
2945 }
2946 }
2947
2948 /* Prepare the base and offset in GS_INFO for vectorization.
2949 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2950 to the vectorized offset argument for the first copy of STMT_INFO.
2951 STMT_INFO is the statement described by GS_INFO and LOOP is the
2952 containing loop. */
2953
2954 static void
2955 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
2956 gather_scatter_info *gs_info,
2957 tree *dataref_ptr, tree *vec_offset)
2958 {
2959 gimple_seq stmts = NULL;
2960 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2961 if (stmts != NULL)
2962 {
2963 basic_block new_bb;
2964 edge pe = loop_preheader_edge (loop);
2965 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2966 gcc_assert (!new_bb);
2967 }
2968 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2969 gs_info->offset_vectype);
2970 }
2971
2972 /* Prepare to implement a grouped or strided load or store using
2973 the gather load or scatter store operation described by GS_INFO.
2974 STMT_INFO is the load or store statement.
2975
2976 Set *DATAREF_BUMP to the amount that should be added to the base
2977 address after each copy of the vectorized statement. Set *VEC_OFFSET
2978 to an invariant offset vector in which element I has the value
2979 I * DR_STEP / SCALE. */
2980
2981 static void
2982 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2983 loop_vec_info loop_vinfo,
2984 gather_scatter_info *gs_info,
2985 tree *dataref_bump, tree *vec_offset)
2986 {
2987 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2988 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2989 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2990 gimple_seq stmts;
2991
2992 tree bump = size_binop (MULT_EXPR,
2993 fold_convert (sizetype, DR_STEP (dr)),
2994 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2995 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2996 if (stmts)
2997 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2998
2999 /* The offset given in GS_INFO can have pointer type, so use the element
3000 type of the vector instead. */
3001 tree offset_type = TREE_TYPE (gs_info->offset);
3002 offset_type = TREE_TYPE (gs_info->offset_vectype);
3003
3004 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3005 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
3006 ssize_int (gs_info->scale));
3007 step = fold_convert (offset_type, step);
3008 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3009
3010 /* Create {0, X, X*2, X*3, ...}. */
3011 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3012 build_zero_cst (offset_type), step);
3013 if (stmts)
3014 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3015 }
3016
3017 /* Return the amount that should be added to a vector pointer to move
3018 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3019 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3020 vectorization. */
3021
3022 static tree
3023 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3024 vect_memory_access_type memory_access_type)
3025 {
3026 if (memory_access_type == VMAT_INVARIANT)
3027 return size_zero_node;
3028
3029 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3030 tree step = vect_dr_behavior (dr_info)->step;
3031 if (tree_int_cst_sgn (step) == -1)
3032 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3033 return iv_step;
3034 }
3035
3036 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3037
3038 static bool
3039 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3040 stmt_vec_info *vec_stmt, slp_tree slp_node,
3041 tree vectype_in, stmt_vector_for_cost *cost_vec)
3042 {
3043 tree op, vectype;
3044 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3045 vec_info *vinfo = stmt_info->vinfo;
3046 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3047 unsigned ncopies;
3048
3049 op = gimple_call_arg (stmt, 0);
3050 vectype = STMT_VINFO_VECTYPE (stmt_info);
3051 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3052
3053 /* Multiple types in SLP are handled by creating the appropriate number of
3054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3055 case of SLP. */
3056 if (slp_node)
3057 ncopies = 1;
3058 else
3059 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3060
3061 gcc_assert (ncopies >= 1);
3062
3063 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3064 if (! char_vectype)
3065 return false;
3066
3067 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3068 unsigned word_bytes;
3069 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3070 return false;
3071
3072 /* The encoding uses one stepped pattern for each byte in the word. */
3073 vec_perm_builder elts (num_bytes, word_bytes, 3);
3074 for (unsigned i = 0; i < 3; ++i)
3075 for (unsigned j = 0; j < word_bytes; ++j)
3076 elts.quick_push ((i + 1) * word_bytes - j - 1);
3077
3078 vec_perm_indices indices (elts, 1, num_bytes);
3079 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3080 return false;
3081
3082 if (! vec_stmt)
3083 {
3084 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3085 DUMP_VECT_SCOPE ("vectorizable_bswap");
3086 if (! slp_node)
3087 {
3088 record_stmt_cost (cost_vec,
3089 1, vector_stmt, stmt_info, 0, vect_prologue);
3090 record_stmt_cost (cost_vec,
3091 ncopies, vec_perm, stmt_info, 0, vect_body);
3092 }
3093 return true;
3094 }
3095
3096 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3097
3098 /* Transform. */
3099 vec<tree> vec_oprnds = vNULL;
3100 stmt_vec_info new_stmt_info = NULL;
3101 stmt_vec_info prev_stmt_info = NULL;
3102 for (unsigned j = 0; j < ncopies; j++)
3103 {
3104 /* Handle uses. */
3105 if (j == 0)
3106 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3107 else
3108 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3109
3110 /* Arguments are ready. create the new vector stmt. */
3111 unsigned i;
3112 tree vop;
3113 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3114 {
3115 gimple *new_stmt;
3116 tree tem = make_ssa_name (char_vectype);
3117 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3118 char_vectype, vop));
3119 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3120 tree tem2 = make_ssa_name (char_vectype);
3121 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3122 tem, tem, bswap_vconst);
3123 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3124 tem = make_ssa_name (vectype);
3125 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3126 vectype, tem2));
3127 new_stmt_info
3128 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3129 if (slp_node)
3130 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3131 }
3132
3133 if (slp_node)
3134 continue;
3135
3136 if (j == 0)
3137 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3138 else
3139 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3140
3141 prev_stmt_info = new_stmt_info;
3142 }
3143
3144 vec_oprnds.release ();
3145 return true;
3146 }
3147
3148 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3149 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3150 in a single step. On success, store the binary pack code in
3151 *CONVERT_CODE. */
3152
3153 static bool
3154 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3155 tree_code *convert_code)
3156 {
3157 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3158 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3159 return false;
3160
3161 tree_code code;
3162 int multi_step_cvt = 0;
3163 auto_vec <tree, 8> interm_types;
3164 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3165 &code, &multi_step_cvt, &interm_types)
3166 || multi_step_cvt)
3167 return false;
3168
3169 *convert_code = code;
3170 return true;
3171 }
3172
3173 /* Function vectorizable_call.
3174
3175 Check if STMT_INFO performs a function call that can be vectorized.
3176 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3178 Return true if STMT_INFO is vectorizable in this way. */
3179
3180 static bool
3181 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3182 stmt_vec_info *vec_stmt, slp_tree slp_node,
3183 stmt_vector_for_cost *cost_vec)
3184 {
3185 gcall *stmt;
3186 tree vec_dest;
3187 tree scalar_dest;
3188 tree op;
3189 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3190 stmt_vec_info prev_stmt_info;
3191 tree vectype_out, vectype_in;
3192 poly_uint64 nunits_in;
3193 poly_uint64 nunits_out;
3194 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3195 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3196 vec_info *vinfo = stmt_info->vinfo;
3197 tree fndecl, new_temp, rhs_type;
3198 enum vect_def_type dt[4]
3199 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3200 vect_unknown_def_type };
3201 tree vectypes[ARRAY_SIZE (dt)] = {};
3202 int ndts = ARRAY_SIZE (dt);
3203 int ncopies, j;
3204 auto_vec<tree, 8> vargs;
3205 auto_vec<tree, 8> orig_vargs;
3206 enum { NARROW, NONE, WIDEN } modifier;
3207 size_t i, nargs;
3208 tree lhs;
3209
3210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3211 return false;
3212
3213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3214 && ! vec_stmt)
3215 return false;
3216
3217 /* Is STMT_INFO a vectorizable call? */
3218 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3219 if (!stmt)
3220 return false;
3221
3222 if (gimple_call_internal_p (stmt)
3223 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3224 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3225 /* Handled by vectorizable_load and vectorizable_store. */
3226 return false;
3227
3228 if (gimple_call_lhs (stmt) == NULL_TREE
3229 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3230 return false;
3231
3232 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3233
3234 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3235
3236 /* Process function arguments. */
3237 rhs_type = NULL_TREE;
3238 vectype_in = NULL_TREE;
3239 nargs = gimple_call_num_args (stmt);
3240
3241 /* Bail out if the function has more than three arguments, we do not have
3242 interesting builtin functions to vectorize with more than two arguments
3243 except for fma. No arguments is also not good. */
3244 if (nargs == 0 || nargs > 4)
3245 return false;
3246
3247 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3248 combined_fn cfn = gimple_call_combined_fn (stmt);
3249 if (cfn == CFN_GOMP_SIMD_LANE)
3250 {
3251 nargs = 0;
3252 rhs_type = unsigned_type_node;
3253 }
3254
3255 int mask_opno = -1;
3256 if (internal_fn_p (cfn))
3257 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3258
3259 for (i = 0; i < nargs; i++)
3260 {
3261 op = gimple_call_arg (stmt, i);
3262 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3263 {
3264 if (dump_enabled_p ())
3265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3266 "use not simple.\n");
3267 return false;
3268 }
3269
3270 /* Skip the mask argument to an internal function. This operand
3271 has been converted via a pattern if necessary. */
3272 if ((int) i == mask_opno)
3273 continue;
3274
3275 /* We can only handle calls with arguments of the same type. */
3276 if (rhs_type
3277 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3278 {
3279 if (dump_enabled_p ())
3280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3281 "argument types differ.\n");
3282 return false;
3283 }
3284 if (!rhs_type)
3285 rhs_type = TREE_TYPE (op);
3286
3287 if (!vectype_in)
3288 vectype_in = vectypes[i];
3289 else if (vectypes[i]
3290 && !types_compatible_p (vectypes[i], vectype_in))
3291 {
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "argument vector types differ.\n");
3295 return false;
3296 }
3297 }
3298 /* If all arguments are external or constant defs use a vector type with
3299 the same size as the output vector type. */
3300 if (!vectype_in)
3301 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3302 if (vec_stmt)
3303 gcc_assert (vectype_in);
3304 if (!vectype_in)
3305 {
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 "no vectype for scalar type %T\n", rhs_type);
3309
3310 return false;
3311 }
3312
3313 /* FORNOW */
3314 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3315 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3316 if (known_eq (nunits_in * 2, nunits_out))
3317 modifier = NARROW;
3318 else if (known_eq (nunits_out, nunits_in))
3319 modifier = NONE;
3320 else if (known_eq (nunits_out * 2, nunits_in))
3321 modifier = WIDEN;
3322 else
3323 return false;
3324
3325 /* We only handle functions that do not read or clobber memory. */
3326 if (gimple_vuse (stmt))
3327 {
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3330 "function reads from or writes to memory.\n");
3331 return false;
3332 }
3333
3334 /* For now, we only vectorize functions if a target specific builtin
3335 is available. TODO -- in some cases, it might be profitable to
3336 insert the calls for pieces of the vector, in order to be able
3337 to vectorize other operations in the loop. */
3338 fndecl = NULL_TREE;
3339 internal_fn ifn = IFN_LAST;
3340 tree callee = gimple_call_fndecl (stmt);
3341
3342 /* First try using an internal function. */
3343 tree_code convert_code = ERROR_MARK;
3344 if (cfn != CFN_LAST
3345 && (modifier == NONE
3346 || (modifier == NARROW
3347 && simple_integer_narrowing (vectype_out, vectype_in,
3348 &convert_code))))
3349 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3350 vectype_in);
3351
3352 /* If that fails, try asking for a target-specific built-in function. */
3353 if (ifn == IFN_LAST)
3354 {
3355 if (cfn != CFN_LAST)
3356 fndecl = targetm.vectorize.builtin_vectorized_function
3357 (cfn, vectype_out, vectype_in);
3358 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3359 fndecl = targetm.vectorize.builtin_md_vectorized_function
3360 (callee, vectype_out, vectype_in);
3361 }
3362
3363 if (ifn == IFN_LAST && !fndecl)
3364 {
3365 if (cfn == CFN_GOMP_SIMD_LANE
3366 && !slp_node
3367 && loop_vinfo
3368 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3369 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3370 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3371 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3372 {
3373 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3374 { 0, 1, 2, ... vf - 1 } vector. */
3375 gcc_assert (nargs == 0);
3376 }
3377 else if (modifier == NONE
3378 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3379 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3380 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3381 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3382 vectype_in, cost_vec);
3383 else
3384 {
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3387 "function is not vectorizable.\n");
3388 return false;
3389 }
3390 }
3391
3392 if (slp_node)
3393 ncopies = 1;
3394 else if (modifier == NARROW && ifn == IFN_LAST)
3395 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3396 else
3397 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3398
3399 /* Sanity check: make sure that at least one copy of the vectorized stmt
3400 needs to be generated. */
3401 gcc_assert (ncopies >= 1);
3402
3403 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3404 if (!vec_stmt) /* transformation not required. */
3405 {
3406 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3407 DUMP_VECT_SCOPE ("vectorizable_call");
3408 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3409 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3410 record_stmt_cost (cost_vec, ncopies / 2,
3411 vec_promote_demote, stmt_info, 0, vect_body);
3412
3413 if (loop_vinfo && mask_opno >= 0)
3414 {
3415 unsigned int nvectors = (slp_node
3416 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3417 : ncopies);
3418 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3419 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3420 vectype_out, scalar_mask);
3421 }
3422 return true;
3423 }
3424
3425 /* Transform. */
3426
3427 if (dump_enabled_p ())
3428 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3429
3430 /* Handle def. */
3431 scalar_dest = gimple_call_lhs (stmt);
3432 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3433
3434 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3435
3436 stmt_vec_info new_stmt_info = NULL;
3437 prev_stmt_info = NULL;
3438 if (modifier == NONE || ifn != IFN_LAST)
3439 {
3440 tree prev_res = NULL_TREE;
3441 vargs.safe_grow (nargs);
3442 orig_vargs.safe_grow (nargs);
3443 for (j = 0; j < ncopies; ++j)
3444 {
3445 /* Build argument list for the vectorized call. */
3446 if (slp_node)
3447 {
3448 auto_vec<vec<tree> > vec_defs (nargs);
3449 vec<tree> vec_oprnds0;
3450
3451 vect_get_slp_defs (slp_node, &vec_defs);
3452 vec_oprnds0 = vec_defs[0];
3453
3454 /* Arguments are ready. Create the new vector stmt. */
3455 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3456 {
3457 size_t k;
3458 for (k = 0; k < nargs; k++)
3459 {
3460 vec<tree> vec_oprndsk = vec_defs[k];
3461 vargs[k] = vec_oprndsk[i];
3462 }
3463 if (modifier == NARROW)
3464 {
3465 /* We don't define any narrowing conditional functions
3466 at present. */
3467 gcc_assert (mask_opno < 0);
3468 tree half_res = make_ssa_name (vectype_in);
3469 gcall *call
3470 = gimple_build_call_internal_vec (ifn, vargs);
3471 gimple_call_set_lhs (call, half_res);
3472 gimple_call_set_nothrow (call, true);
3473 vect_finish_stmt_generation (stmt_info, call, gsi);
3474 if ((i & 1) == 0)
3475 {
3476 prev_res = half_res;
3477 continue;
3478 }
3479 new_temp = make_ssa_name (vec_dest);
3480 gimple *new_stmt
3481 = gimple_build_assign (new_temp, convert_code,
3482 prev_res, half_res);
3483 new_stmt_info
3484 = vect_finish_stmt_generation (stmt_info, new_stmt,
3485 gsi);
3486 }
3487 else
3488 {
3489 if (mask_opno >= 0 && masked_loop_p)
3490 {
3491 unsigned int vec_num = vec_oprnds0.length ();
3492 /* Always true for SLP. */
3493 gcc_assert (ncopies == 1);
3494 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3495 vectype_out, i);
3496 vargs[mask_opno] = prepare_load_store_mask
3497 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3498 }
3499
3500 gcall *call;
3501 if (ifn != IFN_LAST)
3502 call = gimple_build_call_internal_vec (ifn, vargs);
3503 else
3504 call = gimple_build_call_vec (fndecl, vargs);
3505 new_temp = make_ssa_name (vec_dest, call);
3506 gimple_call_set_lhs (call, new_temp);
3507 gimple_call_set_nothrow (call, true);
3508 new_stmt_info
3509 = vect_finish_stmt_generation (stmt_info, call, gsi);
3510 }
3511 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3512 }
3513
3514 for (i = 0; i < nargs; i++)
3515 {
3516 vec<tree> vec_oprndsi = vec_defs[i];
3517 vec_oprndsi.release ();
3518 }
3519 continue;
3520 }
3521
3522 if (mask_opno >= 0 && !vectypes[mask_opno])
3523 {
3524 gcc_assert (modifier != WIDEN);
3525 vectypes[mask_opno] = truth_type_for (vectype_in);
3526 }
3527
3528 for (i = 0; i < nargs; i++)
3529 {
3530 op = gimple_call_arg (stmt, i);
3531 if (j == 0)
3532 vec_oprnd0
3533 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3534 else
3535 vec_oprnd0
3536 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3537
3538 orig_vargs[i] = vargs[i] = vec_oprnd0;
3539 }
3540
3541 if (mask_opno >= 0 && masked_loop_p)
3542 {
3543 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3544 vectype_out, j);
3545 vargs[mask_opno]
3546 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3547 vargs[mask_opno], gsi);
3548 }
3549
3550 if (cfn == CFN_GOMP_SIMD_LANE)
3551 {
3552 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3553 tree new_var
3554 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3555 gimple *init_stmt = gimple_build_assign (new_var, cst);
3556 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3557 new_temp = make_ssa_name (vec_dest);
3558 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3559 new_stmt_info
3560 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3561 }
3562 else if (modifier == NARROW)
3563 {
3564 /* We don't define any narrowing conditional functions at
3565 present. */
3566 gcc_assert (mask_opno < 0);
3567 tree half_res = make_ssa_name (vectype_in);
3568 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3569 gimple_call_set_lhs (call, half_res);
3570 gimple_call_set_nothrow (call, true);
3571 vect_finish_stmt_generation (stmt_info, call, gsi);
3572 if ((j & 1) == 0)
3573 {
3574 prev_res = half_res;
3575 continue;
3576 }
3577 new_temp = make_ssa_name (vec_dest);
3578 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3579 prev_res, half_res);
3580 new_stmt_info
3581 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3582 }
3583 else
3584 {
3585 gcall *call;
3586 if (ifn != IFN_LAST)
3587 call = gimple_build_call_internal_vec (ifn, vargs);
3588 else
3589 call = gimple_build_call_vec (fndecl, vargs);
3590 new_temp = make_ssa_name (vec_dest, call);
3591 gimple_call_set_lhs (call, new_temp);
3592 gimple_call_set_nothrow (call, true);
3593 new_stmt_info
3594 = vect_finish_stmt_generation (stmt_info, call, gsi);
3595 }
3596
3597 if (j == (modifier == NARROW ? 1 : 0))
3598 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3599 else
3600 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3601
3602 prev_stmt_info = new_stmt_info;
3603 }
3604 }
3605 else if (modifier == NARROW)
3606 {
3607 /* We don't define any narrowing conditional functions at present. */
3608 gcc_assert (mask_opno < 0);
3609 for (j = 0; j < ncopies; ++j)
3610 {
3611 /* Build argument list for the vectorized call. */
3612 if (j == 0)
3613 vargs.create (nargs * 2);
3614 else
3615 vargs.truncate (0);
3616
3617 if (slp_node)
3618 {
3619 auto_vec<vec<tree> > vec_defs (nargs);
3620 vec<tree> vec_oprnds0;
3621
3622 vect_get_slp_defs (slp_node, &vec_defs);
3623 vec_oprnds0 = vec_defs[0];
3624
3625 /* Arguments are ready. Create the new vector stmt. */
3626 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3627 {
3628 size_t k;
3629 vargs.truncate (0);
3630 for (k = 0; k < nargs; k++)
3631 {
3632 vec<tree> vec_oprndsk = vec_defs[k];
3633 vargs.quick_push (vec_oprndsk[i]);
3634 vargs.quick_push (vec_oprndsk[i + 1]);
3635 }
3636 gcall *call;
3637 if (ifn != IFN_LAST)
3638 call = gimple_build_call_internal_vec (ifn, vargs);
3639 else
3640 call = gimple_build_call_vec (fndecl, vargs);
3641 new_temp = make_ssa_name (vec_dest, call);
3642 gimple_call_set_lhs (call, new_temp);
3643 gimple_call_set_nothrow (call, true);
3644 new_stmt_info
3645 = vect_finish_stmt_generation (stmt_info, call, gsi);
3646 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3647 }
3648
3649 for (i = 0; i < nargs; i++)
3650 {
3651 vec<tree> vec_oprndsi = vec_defs[i];
3652 vec_oprndsi.release ();
3653 }
3654 continue;
3655 }
3656
3657 for (i = 0; i < nargs; i++)
3658 {
3659 op = gimple_call_arg (stmt, i);
3660 if (j == 0)
3661 {
3662 vec_oprnd0
3663 = vect_get_vec_def_for_operand (op, stmt_info,
3664 vectypes[i]);
3665 vec_oprnd1
3666 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3667 }
3668 else
3669 {
3670 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3671 2 * i + 1);
3672 vec_oprnd0
3673 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3674 vec_oprnd1
3675 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3676 }
3677
3678 vargs.quick_push (vec_oprnd0);
3679 vargs.quick_push (vec_oprnd1);
3680 }
3681
3682 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3683 new_temp = make_ssa_name (vec_dest, new_stmt);
3684 gimple_call_set_lhs (new_stmt, new_temp);
3685 new_stmt_info
3686 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3687
3688 if (j == 0)
3689 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3690 else
3691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3692
3693 prev_stmt_info = new_stmt_info;
3694 }
3695
3696 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3697 }
3698 else
3699 /* No current target implements this case. */
3700 return false;
3701
3702 vargs.release ();
3703
3704 /* The call in STMT might prevent it from being removed in dce.
3705 We however cannot remove it here, due to the way the ssa name
3706 it defines is mapped to the new definition. So just replace
3707 rhs of the statement with something harmless. */
3708
3709 if (slp_node)
3710 return true;
3711
3712 stmt_info = vect_orig_stmt (stmt_info);
3713 lhs = gimple_get_lhs (stmt_info->stmt);
3714
3715 gassign *new_stmt
3716 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3717 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3718
3719 return true;
3720 }
3721
3722
3723 struct simd_call_arg_info
3724 {
3725 tree vectype;
3726 tree op;
3727 HOST_WIDE_INT linear_step;
3728 enum vect_def_type dt;
3729 unsigned int align;
3730 bool simd_lane_linear;
3731 };
3732
3733 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3734 is linear within simd lane (but not within whole loop), note it in
3735 *ARGINFO. */
3736
3737 static void
3738 vect_simd_lane_linear (tree op, class loop *loop,
3739 struct simd_call_arg_info *arginfo)
3740 {
3741 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3742
3743 if (!is_gimple_assign (def_stmt)
3744 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3745 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3746 return;
3747
3748 tree base = gimple_assign_rhs1 (def_stmt);
3749 HOST_WIDE_INT linear_step = 0;
3750 tree v = gimple_assign_rhs2 (def_stmt);
3751 while (TREE_CODE (v) == SSA_NAME)
3752 {
3753 tree t;
3754 def_stmt = SSA_NAME_DEF_STMT (v);
3755 if (is_gimple_assign (def_stmt))
3756 switch (gimple_assign_rhs_code (def_stmt))
3757 {
3758 case PLUS_EXPR:
3759 t = gimple_assign_rhs2 (def_stmt);
3760 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3761 return;
3762 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3763 v = gimple_assign_rhs1 (def_stmt);
3764 continue;
3765 case MULT_EXPR:
3766 t = gimple_assign_rhs2 (def_stmt);
3767 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3768 return;
3769 linear_step = tree_to_shwi (t);
3770 v = gimple_assign_rhs1 (def_stmt);
3771 continue;
3772 CASE_CONVERT:
3773 t = gimple_assign_rhs1 (def_stmt);
3774 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3775 || (TYPE_PRECISION (TREE_TYPE (v))
3776 < TYPE_PRECISION (TREE_TYPE (t))))
3777 return;
3778 if (!linear_step)
3779 linear_step = 1;
3780 v = t;
3781 continue;
3782 default:
3783 return;
3784 }
3785 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3786 && loop->simduid
3787 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3788 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3789 == loop->simduid))
3790 {
3791 if (!linear_step)
3792 linear_step = 1;
3793 arginfo->linear_step = linear_step;
3794 arginfo->op = base;
3795 arginfo->simd_lane_linear = true;
3796 return;
3797 }
3798 }
3799 }
3800
3801 /* Return the number of elements in vector type VECTYPE, which is associated
3802 with a SIMD clone. At present these vectors always have a constant
3803 length. */
3804
3805 static unsigned HOST_WIDE_INT
3806 simd_clone_subparts (tree vectype)
3807 {
3808 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3809 }
3810
3811 /* Function vectorizable_simd_clone_call.
3812
3813 Check if STMT_INFO performs a function call that can be vectorized
3814 by calling a simd clone of the function.
3815 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3816 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3817 Return true if STMT_INFO is vectorizable in this way. */
3818
3819 static bool
3820 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3821 gimple_stmt_iterator *gsi,
3822 stmt_vec_info *vec_stmt, slp_tree slp_node,
3823 stmt_vector_for_cost *)
3824 {
3825 tree vec_dest;
3826 tree scalar_dest;
3827 tree op, type;
3828 tree vec_oprnd0 = NULL_TREE;
3829 stmt_vec_info prev_stmt_info;
3830 tree vectype;
3831 unsigned int nunits;
3832 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3833 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3834 vec_info *vinfo = stmt_info->vinfo;
3835 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3836 tree fndecl, new_temp;
3837 int ncopies, j;
3838 auto_vec<simd_call_arg_info> arginfo;
3839 vec<tree> vargs = vNULL;
3840 size_t i, nargs;
3841 tree lhs, rtype, ratype;
3842 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3843
3844 /* Is STMT a vectorizable call? */
3845 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3846 if (!stmt)
3847 return false;
3848
3849 fndecl = gimple_call_fndecl (stmt);
3850 if (fndecl == NULL_TREE)
3851 return false;
3852
3853 struct cgraph_node *node = cgraph_node::get (fndecl);
3854 if (node == NULL || node->simd_clones == NULL)
3855 return false;
3856
3857 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3858 return false;
3859
3860 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3861 && ! vec_stmt)
3862 return false;
3863
3864 if (gimple_call_lhs (stmt)
3865 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3866 return false;
3867
3868 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3869
3870 vectype = STMT_VINFO_VECTYPE (stmt_info);
3871
3872 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3873 return false;
3874
3875 /* FORNOW */
3876 if (slp_node)
3877 return false;
3878
3879 /* Process function arguments. */
3880 nargs = gimple_call_num_args (stmt);
3881
3882 /* Bail out if the function has zero arguments. */
3883 if (nargs == 0)
3884 return false;
3885
3886 arginfo.reserve (nargs, true);
3887
3888 for (i = 0; i < nargs; i++)
3889 {
3890 simd_call_arg_info thisarginfo;
3891 affine_iv iv;
3892
3893 thisarginfo.linear_step = 0;
3894 thisarginfo.align = 0;
3895 thisarginfo.op = NULL_TREE;
3896 thisarginfo.simd_lane_linear = false;
3897
3898 op = gimple_call_arg (stmt, i);
3899 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3900 &thisarginfo.vectype)
3901 || thisarginfo.dt == vect_uninitialized_def)
3902 {
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3905 "use not simple.\n");
3906 return false;
3907 }
3908
3909 if (thisarginfo.dt == vect_constant_def
3910 || thisarginfo.dt == vect_external_def)
3911 gcc_assert (thisarginfo.vectype == NULL_TREE);
3912 else
3913 gcc_assert (thisarginfo.vectype != NULL_TREE);
3914
3915 /* For linear arguments, the analyze phase should have saved
3916 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3917 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3918 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3919 {
3920 gcc_assert (vec_stmt);
3921 thisarginfo.linear_step
3922 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3923 thisarginfo.op
3924 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3925 thisarginfo.simd_lane_linear
3926 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3927 == boolean_true_node);
3928 /* If loop has been peeled for alignment, we need to adjust it. */
3929 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3930 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3931 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3932 {
3933 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3934 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3935 tree opt = TREE_TYPE (thisarginfo.op);
3936 bias = fold_convert (TREE_TYPE (step), bias);
3937 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3938 thisarginfo.op
3939 = fold_build2 (POINTER_TYPE_P (opt)
3940 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3941 thisarginfo.op, bias);
3942 }
3943 }
3944 else if (!vec_stmt
3945 && thisarginfo.dt != vect_constant_def
3946 && thisarginfo.dt != vect_external_def
3947 && loop_vinfo
3948 && TREE_CODE (op) == SSA_NAME
3949 && simple_iv (loop, loop_containing_stmt (stmt), op,
3950 &iv, false)
3951 && tree_fits_shwi_p (iv.step))
3952 {
3953 thisarginfo.linear_step = tree_to_shwi (iv.step);
3954 thisarginfo.op = iv.base;
3955 }
3956 else if ((thisarginfo.dt == vect_constant_def
3957 || thisarginfo.dt == vect_external_def)
3958 && POINTER_TYPE_P (TREE_TYPE (op)))
3959 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3960 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3961 linear too. */
3962 if (POINTER_TYPE_P (TREE_TYPE (op))
3963 && !thisarginfo.linear_step
3964 && !vec_stmt
3965 && thisarginfo.dt != vect_constant_def
3966 && thisarginfo.dt != vect_external_def
3967 && loop_vinfo
3968 && !slp_node
3969 && TREE_CODE (op) == SSA_NAME)
3970 vect_simd_lane_linear (op, loop, &thisarginfo);
3971
3972 arginfo.quick_push (thisarginfo);
3973 }
3974
3975 unsigned HOST_WIDE_INT vf;
3976 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3977 {
3978 if (dump_enabled_p ())
3979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3980 "not considering SIMD clones; not yet supported"
3981 " for variable-width vectors.\n");
3982 return false;
3983 }
3984
3985 unsigned int badness = 0;
3986 struct cgraph_node *bestn = NULL;
3987 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3988 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3989 else
3990 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3991 n = n->simdclone->next_clone)
3992 {
3993 unsigned int this_badness = 0;
3994 if (n->simdclone->simdlen > vf
3995 || n->simdclone->nargs != nargs)
3996 continue;
3997 if (n->simdclone->simdlen < vf)
3998 this_badness += (exact_log2 (vf)
3999 - exact_log2 (n->simdclone->simdlen)) * 1024;
4000 if (n->simdclone->inbranch)
4001 this_badness += 2048;
4002 int target_badness = targetm.simd_clone.usable (n);
4003 if (target_badness < 0)
4004 continue;
4005 this_badness += target_badness * 512;
4006 /* FORNOW: Have to add code to add the mask argument. */
4007 if (n->simdclone->inbranch)
4008 continue;
4009 for (i = 0; i < nargs; i++)
4010 {
4011 switch (n->simdclone->args[i].arg_type)
4012 {
4013 case SIMD_CLONE_ARG_TYPE_VECTOR:
4014 if (!useless_type_conversion_p
4015 (n->simdclone->args[i].orig_type,
4016 TREE_TYPE (gimple_call_arg (stmt, i))))
4017 i = -1;
4018 else if (arginfo[i].dt == vect_constant_def
4019 || arginfo[i].dt == vect_external_def
4020 || arginfo[i].linear_step)
4021 this_badness += 64;
4022 break;
4023 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4024 if (arginfo[i].dt != vect_constant_def
4025 && arginfo[i].dt != vect_external_def)
4026 i = -1;
4027 break;
4028 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4029 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4030 if (arginfo[i].dt == vect_constant_def
4031 || arginfo[i].dt == vect_external_def
4032 || (arginfo[i].linear_step
4033 != n->simdclone->args[i].linear_step))
4034 i = -1;
4035 break;
4036 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4037 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4038 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4039 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4040 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4041 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4042 /* FORNOW */
4043 i = -1;
4044 break;
4045 case SIMD_CLONE_ARG_TYPE_MASK:
4046 gcc_unreachable ();
4047 }
4048 if (i == (size_t) -1)
4049 break;
4050 if (n->simdclone->args[i].alignment > arginfo[i].align)
4051 {
4052 i = -1;
4053 break;
4054 }
4055 if (arginfo[i].align)
4056 this_badness += (exact_log2 (arginfo[i].align)
4057 - exact_log2 (n->simdclone->args[i].alignment));
4058 }
4059 if (i == (size_t) -1)
4060 continue;
4061 if (bestn == NULL || this_badness < badness)
4062 {
4063 bestn = n;
4064 badness = this_badness;
4065 }
4066 }
4067
4068 if (bestn == NULL)
4069 return false;
4070
4071 for (i = 0; i < nargs; i++)
4072 if ((arginfo[i].dt == vect_constant_def
4073 || arginfo[i].dt == vect_external_def)
4074 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4075 {
4076 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4077 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
4078 if (arginfo[i].vectype == NULL
4079 || (simd_clone_subparts (arginfo[i].vectype)
4080 > bestn->simdclone->simdlen))
4081 return false;
4082 }
4083
4084 fndecl = bestn->decl;
4085 nunits = bestn->simdclone->simdlen;
4086 ncopies = vf / nunits;
4087
4088 /* If the function isn't const, only allow it in simd loops where user
4089 has asserted that at least nunits consecutive iterations can be
4090 performed using SIMD instructions. */
4091 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4092 && gimple_vuse (stmt))
4093 return false;
4094
4095 /* Sanity check: make sure that at least one copy of the vectorized stmt
4096 needs to be generated. */
4097 gcc_assert (ncopies >= 1);
4098
4099 if (!vec_stmt) /* transformation not required. */
4100 {
4101 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4102 for (i = 0; i < nargs; i++)
4103 if ((bestn->simdclone->args[i].arg_type
4104 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4105 || (bestn->simdclone->args[i].arg_type
4106 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4107 {
4108 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4109 + 1);
4110 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4111 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4112 ? size_type_node : TREE_TYPE (arginfo[i].op);
4113 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4114 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4115 tree sll = arginfo[i].simd_lane_linear
4116 ? boolean_true_node : boolean_false_node;
4117 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4118 }
4119 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4120 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4121 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4122 return true;
4123 }
4124
4125 /* Transform. */
4126
4127 if (dump_enabled_p ())
4128 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4129
4130 /* Handle def. */
4131 scalar_dest = gimple_call_lhs (stmt);
4132 vec_dest = NULL_TREE;
4133 rtype = NULL_TREE;
4134 ratype = NULL_TREE;
4135 if (scalar_dest)
4136 {
4137 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4138 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4139 if (TREE_CODE (rtype) == ARRAY_TYPE)
4140 {
4141 ratype = rtype;
4142 rtype = TREE_TYPE (ratype);
4143 }
4144 }
4145
4146 prev_stmt_info = NULL;
4147 for (j = 0; j < ncopies; ++j)
4148 {
4149 /* Build argument list for the vectorized call. */
4150 if (j == 0)
4151 vargs.create (nargs);
4152 else
4153 vargs.truncate (0);
4154
4155 for (i = 0; i < nargs; i++)
4156 {
4157 unsigned int k, l, m, o;
4158 tree atype;
4159 op = gimple_call_arg (stmt, i);
4160 switch (bestn->simdclone->args[i].arg_type)
4161 {
4162 case SIMD_CLONE_ARG_TYPE_VECTOR:
4163 atype = bestn->simdclone->args[i].vector_type;
4164 o = nunits / simd_clone_subparts (atype);
4165 for (m = j * o; m < (j + 1) * o; m++)
4166 {
4167 if (simd_clone_subparts (atype)
4168 < simd_clone_subparts (arginfo[i].vectype))
4169 {
4170 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4171 k = (simd_clone_subparts (arginfo[i].vectype)
4172 / simd_clone_subparts (atype));
4173 gcc_assert ((k & (k - 1)) == 0);
4174 if (m == 0)
4175 vec_oprnd0
4176 = vect_get_vec_def_for_operand (op, stmt_info);
4177 else
4178 {
4179 vec_oprnd0 = arginfo[i].op;
4180 if ((m & (k - 1)) == 0)
4181 vec_oprnd0
4182 = vect_get_vec_def_for_stmt_copy (vinfo,
4183 vec_oprnd0);
4184 }
4185 arginfo[i].op = vec_oprnd0;
4186 vec_oprnd0
4187 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4188 bitsize_int (prec),
4189 bitsize_int ((m & (k - 1)) * prec));
4190 gassign *new_stmt
4191 = gimple_build_assign (make_ssa_name (atype),
4192 vec_oprnd0);
4193 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4194 vargs.safe_push (gimple_assign_lhs (new_stmt));
4195 }
4196 else
4197 {
4198 k = (simd_clone_subparts (atype)
4199 / simd_clone_subparts (arginfo[i].vectype));
4200 gcc_assert ((k & (k - 1)) == 0);
4201 vec<constructor_elt, va_gc> *ctor_elts;
4202 if (k != 1)
4203 vec_alloc (ctor_elts, k);
4204 else
4205 ctor_elts = NULL;
4206 for (l = 0; l < k; l++)
4207 {
4208 if (m == 0 && l == 0)
4209 vec_oprnd0
4210 = vect_get_vec_def_for_operand (op, stmt_info);
4211 else
4212 vec_oprnd0
4213 = vect_get_vec_def_for_stmt_copy (vinfo,
4214 arginfo[i].op);
4215 arginfo[i].op = vec_oprnd0;
4216 if (k == 1)
4217 break;
4218 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4219 vec_oprnd0);
4220 }
4221 if (k == 1)
4222 vargs.safe_push (vec_oprnd0);
4223 else
4224 {
4225 vec_oprnd0 = build_constructor (atype, ctor_elts);
4226 gassign *new_stmt
4227 = gimple_build_assign (make_ssa_name (atype),
4228 vec_oprnd0);
4229 vect_finish_stmt_generation (stmt_info, new_stmt,
4230 gsi);
4231 vargs.safe_push (gimple_assign_lhs (new_stmt));
4232 }
4233 }
4234 }
4235 break;
4236 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4237 vargs.safe_push (op);
4238 break;
4239 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4240 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4241 if (j == 0)
4242 {
4243 gimple_seq stmts;
4244 arginfo[i].op
4245 = force_gimple_operand (unshare_expr (arginfo[i].op),
4246 &stmts, true, NULL_TREE);
4247 if (stmts != NULL)
4248 {
4249 basic_block new_bb;
4250 edge pe = loop_preheader_edge (loop);
4251 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4252 gcc_assert (!new_bb);
4253 }
4254 if (arginfo[i].simd_lane_linear)
4255 {
4256 vargs.safe_push (arginfo[i].op);
4257 break;
4258 }
4259 tree phi_res = copy_ssa_name (op);
4260 gphi *new_phi = create_phi_node (phi_res, loop->header);
4261 loop_vinfo->add_stmt (new_phi);
4262 add_phi_arg (new_phi, arginfo[i].op,
4263 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4264 enum tree_code code
4265 = POINTER_TYPE_P (TREE_TYPE (op))
4266 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4267 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4268 ? sizetype : TREE_TYPE (op);
4269 widest_int cst
4270 = wi::mul (bestn->simdclone->args[i].linear_step,
4271 ncopies * nunits);
4272 tree tcst = wide_int_to_tree (type, cst);
4273 tree phi_arg = copy_ssa_name (op);
4274 gassign *new_stmt
4275 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4276 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4277 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4278 loop_vinfo->add_stmt (new_stmt);
4279 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4280 UNKNOWN_LOCATION);
4281 arginfo[i].op = phi_res;
4282 vargs.safe_push (phi_res);
4283 }
4284 else
4285 {
4286 enum tree_code code
4287 = POINTER_TYPE_P (TREE_TYPE (op))
4288 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4289 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4290 ? sizetype : TREE_TYPE (op);
4291 widest_int cst
4292 = wi::mul (bestn->simdclone->args[i].linear_step,
4293 j * nunits);
4294 tree tcst = wide_int_to_tree (type, cst);
4295 new_temp = make_ssa_name (TREE_TYPE (op));
4296 gassign *new_stmt
4297 = gimple_build_assign (new_temp, code,
4298 arginfo[i].op, tcst);
4299 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4300 vargs.safe_push (new_temp);
4301 }
4302 break;
4303 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4304 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4305 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4306 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4307 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4308 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4309 default:
4310 gcc_unreachable ();
4311 }
4312 }
4313
4314 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4315 if (vec_dest)
4316 {
4317 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4318 if (ratype)
4319 new_temp = create_tmp_var (ratype);
4320 else if (simd_clone_subparts (vectype)
4321 == simd_clone_subparts (rtype))
4322 new_temp = make_ssa_name (vec_dest, new_call);
4323 else
4324 new_temp = make_ssa_name (rtype, new_call);
4325 gimple_call_set_lhs (new_call, new_temp);
4326 }
4327 stmt_vec_info new_stmt_info
4328 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4329
4330 if (vec_dest)
4331 {
4332 if (simd_clone_subparts (vectype) < nunits)
4333 {
4334 unsigned int k, l;
4335 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4336 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4337 k = nunits / simd_clone_subparts (vectype);
4338 gcc_assert ((k & (k - 1)) == 0);
4339 for (l = 0; l < k; l++)
4340 {
4341 tree t;
4342 if (ratype)
4343 {
4344 t = build_fold_addr_expr (new_temp);
4345 t = build2 (MEM_REF, vectype, t,
4346 build_int_cst (TREE_TYPE (t), l * bytes));
4347 }
4348 else
4349 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4350 bitsize_int (prec), bitsize_int (l * prec));
4351 gimple *new_stmt
4352 = gimple_build_assign (make_ssa_name (vectype), t);
4353 new_stmt_info
4354 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4355
4356 if (j == 0 && l == 0)
4357 STMT_VINFO_VEC_STMT (stmt_info)
4358 = *vec_stmt = new_stmt_info;
4359 else
4360 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4361
4362 prev_stmt_info = new_stmt_info;
4363 }
4364
4365 if (ratype)
4366 vect_clobber_variable (stmt_info, gsi, new_temp);
4367 continue;
4368 }
4369 else if (simd_clone_subparts (vectype) > nunits)
4370 {
4371 unsigned int k = (simd_clone_subparts (vectype)
4372 / simd_clone_subparts (rtype));
4373 gcc_assert ((k & (k - 1)) == 0);
4374 if ((j & (k - 1)) == 0)
4375 vec_alloc (ret_ctor_elts, k);
4376 if (ratype)
4377 {
4378 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4379 for (m = 0; m < o; m++)
4380 {
4381 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4382 size_int (m), NULL_TREE, NULL_TREE);
4383 gimple *new_stmt
4384 = gimple_build_assign (make_ssa_name (rtype), tem);
4385 new_stmt_info
4386 = vect_finish_stmt_generation (stmt_info, new_stmt,
4387 gsi);
4388 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4389 gimple_assign_lhs (new_stmt));
4390 }
4391 vect_clobber_variable (stmt_info, gsi, new_temp);
4392 }
4393 else
4394 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4395 if ((j & (k - 1)) != k - 1)
4396 continue;
4397 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4398 gimple *new_stmt
4399 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4400 new_stmt_info
4401 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4402
4403 if ((unsigned) j == k - 1)
4404 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4405 else
4406 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4407
4408 prev_stmt_info = new_stmt_info;
4409 continue;
4410 }
4411 else if (ratype)
4412 {
4413 tree t = build_fold_addr_expr (new_temp);
4414 t = build2 (MEM_REF, vectype, t,
4415 build_int_cst (TREE_TYPE (t), 0));
4416 gimple *new_stmt
4417 = gimple_build_assign (make_ssa_name (vec_dest), t);
4418 new_stmt_info
4419 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4420 vect_clobber_variable (stmt_info, gsi, new_temp);
4421 }
4422 }
4423
4424 if (j == 0)
4425 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4426 else
4427 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4428
4429 prev_stmt_info = new_stmt_info;
4430 }
4431
4432 vargs.release ();
4433
4434 /* The call in STMT might prevent it from being removed in dce.
4435 We however cannot remove it here, due to the way the ssa name
4436 it defines is mapped to the new definition. So just replace
4437 rhs of the statement with something harmless. */
4438
4439 if (slp_node)
4440 return true;
4441
4442 gimple *new_stmt;
4443 if (scalar_dest)
4444 {
4445 type = TREE_TYPE (scalar_dest);
4446 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4447 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4448 }
4449 else
4450 new_stmt = gimple_build_nop ();
4451 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4452 unlink_stmt_vdef (stmt);
4453
4454 return true;
4455 }
4456
4457
4458 /* Function vect_gen_widened_results_half
4459
4460 Create a vector stmt whose code, type, number of arguments, and result
4461 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4462 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4463 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4464 needs to be created (DECL is a function-decl of a target-builtin).
4465 STMT_INFO is the original scalar stmt that we are vectorizing. */
4466
4467 static gimple *
4468 vect_gen_widened_results_half (enum tree_code code,
4469 tree decl,
4470 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4471 tree vec_dest, gimple_stmt_iterator *gsi,
4472 stmt_vec_info stmt_info)
4473 {
4474 gimple *new_stmt;
4475 tree new_temp;
4476
4477 /* Generate half of the widened result: */
4478 if (code == CALL_EXPR)
4479 {
4480 /* Target specific support */
4481 if (op_type == binary_op)
4482 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4483 else
4484 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4485 new_temp = make_ssa_name (vec_dest, new_stmt);
4486 gimple_call_set_lhs (new_stmt, new_temp);
4487 }
4488 else
4489 {
4490 /* Generic support */
4491 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4492 if (op_type != binary_op)
4493 vec_oprnd1 = NULL;
4494 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4495 new_temp = make_ssa_name (vec_dest, new_stmt);
4496 gimple_assign_set_lhs (new_stmt, new_temp);
4497 }
4498 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4499
4500 return new_stmt;
4501 }
4502
4503
4504 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4505 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4506 containing scalar operand), and for the rest we get a copy with
4507 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4508 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4509 The vectors are collected into VEC_OPRNDS. */
4510
4511 static void
4512 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4513 vec<tree> *vec_oprnds, int multi_step_cvt)
4514 {
4515 vec_info *vinfo = stmt_info->vinfo;
4516 tree vec_oprnd;
4517
4518 /* Get first vector operand. */
4519 /* All the vector operands except the very first one (that is scalar oprnd)
4520 are stmt copies. */
4521 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4522 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4523 else
4524 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4525
4526 vec_oprnds->quick_push (vec_oprnd);
4527
4528 /* Get second vector operand. */
4529 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4530 vec_oprnds->quick_push (vec_oprnd);
4531
4532 *oprnd = vec_oprnd;
4533
4534 /* For conversion in multiple steps, continue to get operands
4535 recursively. */
4536 if (multi_step_cvt)
4537 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4538 multi_step_cvt - 1);
4539 }
4540
4541
4542 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4543 For multi-step conversions store the resulting vectors and call the function
4544 recursively. */
4545
4546 static void
4547 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4548 int multi_step_cvt,
4549 stmt_vec_info stmt_info,
4550 vec<tree> vec_dsts,
4551 gimple_stmt_iterator *gsi,
4552 slp_tree slp_node, enum tree_code code,
4553 stmt_vec_info *prev_stmt_info)
4554 {
4555 unsigned int i;
4556 tree vop0, vop1, new_tmp, vec_dest;
4557
4558 vec_dest = vec_dsts.pop ();
4559
4560 for (i = 0; i < vec_oprnds->length (); i += 2)
4561 {
4562 /* Create demotion operation. */
4563 vop0 = (*vec_oprnds)[i];
4564 vop1 = (*vec_oprnds)[i + 1];
4565 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4566 new_tmp = make_ssa_name (vec_dest, new_stmt);
4567 gimple_assign_set_lhs (new_stmt, new_tmp);
4568 stmt_vec_info new_stmt_info
4569 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4570
4571 if (multi_step_cvt)
4572 /* Store the resulting vector for next recursive call. */
4573 (*vec_oprnds)[i/2] = new_tmp;
4574 else
4575 {
4576 /* This is the last step of the conversion sequence. Store the
4577 vectors in SLP_NODE or in vector info of the scalar statement
4578 (or in STMT_VINFO_RELATED_STMT chain). */
4579 if (slp_node)
4580 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4581 else
4582 {
4583 if (!*prev_stmt_info)
4584 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4585 else
4586 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4587
4588 *prev_stmt_info = new_stmt_info;
4589 }
4590 }
4591 }
4592
4593 /* For multi-step demotion operations we first generate demotion operations
4594 from the source type to the intermediate types, and then combine the
4595 results (stored in VEC_OPRNDS) in demotion operation to the destination
4596 type. */
4597 if (multi_step_cvt)
4598 {
4599 /* At each level of recursion we have half of the operands we had at the
4600 previous level. */
4601 vec_oprnds->truncate ((i+1)/2);
4602 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4603 stmt_info, vec_dsts, gsi,
4604 slp_node, VEC_PACK_TRUNC_EXPR,
4605 prev_stmt_info);
4606 }
4607
4608 vec_dsts.quick_push (vec_dest);
4609 }
4610
4611
4612 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4613 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4614 STMT_INFO. For multi-step conversions store the resulting vectors and
4615 call the function recursively. */
4616
4617 static void
4618 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4619 vec<tree> *vec_oprnds1,
4620 stmt_vec_info stmt_info, tree vec_dest,
4621 gimple_stmt_iterator *gsi,
4622 enum tree_code code1,
4623 enum tree_code code2, tree decl1,
4624 tree decl2, int op_type)
4625 {
4626 int i;
4627 tree vop0, vop1, new_tmp1, new_tmp2;
4628 gimple *new_stmt1, *new_stmt2;
4629 vec<tree> vec_tmp = vNULL;
4630
4631 vec_tmp.create (vec_oprnds0->length () * 2);
4632 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4633 {
4634 if (op_type == binary_op)
4635 vop1 = (*vec_oprnds1)[i];
4636 else
4637 vop1 = NULL_TREE;
4638
4639 /* Generate the two halves of promotion operation. */
4640 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4641 op_type, vec_dest, gsi,
4642 stmt_info);
4643 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4644 op_type, vec_dest, gsi,
4645 stmt_info);
4646 if (is_gimple_call (new_stmt1))
4647 {
4648 new_tmp1 = gimple_call_lhs (new_stmt1);
4649 new_tmp2 = gimple_call_lhs (new_stmt2);
4650 }
4651 else
4652 {
4653 new_tmp1 = gimple_assign_lhs (new_stmt1);
4654 new_tmp2 = gimple_assign_lhs (new_stmt2);
4655 }
4656
4657 /* Store the results for the next step. */
4658 vec_tmp.quick_push (new_tmp1);
4659 vec_tmp.quick_push (new_tmp2);
4660 }
4661
4662 vec_oprnds0->release ();
4663 *vec_oprnds0 = vec_tmp;
4664 }
4665
4666
4667 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4668 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4669 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4670 Return true if STMT_INFO is vectorizable in this way. */
4671
4672 static bool
4673 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4674 stmt_vec_info *vec_stmt, slp_tree slp_node,
4675 stmt_vector_for_cost *cost_vec)
4676 {
4677 tree vec_dest;
4678 tree scalar_dest;
4679 tree op0, op1 = NULL_TREE;
4680 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4681 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4682 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4683 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4684 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4685 tree new_temp;
4686 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4687 int ndts = 2;
4688 stmt_vec_info prev_stmt_info;
4689 poly_uint64 nunits_in;
4690 poly_uint64 nunits_out;
4691 tree vectype_out, vectype_in;
4692 int ncopies, i, j;
4693 tree lhs_type, rhs_type;
4694 enum { NARROW, NONE, WIDEN } modifier;
4695 vec<tree> vec_oprnds0 = vNULL;
4696 vec<tree> vec_oprnds1 = vNULL;
4697 tree vop0;
4698 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4699 vec_info *vinfo = stmt_info->vinfo;
4700 int multi_step_cvt = 0;
4701 vec<tree> interm_types = vNULL;
4702 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4703 int op_type;
4704 unsigned short fltsz;
4705
4706 /* Is STMT a vectorizable conversion? */
4707
4708 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4709 return false;
4710
4711 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4712 && ! vec_stmt)
4713 return false;
4714
4715 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4716 if (!stmt)
4717 return false;
4718
4719 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4720 return false;
4721
4722 code = gimple_assign_rhs_code (stmt);
4723 if (!CONVERT_EXPR_CODE_P (code)
4724 && code != FIX_TRUNC_EXPR
4725 && code != FLOAT_EXPR
4726 && code != WIDEN_MULT_EXPR
4727 && code != WIDEN_LSHIFT_EXPR)
4728 return false;
4729
4730 op_type = TREE_CODE_LENGTH (code);
4731
4732 /* Check types of lhs and rhs. */
4733 scalar_dest = gimple_assign_lhs (stmt);
4734 lhs_type = TREE_TYPE (scalar_dest);
4735 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4736
4737 op0 = gimple_assign_rhs1 (stmt);
4738 rhs_type = TREE_TYPE (op0);
4739
4740 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4741 && !((INTEGRAL_TYPE_P (lhs_type)
4742 && INTEGRAL_TYPE_P (rhs_type))
4743 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4744 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4745 return false;
4746
4747 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4748 && ((INTEGRAL_TYPE_P (lhs_type)
4749 && !type_has_mode_precision_p (lhs_type))
4750 || (INTEGRAL_TYPE_P (rhs_type)
4751 && !type_has_mode_precision_p (rhs_type))))
4752 {
4753 if (dump_enabled_p ())
4754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4755 "type conversion to/from bit-precision unsupported."
4756 "\n");
4757 return false;
4758 }
4759
4760 /* Check the operands of the operation. */
4761 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4762 {
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4765 "use not simple.\n");
4766 return false;
4767 }
4768 if (op_type == binary_op)
4769 {
4770 bool ok;
4771
4772 op1 = gimple_assign_rhs2 (stmt);
4773 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4774 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4775 OP1. */
4776 if (CONSTANT_CLASS_P (op0))
4777 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4778 else
4779 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4780
4781 if (!ok)
4782 {
4783 if (dump_enabled_p ())
4784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4785 "use not simple.\n");
4786 return false;
4787 }
4788 }
4789
4790 /* If op0 is an external or constant defs use a vector type of
4791 the same size as the output vector type. */
4792 if (!vectype_in)
4793 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4794 if (vec_stmt)
4795 gcc_assert (vectype_in);
4796 if (!vectype_in)
4797 {
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4800 "no vectype for scalar type %T\n", rhs_type);
4801
4802 return false;
4803 }
4804
4805 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4806 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4807 {
4808 if (dump_enabled_p ())
4809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4810 "can't convert between boolean and non "
4811 "boolean vectors %T\n", rhs_type);
4812
4813 return false;
4814 }
4815
4816 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4817 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4818 if (known_eq (nunits_out, nunits_in))
4819 modifier = NONE;
4820 else if (multiple_p (nunits_out, nunits_in))
4821 modifier = NARROW;
4822 else
4823 {
4824 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4825 modifier = WIDEN;
4826 }
4827
4828 /* Multiple types in SLP are handled by creating the appropriate number of
4829 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4830 case of SLP. */
4831 if (slp_node)
4832 ncopies = 1;
4833 else if (modifier == NARROW)
4834 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4835 else
4836 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4837
4838 /* Sanity check: make sure that at least one copy of the vectorized stmt
4839 needs to be generated. */
4840 gcc_assert (ncopies >= 1);
4841
4842 bool found_mode = false;
4843 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4844 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4845 opt_scalar_mode rhs_mode_iter;
4846
4847 /* Supportable by target? */
4848 switch (modifier)
4849 {
4850 case NONE:
4851 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4852 return false;
4853 if (supportable_convert_operation (code, vectype_out, vectype_in,
4854 &decl1, &code1))
4855 break;
4856 /* FALLTHRU */
4857 unsupported:
4858 if (dump_enabled_p ())
4859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4860 "conversion not supported by target.\n");
4861 return false;
4862
4863 case WIDEN:
4864 if (supportable_widening_operation (code, stmt_info, vectype_out,
4865 vectype_in, &code1, &code2,
4866 &multi_step_cvt, &interm_types))
4867 {
4868 /* Binary widening operation can only be supported directly by the
4869 architecture. */
4870 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4871 break;
4872 }
4873
4874 if (code != FLOAT_EXPR
4875 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4876 goto unsupported;
4877
4878 fltsz = GET_MODE_SIZE (lhs_mode);
4879 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4880 {
4881 rhs_mode = rhs_mode_iter.require ();
4882 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4883 break;
4884
4885 cvt_type
4886 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4887 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4888 if (cvt_type == NULL_TREE)
4889 goto unsupported;
4890
4891 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4892 {
4893 if (!supportable_convert_operation (code, vectype_out,
4894 cvt_type, &decl1, &codecvt1))
4895 goto unsupported;
4896 }
4897 else if (!supportable_widening_operation (code, stmt_info,
4898 vectype_out, cvt_type,
4899 &codecvt1, &codecvt2,
4900 &multi_step_cvt,
4901 &interm_types))
4902 continue;
4903 else
4904 gcc_assert (multi_step_cvt == 0);
4905
4906 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4907 vectype_in, &code1, &code2,
4908 &multi_step_cvt, &interm_types))
4909 {
4910 found_mode = true;
4911 break;
4912 }
4913 }
4914
4915 if (!found_mode)
4916 goto unsupported;
4917
4918 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4919 codecvt2 = ERROR_MARK;
4920 else
4921 {
4922 multi_step_cvt++;
4923 interm_types.safe_push (cvt_type);
4924 cvt_type = NULL_TREE;
4925 }
4926 break;
4927
4928 case NARROW:
4929 gcc_assert (op_type == unary_op);
4930 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4931 &code1, &multi_step_cvt,
4932 &interm_types))
4933 break;
4934
4935 if (code != FIX_TRUNC_EXPR
4936 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4937 goto unsupported;
4938
4939 cvt_type
4940 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4941 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4942 if (cvt_type == NULL_TREE)
4943 goto unsupported;
4944 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4945 &decl1, &codecvt1))
4946 goto unsupported;
4947 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4948 &code1, &multi_step_cvt,
4949 &interm_types))
4950 break;
4951 goto unsupported;
4952
4953 default:
4954 gcc_unreachable ();
4955 }
4956
4957 if (!vec_stmt) /* transformation not required. */
4958 {
4959 DUMP_VECT_SCOPE ("vectorizable_conversion");
4960 if (modifier == NONE)
4961 {
4962 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4963 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4964 cost_vec);
4965 }
4966 else if (modifier == NARROW)
4967 {
4968 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4969 /* The final packing step produces one vector result per copy. */
4970 unsigned int nvectors
4971 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4972 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4973 multi_step_cvt, cost_vec);
4974 }
4975 else
4976 {
4977 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4978 /* The initial unpacking step produces two vector results
4979 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4980 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4981 unsigned int nvectors
4982 = (slp_node
4983 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4984 : ncopies * 2);
4985 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4986 multi_step_cvt, cost_vec);
4987 }
4988 interm_types.release ();
4989 return true;
4990 }
4991
4992 /* Transform. */
4993 if (dump_enabled_p ())
4994 dump_printf_loc (MSG_NOTE, vect_location,
4995 "transform conversion. ncopies = %d.\n", ncopies);
4996
4997 if (op_type == binary_op)
4998 {
4999 if (CONSTANT_CLASS_P (op0))
5000 op0 = fold_convert (TREE_TYPE (op1), op0);
5001 else if (CONSTANT_CLASS_P (op1))
5002 op1 = fold_convert (TREE_TYPE (op0), op1);
5003 }
5004
5005 /* In case of multi-step conversion, we first generate conversion operations
5006 to the intermediate types, and then from that types to the final one.
5007 We create vector destinations for the intermediate type (TYPES) received
5008 from supportable_*_operation, and store them in the correct order
5009 for future use in vect_create_vectorized_*_stmts (). */
5010 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5011 vec_dest = vect_create_destination_var (scalar_dest,
5012 (cvt_type && modifier == WIDEN)
5013 ? cvt_type : vectype_out);
5014 vec_dsts.quick_push (vec_dest);
5015
5016 if (multi_step_cvt)
5017 {
5018 for (i = interm_types.length () - 1;
5019 interm_types.iterate (i, &intermediate_type); i--)
5020 {
5021 vec_dest = vect_create_destination_var (scalar_dest,
5022 intermediate_type);
5023 vec_dsts.quick_push (vec_dest);
5024 }
5025 }
5026
5027 if (cvt_type)
5028 vec_dest = vect_create_destination_var (scalar_dest,
5029 modifier == WIDEN
5030 ? vectype_out : cvt_type);
5031
5032 if (!slp_node)
5033 {
5034 if (modifier == WIDEN)
5035 {
5036 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5037 if (op_type == binary_op)
5038 vec_oprnds1.create (1);
5039 }
5040 else if (modifier == NARROW)
5041 vec_oprnds0.create (
5042 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5043 }
5044 else if (code == WIDEN_LSHIFT_EXPR)
5045 vec_oprnds1.create (slp_node->vec_stmts_size);
5046
5047 last_oprnd = op0;
5048 prev_stmt_info = NULL;
5049 switch (modifier)
5050 {
5051 case NONE:
5052 for (j = 0; j < ncopies; j++)
5053 {
5054 if (j == 0)
5055 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5056 NULL, slp_node);
5057 else
5058 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5059
5060 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5061 {
5062 stmt_vec_info new_stmt_info;
5063 /* Arguments are ready, create the new vector stmt. */
5064 if (code1 == CALL_EXPR)
5065 {
5066 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5067 new_temp = make_ssa_name (vec_dest, new_stmt);
5068 gimple_call_set_lhs (new_stmt, new_temp);
5069 new_stmt_info
5070 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5071 }
5072 else
5073 {
5074 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5075 gassign *new_stmt
5076 = gimple_build_assign (vec_dest, code1, vop0);
5077 new_temp = make_ssa_name (vec_dest, new_stmt);
5078 gimple_assign_set_lhs (new_stmt, new_temp);
5079 new_stmt_info
5080 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5081 }
5082
5083 if (slp_node)
5084 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5085 else
5086 {
5087 if (!prev_stmt_info)
5088 STMT_VINFO_VEC_STMT (stmt_info)
5089 = *vec_stmt = new_stmt_info;
5090 else
5091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5092 prev_stmt_info = new_stmt_info;
5093 }
5094 }
5095 }
5096 break;
5097
5098 case WIDEN:
5099 /* In case the vectorization factor (VF) is bigger than the number
5100 of elements that we can fit in a vectype (nunits), we have to
5101 generate more than one vector stmt - i.e - we need to "unroll"
5102 the vector stmt by a factor VF/nunits. */
5103 for (j = 0; j < ncopies; j++)
5104 {
5105 /* Handle uses. */
5106 if (j == 0)
5107 {
5108 if (slp_node)
5109 {
5110 if (code == WIDEN_LSHIFT_EXPR)
5111 {
5112 unsigned int k;
5113
5114 vec_oprnd1 = op1;
5115 /* Store vec_oprnd1 for every vector stmt to be created
5116 for SLP_NODE. We check during the analysis that all
5117 the shift arguments are the same. */
5118 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5119 vec_oprnds1.quick_push (vec_oprnd1);
5120
5121 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5122 &vec_oprnds0, NULL, slp_node);
5123 }
5124 else
5125 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5126 &vec_oprnds1, slp_node);
5127 }
5128 else
5129 {
5130 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5131 vec_oprnds0.quick_push (vec_oprnd0);
5132 if (op_type == binary_op)
5133 {
5134 if (code == WIDEN_LSHIFT_EXPR)
5135 vec_oprnd1 = op1;
5136 else
5137 vec_oprnd1
5138 = vect_get_vec_def_for_operand (op1, stmt_info);
5139 vec_oprnds1.quick_push (vec_oprnd1);
5140 }
5141 }
5142 }
5143 else
5144 {
5145 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5146 vec_oprnds0.truncate (0);
5147 vec_oprnds0.quick_push (vec_oprnd0);
5148 if (op_type == binary_op)
5149 {
5150 if (code == WIDEN_LSHIFT_EXPR)
5151 vec_oprnd1 = op1;
5152 else
5153 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5154 vec_oprnd1);
5155 vec_oprnds1.truncate (0);
5156 vec_oprnds1.quick_push (vec_oprnd1);
5157 }
5158 }
5159
5160 /* Arguments are ready. Create the new vector stmts. */
5161 for (i = multi_step_cvt; i >= 0; i--)
5162 {
5163 tree this_dest = vec_dsts[i];
5164 enum tree_code c1 = code1, c2 = code2;
5165 if (i == 0 && codecvt2 != ERROR_MARK)
5166 {
5167 c1 = codecvt1;
5168 c2 = codecvt2;
5169 }
5170 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5171 &vec_oprnds1, stmt_info,
5172 this_dest, gsi,
5173 c1, c2, decl1, decl2,
5174 op_type);
5175 }
5176
5177 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5178 {
5179 stmt_vec_info new_stmt_info;
5180 if (cvt_type)
5181 {
5182 if (codecvt1 == CALL_EXPR)
5183 {
5184 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5185 new_temp = make_ssa_name (vec_dest, new_stmt);
5186 gimple_call_set_lhs (new_stmt, new_temp);
5187 new_stmt_info
5188 = vect_finish_stmt_generation (stmt_info, new_stmt,
5189 gsi);
5190 }
5191 else
5192 {
5193 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5194 new_temp = make_ssa_name (vec_dest);
5195 gassign *new_stmt
5196 = gimple_build_assign (new_temp, codecvt1, vop0);
5197 new_stmt_info
5198 = vect_finish_stmt_generation (stmt_info, new_stmt,
5199 gsi);
5200 }
5201 }
5202 else
5203 new_stmt_info = vinfo->lookup_def (vop0);
5204
5205 if (slp_node)
5206 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5207 else
5208 {
5209 if (!prev_stmt_info)
5210 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5211 else
5212 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5213 prev_stmt_info = new_stmt_info;
5214 }
5215 }
5216 }
5217
5218 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5219 break;
5220
5221 case NARROW:
5222 /* In case the vectorization factor (VF) is bigger than the number
5223 of elements that we can fit in a vectype (nunits), we have to
5224 generate more than one vector stmt - i.e - we need to "unroll"
5225 the vector stmt by a factor VF/nunits. */
5226 for (j = 0; j < ncopies; j++)
5227 {
5228 /* Handle uses. */
5229 if (slp_node)
5230 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5231 slp_node);
5232 else
5233 {
5234 vec_oprnds0.truncate (0);
5235 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5236 vect_pow2 (multi_step_cvt) - 1);
5237 }
5238
5239 /* Arguments are ready. Create the new vector stmts. */
5240 if (cvt_type)
5241 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5242 {
5243 if (codecvt1 == CALL_EXPR)
5244 {
5245 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5246 new_temp = make_ssa_name (vec_dest, new_stmt);
5247 gimple_call_set_lhs (new_stmt, new_temp);
5248 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5249 }
5250 else
5251 {
5252 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5253 new_temp = make_ssa_name (vec_dest);
5254 gassign *new_stmt
5255 = gimple_build_assign (new_temp, codecvt1, vop0);
5256 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5257 }
5258
5259 vec_oprnds0[i] = new_temp;
5260 }
5261
5262 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5263 stmt_info, vec_dsts, gsi,
5264 slp_node, code1,
5265 &prev_stmt_info);
5266 }
5267
5268 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5269 break;
5270 }
5271
5272 vec_oprnds0.release ();
5273 vec_oprnds1.release ();
5274 interm_types.release ();
5275
5276 return true;
5277 }
5278
5279 /* Return true if we can assume from the scalar form of STMT_INFO that
5280 neither the scalar nor the vector forms will generate code. STMT_INFO
5281 is known not to involve a data reference. */
5282
5283 bool
5284 vect_nop_conversion_p (stmt_vec_info stmt_info)
5285 {
5286 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5287 if (!stmt)
5288 return false;
5289
5290 tree lhs = gimple_assign_lhs (stmt);
5291 tree_code code = gimple_assign_rhs_code (stmt);
5292 tree rhs = gimple_assign_rhs1 (stmt);
5293
5294 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5295 return true;
5296
5297 if (CONVERT_EXPR_CODE_P (code))
5298 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5299
5300 return false;
5301 }
5302
5303 /* Function vectorizable_assignment.
5304
5305 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5306 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5307 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5308 Return true if STMT_INFO is vectorizable in this way. */
5309
5310 static bool
5311 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5312 stmt_vec_info *vec_stmt, slp_tree slp_node,
5313 stmt_vector_for_cost *cost_vec)
5314 {
5315 tree vec_dest;
5316 tree scalar_dest;
5317 tree op;
5318 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5319 tree new_temp;
5320 enum vect_def_type dt[1] = {vect_unknown_def_type};
5321 int ndts = 1;
5322 int ncopies;
5323 int i, j;
5324 vec<tree> vec_oprnds = vNULL;
5325 tree vop;
5326 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5327 vec_info *vinfo = stmt_info->vinfo;
5328 stmt_vec_info prev_stmt_info = NULL;
5329 enum tree_code code;
5330 tree vectype_in;
5331
5332 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5333 return false;
5334
5335 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5336 && ! vec_stmt)
5337 return false;
5338
5339 /* Is vectorizable assignment? */
5340 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5341 if (!stmt)
5342 return false;
5343
5344 scalar_dest = gimple_assign_lhs (stmt);
5345 if (TREE_CODE (scalar_dest) != SSA_NAME)
5346 return false;
5347
5348 code = gimple_assign_rhs_code (stmt);
5349 if (gimple_assign_single_p (stmt)
5350 || code == PAREN_EXPR
5351 || CONVERT_EXPR_CODE_P (code))
5352 op = gimple_assign_rhs1 (stmt);
5353 else
5354 return false;
5355
5356 if (code == VIEW_CONVERT_EXPR)
5357 op = TREE_OPERAND (op, 0);
5358
5359 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5360 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5361
5362 /* Multiple types in SLP are handled by creating the appropriate number of
5363 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5364 case of SLP. */
5365 if (slp_node)
5366 ncopies = 1;
5367 else
5368 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5369
5370 gcc_assert (ncopies >= 1);
5371
5372 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5373 {
5374 if (dump_enabled_p ())
5375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5376 "use not simple.\n");
5377 return false;
5378 }
5379
5380 /* We can handle NOP_EXPR conversions that do not change the number
5381 of elements or the vector size. */
5382 if ((CONVERT_EXPR_CODE_P (code)
5383 || code == VIEW_CONVERT_EXPR)
5384 && (!vectype_in
5385 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5386 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5387 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5388 return false;
5389
5390 /* We do not handle bit-precision changes. */
5391 if ((CONVERT_EXPR_CODE_P (code)
5392 || code == VIEW_CONVERT_EXPR)
5393 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5394 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5395 || !type_has_mode_precision_p (TREE_TYPE (op)))
5396 /* But a conversion that does not change the bit-pattern is ok. */
5397 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5398 > TYPE_PRECISION (TREE_TYPE (op)))
5399 && TYPE_UNSIGNED (TREE_TYPE (op)))
5400 /* Conversion between boolean types of different sizes is
5401 a simple assignment in case their vectypes are same
5402 boolean vectors. */
5403 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5404 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5405 {
5406 if (dump_enabled_p ())
5407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5408 "type conversion to/from bit-precision "
5409 "unsupported.\n");
5410 return false;
5411 }
5412
5413 if (!vec_stmt) /* transformation not required. */
5414 {
5415 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5416 DUMP_VECT_SCOPE ("vectorizable_assignment");
5417 if (!vect_nop_conversion_p (stmt_info))
5418 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5419 cost_vec);
5420 return true;
5421 }
5422
5423 /* Transform. */
5424 if (dump_enabled_p ())
5425 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5426
5427 /* Handle def. */
5428 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5429
5430 /* Handle use. */
5431 for (j = 0; j < ncopies; j++)
5432 {
5433 /* Handle uses. */
5434 if (j == 0)
5435 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5436 else
5437 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5438
5439 /* Arguments are ready. create the new vector stmt. */
5440 stmt_vec_info new_stmt_info = NULL;
5441 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5442 {
5443 if (CONVERT_EXPR_CODE_P (code)
5444 || code == VIEW_CONVERT_EXPR)
5445 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5446 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5447 new_temp = make_ssa_name (vec_dest, new_stmt);
5448 gimple_assign_set_lhs (new_stmt, new_temp);
5449 new_stmt_info
5450 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5451 if (slp_node)
5452 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5453 }
5454
5455 if (slp_node)
5456 continue;
5457
5458 if (j == 0)
5459 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5460 else
5461 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5462
5463 prev_stmt_info = new_stmt_info;
5464 }
5465
5466 vec_oprnds.release ();
5467 return true;
5468 }
5469
5470
5471 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5472 either as shift by a scalar or by a vector. */
5473
5474 bool
5475 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5476 {
5477
5478 machine_mode vec_mode;
5479 optab optab;
5480 int icode;
5481 tree vectype;
5482
5483 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5484 if (!vectype)
5485 return false;
5486
5487 optab = optab_for_tree_code (code, vectype, optab_scalar);
5488 if (!optab
5489 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5490 {
5491 optab = optab_for_tree_code (code, vectype, optab_vector);
5492 if (!optab
5493 || (optab_handler (optab, TYPE_MODE (vectype))
5494 == CODE_FOR_nothing))
5495 return false;
5496 }
5497
5498 vec_mode = TYPE_MODE (vectype);
5499 icode = (int) optab_handler (optab, vec_mode);
5500 if (icode == CODE_FOR_nothing)
5501 return false;
5502
5503 return true;
5504 }
5505
5506
5507 /* Function vectorizable_shift.
5508
5509 Check if STMT_INFO performs a shift operation that can be vectorized.
5510 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5511 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5512 Return true if STMT_INFO is vectorizable in this way. */
5513
5514 static bool
5515 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5516 stmt_vec_info *vec_stmt, slp_tree slp_node,
5517 stmt_vector_for_cost *cost_vec)
5518 {
5519 tree vec_dest;
5520 tree scalar_dest;
5521 tree op0, op1 = NULL;
5522 tree vec_oprnd1 = NULL_TREE;
5523 tree vectype;
5524 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5525 enum tree_code code;
5526 machine_mode vec_mode;
5527 tree new_temp;
5528 optab optab;
5529 int icode;
5530 machine_mode optab_op2_mode;
5531 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5532 int ndts = 2;
5533 stmt_vec_info prev_stmt_info;
5534 poly_uint64 nunits_in;
5535 poly_uint64 nunits_out;
5536 tree vectype_out;
5537 tree op1_vectype;
5538 int ncopies;
5539 int j, i;
5540 vec<tree> vec_oprnds0 = vNULL;
5541 vec<tree> vec_oprnds1 = vNULL;
5542 tree vop0, vop1;
5543 unsigned int k;
5544 bool scalar_shift_arg = true;
5545 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5546 vec_info *vinfo = stmt_info->vinfo;
5547 bool incompatible_op1_vectype_p = false;
5548
5549 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5550 return false;
5551
5552 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5553 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5554 && ! vec_stmt)
5555 return false;
5556
5557 /* Is STMT a vectorizable binary/unary operation? */
5558 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5559 if (!stmt)
5560 return false;
5561
5562 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5563 return false;
5564
5565 code = gimple_assign_rhs_code (stmt);
5566
5567 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5568 || code == RROTATE_EXPR))
5569 return false;
5570
5571 scalar_dest = gimple_assign_lhs (stmt);
5572 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5573 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5574 {
5575 if (dump_enabled_p ())
5576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5577 "bit-precision shifts not supported.\n");
5578 return false;
5579 }
5580
5581 op0 = gimple_assign_rhs1 (stmt);
5582 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5583 {
5584 if (dump_enabled_p ())
5585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5586 "use not simple.\n");
5587 return false;
5588 }
5589 /* If op0 is an external or constant def use a vector type with
5590 the same size as the output vector type. */
5591 if (!vectype)
5592 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5593 if (vec_stmt)
5594 gcc_assert (vectype);
5595 if (!vectype)
5596 {
5597 if (dump_enabled_p ())
5598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5599 "no vectype for scalar type\n");
5600 return false;
5601 }
5602
5603 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5604 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5605 if (maybe_ne (nunits_out, nunits_in))
5606 return false;
5607
5608 op1 = gimple_assign_rhs2 (stmt);
5609 stmt_vec_info op1_def_stmt_info;
5610 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5611 &op1_def_stmt_info))
5612 {
5613 if (dump_enabled_p ())
5614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5615 "use not simple.\n");
5616 return false;
5617 }
5618
5619 /* Multiple types in SLP are handled by creating the appropriate number of
5620 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5621 case of SLP. */
5622 if (slp_node)
5623 ncopies = 1;
5624 else
5625 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5626
5627 gcc_assert (ncopies >= 1);
5628
5629 /* Determine whether the shift amount is a vector, or scalar. If the
5630 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5631
5632 if ((dt[1] == vect_internal_def
5633 || dt[1] == vect_induction_def
5634 || dt[1] == vect_nested_cycle)
5635 && !slp_node)
5636 scalar_shift_arg = false;
5637 else if (dt[1] == vect_constant_def
5638 || dt[1] == vect_external_def
5639 || dt[1] == vect_internal_def)
5640 {
5641 /* In SLP, need to check whether the shift count is the same,
5642 in loops if it is a constant or invariant, it is always
5643 a scalar shift. */
5644 if (slp_node)
5645 {
5646 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5647 stmt_vec_info slpstmt_info;
5648
5649 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5650 {
5651 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5652 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5653 scalar_shift_arg = false;
5654 }
5655
5656 /* For internal SLP defs we have to make sure we see scalar stmts
5657 for all vector elements.
5658 ??? For different vectors we could resort to a different
5659 scalar shift operand but code-generation below simply always
5660 takes the first. */
5661 if (dt[1] == vect_internal_def
5662 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5663 stmts.length ()))
5664 scalar_shift_arg = false;
5665 }
5666
5667 /* If the shift amount is computed by a pattern stmt we cannot
5668 use the scalar amount directly thus give up and use a vector
5669 shift. */
5670 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5671 scalar_shift_arg = false;
5672 }
5673 else
5674 {
5675 if (dump_enabled_p ())
5676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5677 "operand mode requires invariant argument.\n");
5678 return false;
5679 }
5680
5681 /* Vector shifted by vector. */
5682 bool was_scalar_shift_arg = scalar_shift_arg;
5683 if (!scalar_shift_arg)
5684 {
5685 optab = optab_for_tree_code (code, vectype, optab_vector);
5686 if (dump_enabled_p ())
5687 dump_printf_loc (MSG_NOTE, vect_location,
5688 "vector/vector shift/rotate found.\n");
5689
5690 if (!op1_vectype)
5691 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5692 incompatible_op1_vectype_p
5693 = (op1_vectype == NULL_TREE
5694 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5695 TYPE_VECTOR_SUBPARTS (vectype))
5696 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5697 if (incompatible_op1_vectype_p
5698 && (!slp_node
5699 || SLP_TREE_DEF_TYPE
5700 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5701 {
5702 if (dump_enabled_p ())
5703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5704 "unusable type for last operand in"
5705 " vector/vector shift/rotate.\n");
5706 return false;
5707 }
5708 }
5709 /* See if the machine has a vector shifted by scalar insn and if not
5710 then see if it has a vector shifted by vector insn. */
5711 else
5712 {
5713 optab = optab_for_tree_code (code, vectype, optab_scalar);
5714 if (optab
5715 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5716 {
5717 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_NOTE, vect_location,
5719 "vector/scalar shift/rotate found.\n");
5720 }
5721 else
5722 {
5723 optab = optab_for_tree_code (code, vectype, optab_vector);
5724 if (optab
5725 && (optab_handler (optab, TYPE_MODE (vectype))
5726 != CODE_FOR_nothing))
5727 {
5728 scalar_shift_arg = false;
5729
5730 if (dump_enabled_p ())
5731 dump_printf_loc (MSG_NOTE, vect_location,
5732 "vector/vector shift/rotate found.\n");
5733
5734 /* Unlike the other binary operators, shifts/rotates have
5735 the rhs being int, instead of the same type as the lhs,
5736 so make sure the scalar is the right type if we are
5737 dealing with vectors of long long/long/short/char. */
5738 if (dt[1] == vect_constant_def)
5739 {
5740 if (!slp_node)
5741 op1 = fold_convert (TREE_TYPE (vectype), op1);
5742 }
5743 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5744 TREE_TYPE (op1)))
5745 {
5746 if (vec_stmt && !slp_node)
5747 {
5748 op1 = fold_convert (TREE_TYPE (vectype), op1);
5749 op1 = vect_init_vector (stmt_info, op1,
5750 TREE_TYPE (vectype), NULL);
5751 }
5752 }
5753 }
5754 }
5755 }
5756
5757 /* Supportable by target? */
5758 if (!optab)
5759 {
5760 if (dump_enabled_p ())
5761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5762 "no optab.\n");
5763 return false;
5764 }
5765 vec_mode = TYPE_MODE (vectype);
5766 icode = (int) optab_handler (optab, vec_mode);
5767 if (icode == CODE_FOR_nothing)
5768 {
5769 if (dump_enabled_p ())
5770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5771 "op not supported by target.\n");
5772 /* Check only during analysis. */
5773 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5774 || (!vec_stmt
5775 && !vect_worthwhile_without_simd_p (vinfo, code)))
5776 return false;
5777 if (dump_enabled_p ())
5778 dump_printf_loc (MSG_NOTE, vect_location,
5779 "proceeding using word mode.\n");
5780 }
5781
5782 /* Worthwhile without SIMD support? Check only during analysis. */
5783 if (!vec_stmt
5784 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5785 && !vect_worthwhile_without_simd_p (vinfo, code))
5786 {
5787 if (dump_enabled_p ())
5788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5789 "not worthwhile without SIMD support.\n");
5790 return false;
5791 }
5792
5793 if (!vec_stmt) /* transformation not required. */
5794 {
5795 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5796 DUMP_VECT_SCOPE ("vectorizable_shift");
5797 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5798 return true;
5799 }
5800
5801 /* Transform. */
5802
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_NOTE, vect_location,
5805 "transform binary/unary operation.\n");
5806
5807 /* Handle def. */
5808 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5809
5810 prev_stmt_info = NULL;
5811 for (j = 0; j < ncopies; j++)
5812 {
5813 /* Handle uses. */
5814 if (j == 0)
5815 {
5816 if (scalar_shift_arg)
5817 {
5818 /* Vector shl and shr insn patterns can be defined with scalar
5819 operand 2 (shift operand). In this case, use constant or loop
5820 invariant op1 directly, without extending it to vector mode
5821 first. */
5822 optab_op2_mode = insn_data[icode].operand[2].mode;
5823 if (!VECTOR_MODE_P (optab_op2_mode))
5824 {
5825 if (dump_enabled_p ())
5826 dump_printf_loc (MSG_NOTE, vect_location,
5827 "operand 1 using scalar mode.\n");
5828 vec_oprnd1 = op1;
5829 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5830 vec_oprnds1.quick_push (vec_oprnd1);
5831 if (slp_node)
5832 {
5833 /* Store vec_oprnd1 for every vector stmt to be created
5834 for SLP_NODE. We check during the analysis that all
5835 the shift arguments are the same.
5836 TODO: Allow different constants for different vector
5837 stmts generated for an SLP instance. */
5838 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5839 vec_oprnds1.quick_push (vec_oprnd1);
5840 }
5841 }
5842 }
5843 else if (slp_node && incompatible_op1_vectype_p)
5844 {
5845 if (was_scalar_shift_arg)
5846 {
5847 /* If the argument was the same in all lanes create
5848 the correctly typed vector shift amount directly. */
5849 op1 = fold_convert (TREE_TYPE (vectype), op1);
5850 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5851 !loop_vinfo ? gsi : NULL);
5852 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5853 !loop_vinfo ? gsi : NULL);
5854 vec_oprnds1.create (slp_node->vec_stmts_size);
5855 for (k = 0; k < slp_node->vec_stmts_size; k++)
5856 vec_oprnds1.quick_push (vec_oprnd1);
5857 }
5858 else if (dt[1] == vect_constant_def)
5859 {
5860 /* Convert the scalar constant shift amounts in-place. */
5861 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5862 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5863 for (unsigned i = 0;
5864 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5865 {
5866 SLP_TREE_SCALAR_OPS (shift)[i]
5867 = fold_convert (TREE_TYPE (vectype),
5868 SLP_TREE_SCALAR_OPS (shift)[i]);
5869 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5870 == INTEGER_CST));
5871 }
5872 }
5873 else
5874 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5875 }
5876
5877 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5878 (a special case for certain kind of vector shifts); otherwise,
5879 operand 1 should be of a vector type (the usual case). */
5880 if (vec_oprnd1)
5881 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5882 slp_node);
5883 else
5884 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5885 slp_node);
5886 }
5887 else
5888 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5889
5890 /* Arguments are ready. Create the new vector stmt. */
5891 stmt_vec_info new_stmt_info = NULL;
5892 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5893 {
5894 vop1 = vec_oprnds1[i];
5895 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5896 new_temp = make_ssa_name (vec_dest, new_stmt);
5897 gimple_assign_set_lhs (new_stmt, new_temp);
5898 new_stmt_info
5899 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5900 if (slp_node)
5901 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5902 }
5903
5904 if (slp_node)
5905 continue;
5906
5907 if (j == 0)
5908 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5909 else
5910 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5911 prev_stmt_info = new_stmt_info;
5912 }
5913
5914 vec_oprnds0.release ();
5915 vec_oprnds1.release ();
5916
5917 return true;
5918 }
5919
5920
5921 /* Function vectorizable_operation.
5922
5923 Check if STMT_INFO performs a binary, unary or ternary operation that can
5924 be vectorized.
5925 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5926 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5927 Return true if STMT_INFO is vectorizable in this way. */
5928
5929 static bool
5930 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5931 stmt_vec_info *vec_stmt, slp_tree slp_node,
5932 stmt_vector_for_cost *cost_vec)
5933 {
5934 tree vec_dest;
5935 tree scalar_dest;
5936 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5937 tree vectype;
5938 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5939 enum tree_code code, orig_code;
5940 machine_mode vec_mode;
5941 tree new_temp;
5942 int op_type;
5943 optab optab;
5944 bool target_support_p;
5945 enum vect_def_type dt[3]
5946 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5947 int ndts = 3;
5948 stmt_vec_info prev_stmt_info;
5949 poly_uint64 nunits_in;
5950 poly_uint64 nunits_out;
5951 tree vectype_out;
5952 int ncopies, vec_num;
5953 int j, i;
5954 vec<tree> vec_oprnds0 = vNULL;
5955 vec<tree> vec_oprnds1 = vNULL;
5956 vec<tree> vec_oprnds2 = vNULL;
5957 tree vop0, vop1, vop2;
5958 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5959 vec_info *vinfo = stmt_info->vinfo;
5960
5961 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5962 return false;
5963
5964 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5965 && ! vec_stmt)
5966 return false;
5967
5968 /* Is STMT a vectorizable binary/unary operation? */
5969 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5970 if (!stmt)
5971 return false;
5972
5973 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5974 return false;
5975
5976 orig_code = code = gimple_assign_rhs_code (stmt);
5977
5978 /* For pointer addition and subtraction, we should use the normal
5979 plus and minus for the vector operation. */
5980 if (code == POINTER_PLUS_EXPR)
5981 code = PLUS_EXPR;
5982 if (code == POINTER_DIFF_EXPR)
5983 code = MINUS_EXPR;
5984
5985 /* Support only unary or binary operations. */
5986 op_type = TREE_CODE_LENGTH (code);
5987 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5988 {
5989 if (dump_enabled_p ())
5990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5991 "num. args = %d (not unary/binary/ternary op).\n",
5992 op_type);
5993 return false;
5994 }
5995
5996 scalar_dest = gimple_assign_lhs (stmt);
5997 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5998
5999 /* Most operations cannot handle bit-precision types without extra
6000 truncations. */
6001 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
6002 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6003 /* Exception are bitwise binary operations. */
6004 && code != BIT_IOR_EXPR
6005 && code != BIT_XOR_EXPR
6006 && code != BIT_AND_EXPR)
6007 {
6008 if (dump_enabled_p ())
6009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6010 "bit-precision arithmetic not supported.\n");
6011 return false;
6012 }
6013
6014 op0 = gimple_assign_rhs1 (stmt);
6015 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6016 {
6017 if (dump_enabled_p ())
6018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6019 "use not simple.\n");
6020 return false;
6021 }
6022 /* If op0 is an external or constant def use a vector type with
6023 the same size as the output vector type. */
6024 if (!vectype)
6025 {
6026 /* For boolean type we cannot determine vectype by
6027 invariant value (don't know whether it is a vector
6028 of booleans or vector of integers). We use output
6029 vectype because operations on boolean don't change
6030 type. */
6031 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6032 {
6033 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6034 {
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6037 "not supported operation on bool value.\n");
6038 return false;
6039 }
6040 vectype = vectype_out;
6041 }
6042 else
6043 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
6044 }
6045 if (vec_stmt)
6046 gcc_assert (vectype);
6047 if (!vectype)
6048 {
6049 if (dump_enabled_p ())
6050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6051 "no vectype for scalar type %T\n",
6052 TREE_TYPE (op0));
6053
6054 return false;
6055 }
6056
6057 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6058 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6059 if (maybe_ne (nunits_out, nunits_in))
6060 return false;
6061
6062 if (op_type == binary_op || op_type == ternary_op)
6063 {
6064 op1 = gimple_assign_rhs2 (stmt);
6065 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
6066 {
6067 if (dump_enabled_p ())
6068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6069 "use not simple.\n");
6070 return false;
6071 }
6072 }
6073 if (op_type == ternary_op)
6074 {
6075 op2 = gimple_assign_rhs3 (stmt);
6076 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6077 {
6078 if (dump_enabled_p ())
6079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6080 "use not simple.\n");
6081 return false;
6082 }
6083 }
6084
6085 /* Multiple types in SLP are handled by creating the appropriate number of
6086 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6087 case of SLP. */
6088 if (slp_node)
6089 {
6090 ncopies = 1;
6091 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6092 }
6093 else
6094 {
6095 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6096 vec_num = 1;
6097 }
6098
6099 gcc_assert (ncopies >= 1);
6100
6101 /* Shifts are handled in vectorizable_shift (). */
6102 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6103 || code == RROTATE_EXPR)
6104 return false;
6105
6106 /* Supportable by target? */
6107
6108 vec_mode = TYPE_MODE (vectype);
6109 if (code == MULT_HIGHPART_EXPR)
6110 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6111 else
6112 {
6113 optab = optab_for_tree_code (code, vectype, optab_default);
6114 if (!optab)
6115 {
6116 if (dump_enabled_p ())
6117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6118 "no optab.\n");
6119 return false;
6120 }
6121 target_support_p = (optab_handler (optab, vec_mode)
6122 != CODE_FOR_nothing);
6123 }
6124
6125 if (!target_support_p)
6126 {
6127 if (dump_enabled_p ())
6128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6129 "op not supported by target.\n");
6130 /* Check only during analysis. */
6131 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6132 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6133 return false;
6134 if (dump_enabled_p ())
6135 dump_printf_loc (MSG_NOTE, vect_location,
6136 "proceeding using word mode.\n");
6137 }
6138
6139 /* Worthwhile without SIMD support? Check only during analysis. */
6140 if (!VECTOR_MODE_P (vec_mode)
6141 && !vec_stmt
6142 && !vect_worthwhile_without_simd_p (vinfo, code))
6143 {
6144 if (dump_enabled_p ())
6145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6146 "not worthwhile without SIMD support.\n");
6147 return false;
6148 }
6149
6150 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6151 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6152 internal_fn cond_fn = get_conditional_internal_fn (code);
6153
6154 if (!vec_stmt) /* transformation not required. */
6155 {
6156 /* If this operation is part of a reduction, a fully-masked loop
6157 should only change the active lanes of the reduction chain,
6158 keeping the inactive lanes as-is. */
6159 if (loop_vinfo
6160 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6161 && reduc_idx >= 0)
6162 {
6163 if (cond_fn == IFN_LAST
6164 || !direct_internal_fn_supported_p (cond_fn, vectype,
6165 OPTIMIZE_FOR_SPEED))
6166 {
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "can't use a fully-masked loop because no"
6170 " conditional operation is available.\n");
6171 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6172 }
6173 else
6174 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6175 vectype, NULL);
6176 }
6177
6178 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6179 DUMP_VECT_SCOPE ("vectorizable_operation");
6180 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6181 return true;
6182 }
6183
6184 /* Transform. */
6185
6186 if (dump_enabled_p ())
6187 dump_printf_loc (MSG_NOTE, vect_location,
6188 "transform binary/unary operation.\n");
6189
6190 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6191
6192 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6193 vectors with unsigned elements, but the result is signed. So, we
6194 need to compute the MINUS_EXPR into vectype temporary and
6195 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6196 tree vec_cvt_dest = NULL_TREE;
6197 if (orig_code == POINTER_DIFF_EXPR)
6198 {
6199 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6200 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6201 }
6202 /* Handle def. */
6203 else
6204 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6205
6206 /* In case the vectorization factor (VF) is bigger than the number
6207 of elements that we can fit in a vectype (nunits), we have to generate
6208 more than one vector stmt - i.e - we need to "unroll" the
6209 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6210 from one copy of the vector stmt to the next, in the field
6211 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6212 stages to find the correct vector defs to be used when vectorizing
6213 stmts that use the defs of the current stmt. The example below
6214 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6215 we need to create 4 vectorized stmts):
6216
6217 before vectorization:
6218 RELATED_STMT VEC_STMT
6219 S1: x = memref - -
6220 S2: z = x + 1 - -
6221
6222 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6223 there):
6224 RELATED_STMT VEC_STMT
6225 VS1_0: vx0 = memref0 VS1_1 -
6226 VS1_1: vx1 = memref1 VS1_2 -
6227 VS1_2: vx2 = memref2 VS1_3 -
6228 VS1_3: vx3 = memref3 - -
6229 S1: x = load - VS1_0
6230 S2: z = x + 1 - -
6231
6232 step2: vectorize stmt S2 (done here):
6233 To vectorize stmt S2 we first need to find the relevant vector
6234 def for the first operand 'x'. This is, as usual, obtained from
6235 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6236 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6237 relevant vector def 'vx0'. Having found 'vx0' we can generate
6238 the vector stmt VS2_0, and as usual, record it in the
6239 STMT_VINFO_VEC_STMT of stmt S2.
6240 When creating the second copy (VS2_1), we obtain the relevant vector
6241 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6242 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6243 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6244 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6245 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6246 chain of stmts and pointers:
6247 RELATED_STMT VEC_STMT
6248 VS1_0: vx0 = memref0 VS1_1 -
6249 VS1_1: vx1 = memref1 VS1_2 -
6250 VS1_2: vx2 = memref2 VS1_3 -
6251 VS1_3: vx3 = memref3 - -
6252 S1: x = load - VS1_0
6253 VS2_0: vz0 = vx0 + v1 VS2_1 -
6254 VS2_1: vz1 = vx1 + v1 VS2_2 -
6255 VS2_2: vz2 = vx2 + v1 VS2_3 -
6256 VS2_3: vz3 = vx3 + v1 - -
6257 S2: z = x + 1 - VS2_0 */
6258
6259 prev_stmt_info = NULL;
6260 for (j = 0; j < ncopies; j++)
6261 {
6262 /* Handle uses. */
6263 if (j == 0)
6264 {
6265 if (op_type == binary_op)
6266 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6267 slp_node);
6268 else if (op_type == ternary_op)
6269 {
6270 if (slp_node)
6271 {
6272 auto_vec<vec<tree> > vec_defs(3);
6273 vect_get_slp_defs (slp_node, &vec_defs);
6274 vec_oprnds0 = vec_defs[0];
6275 vec_oprnds1 = vec_defs[1];
6276 vec_oprnds2 = vec_defs[2];
6277 }
6278 else
6279 {
6280 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6281 &vec_oprnds1, NULL);
6282 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6283 NULL, NULL);
6284 }
6285 }
6286 else
6287 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6288 slp_node);
6289 }
6290 else
6291 {
6292 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6293 if (op_type == ternary_op)
6294 {
6295 tree vec_oprnd = vec_oprnds2.pop ();
6296 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6297 vec_oprnd));
6298 }
6299 }
6300
6301 /* Arguments are ready. Create the new vector stmt. */
6302 stmt_vec_info new_stmt_info = NULL;
6303 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6304 {
6305 vop1 = ((op_type == binary_op || op_type == ternary_op)
6306 ? vec_oprnds1[i] : NULL_TREE);
6307 vop2 = ((op_type == ternary_op)
6308 ? vec_oprnds2[i] : NULL_TREE);
6309 if (masked_loop_p && reduc_idx >= 0)
6310 {
6311 /* Perform the operation on active elements only and take
6312 inactive elements from the reduction chain input. */
6313 gcc_assert (!vop2);
6314 vop2 = reduc_idx == 1 ? vop1 : vop0;
6315 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6316 vectype, i * ncopies + j);
6317 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6318 vop0, vop1, vop2);
6319 new_temp = make_ssa_name (vec_dest, call);
6320 gimple_call_set_lhs (call, new_temp);
6321 gimple_call_set_nothrow (call, true);
6322 new_stmt_info
6323 = vect_finish_stmt_generation (stmt_info, call, gsi);
6324 }
6325 else
6326 {
6327 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6328 vop0, vop1, vop2);
6329 new_temp = make_ssa_name (vec_dest, new_stmt);
6330 gimple_assign_set_lhs (new_stmt, new_temp);
6331 new_stmt_info
6332 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6333 if (vec_cvt_dest)
6334 {
6335 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6336 gassign *new_stmt
6337 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6338 new_temp);
6339 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6340 gimple_assign_set_lhs (new_stmt, new_temp);
6341 new_stmt_info
6342 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6343 }
6344 }
6345 if (slp_node)
6346 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6347 }
6348
6349 if (slp_node)
6350 continue;
6351
6352 if (j == 0)
6353 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6354 else
6355 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6356 prev_stmt_info = new_stmt_info;
6357 }
6358
6359 vec_oprnds0.release ();
6360 vec_oprnds1.release ();
6361 vec_oprnds2.release ();
6362
6363 return true;
6364 }
6365
6366 /* A helper function to ensure data reference DR_INFO's base alignment. */
6367
6368 static void
6369 ensure_base_align (dr_vec_info *dr_info)
6370 {
6371 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6372 return;
6373
6374 if (dr_info->base_misaligned)
6375 {
6376 tree base_decl = dr_info->base_decl;
6377
6378 // We should only be able to increase the alignment of a base object if
6379 // we know what its new alignment should be at compile time.
6380 unsigned HOST_WIDE_INT align_base_to =
6381 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6382
6383 if (decl_in_symtab_p (base_decl))
6384 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6385 else if (DECL_ALIGN (base_decl) < align_base_to)
6386 {
6387 SET_DECL_ALIGN (base_decl, align_base_to);
6388 DECL_USER_ALIGN (base_decl) = 1;
6389 }
6390 dr_info->base_misaligned = false;
6391 }
6392 }
6393
6394
6395 /* Function get_group_alias_ptr_type.
6396
6397 Return the alias type for the group starting at FIRST_STMT_INFO. */
6398
6399 static tree
6400 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6401 {
6402 struct data_reference *first_dr, *next_dr;
6403
6404 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6405 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6406 while (next_stmt_info)
6407 {
6408 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6409 if (get_alias_set (DR_REF (first_dr))
6410 != get_alias_set (DR_REF (next_dr)))
6411 {
6412 if (dump_enabled_p ())
6413 dump_printf_loc (MSG_NOTE, vect_location,
6414 "conflicting alias set types.\n");
6415 return ptr_type_node;
6416 }
6417 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6418 }
6419 return reference_alias_ptr_type (DR_REF (first_dr));
6420 }
6421
6422
6423 /* Function scan_operand_equal_p.
6424
6425 Helper function for check_scan_store. Compare two references
6426 with .GOMP_SIMD_LANE bases. */
6427
6428 static bool
6429 scan_operand_equal_p (tree ref1, tree ref2)
6430 {
6431 tree ref[2] = { ref1, ref2 };
6432 poly_int64 bitsize[2], bitpos[2];
6433 tree offset[2], base[2];
6434 for (int i = 0; i < 2; ++i)
6435 {
6436 machine_mode mode;
6437 int unsignedp, reversep, volatilep = 0;
6438 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6439 &offset[i], &mode, &unsignedp,
6440 &reversep, &volatilep);
6441 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6442 return false;
6443 if (TREE_CODE (base[i]) == MEM_REF
6444 && offset[i] == NULL_TREE
6445 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6446 {
6447 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6448 if (is_gimple_assign (def_stmt)
6449 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6450 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6451 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6452 {
6453 if (maybe_ne (mem_ref_offset (base[i]), 0))
6454 return false;
6455 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6456 offset[i] = gimple_assign_rhs2 (def_stmt);
6457 }
6458 }
6459 }
6460
6461 if (!operand_equal_p (base[0], base[1], 0))
6462 return false;
6463 if (maybe_ne (bitsize[0], bitsize[1]))
6464 return false;
6465 if (offset[0] != offset[1])
6466 {
6467 if (!offset[0] || !offset[1])
6468 return false;
6469 if (!operand_equal_p (offset[0], offset[1], 0))
6470 {
6471 tree step[2];
6472 for (int i = 0; i < 2; ++i)
6473 {
6474 step[i] = integer_one_node;
6475 if (TREE_CODE (offset[i]) == SSA_NAME)
6476 {
6477 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6478 if (is_gimple_assign (def_stmt)
6479 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6480 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6481 == INTEGER_CST))
6482 {
6483 step[i] = gimple_assign_rhs2 (def_stmt);
6484 offset[i] = gimple_assign_rhs1 (def_stmt);
6485 }
6486 }
6487 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6488 {
6489 step[i] = TREE_OPERAND (offset[i], 1);
6490 offset[i] = TREE_OPERAND (offset[i], 0);
6491 }
6492 tree rhs1 = NULL_TREE;
6493 if (TREE_CODE (offset[i]) == SSA_NAME)
6494 {
6495 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6496 if (gimple_assign_cast_p (def_stmt))
6497 rhs1 = gimple_assign_rhs1 (def_stmt);
6498 }
6499 else if (CONVERT_EXPR_P (offset[i]))
6500 rhs1 = TREE_OPERAND (offset[i], 0);
6501 if (rhs1
6502 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6503 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6504 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6505 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6506 offset[i] = rhs1;
6507 }
6508 if (!operand_equal_p (offset[0], offset[1], 0)
6509 || !operand_equal_p (step[0], step[1], 0))
6510 return false;
6511 }
6512 }
6513 return true;
6514 }
6515
6516
6517 enum scan_store_kind {
6518 /* Normal permutation. */
6519 scan_store_kind_perm,
6520
6521 /* Whole vector left shift permutation with zero init. */
6522 scan_store_kind_lshift_zero,
6523
6524 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6525 scan_store_kind_lshift_cond
6526 };
6527
6528 /* Function check_scan_store.
6529
6530 Verify if we can perform the needed permutations or whole vector shifts.
6531 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6532 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6533 to do at each step. */
6534
6535 static int
6536 scan_store_can_perm_p (tree vectype, tree init,
6537 vec<enum scan_store_kind> *use_whole_vector = NULL)
6538 {
6539 enum machine_mode vec_mode = TYPE_MODE (vectype);
6540 unsigned HOST_WIDE_INT nunits;
6541 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6542 return -1;
6543 int units_log2 = exact_log2 (nunits);
6544 if (units_log2 <= 0)
6545 return -1;
6546
6547 int i;
6548 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6549 for (i = 0; i <= units_log2; ++i)
6550 {
6551 unsigned HOST_WIDE_INT j, k;
6552 enum scan_store_kind kind = scan_store_kind_perm;
6553 vec_perm_builder sel (nunits, nunits, 1);
6554 sel.quick_grow (nunits);
6555 if (i == units_log2)
6556 {
6557 for (j = 0; j < nunits; ++j)
6558 sel[j] = nunits - 1;
6559 }
6560 else
6561 {
6562 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6563 sel[j] = j;
6564 for (k = 0; j < nunits; ++j, ++k)
6565 sel[j] = nunits + k;
6566 }
6567 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6568 if (!can_vec_perm_const_p (vec_mode, indices))
6569 {
6570 if (i == units_log2)
6571 return -1;
6572
6573 if (whole_vector_shift_kind == scan_store_kind_perm)
6574 {
6575 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6576 return -1;
6577 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6578 /* Whole vector shifts shift in zeros, so if init is all zero
6579 constant, there is no need to do anything further. */
6580 if ((TREE_CODE (init) != INTEGER_CST
6581 && TREE_CODE (init) != REAL_CST)
6582 || !initializer_zerop (init))
6583 {
6584 tree masktype = truth_type_for (vectype);
6585 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6586 return -1;
6587 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6588 }
6589 }
6590 kind = whole_vector_shift_kind;
6591 }
6592 if (use_whole_vector)
6593 {
6594 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6595 use_whole_vector->safe_grow_cleared (i);
6596 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6597 use_whole_vector->safe_push (kind);
6598 }
6599 }
6600
6601 return units_log2;
6602 }
6603
6604
6605 /* Function check_scan_store.
6606
6607 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6608
6609 static bool
6610 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6611 enum vect_def_type rhs_dt, bool slp, tree mask,
6612 vect_memory_access_type memory_access_type)
6613 {
6614 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6615 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6616 tree ref_type;
6617
6618 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6619 if (slp
6620 || mask
6621 || memory_access_type != VMAT_CONTIGUOUS
6622 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6623 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6624 || loop_vinfo == NULL
6625 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6626 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6627 || !integer_zerop (DR_OFFSET (dr_info->dr))
6628 || !integer_zerop (DR_INIT (dr_info->dr))
6629 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6630 || !alias_sets_conflict_p (get_alias_set (vectype),
6631 get_alias_set (TREE_TYPE (ref_type))))
6632 {
6633 if (dump_enabled_p ())
6634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6635 "unsupported OpenMP scan store.\n");
6636 return false;
6637 }
6638
6639 /* We need to pattern match code built by OpenMP lowering and simplified
6640 by following optimizations into something we can handle.
6641 #pragma omp simd reduction(inscan,+:r)
6642 for (...)
6643 {
6644 r += something ();
6645 #pragma omp scan inclusive (r)
6646 use (r);
6647 }
6648 shall have body with:
6649 // Initialization for input phase, store the reduction initializer:
6650 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6651 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6652 D.2042[_21] = 0;
6653 // Actual input phase:
6654 ...
6655 r.0_5 = D.2042[_20];
6656 _6 = _4 + r.0_5;
6657 D.2042[_20] = _6;
6658 // Initialization for scan phase:
6659 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6660 _26 = D.2043[_25];
6661 _27 = D.2042[_25];
6662 _28 = _26 + _27;
6663 D.2043[_25] = _28;
6664 D.2042[_25] = _28;
6665 // Actual scan phase:
6666 ...
6667 r.1_8 = D.2042[_20];
6668 ...
6669 The "omp simd array" variable D.2042 holds the privatized copy used
6670 inside of the loop and D.2043 is another one that holds copies of
6671 the current original list item. The separate GOMP_SIMD_LANE ifn
6672 kinds are there in order to allow optimizing the initializer store
6673 and combiner sequence, e.g. if it is originally some C++ish user
6674 defined reduction, but allow the vectorizer to pattern recognize it
6675 and turn into the appropriate vectorized scan.
6676
6677 For exclusive scan, this is slightly different:
6678 #pragma omp simd reduction(inscan,+:r)
6679 for (...)
6680 {
6681 use (r);
6682 #pragma omp scan exclusive (r)
6683 r += something ();
6684 }
6685 shall have body with:
6686 // Initialization for input phase, store the reduction initializer:
6687 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6688 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6689 D.2042[_21] = 0;
6690 // Actual input phase:
6691 ...
6692 r.0_5 = D.2042[_20];
6693 _6 = _4 + r.0_5;
6694 D.2042[_20] = _6;
6695 // Initialization for scan phase:
6696 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6697 _26 = D.2043[_25];
6698 D.2044[_25] = _26;
6699 _27 = D.2042[_25];
6700 _28 = _26 + _27;
6701 D.2043[_25] = _28;
6702 // Actual scan phase:
6703 ...
6704 r.1_8 = D.2044[_20];
6705 ... */
6706
6707 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6708 {
6709 /* Match the D.2042[_21] = 0; store above. Just require that
6710 it is a constant or external definition store. */
6711 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6712 {
6713 fail_init:
6714 if (dump_enabled_p ())
6715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6716 "unsupported OpenMP scan initializer store.\n");
6717 return false;
6718 }
6719
6720 if (! loop_vinfo->scan_map)
6721 loop_vinfo->scan_map = new hash_map<tree, tree>;
6722 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6723 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6724 if (cached)
6725 goto fail_init;
6726 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6727
6728 /* These stores can be vectorized normally. */
6729 return true;
6730 }
6731
6732 if (rhs_dt != vect_internal_def)
6733 {
6734 fail:
6735 if (dump_enabled_p ())
6736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6737 "unsupported OpenMP scan combiner pattern.\n");
6738 return false;
6739 }
6740
6741 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6742 tree rhs = gimple_assign_rhs1 (stmt);
6743 if (TREE_CODE (rhs) != SSA_NAME)
6744 goto fail;
6745
6746 gimple *other_store_stmt = NULL;
6747 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6748 bool inscan_var_store
6749 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6750
6751 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6752 {
6753 if (!inscan_var_store)
6754 {
6755 use_operand_p use_p;
6756 imm_use_iterator iter;
6757 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6758 {
6759 gimple *use_stmt = USE_STMT (use_p);
6760 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6761 continue;
6762 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6763 || !is_gimple_assign (use_stmt)
6764 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6765 || other_store_stmt
6766 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6767 goto fail;
6768 other_store_stmt = use_stmt;
6769 }
6770 if (other_store_stmt == NULL)
6771 goto fail;
6772 rhs = gimple_assign_lhs (other_store_stmt);
6773 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6774 goto fail;
6775 }
6776 }
6777 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6778 {
6779 use_operand_p use_p;
6780 imm_use_iterator iter;
6781 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6782 {
6783 gimple *use_stmt = USE_STMT (use_p);
6784 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6785 continue;
6786 if (other_store_stmt)
6787 goto fail;
6788 other_store_stmt = use_stmt;
6789 }
6790 }
6791 else
6792 goto fail;
6793
6794 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6795 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6796 || !is_gimple_assign (def_stmt)
6797 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6798 goto fail;
6799
6800 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6801 /* For pointer addition, we should use the normal plus for the vector
6802 operation. */
6803 switch (code)
6804 {
6805 case POINTER_PLUS_EXPR:
6806 code = PLUS_EXPR;
6807 break;
6808 case MULT_HIGHPART_EXPR:
6809 goto fail;
6810 default:
6811 break;
6812 }
6813 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6814 goto fail;
6815
6816 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6817 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6818 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6819 goto fail;
6820
6821 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6822 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6823 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6824 || !gimple_assign_load_p (load1_stmt)
6825 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6826 || !gimple_assign_load_p (load2_stmt))
6827 goto fail;
6828
6829 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6830 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6831 if (load1_stmt_info == NULL
6832 || load2_stmt_info == NULL
6833 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6834 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6835 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6836 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6837 goto fail;
6838
6839 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6840 {
6841 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6842 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6843 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6844 goto fail;
6845 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6846 tree lrhs;
6847 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6848 lrhs = rhs1;
6849 else
6850 lrhs = rhs2;
6851 use_operand_p use_p;
6852 imm_use_iterator iter;
6853 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6854 {
6855 gimple *use_stmt = USE_STMT (use_p);
6856 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6857 continue;
6858 if (other_store_stmt)
6859 goto fail;
6860 other_store_stmt = use_stmt;
6861 }
6862 }
6863
6864 if (other_store_stmt == NULL)
6865 goto fail;
6866 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6867 || !gimple_store_p (other_store_stmt))
6868 goto fail;
6869
6870 stmt_vec_info other_store_stmt_info
6871 = loop_vinfo->lookup_stmt (other_store_stmt);
6872 if (other_store_stmt_info == NULL
6873 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6874 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6875 goto fail;
6876
6877 gimple *stmt1 = stmt;
6878 gimple *stmt2 = other_store_stmt;
6879 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6880 std::swap (stmt1, stmt2);
6881 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6882 gimple_assign_rhs1 (load2_stmt)))
6883 {
6884 std::swap (rhs1, rhs2);
6885 std::swap (load1_stmt, load2_stmt);
6886 std::swap (load1_stmt_info, load2_stmt_info);
6887 }
6888 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6889 gimple_assign_rhs1 (load1_stmt)))
6890 goto fail;
6891
6892 tree var3 = NULL_TREE;
6893 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6894 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6895 gimple_assign_rhs1 (load2_stmt)))
6896 goto fail;
6897 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6898 {
6899 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6900 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6901 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6902 goto fail;
6903 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6904 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6905 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6906 || lookup_attribute ("omp simd inscan exclusive",
6907 DECL_ATTRIBUTES (var3)))
6908 goto fail;
6909 }
6910
6911 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6912 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6913 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6914 goto fail;
6915
6916 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6917 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6918 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6919 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6920 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6921 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6922 goto fail;
6923
6924 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6925 std::swap (var1, var2);
6926
6927 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6928 {
6929 if (!lookup_attribute ("omp simd inscan exclusive",
6930 DECL_ATTRIBUTES (var1)))
6931 goto fail;
6932 var1 = var3;
6933 }
6934
6935 if (loop_vinfo->scan_map == NULL)
6936 goto fail;
6937 tree *init = loop_vinfo->scan_map->get (var1);
6938 if (init == NULL)
6939 goto fail;
6940
6941 /* The IL is as expected, now check if we can actually vectorize it.
6942 Inclusive scan:
6943 _26 = D.2043[_25];
6944 _27 = D.2042[_25];
6945 _28 = _26 + _27;
6946 D.2043[_25] = _28;
6947 D.2042[_25] = _28;
6948 should be vectorized as (where _40 is the vectorized rhs
6949 from the D.2042[_21] = 0; store):
6950 _30 = MEM <vector(8) int> [(int *)&D.2043];
6951 _31 = MEM <vector(8) int> [(int *)&D.2042];
6952 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6953 _33 = _31 + _32;
6954 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6955 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6956 _35 = _33 + _34;
6957 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6958 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6959 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6960 _37 = _35 + _36;
6961 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6962 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6963 _38 = _30 + _37;
6964 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6965 MEM <vector(8) int> [(int *)&D.2043] = _39;
6966 MEM <vector(8) int> [(int *)&D.2042] = _38;
6967 Exclusive scan:
6968 _26 = D.2043[_25];
6969 D.2044[_25] = _26;
6970 _27 = D.2042[_25];
6971 _28 = _26 + _27;
6972 D.2043[_25] = _28;
6973 should be vectorized as (where _40 is the vectorized rhs
6974 from the D.2042[_21] = 0; store):
6975 _30 = MEM <vector(8) int> [(int *)&D.2043];
6976 _31 = MEM <vector(8) int> [(int *)&D.2042];
6977 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6978 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6979 _34 = _32 + _33;
6980 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6981 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6982 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6983 _36 = _34 + _35;
6984 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6985 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6986 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6987 _38 = _36 + _37;
6988 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6989 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6990 _39 = _30 + _38;
6991 _50 = _31 + _39;
6992 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6993 MEM <vector(8) int> [(int *)&D.2044] = _39;
6994 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6995 enum machine_mode vec_mode = TYPE_MODE (vectype);
6996 optab optab = optab_for_tree_code (code, vectype, optab_default);
6997 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6998 goto fail;
6999
7000 int units_log2 = scan_store_can_perm_p (vectype, *init);
7001 if (units_log2 == -1)
7002 goto fail;
7003
7004 return true;
7005 }
7006
7007
7008 /* Function vectorizable_scan_store.
7009
7010 Helper of vectorizable_score, arguments like on vectorizable_store.
7011 Handle only the transformation, checking is done in check_scan_store. */
7012
7013 static bool
7014 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7015 stmt_vec_info *vec_stmt, int ncopies)
7016 {
7017 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7018 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7019 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7020 vec_info *vinfo = stmt_info->vinfo;
7021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7022
7023 if (dump_enabled_p ())
7024 dump_printf_loc (MSG_NOTE, vect_location,
7025 "transform scan store. ncopies = %d\n", ncopies);
7026
7027 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7028 tree rhs = gimple_assign_rhs1 (stmt);
7029 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7030
7031 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7032 bool inscan_var_store
7033 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7034
7035 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7036 {
7037 use_operand_p use_p;
7038 imm_use_iterator iter;
7039 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7040 {
7041 gimple *use_stmt = USE_STMT (use_p);
7042 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7043 continue;
7044 rhs = gimple_assign_lhs (use_stmt);
7045 break;
7046 }
7047 }
7048
7049 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7050 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7051 if (code == POINTER_PLUS_EXPR)
7052 code = PLUS_EXPR;
7053 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7054 && commutative_tree_code (code));
7055 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7056 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7057 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7058 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7059 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7060 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7061 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7062 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7063 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7064 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7065 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7066
7067 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7068 {
7069 std::swap (rhs1, rhs2);
7070 std::swap (var1, var2);
7071 std::swap (load1_dr_info, load2_dr_info);
7072 }
7073
7074 tree *init = loop_vinfo->scan_map->get (var1);
7075 gcc_assert (init);
7076
7077 unsigned HOST_WIDE_INT nunits;
7078 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7079 gcc_unreachable ();
7080 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7081 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7082 gcc_assert (units_log2 > 0);
7083 auto_vec<tree, 16> perms;
7084 perms.quick_grow (units_log2 + 1);
7085 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7086 for (int i = 0; i <= units_log2; ++i)
7087 {
7088 unsigned HOST_WIDE_INT j, k;
7089 vec_perm_builder sel (nunits, nunits, 1);
7090 sel.quick_grow (nunits);
7091 if (i == units_log2)
7092 for (j = 0; j < nunits; ++j)
7093 sel[j] = nunits - 1;
7094 else
7095 {
7096 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7097 sel[j] = j;
7098 for (k = 0; j < nunits; ++j, ++k)
7099 sel[j] = nunits + k;
7100 }
7101 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7102 if (!use_whole_vector.is_empty ()
7103 && use_whole_vector[i] != scan_store_kind_perm)
7104 {
7105 if (zero_vec == NULL_TREE)
7106 zero_vec = build_zero_cst (vectype);
7107 if (masktype == NULL_TREE
7108 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7109 masktype = truth_type_for (vectype);
7110 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7111 }
7112 else
7113 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7114 }
7115
7116 stmt_vec_info prev_stmt_info = NULL;
7117 tree vec_oprnd1 = NULL_TREE;
7118 tree vec_oprnd2 = NULL_TREE;
7119 tree vec_oprnd3 = NULL_TREE;
7120 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7121 tree dataref_offset = build_int_cst (ref_type, 0);
7122 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7123 tree ldataref_ptr = NULL_TREE;
7124 tree orig = NULL_TREE;
7125 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7126 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7127 for (int j = 0; j < ncopies; j++)
7128 {
7129 stmt_vec_info new_stmt_info;
7130 if (j == 0)
7131 {
7132 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7133 if (ldataref_ptr == NULL)
7134 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7135 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7136 orig = vec_oprnd3;
7137 }
7138 else
7139 {
7140 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7141 if (ldataref_ptr == NULL)
7142 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7143 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7144 if (!inscan_var_store)
7145 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7146 }
7147
7148 if (ldataref_ptr)
7149 {
7150 vec_oprnd2 = make_ssa_name (vectype);
7151 tree data_ref = fold_build2 (MEM_REF, vectype,
7152 unshare_expr (ldataref_ptr),
7153 dataref_offset);
7154 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7155 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7156 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7157 if (prev_stmt_info == NULL)
7158 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7159 else
7160 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7161 prev_stmt_info = new_stmt_info;
7162 }
7163
7164 tree v = vec_oprnd2;
7165 for (int i = 0; i < units_log2; ++i)
7166 {
7167 tree new_temp = make_ssa_name (vectype);
7168 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7169 (zero_vec
7170 && (use_whole_vector[i]
7171 != scan_store_kind_perm))
7172 ? zero_vec : vec_oprnd1, v,
7173 perms[i]);
7174 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7175 if (prev_stmt_info == NULL)
7176 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7177 else
7178 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7179 prev_stmt_info = new_stmt_info;
7180
7181 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7182 {
7183 /* Whole vector shift shifted in zero bits, but if *init
7184 is not initializer_zerop, we need to replace those elements
7185 with elements from vec_oprnd1. */
7186 tree_vector_builder vb (masktype, nunits, 1);
7187 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7188 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7189 ? boolean_false_node : boolean_true_node);
7190
7191 tree new_temp2 = make_ssa_name (vectype);
7192 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7193 new_temp, vec_oprnd1);
7194 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7195 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7196 prev_stmt_info = new_stmt_info;
7197 new_temp = new_temp2;
7198 }
7199
7200 /* For exclusive scan, perform the perms[i] permutation once
7201 more. */
7202 if (i == 0
7203 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7204 && v == vec_oprnd2)
7205 {
7206 v = new_temp;
7207 --i;
7208 continue;
7209 }
7210
7211 tree new_temp2 = make_ssa_name (vectype);
7212 g = gimple_build_assign (new_temp2, code, v, new_temp);
7213 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7214 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7215 prev_stmt_info = new_stmt_info;
7216
7217 v = new_temp2;
7218 }
7219
7220 tree new_temp = make_ssa_name (vectype);
7221 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7222 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7223 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7224 prev_stmt_info = new_stmt_info;
7225
7226 tree last_perm_arg = new_temp;
7227 /* For exclusive scan, new_temp computed above is the exclusive scan
7228 prefix sum. Turn it into inclusive prefix sum for the broadcast
7229 of the last element into orig. */
7230 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7231 {
7232 last_perm_arg = make_ssa_name (vectype);
7233 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7234 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7235 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7236 prev_stmt_info = new_stmt_info;
7237 }
7238
7239 orig = make_ssa_name (vectype);
7240 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7241 last_perm_arg, perms[units_log2]);
7242 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7243 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7244 prev_stmt_info = new_stmt_info;
7245
7246 if (!inscan_var_store)
7247 {
7248 tree data_ref = fold_build2 (MEM_REF, vectype,
7249 unshare_expr (dataref_ptr),
7250 dataref_offset);
7251 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7252 g = gimple_build_assign (data_ref, new_temp);
7253 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7254 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7255 prev_stmt_info = new_stmt_info;
7256 }
7257 }
7258
7259 if (inscan_var_store)
7260 for (int j = 0; j < ncopies; j++)
7261 {
7262 if (j != 0)
7263 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7264
7265 tree data_ref = fold_build2 (MEM_REF, vectype,
7266 unshare_expr (dataref_ptr),
7267 dataref_offset);
7268 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7269 gimple *g = gimple_build_assign (data_ref, orig);
7270 stmt_vec_info new_stmt_info
7271 = vect_finish_stmt_generation (stmt_info, g, gsi);
7272 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7273 prev_stmt_info = new_stmt_info;
7274 }
7275 return true;
7276 }
7277
7278
7279 /* Function vectorizable_store.
7280
7281 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7282 that can be vectorized.
7283 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7284 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7285 Return true if STMT_INFO is vectorizable in this way. */
7286
7287 static bool
7288 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7289 stmt_vec_info *vec_stmt, slp_tree slp_node,
7290 stmt_vector_for_cost *cost_vec)
7291 {
7292 tree data_ref;
7293 tree op;
7294 tree vec_oprnd = NULL_TREE;
7295 tree elem_type;
7296 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7297 class loop *loop = NULL;
7298 machine_mode vec_mode;
7299 tree dummy;
7300 enum dr_alignment_support alignment_support_scheme;
7301 enum vect_def_type rhs_dt = vect_unknown_def_type;
7302 enum vect_def_type mask_dt = vect_unknown_def_type;
7303 stmt_vec_info prev_stmt_info = NULL;
7304 tree dataref_ptr = NULL_TREE;
7305 tree dataref_offset = NULL_TREE;
7306 gimple *ptr_incr = NULL;
7307 int ncopies;
7308 int j;
7309 stmt_vec_info first_stmt_info;
7310 bool grouped_store;
7311 unsigned int group_size, i;
7312 vec<tree> oprnds = vNULL;
7313 vec<tree> result_chain = vNULL;
7314 tree offset = NULL_TREE;
7315 vec<tree> vec_oprnds = vNULL;
7316 bool slp = (slp_node != NULL);
7317 unsigned int vec_num;
7318 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7319 vec_info *vinfo = stmt_info->vinfo;
7320 tree aggr_type;
7321 gather_scatter_info gs_info;
7322 poly_uint64 vf;
7323 vec_load_store_type vls_type;
7324 tree ref_type;
7325
7326 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7327 return false;
7328
7329 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7330 && ! vec_stmt)
7331 return false;
7332
7333 /* Is vectorizable store? */
7334
7335 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7336 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7337 {
7338 tree scalar_dest = gimple_assign_lhs (assign);
7339 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7340 && is_pattern_stmt_p (stmt_info))
7341 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7342 if (TREE_CODE (scalar_dest) != ARRAY_REF
7343 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7344 && TREE_CODE (scalar_dest) != INDIRECT_REF
7345 && TREE_CODE (scalar_dest) != COMPONENT_REF
7346 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7347 && TREE_CODE (scalar_dest) != REALPART_EXPR
7348 && TREE_CODE (scalar_dest) != MEM_REF)
7349 return false;
7350 }
7351 else
7352 {
7353 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7354 if (!call || !gimple_call_internal_p (call))
7355 return false;
7356
7357 internal_fn ifn = gimple_call_internal_fn (call);
7358 if (!internal_store_fn_p (ifn))
7359 return false;
7360
7361 if (slp_node != NULL)
7362 {
7363 if (dump_enabled_p ())
7364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7365 "SLP of masked stores not supported.\n");
7366 return false;
7367 }
7368
7369 int mask_index = internal_fn_mask_index (ifn);
7370 if (mask_index >= 0)
7371 {
7372 mask = gimple_call_arg (call, mask_index);
7373 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7374 &mask_vectype))
7375 return false;
7376 }
7377 }
7378
7379 op = vect_get_store_rhs (stmt_info);
7380
7381 /* Cannot have hybrid store SLP -- that would mean storing to the
7382 same location twice. */
7383 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7384
7385 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7386 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7387
7388 if (loop_vinfo)
7389 {
7390 loop = LOOP_VINFO_LOOP (loop_vinfo);
7391 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7392 }
7393 else
7394 vf = 1;
7395
7396 /* Multiple types in SLP are handled by creating the appropriate number of
7397 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7398 case of SLP. */
7399 if (slp)
7400 ncopies = 1;
7401 else
7402 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7403
7404 gcc_assert (ncopies >= 1);
7405
7406 /* FORNOW. This restriction should be relaxed. */
7407 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7408 {
7409 if (dump_enabled_p ())
7410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7411 "multiple types in nested loop.\n");
7412 return false;
7413 }
7414
7415 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7416 return false;
7417
7418 elem_type = TREE_TYPE (vectype);
7419 vec_mode = TYPE_MODE (vectype);
7420
7421 if (!STMT_VINFO_DATA_REF (stmt_info))
7422 return false;
7423
7424 vect_memory_access_type memory_access_type;
7425 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7426 &memory_access_type, &gs_info))
7427 return false;
7428
7429 if (mask)
7430 {
7431 if (memory_access_type == VMAT_CONTIGUOUS)
7432 {
7433 if (!VECTOR_MODE_P (vec_mode)
7434 || !can_vec_mask_load_store_p (vec_mode,
7435 TYPE_MODE (mask_vectype), false))
7436 return false;
7437 }
7438 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7439 && (memory_access_type != VMAT_GATHER_SCATTER
7440 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7441 {
7442 if (dump_enabled_p ())
7443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7444 "unsupported access type for masked store.\n");
7445 return false;
7446 }
7447 }
7448 else
7449 {
7450 /* FORNOW. In some cases can vectorize even if data-type not supported
7451 (e.g. - array initialization with 0). */
7452 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7453 return false;
7454 }
7455
7456 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7457 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7458 && memory_access_type != VMAT_GATHER_SCATTER
7459 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7460 if (grouped_store)
7461 {
7462 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7463 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7464 group_size = DR_GROUP_SIZE (first_stmt_info);
7465 }
7466 else
7467 {
7468 first_stmt_info = stmt_info;
7469 first_dr_info = dr_info;
7470 group_size = vec_num = 1;
7471 }
7472
7473 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7474 {
7475 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7476 memory_access_type))
7477 return false;
7478 }
7479
7480 if (!vec_stmt) /* transformation not required. */
7481 {
7482 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7483
7484 if (loop_vinfo
7485 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7486 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7487 memory_access_type, &gs_info, mask);
7488
7489 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7490 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7491 vls_type, slp_node, cost_vec);
7492 return true;
7493 }
7494 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7495
7496 /* Transform. */
7497
7498 ensure_base_align (dr_info);
7499
7500 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7501 {
7502 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7503 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7504 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7505 tree ptr, var, scale, vec_mask;
7506 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7507 tree mask_halfvectype = mask_vectype;
7508 edge pe = loop_preheader_edge (loop);
7509 gimple_seq seq;
7510 basic_block new_bb;
7511 enum { NARROW, NONE, WIDEN } modifier;
7512 poly_uint64 scatter_off_nunits
7513 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7514
7515 if (known_eq (nunits, scatter_off_nunits))
7516 modifier = NONE;
7517 else if (known_eq (nunits * 2, scatter_off_nunits))
7518 {
7519 modifier = WIDEN;
7520
7521 /* Currently gathers and scatters are only supported for
7522 fixed-length vectors. */
7523 unsigned int count = scatter_off_nunits.to_constant ();
7524 vec_perm_builder sel (count, count, 1);
7525 for (i = 0; i < (unsigned int) count; ++i)
7526 sel.quick_push (i | (count / 2));
7527
7528 vec_perm_indices indices (sel, 1, count);
7529 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7530 indices);
7531 gcc_assert (perm_mask != NULL_TREE);
7532 }
7533 else if (known_eq (nunits, scatter_off_nunits * 2))
7534 {
7535 modifier = NARROW;
7536
7537 /* Currently gathers and scatters are only supported for
7538 fixed-length vectors. */
7539 unsigned int count = nunits.to_constant ();
7540 vec_perm_builder sel (count, count, 1);
7541 for (i = 0; i < (unsigned int) count; ++i)
7542 sel.quick_push (i | (count / 2));
7543
7544 vec_perm_indices indices (sel, 2, count);
7545 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7546 gcc_assert (perm_mask != NULL_TREE);
7547 ncopies *= 2;
7548
7549 if (mask)
7550 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7551 }
7552 else
7553 gcc_unreachable ();
7554
7555 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7556 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7557 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7558 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7559 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7560 scaletype = TREE_VALUE (arglist);
7561
7562 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7563 && TREE_CODE (rettype) == VOID_TYPE);
7564
7565 ptr = fold_convert (ptrtype, gs_info.base);
7566 if (!is_gimple_min_invariant (ptr))
7567 {
7568 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7569 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7570 gcc_assert (!new_bb);
7571 }
7572
7573 if (mask == NULL_TREE)
7574 {
7575 mask_arg = build_int_cst (masktype, -1);
7576 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7577 }
7578
7579 scale = build_int_cst (scaletype, gs_info.scale);
7580
7581 prev_stmt_info = NULL;
7582 for (j = 0; j < ncopies; ++j)
7583 {
7584 if (j == 0)
7585 {
7586 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7587 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7588 stmt_info);
7589 if (mask)
7590 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
7591 stmt_info);
7592 }
7593 else if (modifier != NONE && (j & 1))
7594 {
7595 if (modifier == WIDEN)
7596 {
7597 src
7598 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7599 vec_oprnd1);
7600 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7601 stmt_info, gsi);
7602 if (mask)
7603 mask_op
7604 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7605 vec_mask);
7606 }
7607 else if (modifier == NARROW)
7608 {
7609 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7610 stmt_info, gsi);
7611 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7612 vec_oprnd0);
7613 }
7614 else
7615 gcc_unreachable ();
7616 }
7617 else
7618 {
7619 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7620 vec_oprnd1);
7621 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7622 vec_oprnd0);
7623 if (mask)
7624 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7625 vec_mask);
7626 }
7627
7628 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7629 {
7630 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7631 TYPE_VECTOR_SUBPARTS (srctype)));
7632 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7633 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7634 gassign *new_stmt
7635 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7636 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7637 src = var;
7638 }
7639
7640 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7641 {
7642 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7643 TYPE_VECTOR_SUBPARTS (idxtype)));
7644 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7645 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7646 gassign *new_stmt
7647 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7648 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7649 op = var;
7650 }
7651
7652 if (mask)
7653 {
7654 tree utype;
7655 mask_arg = mask_op;
7656 if (modifier == NARROW)
7657 {
7658 var = vect_get_new_ssa_name (mask_halfvectype,
7659 vect_simple_var);
7660 gassign *new_stmt
7661 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7662 : VEC_UNPACK_LO_EXPR,
7663 mask_op);
7664 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7665 mask_arg = var;
7666 }
7667 tree optype = TREE_TYPE (mask_arg);
7668 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7669 utype = masktype;
7670 else
7671 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7672 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7673 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7674 gassign *new_stmt
7675 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7676 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7677 mask_arg = var;
7678 if (!useless_type_conversion_p (masktype, utype))
7679 {
7680 gcc_assert (TYPE_PRECISION (utype)
7681 <= TYPE_PRECISION (masktype));
7682 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7683 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7684 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7685 mask_arg = var;
7686 }
7687 }
7688
7689 gcall *new_stmt
7690 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7691 stmt_vec_info new_stmt_info
7692 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7693
7694 if (prev_stmt_info == NULL)
7695 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7696 else
7697 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7698 prev_stmt_info = new_stmt_info;
7699 }
7700 return true;
7701 }
7702 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7703 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7704
7705 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7706 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7707
7708 if (grouped_store)
7709 {
7710 /* FORNOW */
7711 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7712
7713 /* We vectorize all the stmts of the interleaving group when we
7714 reach the last stmt in the group. */
7715 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7716 < DR_GROUP_SIZE (first_stmt_info)
7717 && !slp)
7718 {
7719 *vec_stmt = NULL;
7720 return true;
7721 }
7722
7723 if (slp)
7724 {
7725 grouped_store = false;
7726 /* VEC_NUM is the number of vect stmts to be created for this
7727 group. */
7728 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7729 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7730 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7731 == first_stmt_info);
7732 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7733 op = vect_get_store_rhs (first_stmt_info);
7734 }
7735 else
7736 /* VEC_NUM is the number of vect stmts to be created for this
7737 group. */
7738 vec_num = group_size;
7739
7740 ref_type = get_group_alias_ptr_type (first_stmt_info);
7741 }
7742 else
7743 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7744
7745 if (dump_enabled_p ())
7746 dump_printf_loc (MSG_NOTE, vect_location,
7747 "transform store. ncopies = %d\n", ncopies);
7748
7749 if (memory_access_type == VMAT_ELEMENTWISE
7750 || memory_access_type == VMAT_STRIDED_SLP)
7751 {
7752 gimple_stmt_iterator incr_gsi;
7753 bool insert_after;
7754 gimple *incr;
7755 tree offvar;
7756 tree ivstep;
7757 tree running_off;
7758 tree stride_base, stride_step, alias_off;
7759 tree vec_oprnd;
7760 unsigned int g;
7761 /* Checked by get_load_store_type. */
7762 unsigned int const_nunits = nunits.to_constant ();
7763
7764 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7765 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7766
7767 stride_base
7768 = fold_build_pointer_plus
7769 (DR_BASE_ADDRESS (first_dr_info->dr),
7770 size_binop (PLUS_EXPR,
7771 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7772 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7773 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7774
7775 /* For a store with loop-invariant (but other than power-of-2)
7776 stride (i.e. not a grouped access) like so:
7777
7778 for (i = 0; i < n; i += stride)
7779 array[i] = ...;
7780
7781 we generate a new induction variable and new stores from
7782 the components of the (vectorized) rhs:
7783
7784 for (j = 0; ; j += VF*stride)
7785 vectemp = ...;
7786 tmp1 = vectemp[0];
7787 array[j] = tmp1;
7788 tmp2 = vectemp[1];
7789 array[j + stride] = tmp2;
7790 ...
7791 */
7792
7793 unsigned nstores = const_nunits;
7794 unsigned lnel = 1;
7795 tree ltype = elem_type;
7796 tree lvectype = vectype;
7797 if (slp)
7798 {
7799 if (group_size < const_nunits
7800 && const_nunits % group_size == 0)
7801 {
7802 nstores = const_nunits / group_size;
7803 lnel = group_size;
7804 ltype = build_vector_type (elem_type, group_size);
7805 lvectype = vectype;
7806
7807 /* First check if vec_extract optab doesn't support extraction
7808 of vector elts directly. */
7809 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7810 machine_mode vmode;
7811 if (!related_vector_mode (TYPE_MODE (vectype), elmode,
7812 group_size).exists (&vmode)
7813 || (convert_optab_handler (vec_extract_optab,
7814 TYPE_MODE (vectype), vmode)
7815 == CODE_FOR_nothing))
7816 {
7817 /* Try to avoid emitting an extract of vector elements
7818 by performing the extracts using an integer type of the
7819 same size, extracting from a vector of those and then
7820 re-interpreting it as the original vector type if
7821 supported. */
7822 unsigned lsize
7823 = group_size * GET_MODE_BITSIZE (elmode);
7824 unsigned int lnunits = const_nunits / group_size;
7825 /* If we can't construct such a vector fall back to
7826 element extracts from the original vector type and
7827 element size stores. */
7828 if (int_mode_for_size (lsize, 0).exists (&elmode)
7829 && related_vector_mode (TYPE_MODE (vectype), elmode,
7830 lnunits).exists (&vmode)
7831 && (convert_optab_handler (vec_extract_optab,
7832 vmode, elmode)
7833 != CODE_FOR_nothing))
7834 {
7835 nstores = lnunits;
7836 lnel = group_size;
7837 ltype = build_nonstandard_integer_type (lsize, 1);
7838 lvectype = build_vector_type (ltype, nstores);
7839 }
7840 /* Else fall back to vector extraction anyway.
7841 Fewer stores are more important than avoiding spilling
7842 of the vector we extract from. Compared to the
7843 construction case in vectorizable_load no store-forwarding
7844 issue exists here for reasonable archs. */
7845 }
7846 }
7847 else if (group_size >= const_nunits
7848 && group_size % const_nunits == 0)
7849 {
7850 nstores = 1;
7851 lnel = const_nunits;
7852 ltype = vectype;
7853 lvectype = vectype;
7854 }
7855 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7856 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7857 }
7858
7859 ivstep = stride_step;
7860 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7861 build_int_cst (TREE_TYPE (ivstep), vf));
7862
7863 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7864
7865 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7866 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7867 create_iv (stride_base, ivstep, NULL,
7868 loop, &incr_gsi, insert_after,
7869 &offvar, NULL);
7870 incr = gsi_stmt (incr_gsi);
7871 loop_vinfo->add_stmt (incr);
7872
7873 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7874
7875 prev_stmt_info = NULL;
7876 alias_off = build_int_cst (ref_type, 0);
7877 stmt_vec_info next_stmt_info = first_stmt_info;
7878 for (g = 0; g < group_size; g++)
7879 {
7880 running_off = offvar;
7881 if (g)
7882 {
7883 tree size = TYPE_SIZE_UNIT (ltype);
7884 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7885 size);
7886 tree newoff = copy_ssa_name (running_off, NULL);
7887 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7888 running_off, pos);
7889 vect_finish_stmt_generation (stmt_info, incr, gsi);
7890 running_off = newoff;
7891 }
7892 unsigned int group_el = 0;
7893 unsigned HOST_WIDE_INT
7894 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7895 for (j = 0; j < ncopies; j++)
7896 {
7897 /* We've set op and dt above, from vect_get_store_rhs,
7898 and first_stmt_info == stmt_info. */
7899 if (j == 0)
7900 {
7901 if (slp)
7902 {
7903 vect_get_vec_defs (op, NULL_TREE, stmt_info,
7904 &vec_oprnds, NULL, slp_node);
7905 vec_oprnd = vec_oprnds[0];
7906 }
7907 else
7908 {
7909 op = vect_get_store_rhs (next_stmt_info);
7910 vec_oprnd = vect_get_vec_def_for_operand
7911 (op, next_stmt_info);
7912 }
7913 }
7914 else
7915 {
7916 if (slp)
7917 vec_oprnd = vec_oprnds[j];
7918 else
7919 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7920 vec_oprnd);
7921 }
7922 /* Pun the vector to extract from if necessary. */
7923 if (lvectype != vectype)
7924 {
7925 tree tem = make_ssa_name (lvectype);
7926 gimple *pun
7927 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7928 lvectype, vec_oprnd));
7929 vect_finish_stmt_generation (stmt_info, pun, gsi);
7930 vec_oprnd = tem;
7931 }
7932 for (i = 0; i < nstores; i++)
7933 {
7934 tree newref, newoff;
7935 gimple *incr, *assign;
7936 tree size = TYPE_SIZE (ltype);
7937 /* Extract the i'th component. */
7938 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7939 bitsize_int (i), size);
7940 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7941 size, pos);
7942
7943 elem = force_gimple_operand_gsi (gsi, elem, true,
7944 NULL_TREE, true,
7945 GSI_SAME_STMT);
7946
7947 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7948 group_el * elsz);
7949 newref = build2 (MEM_REF, ltype,
7950 running_off, this_off);
7951 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7952
7953 /* And store it to *running_off. */
7954 assign = gimple_build_assign (newref, elem);
7955 stmt_vec_info assign_info
7956 = vect_finish_stmt_generation (stmt_info, assign, gsi);
7957
7958 group_el += lnel;
7959 if (! slp
7960 || group_el == group_size)
7961 {
7962 newoff = copy_ssa_name (running_off, NULL);
7963 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7964 running_off, stride_step);
7965 vect_finish_stmt_generation (stmt_info, incr, gsi);
7966
7967 running_off = newoff;
7968 group_el = 0;
7969 }
7970 if (g == group_size - 1
7971 && !slp)
7972 {
7973 if (j == 0 && i == 0)
7974 STMT_VINFO_VEC_STMT (stmt_info)
7975 = *vec_stmt = assign_info;
7976 else
7977 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
7978 prev_stmt_info = assign_info;
7979 }
7980 }
7981 }
7982 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7983 if (slp)
7984 break;
7985 }
7986
7987 vec_oprnds.release ();
7988 return true;
7989 }
7990
7991 auto_vec<tree> dr_chain (group_size);
7992 oprnds.create (group_size);
7993
7994 alignment_support_scheme
7995 = vect_supportable_dr_alignment (first_dr_info, false);
7996 gcc_assert (alignment_support_scheme);
7997 vec_loop_masks *loop_masks
7998 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7999 ? &LOOP_VINFO_MASKS (loop_vinfo)
8000 : NULL);
8001 /* Targets with store-lane instructions must not require explicit
8002 realignment. vect_supportable_dr_alignment always returns either
8003 dr_aligned or dr_unaligned_supported for masked operations. */
8004 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8005 && !mask
8006 && !loop_masks)
8007 || alignment_support_scheme == dr_aligned
8008 || alignment_support_scheme == dr_unaligned_supported);
8009
8010 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8011 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8012 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8013
8014 tree bump;
8015 tree vec_offset = NULL_TREE;
8016 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8017 {
8018 aggr_type = NULL_TREE;
8019 bump = NULL_TREE;
8020 }
8021 else if (memory_access_type == VMAT_GATHER_SCATTER)
8022 {
8023 aggr_type = elem_type;
8024 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8025 &bump, &vec_offset);
8026 }
8027 else
8028 {
8029 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8030 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8031 else
8032 aggr_type = vectype;
8033 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8034 memory_access_type);
8035 }
8036
8037 if (mask)
8038 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8039
8040 /* In case the vectorization factor (VF) is bigger than the number
8041 of elements that we can fit in a vectype (nunits), we have to generate
8042 more than one vector stmt - i.e - we need to "unroll" the
8043 vector stmt by a factor VF/nunits. For more details see documentation in
8044 vect_get_vec_def_for_copy_stmt. */
8045
8046 /* In case of interleaving (non-unit grouped access):
8047
8048 S1: &base + 2 = x2
8049 S2: &base = x0
8050 S3: &base + 1 = x1
8051 S4: &base + 3 = x3
8052
8053 We create vectorized stores starting from base address (the access of the
8054 first stmt in the chain (S2 in the above example), when the last store stmt
8055 of the chain (S4) is reached:
8056
8057 VS1: &base = vx2
8058 VS2: &base + vec_size*1 = vx0
8059 VS3: &base + vec_size*2 = vx1
8060 VS4: &base + vec_size*3 = vx3
8061
8062 Then permutation statements are generated:
8063
8064 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8065 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8066 ...
8067
8068 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8069 (the order of the data-refs in the output of vect_permute_store_chain
8070 corresponds to the order of scalar stmts in the interleaving chain - see
8071 the documentation of vect_permute_store_chain()).
8072
8073 In case of both multiple types and interleaving, above vector stores and
8074 permutation stmts are created for every copy. The result vector stmts are
8075 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8076 STMT_VINFO_RELATED_STMT for the next copies.
8077 */
8078
8079 prev_stmt_info = NULL;
8080 tree vec_mask = NULL_TREE;
8081 for (j = 0; j < ncopies; j++)
8082 {
8083 stmt_vec_info new_stmt_info;
8084 if (j == 0)
8085 {
8086 if (slp)
8087 {
8088 /* Get vectorized arguments for SLP_NODE. */
8089 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8090 NULL, slp_node);
8091
8092 vec_oprnd = vec_oprnds[0];
8093 }
8094 else
8095 {
8096 /* For interleaved stores we collect vectorized defs for all the
8097 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8098 used as an input to vect_permute_store_chain(), and OPRNDS as
8099 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8100
8101 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8102 OPRNDS are of size 1. */
8103 stmt_vec_info next_stmt_info = first_stmt_info;
8104 for (i = 0; i < group_size; i++)
8105 {
8106 /* Since gaps are not supported for interleaved stores,
8107 DR_GROUP_SIZE is the exact number of stmts in the chain.
8108 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8109 that there is no interleaving, DR_GROUP_SIZE is 1,
8110 and only one iteration of the loop will be executed. */
8111 op = vect_get_store_rhs (next_stmt_info);
8112 vec_oprnd = vect_get_vec_def_for_operand
8113 (op, next_stmt_info);
8114 dr_chain.quick_push (vec_oprnd);
8115 oprnds.quick_push (vec_oprnd);
8116 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8117 }
8118 if (mask)
8119 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8120 mask_vectype);
8121 }
8122
8123 /* We should have catched mismatched types earlier. */
8124 gcc_assert (useless_type_conversion_p (vectype,
8125 TREE_TYPE (vec_oprnd)));
8126 bool simd_lane_access_p
8127 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8128 if (simd_lane_access_p
8129 && !loop_masks
8130 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8131 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8132 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8133 && integer_zerop (DR_INIT (first_dr_info->dr))
8134 && alias_sets_conflict_p (get_alias_set (aggr_type),
8135 get_alias_set (TREE_TYPE (ref_type))))
8136 {
8137 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8138 dataref_offset = build_int_cst (ref_type, 0);
8139 }
8140 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8141 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8142 &dataref_ptr, &vec_offset);
8143 else
8144 dataref_ptr
8145 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8146 simd_lane_access_p ? loop : NULL,
8147 offset, &dummy, gsi, &ptr_incr,
8148 simd_lane_access_p, NULL_TREE, bump);
8149 }
8150 else
8151 {
8152 /* For interleaved stores we created vectorized defs for all the
8153 defs stored in OPRNDS in the previous iteration (previous copy).
8154 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8155 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8156 next copy.
8157 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8158 OPRNDS are of size 1. */
8159 for (i = 0; i < group_size; i++)
8160 {
8161 op = oprnds[i];
8162 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8163 dr_chain[i] = vec_oprnd;
8164 oprnds[i] = vec_oprnd;
8165 }
8166 if (mask)
8167 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8168 if (dataref_offset)
8169 dataref_offset
8170 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8171 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8172 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8173 else
8174 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8175 stmt_info, bump);
8176 }
8177
8178 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8179 {
8180 tree vec_array;
8181
8182 /* Get an array into which we can store the individual vectors. */
8183 vec_array = create_vector_array (vectype, vec_num);
8184
8185 /* Invalidate the current contents of VEC_ARRAY. This should
8186 become an RTL clobber too, which prevents the vector registers
8187 from being upward-exposed. */
8188 vect_clobber_variable (stmt_info, gsi, vec_array);
8189
8190 /* Store the individual vectors into the array. */
8191 for (i = 0; i < vec_num; i++)
8192 {
8193 vec_oprnd = dr_chain[i];
8194 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8195 }
8196
8197 tree final_mask = NULL;
8198 if (loop_masks)
8199 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8200 vectype, j);
8201 if (vec_mask)
8202 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8203 vec_mask, gsi);
8204
8205 gcall *call;
8206 if (final_mask)
8207 {
8208 /* Emit:
8209 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8210 VEC_ARRAY). */
8211 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8212 tree alias_ptr = build_int_cst (ref_type, align);
8213 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8214 dataref_ptr, alias_ptr,
8215 final_mask, vec_array);
8216 }
8217 else
8218 {
8219 /* Emit:
8220 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8221 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8222 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8223 vec_array);
8224 gimple_call_set_lhs (call, data_ref);
8225 }
8226 gimple_call_set_nothrow (call, true);
8227 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8228
8229 /* Record that VEC_ARRAY is now dead. */
8230 vect_clobber_variable (stmt_info, gsi, vec_array);
8231 }
8232 else
8233 {
8234 new_stmt_info = NULL;
8235 if (grouped_store)
8236 {
8237 if (j == 0)
8238 result_chain.create (group_size);
8239 /* Permute. */
8240 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8241 &result_chain);
8242 }
8243
8244 stmt_vec_info next_stmt_info = first_stmt_info;
8245 for (i = 0; i < vec_num; i++)
8246 {
8247 unsigned misalign;
8248 unsigned HOST_WIDE_INT align;
8249
8250 tree final_mask = NULL_TREE;
8251 if (loop_masks)
8252 final_mask = vect_get_loop_mask (gsi, loop_masks,
8253 vec_num * ncopies,
8254 vectype, vec_num * j + i);
8255 if (vec_mask)
8256 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8257 vec_mask, gsi);
8258
8259 if (memory_access_type == VMAT_GATHER_SCATTER)
8260 {
8261 tree scale = size_int (gs_info.scale);
8262 gcall *call;
8263 if (loop_masks)
8264 call = gimple_build_call_internal
8265 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8266 scale, vec_oprnd, final_mask);
8267 else
8268 call = gimple_build_call_internal
8269 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8270 scale, vec_oprnd);
8271 gimple_call_set_nothrow (call, true);
8272 new_stmt_info
8273 = vect_finish_stmt_generation (stmt_info, call, gsi);
8274 break;
8275 }
8276
8277 if (i > 0)
8278 /* Bump the vector pointer. */
8279 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8280 stmt_info, bump);
8281
8282 if (slp)
8283 vec_oprnd = vec_oprnds[i];
8284 else if (grouped_store)
8285 /* For grouped stores vectorized defs are interleaved in
8286 vect_permute_store_chain(). */
8287 vec_oprnd = result_chain[i];
8288
8289 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8290 if (aligned_access_p (first_dr_info))
8291 misalign = 0;
8292 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8293 {
8294 align = dr_alignment (vect_dr_behavior (first_dr_info));
8295 misalign = 0;
8296 }
8297 else
8298 misalign = DR_MISALIGNMENT (first_dr_info);
8299 if (dataref_offset == NULL_TREE
8300 && TREE_CODE (dataref_ptr) == SSA_NAME)
8301 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8302 misalign);
8303
8304 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8305 {
8306 tree perm_mask = perm_mask_for_reverse (vectype);
8307 tree perm_dest = vect_create_destination_var
8308 (vect_get_store_rhs (stmt_info), vectype);
8309 tree new_temp = make_ssa_name (perm_dest);
8310
8311 /* Generate the permute statement. */
8312 gimple *perm_stmt
8313 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8314 vec_oprnd, perm_mask);
8315 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8316
8317 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8318 vec_oprnd = new_temp;
8319 }
8320
8321 /* Arguments are ready. Create the new vector stmt. */
8322 if (final_mask)
8323 {
8324 align = least_bit_hwi (misalign | align);
8325 tree ptr = build_int_cst (ref_type, align);
8326 gcall *call
8327 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8328 dataref_ptr, ptr,
8329 final_mask, vec_oprnd);
8330 gimple_call_set_nothrow (call, true);
8331 new_stmt_info
8332 = vect_finish_stmt_generation (stmt_info, call, gsi);
8333 }
8334 else
8335 {
8336 data_ref = fold_build2 (MEM_REF, vectype,
8337 dataref_ptr,
8338 dataref_offset
8339 ? dataref_offset
8340 : build_int_cst (ref_type, 0));
8341 if (aligned_access_p (first_dr_info))
8342 ;
8343 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8344 TREE_TYPE (data_ref)
8345 = build_aligned_type (TREE_TYPE (data_ref),
8346 align * BITS_PER_UNIT);
8347 else
8348 TREE_TYPE (data_ref)
8349 = build_aligned_type (TREE_TYPE (data_ref),
8350 TYPE_ALIGN (elem_type));
8351 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8352 gassign *new_stmt
8353 = gimple_build_assign (data_ref, vec_oprnd);
8354 new_stmt_info
8355 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8356 }
8357
8358 if (slp)
8359 continue;
8360
8361 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8362 if (!next_stmt_info)
8363 break;
8364 }
8365 }
8366 if (!slp)
8367 {
8368 if (j == 0)
8369 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8370 else
8371 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8372 prev_stmt_info = new_stmt_info;
8373 }
8374 }
8375
8376 oprnds.release ();
8377 result_chain.release ();
8378 vec_oprnds.release ();
8379
8380 return true;
8381 }
8382
8383 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8384 VECTOR_CST mask. No checks are made that the target platform supports the
8385 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8386 vect_gen_perm_mask_checked. */
8387
8388 tree
8389 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8390 {
8391 tree mask_type;
8392
8393 poly_uint64 nunits = sel.length ();
8394 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8395
8396 mask_type = build_vector_type (ssizetype, nunits);
8397 return vec_perm_indices_to_tree (mask_type, sel);
8398 }
8399
8400 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8401 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8402
8403 tree
8404 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8405 {
8406 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8407 return vect_gen_perm_mask_any (vectype, sel);
8408 }
8409
8410 /* Given a vector variable X and Y, that was generated for the scalar
8411 STMT_INFO, generate instructions to permute the vector elements of X and Y
8412 using permutation mask MASK_VEC, insert them at *GSI and return the
8413 permuted vector variable. */
8414
8415 static tree
8416 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8417 gimple_stmt_iterator *gsi)
8418 {
8419 tree vectype = TREE_TYPE (x);
8420 tree perm_dest, data_ref;
8421 gimple *perm_stmt;
8422
8423 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8424 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8425 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8426 else
8427 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8428 data_ref = make_ssa_name (perm_dest);
8429
8430 /* Generate the permute statement. */
8431 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8432 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8433
8434 return data_ref;
8435 }
8436
8437 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8438 inserting them on the loops preheader edge. Returns true if we
8439 were successful in doing so (and thus STMT_INFO can be moved then),
8440 otherwise returns false. */
8441
8442 static bool
8443 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8444 {
8445 ssa_op_iter i;
8446 tree op;
8447 bool any = false;
8448
8449 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8450 {
8451 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8452 if (!gimple_nop_p (def_stmt)
8453 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8454 {
8455 /* Make sure we don't need to recurse. While we could do
8456 so in simple cases when there are more complex use webs
8457 we don't have an easy way to preserve stmt order to fulfil
8458 dependencies within them. */
8459 tree op2;
8460 ssa_op_iter i2;
8461 if (gimple_code (def_stmt) == GIMPLE_PHI)
8462 return false;
8463 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8464 {
8465 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8466 if (!gimple_nop_p (def_stmt2)
8467 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8468 return false;
8469 }
8470 any = true;
8471 }
8472 }
8473
8474 if (!any)
8475 return true;
8476
8477 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8478 {
8479 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8480 if (!gimple_nop_p (def_stmt)
8481 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8482 {
8483 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8484 gsi_remove (&gsi, false);
8485 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8486 }
8487 }
8488
8489 return true;
8490 }
8491
8492 /* vectorizable_load.
8493
8494 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8495 that can be vectorized.
8496 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8497 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8498 Return true if STMT_INFO is vectorizable in this way. */
8499
8500 static bool
8501 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8502 stmt_vec_info *vec_stmt, slp_tree slp_node,
8503 slp_instance slp_node_instance,
8504 stmt_vector_for_cost *cost_vec)
8505 {
8506 tree scalar_dest;
8507 tree vec_dest = NULL;
8508 tree data_ref = NULL;
8509 stmt_vec_info prev_stmt_info;
8510 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8511 class loop *loop = NULL;
8512 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8513 bool nested_in_vect_loop = false;
8514 tree elem_type;
8515 tree new_temp;
8516 machine_mode mode;
8517 tree dummy;
8518 enum dr_alignment_support alignment_support_scheme;
8519 tree dataref_ptr = NULL_TREE;
8520 tree dataref_offset = NULL_TREE;
8521 gimple *ptr_incr = NULL;
8522 int ncopies;
8523 int i, j;
8524 unsigned int group_size;
8525 poly_uint64 group_gap_adj;
8526 tree msq = NULL_TREE, lsq;
8527 tree offset = NULL_TREE;
8528 tree byte_offset = NULL_TREE;
8529 tree realignment_token = NULL_TREE;
8530 gphi *phi = NULL;
8531 vec<tree> dr_chain = vNULL;
8532 bool grouped_load = false;
8533 stmt_vec_info first_stmt_info;
8534 stmt_vec_info first_stmt_info_for_drptr = NULL;
8535 bool compute_in_loop = false;
8536 class loop *at_loop;
8537 int vec_num;
8538 bool slp = (slp_node != NULL);
8539 bool slp_perm = false;
8540 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8541 poly_uint64 vf;
8542 tree aggr_type;
8543 gather_scatter_info gs_info;
8544 vec_info *vinfo = stmt_info->vinfo;
8545 tree ref_type;
8546 enum vect_def_type mask_dt = vect_unknown_def_type;
8547
8548 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8549 return false;
8550
8551 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8552 && ! vec_stmt)
8553 return false;
8554
8555 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8556 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8557 {
8558 scalar_dest = gimple_assign_lhs (assign);
8559 if (TREE_CODE (scalar_dest) != SSA_NAME)
8560 return false;
8561
8562 tree_code code = gimple_assign_rhs_code (assign);
8563 if (code != ARRAY_REF
8564 && code != BIT_FIELD_REF
8565 && code != INDIRECT_REF
8566 && code != COMPONENT_REF
8567 && code != IMAGPART_EXPR
8568 && code != REALPART_EXPR
8569 && code != MEM_REF
8570 && TREE_CODE_CLASS (code) != tcc_declaration)
8571 return false;
8572 }
8573 else
8574 {
8575 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8576 if (!call || !gimple_call_internal_p (call))
8577 return false;
8578
8579 internal_fn ifn = gimple_call_internal_fn (call);
8580 if (!internal_load_fn_p (ifn))
8581 return false;
8582
8583 scalar_dest = gimple_call_lhs (call);
8584 if (!scalar_dest)
8585 return false;
8586
8587 int mask_index = internal_fn_mask_index (ifn);
8588 if (mask_index >= 0)
8589 {
8590 mask = gimple_call_arg (call, mask_index);
8591 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
8592 &mask_vectype))
8593 return false;
8594 }
8595 }
8596
8597 if (!STMT_VINFO_DATA_REF (stmt_info))
8598 return false;
8599
8600 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8601 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8602
8603 if (loop_vinfo)
8604 {
8605 loop = LOOP_VINFO_LOOP (loop_vinfo);
8606 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8607 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8608 }
8609 else
8610 vf = 1;
8611
8612 /* Multiple types in SLP are handled by creating the appropriate number of
8613 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8614 case of SLP. */
8615 if (slp)
8616 ncopies = 1;
8617 else
8618 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8619
8620 gcc_assert (ncopies >= 1);
8621
8622 /* FORNOW. This restriction should be relaxed. */
8623 if (nested_in_vect_loop && ncopies > 1)
8624 {
8625 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8627 "multiple types in nested loop.\n");
8628 return false;
8629 }
8630
8631 /* Invalidate assumptions made by dependence analysis when vectorization
8632 on the unrolled body effectively re-orders stmts. */
8633 if (ncopies > 1
8634 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8635 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8636 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8637 {
8638 if (dump_enabled_p ())
8639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8640 "cannot perform implicit CSE when unrolling "
8641 "with negative dependence distance\n");
8642 return false;
8643 }
8644
8645 elem_type = TREE_TYPE (vectype);
8646 mode = TYPE_MODE (vectype);
8647
8648 /* FORNOW. In some cases can vectorize even if data-type not supported
8649 (e.g. - data copies). */
8650 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8651 {
8652 if (dump_enabled_p ())
8653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8654 "Aligned load, but unsupported type.\n");
8655 return false;
8656 }
8657
8658 /* Check if the load is a part of an interleaving chain. */
8659 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8660 {
8661 grouped_load = true;
8662 /* FORNOW */
8663 gcc_assert (!nested_in_vect_loop);
8664 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8665
8666 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8667 group_size = DR_GROUP_SIZE (first_stmt_info);
8668
8669 /* Refuse non-SLP vectorization of SLP-only groups. */
8670 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8671 {
8672 if (dump_enabled_p ())
8673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8674 "cannot vectorize load in non-SLP mode.\n");
8675 return false;
8676 }
8677
8678 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8679 slp_perm = true;
8680
8681 /* Invalidate assumptions made by dependence analysis when vectorization
8682 on the unrolled body effectively re-orders stmts. */
8683 if (!PURE_SLP_STMT (stmt_info)
8684 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8685 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8686 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8687 {
8688 if (dump_enabled_p ())
8689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8690 "cannot perform implicit CSE when performing "
8691 "group loads with negative dependence distance\n");
8692 return false;
8693 }
8694 }
8695 else
8696 group_size = 1;
8697
8698 vect_memory_access_type memory_access_type;
8699 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8700 &memory_access_type, &gs_info))
8701 return false;
8702
8703 if (mask)
8704 {
8705 if (memory_access_type == VMAT_CONTIGUOUS)
8706 {
8707 machine_mode vec_mode = TYPE_MODE (vectype);
8708 if (!VECTOR_MODE_P (vec_mode)
8709 || !can_vec_mask_load_store_p (vec_mode,
8710 TYPE_MODE (mask_vectype), true))
8711 return false;
8712 }
8713 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8714 && memory_access_type != VMAT_GATHER_SCATTER)
8715 {
8716 if (dump_enabled_p ())
8717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8718 "unsupported access type for masked load.\n");
8719 return false;
8720 }
8721 }
8722
8723 if (!vec_stmt) /* transformation not required. */
8724 {
8725 if (!slp)
8726 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8727
8728 if (loop_vinfo
8729 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8730 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8731 memory_access_type, &gs_info, mask);
8732
8733 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8734 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8735 slp_node_instance, slp_node, cost_vec);
8736 return true;
8737 }
8738
8739 if (!slp)
8740 gcc_assert (memory_access_type
8741 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8742
8743 if (dump_enabled_p ())
8744 dump_printf_loc (MSG_NOTE, vect_location,
8745 "transform load. ncopies = %d\n", ncopies);
8746
8747 /* Transform. */
8748
8749 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8750 ensure_base_align (dr_info);
8751
8752 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8753 {
8754 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8755 return true;
8756 }
8757
8758 if (memory_access_type == VMAT_INVARIANT)
8759 {
8760 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8761 /* If we have versioned for aliasing or the loop doesn't
8762 have any data dependencies that would preclude this,
8763 then we are sure this is a loop invariant load and
8764 thus we can insert it on the preheader edge. */
8765 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8766 && !nested_in_vect_loop
8767 && hoist_defs_of_uses (stmt_info, loop));
8768 if (hoist_p)
8769 {
8770 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8771 if (dump_enabled_p ())
8772 dump_printf_loc (MSG_NOTE, vect_location,
8773 "hoisting out of the vectorized loop: %G", stmt);
8774 scalar_dest = copy_ssa_name (scalar_dest);
8775 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8776 gsi_insert_on_edge_immediate
8777 (loop_preheader_edge (loop),
8778 gimple_build_assign (scalar_dest, rhs));
8779 }
8780 /* These copies are all equivalent, but currently the representation
8781 requires a separate STMT_VINFO_VEC_STMT for each one. */
8782 prev_stmt_info = NULL;
8783 gimple_stmt_iterator gsi2 = *gsi;
8784 gsi_next (&gsi2);
8785 for (j = 0; j < ncopies; j++)
8786 {
8787 stmt_vec_info new_stmt_info;
8788 if (hoist_p)
8789 {
8790 new_temp = vect_init_vector (stmt_info, scalar_dest,
8791 vectype, NULL);
8792 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8793 new_stmt_info = vinfo->add_stmt (new_stmt);
8794 }
8795 else
8796 {
8797 new_temp = vect_init_vector (stmt_info, scalar_dest,
8798 vectype, &gsi2);
8799 new_stmt_info = vinfo->lookup_def (new_temp);
8800 }
8801 if (slp)
8802 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8803 else if (j == 0)
8804 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8805 else
8806 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8807 prev_stmt_info = new_stmt_info;
8808 }
8809 return true;
8810 }
8811
8812 if (memory_access_type == VMAT_ELEMENTWISE
8813 || memory_access_type == VMAT_STRIDED_SLP)
8814 {
8815 gimple_stmt_iterator incr_gsi;
8816 bool insert_after;
8817 gimple *incr;
8818 tree offvar;
8819 tree ivstep;
8820 tree running_off;
8821 vec<constructor_elt, va_gc> *v = NULL;
8822 tree stride_base, stride_step, alias_off;
8823 /* Checked by get_load_store_type. */
8824 unsigned int const_nunits = nunits.to_constant ();
8825 unsigned HOST_WIDE_INT cst_offset = 0;
8826
8827 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8828 gcc_assert (!nested_in_vect_loop);
8829
8830 if (grouped_load)
8831 {
8832 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8833 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8834 }
8835 else
8836 {
8837 first_stmt_info = stmt_info;
8838 first_dr_info = dr_info;
8839 }
8840 if (slp && grouped_load)
8841 {
8842 group_size = DR_GROUP_SIZE (first_stmt_info);
8843 ref_type = get_group_alias_ptr_type (first_stmt_info);
8844 }
8845 else
8846 {
8847 if (grouped_load)
8848 cst_offset
8849 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8850 * vect_get_place_in_interleaving_chain (stmt_info,
8851 first_stmt_info));
8852 group_size = 1;
8853 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8854 }
8855
8856 stride_base
8857 = fold_build_pointer_plus
8858 (DR_BASE_ADDRESS (first_dr_info->dr),
8859 size_binop (PLUS_EXPR,
8860 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
8861 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8862 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8863
8864 /* For a load with loop-invariant (but other than power-of-2)
8865 stride (i.e. not a grouped access) like so:
8866
8867 for (i = 0; i < n; i += stride)
8868 ... = array[i];
8869
8870 we generate a new induction variable and new accesses to
8871 form a new vector (or vectors, depending on ncopies):
8872
8873 for (j = 0; ; j += VF*stride)
8874 tmp1 = array[j];
8875 tmp2 = array[j + stride];
8876 ...
8877 vectemp = {tmp1, tmp2, ...}
8878 */
8879
8880 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8881 build_int_cst (TREE_TYPE (stride_step), vf));
8882
8883 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8884
8885 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8886 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8887 create_iv (stride_base, ivstep, NULL,
8888 loop, &incr_gsi, insert_after,
8889 &offvar, NULL);
8890 incr = gsi_stmt (incr_gsi);
8891 loop_vinfo->add_stmt (incr);
8892
8893 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8894
8895 prev_stmt_info = NULL;
8896 running_off = offvar;
8897 alias_off = build_int_cst (ref_type, 0);
8898 int nloads = const_nunits;
8899 int lnel = 1;
8900 tree ltype = TREE_TYPE (vectype);
8901 tree lvectype = vectype;
8902 auto_vec<tree> dr_chain;
8903 if (memory_access_type == VMAT_STRIDED_SLP)
8904 {
8905 if (group_size < const_nunits)
8906 {
8907 /* First check if vec_init optab supports construction from
8908 vector elts directly. */
8909 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
8910 machine_mode vmode;
8911 if (related_vector_mode (TYPE_MODE (vectype), elmode,
8912 group_size).exists (&vmode)
8913 && (convert_optab_handler (vec_init_optab,
8914 TYPE_MODE (vectype), vmode)
8915 != CODE_FOR_nothing))
8916 {
8917 nloads = const_nunits / group_size;
8918 lnel = group_size;
8919 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
8920 }
8921 else
8922 {
8923 /* Otherwise avoid emitting a constructor of vector elements
8924 by performing the loads using an integer type of the same
8925 size, constructing a vector of those and then
8926 re-interpreting it as the original vector type.
8927 This avoids a huge runtime penalty due to the general
8928 inability to perform store forwarding from smaller stores
8929 to a larger load. */
8930 unsigned lsize
8931 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
8932 unsigned int lnunits = const_nunits / group_size;
8933 /* If we can't construct such a vector fall back to
8934 element loads of the original vector type. */
8935 if (int_mode_for_size (lsize, 0).exists (&elmode)
8936 && related_vector_mode (TYPE_MODE (vectype), elmode,
8937 lnunits).exists (&vmode)
8938 && (convert_optab_handler (vec_init_optab, vmode, elmode)
8939 != CODE_FOR_nothing))
8940 {
8941 nloads = lnunits;
8942 lnel = group_size;
8943 ltype = build_nonstandard_integer_type (lsize, 1);
8944 lvectype = build_vector_type (ltype, nloads);
8945 }
8946 }
8947 }
8948 else
8949 {
8950 nloads = 1;
8951 lnel = const_nunits;
8952 ltype = vectype;
8953 }
8954 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8955 }
8956 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8957 else if (nloads == 1)
8958 ltype = vectype;
8959
8960 if (slp)
8961 {
8962 /* For SLP permutation support we need to load the whole group,
8963 not only the number of vector stmts the permutation result
8964 fits in. */
8965 if (slp_perm)
8966 {
8967 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8968 variable VF. */
8969 unsigned int const_vf = vf.to_constant ();
8970 ncopies = CEIL (group_size * const_vf, const_nunits);
8971 dr_chain.create (ncopies);
8972 }
8973 else
8974 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8975 }
8976 unsigned int group_el = 0;
8977 unsigned HOST_WIDE_INT
8978 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8979 for (j = 0; j < ncopies; j++)
8980 {
8981 if (nloads > 1)
8982 vec_alloc (v, nloads);
8983 stmt_vec_info new_stmt_info = NULL;
8984 for (i = 0; i < nloads; i++)
8985 {
8986 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8987 group_el * elsz + cst_offset);
8988 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8989 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8990 gassign *new_stmt
8991 = gimple_build_assign (make_ssa_name (ltype), data_ref);
8992 new_stmt_info
8993 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8994 if (nloads > 1)
8995 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8996 gimple_assign_lhs (new_stmt));
8997
8998 group_el += lnel;
8999 if (! slp
9000 || group_el == group_size)
9001 {
9002 tree newoff = copy_ssa_name (running_off);
9003 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9004 running_off, stride_step);
9005 vect_finish_stmt_generation (stmt_info, incr, gsi);
9006
9007 running_off = newoff;
9008 group_el = 0;
9009 }
9010 }
9011 if (nloads > 1)
9012 {
9013 tree vec_inv = build_constructor (lvectype, v);
9014 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9015 new_stmt_info = vinfo->lookup_def (new_temp);
9016 if (lvectype != vectype)
9017 {
9018 gassign *new_stmt
9019 = gimple_build_assign (make_ssa_name (vectype),
9020 VIEW_CONVERT_EXPR,
9021 build1 (VIEW_CONVERT_EXPR,
9022 vectype, new_temp));
9023 new_stmt_info
9024 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9025 }
9026 }
9027
9028 if (slp)
9029 {
9030 if (slp_perm)
9031 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9032 else
9033 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9034 }
9035 else
9036 {
9037 if (j == 0)
9038 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9039 else
9040 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9041 prev_stmt_info = new_stmt_info;
9042 }
9043 }
9044 if (slp_perm)
9045 {
9046 unsigned n_perms;
9047 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9048 slp_node_instance, false, &n_perms);
9049 }
9050 return true;
9051 }
9052
9053 if (memory_access_type == VMAT_GATHER_SCATTER
9054 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9055 grouped_load = false;
9056
9057 if (grouped_load)
9058 {
9059 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9060 group_size = DR_GROUP_SIZE (first_stmt_info);
9061 /* For SLP vectorization we directly vectorize a subchain
9062 without permutation. */
9063 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9064 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9065 /* For BB vectorization always use the first stmt to base
9066 the data ref pointer on. */
9067 if (bb_vinfo)
9068 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9069
9070 /* Check if the chain of loads is already vectorized. */
9071 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9072 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9073 ??? But we can only do so if there is exactly one
9074 as we have no way to get at the rest. Leave the CSE
9075 opportunity alone.
9076 ??? With the group load eventually participating
9077 in multiple different permutations (having multiple
9078 slp nodes which refer to the same group) the CSE
9079 is even wrong code. See PR56270. */
9080 && !slp)
9081 {
9082 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9083 return true;
9084 }
9085 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9086 group_gap_adj = 0;
9087
9088 /* VEC_NUM is the number of vect stmts to be created for this group. */
9089 if (slp)
9090 {
9091 grouped_load = false;
9092 /* If an SLP permutation is from N elements to N elements,
9093 and if one vector holds a whole number of N, we can load
9094 the inputs to the permutation in the same way as an
9095 unpermuted sequence. In other cases we need to load the
9096 whole group, not only the number of vector stmts the
9097 permutation result fits in. */
9098 if (slp_perm
9099 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9100 || !multiple_p (nunits, group_size)))
9101 {
9102 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9103 variable VF; see vect_transform_slp_perm_load. */
9104 unsigned int const_vf = vf.to_constant ();
9105 unsigned int const_nunits = nunits.to_constant ();
9106 vec_num = CEIL (group_size * const_vf, const_nunits);
9107 group_gap_adj = vf * group_size - nunits * vec_num;
9108 }
9109 else
9110 {
9111 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9112 group_gap_adj
9113 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9114 }
9115 }
9116 else
9117 vec_num = group_size;
9118
9119 ref_type = get_group_alias_ptr_type (first_stmt_info);
9120 }
9121 else
9122 {
9123 first_stmt_info = stmt_info;
9124 first_dr_info = dr_info;
9125 group_size = vec_num = 1;
9126 group_gap_adj = 0;
9127 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9128 }
9129
9130 alignment_support_scheme
9131 = vect_supportable_dr_alignment (first_dr_info, false);
9132 gcc_assert (alignment_support_scheme);
9133 vec_loop_masks *loop_masks
9134 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9135 ? &LOOP_VINFO_MASKS (loop_vinfo)
9136 : NULL);
9137 /* Targets with store-lane instructions must not require explicit
9138 realignment. vect_supportable_dr_alignment always returns either
9139 dr_aligned or dr_unaligned_supported for masked operations. */
9140 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9141 && !mask
9142 && !loop_masks)
9143 || alignment_support_scheme == dr_aligned
9144 || alignment_support_scheme == dr_unaligned_supported);
9145
9146 /* In case the vectorization factor (VF) is bigger than the number
9147 of elements that we can fit in a vectype (nunits), we have to generate
9148 more than one vector stmt - i.e - we need to "unroll" the
9149 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9150 from one copy of the vector stmt to the next, in the field
9151 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9152 stages to find the correct vector defs to be used when vectorizing
9153 stmts that use the defs of the current stmt. The example below
9154 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9155 need to create 4 vectorized stmts):
9156
9157 before vectorization:
9158 RELATED_STMT VEC_STMT
9159 S1: x = memref - -
9160 S2: z = x + 1 - -
9161
9162 step 1: vectorize stmt S1:
9163 We first create the vector stmt VS1_0, and, as usual, record a
9164 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9165 Next, we create the vector stmt VS1_1, and record a pointer to
9166 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9167 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9168 stmts and pointers:
9169 RELATED_STMT VEC_STMT
9170 VS1_0: vx0 = memref0 VS1_1 -
9171 VS1_1: vx1 = memref1 VS1_2 -
9172 VS1_2: vx2 = memref2 VS1_3 -
9173 VS1_3: vx3 = memref3 - -
9174 S1: x = load - VS1_0
9175 S2: z = x + 1 - -
9176
9177 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9178 information we recorded in RELATED_STMT field is used to vectorize
9179 stmt S2. */
9180
9181 /* In case of interleaving (non-unit grouped access):
9182
9183 S1: x2 = &base + 2
9184 S2: x0 = &base
9185 S3: x1 = &base + 1
9186 S4: x3 = &base + 3
9187
9188 Vectorized loads are created in the order of memory accesses
9189 starting from the access of the first stmt of the chain:
9190
9191 VS1: vx0 = &base
9192 VS2: vx1 = &base + vec_size*1
9193 VS3: vx3 = &base + vec_size*2
9194 VS4: vx4 = &base + vec_size*3
9195
9196 Then permutation statements are generated:
9197
9198 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9199 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9200 ...
9201
9202 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9203 (the order of the data-refs in the output of vect_permute_load_chain
9204 corresponds to the order of scalar stmts in the interleaving chain - see
9205 the documentation of vect_permute_load_chain()).
9206 The generation of permutation stmts and recording them in
9207 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9208
9209 In case of both multiple types and interleaving, the vector loads and
9210 permutation stmts above are created for every copy. The result vector
9211 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9212 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9213
9214 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9215 on a target that supports unaligned accesses (dr_unaligned_supported)
9216 we generate the following code:
9217 p = initial_addr;
9218 indx = 0;
9219 loop {
9220 p = p + indx * vectype_size;
9221 vec_dest = *(p);
9222 indx = indx + 1;
9223 }
9224
9225 Otherwise, the data reference is potentially unaligned on a target that
9226 does not support unaligned accesses (dr_explicit_realign_optimized) -
9227 then generate the following code, in which the data in each iteration is
9228 obtained by two vector loads, one from the previous iteration, and one
9229 from the current iteration:
9230 p1 = initial_addr;
9231 msq_init = *(floor(p1))
9232 p2 = initial_addr + VS - 1;
9233 realignment_token = call target_builtin;
9234 indx = 0;
9235 loop {
9236 p2 = p2 + indx * vectype_size
9237 lsq = *(floor(p2))
9238 vec_dest = realign_load (msq, lsq, realignment_token)
9239 indx = indx + 1;
9240 msq = lsq;
9241 } */
9242
9243 /* If the misalignment remains the same throughout the execution of the
9244 loop, we can create the init_addr and permutation mask at the loop
9245 preheader. Otherwise, it needs to be created inside the loop.
9246 This can only occur when vectorizing memory accesses in the inner-loop
9247 nested within an outer-loop that is being vectorized. */
9248
9249 if (nested_in_vect_loop
9250 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9251 GET_MODE_SIZE (TYPE_MODE (vectype))))
9252 {
9253 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9254 compute_in_loop = true;
9255 }
9256
9257 if ((alignment_support_scheme == dr_explicit_realign_optimized
9258 || alignment_support_scheme == dr_explicit_realign)
9259 && !compute_in_loop)
9260 {
9261 msq = vect_setup_realignment (first_stmt_info_for_drptr
9262 ? first_stmt_info_for_drptr
9263 : first_stmt_info, gsi, &realignment_token,
9264 alignment_support_scheme, NULL_TREE,
9265 &at_loop);
9266 if (alignment_support_scheme == dr_explicit_realign_optimized)
9267 {
9268 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9269 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9270 size_one_node);
9271 }
9272 }
9273 else
9274 at_loop = loop;
9275
9276 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9277 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9278
9279 tree bump;
9280 tree vec_offset = NULL_TREE;
9281 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9282 {
9283 aggr_type = NULL_TREE;
9284 bump = NULL_TREE;
9285 }
9286 else if (memory_access_type == VMAT_GATHER_SCATTER)
9287 {
9288 aggr_type = elem_type;
9289 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9290 &bump, &vec_offset);
9291 }
9292 else
9293 {
9294 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9295 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9296 else
9297 aggr_type = vectype;
9298 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9299 memory_access_type);
9300 }
9301
9302 tree vec_mask = NULL_TREE;
9303 prev_stmt_info = NULL;
9304 poly_uint64 group_elt = 0;
9305 for (j = 0; j < ncopies; j++)
9306 {
9307 stmt_vec_info new_stmt_info = NULL;
9308 /* 1. Create the vector or array pointer update chain. */
9309 if (j == 0)
9310 {
9311 bool simd_lane_access_p
9312 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9313 if (simd_lane_access_p
9314 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9315 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9316 && integer_zerop (DR_OFFSET (first_dr_info->dr))
9317 && integer_zerop (DR_INIT (first_dr_info->dr))
9318 && alias_sets_conflict_p (get_alias_set (aggr_type),
9319 get_alias_set (TREE_TYPE (ref_type)))
9320 && (alignment_support_scheme == dr_aligned
9321 || alignment_support_scheme == dr_unaligned_supported))
9322 {
9323 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9324 dataref_offset = build_int_cst (ref_type, 0);
9325 }
9326 else if (first_stmt_info_for_drptr
9327 && first_stmt_info != first_stmt_info_for_drptr)
9328 {
9329 dataref_ptr
9330 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9331 aggr_type, at_loop, offset, &dummy,
9332 gsi, &ptr_incr, simd_lane_access_p,
9333 byte_offset, bump);
9334 /* Adjust the pointer by the difference to first_stmt. */
9335 data_reference_p ptrdr
9336 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9337 tree diff
9338 = fold_convert (sizetype,
9339 size_binop (MINUS_EXPR,
9340 DR_INIT (first_dr_info->dr),
9341 DR_INIT (ptrdr)));
9342 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9343 stmt_info, diff);
9344 }
9345 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9346 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9347 &dataref_ptr, &vec_offset);
9348 else
9349 dataref_ptr
9350 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9351 offset, &dummy, gsi, &ptr_incr,
9352 simd_lane_access_p,
9353 byte_offset, bump);
9354 if (mask)
9355 {
9356 if (slp_node)
9357 {
9358 auto_vec<vec<tree> > vec_defs (1);
9359 vect_get_slp_defs (slp_node, &vec_defs);
9360 vec_mask = vec_defs[0][0];
9361 }
9362 else
9363 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9364 mask_vectype);
9365 }
9366 }
9367 else
9368 {
9369 if (dataref_offset)
9370 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9371 bump);
9372 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9373 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9374 else
9375 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9376 stmt_info, bump);
9377 if (mask)
9378 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9379 }
9380
9381 if (grouped_load || slp_perm)
9382 dr_chain.create (vec_num);
9383
9384 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9385 {
9386 tree vec_array;
9387
9388 vec_array = create_vector_array (vectype, vec_num);
9389
9390 tree final_mask = NULL_TREE;
9391 if (loop_masks)
9392 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9393 vectype, j);
9394 if (vec_mask)
9395 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9396 vec_mask, gsi);
9397
9398 gcall *call;
9399 if (final_mask)
9400 {
9401 /* Emit:
9402 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9403 VEC_MASK). */
9404 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9405 tree alias_ptr = build_int_cst (ref_type, align);
9406 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9407 dataref_ptr, alias_ptr,
9408 final_mask);
9409 }
9410 else
9411 {
9412 /* Emit:
9413 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9414 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9415 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9416 }
9417 gimple_call_set_lhs (call, vec_array);
9418 gimple_call_set_nothrow (call, true);
9419 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9420
9421 /* Extract each vector into an SSA_NAME. */
9422 for (i = 0; i < vec_num; i++)
9423 {
9424 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9425 vec_array, i);
9426 dr_chain.quick_push (new_temp);
9427 }
9428
9429 /* Record the mapping between SSA_NAMEs and statements. */
9430 vect_record_grouped_load_vectors (stmt_info, dr_chain);
9431
9432 /* Record that VEC_ARRAY is now dead. */
9433 vect_clobber_variable (stmt_info, gsi, vec_array);
9434 }
9435 else
9436 {
9437 for (i = 0; i < vec_num; i++)
9438 {
9439 tree final_mask = NULL_TREE;
9440 if (loop_masks
9441 && memory_access_type != VMAT_INVARIANT)
9442 final_mask = vect_get_loop_mask (gsi, loop_masks,
9443 vec_num * ncopies,
9444 vectype, vec_num * j + i);
9445 if (vec_mask)
9446 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9447 vec_mask, gsi);
9448
9449 if (i > 0)
9450 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9451 stmt_info, bump);
9452
9453 /* 2. Create the vector-load in the loop. */
9454 gimple *new_stmt = NULL;
9455 switch (alignment_support_scheme)
9456 {
9457 case dr_aligned:
9458 case dr_unaligned_supported:
9459 {
9460 unsigned int misalign;
9461 unsigned HOST_WIDE_INT align;
9462
9463 if (memory_access_type == VMAT_GATHER_SCATTER)
9464 {
9465 tree zero = build_zero_cst (vectype);
9466 tree scale = size_int (gs_info.scale);
9467 gcall *call;
9468 if (loop_masks)
9469 call = gimple_build_call_internal
9470 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9471 vec_offset, scale, zero, final_mask);
9472 else
9473 call = gimple_build_call_internal
9474 (IFN_GATHER_LOAD, 4, dataref_ptr,
9475 vec_offset, scale, zero);
9476 gimple_call_set_nothrow (call, true);
9477 new_stmt = call;
9478 data_ref = NULL_TREE;
9479 break;
9480 }
9481
9482 align =
9483 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9484 if (alignment_support_scheme == dr_aligned)
9485 {
9486 gcc_assert (aligned_access_p (first_dr_info));
9487 misalign = 0;
9488 }
9489 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9490 {
9491 align = dr_alignment
9492 (vect_dr_behavior (first_dr_info));
9493 misalign = 0;
9494 }
9495 else
9496 misalign = DR_MISALIGNMENT (first_dr_info);
9497 if (dataref_offset == NULL_TREE
9498 && TREE_CODE (dataref_ptr) == SSA_NAME)
9499 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9500 align, misalign);
9501
9502 if (final_mask)
9503 {
9504 align = least_bit_hwi (misalign | align);
9505 tree ptr = build_int_cst (ref_type, align);
9506 gcall *call
9507 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9508 dataref_ptr, ptr,
9509 final_mask);
9510 gimple_call_set_nothrow (call, true);
9511 new_stmt = call;
9512 data_ref = NULL_TREE;
9513 }
9514 else
9515 {
9516 tree ltype = vectype;
9517 /* If there's no peeling for gaps but we have a gap
9518 with slp loads then load the lower half of the
9519 vector only. See get_group_load_store_type for
9520 when we apply this optimization. */
9521 if (slp
9522 && loop_vinfo
9523 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9524 && DR_GROUP_GAP (first_stmt_info) != 0
9525 && known_eq (nunits,
9526 (group_size
9527 - DR_GROUP_GAP (first_stmt_info)) * 2)
9528 && known_eq (nunits, group_size))
9529 ltype = build_vector_type (TREE_TYPE (vectype),
9530 (group_size
9531 - DR_GROUP_GAP
9532 (first_stmt_info)));
9533 data_ref
9534 = fold_build2 (MEM_REF, ltype, dataref_ptr,
9535 dataref_offset
9536 ? dataref_offset
9537 : build_int_cst (ref_type, 0));
9538 if (alignment_support_scheme == dr_aligned)
9539 ;
9540 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9541 TREE_TYPE (data_ref)
9542 = build_aligned_type (TREE_TYPE (data_ref),
9543 align * BITS_PER_UNIT);
9544 else
9545 TREE_TYPE (data_ref)
9546 = build_aligned_type (TREE_TYPE (data_ref),
9547 TYPE_ALIGN (elem_type));
9548 if (ltype != vectype)
9549 {
9550 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9551 tree tem = make_ssa_name (ltype);
9552 new_stmt = gimple_build_assign (tem, data_ref);
9553 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9554 data_ref = NULL;
9555 vec<constructor_elt, va_gc> *v;
9556 vec_alloc (v, 2);
9557 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9558 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9559 build_zero_cst (ltype));
9560 new_stmt
9561 = gimple_build_assign (vec_dest,
9562 build_constructor
9563 (vectype, v));
9564 }
9565 }
9566 break;
9567 }
9568 case dr_explicit_realign:
9569 {
9570 tree ptr, bump;
9571
9572 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9573
9574 if (compute_in_loop)
9575 msq = vect_setup_realignment (first_stmt_info, gsi,
9576 &realignment_token,
9577 dr_explicit_realign,
9578 dataref_ptr, NULL);
9579
9580 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9581 ptr = copy_ssa_name (dataref_ptr);
9582 else
9583 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9584 // For explicit realign the target alignment should be
9585 // known at compile time.
9586 unsigned HOST_WIDE_INT align =
9587 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9588 new_stmt = gimple_build_assign
9589 (ptr, BIT_AND_EXPR, dataref_ptr,
9590 build_int_cst
9591 (TREE_TYPE (dataref_ptr),
9592 -(HOST_WIDE_INT) align));
9593 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9594 data_ref
9595 = build2 (MEM_REF, vectype, ptr,
9596 build_int_cst (ref_type, 0));
9597 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9598 vec_dest = vect_create_destination_var (scalar_dest,
9599 vectype);
9600 new_stmt = gimple_build_assign (vec_dest, data_ref);
9601 new_temp = make_ssa_name (vec_dest, new_stmt);
9602 gimple_assign_set_lhs (new_stmt, new_temp);
9603 gimple_move_vops (new_stmt, stmt_info->stmt);
9604 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9605 msq = new_temp;
9606
9607 bump = size_binop (MULT_EXPR, vs,
9608 TYPE_SIZE_UNIT (elem_type));
9609 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9610 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9611 stmt_info, bump);
9612 new_stmt = gimple_build_assign
9613 (NULL_TREE, BIT_AND_EXPR, ptr,
9614 build_int_cst
9615 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9616 ptr = copy_ssa_name (ptr, new_stmt);
9617 gimple_assign_set_lhs (new_stmt, ptr);
9618 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9619 data_ref
9620 = build2 (MEM_REF, vectype, ptr,
9621 build_int_cst (ref_type, 0));
9622 break;
9623 }
9624 case dr_explicit_realign_optimized:
9625 {
9626 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9627 new_temp = copy_ssa_name (dataref_ptr);
9628 else
9629 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9630 // We should only be doing this if we know the target
9631 // alignment at compile time.
9632 unsigned HOST_WIDE_INT align =
9633 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9634 new_stmt = gimple_build_assign
9635 (new_temp, BIT_AND_EXPR, dataref_ptr,
9636 build_int_cst (TREE_TYPE (dataref_ptr),
9637 -(HOST_WIDE_INT) align));
9638 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9639 data_ref
9640 = build2 (MEM_REF, vectype, new_temp,
9641 build_int_cst (ref_type, 0));
9642 break;
9643 }
9644 default:
9645 gcc_unreachable ();
9646 }
9647 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9648 /* DATA_REF is null if we've already built the statement. */
9649 if (data_ref)
9650 {
9651 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9652 new_stmt = gimple_build_assign (vec_dest, data_ref);
9653 }
9654 new_temp = make_ssa_name (vec_dest, new_stmt);
9655 gimple_set_lhs (new_stmt, new_temp);
9656 new_stmt_info
9657 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9658
9659 /* 3. Handle explicit realignment if necessary/supported.
9660 Create in loop:
9661 vec_dest = realign_load (msq, lsq, realignment_token) */
9662 if (alignment_support_scheme == dr_explicit_realign_optimized
9663 || alignment_support_scheme == dr_explicit_realign)
9664 {
9665 lsq = gimple_assign_lhs (new_stmt);
9666 if (!realignment_token)
9667 realignment_token = dataref_ptr;
9668 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9669 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9670 msq, lsq, realignment_token);
9671 new_temp = make_ssa_name (vec_dest, new_stmt);
9672 gimple_assign_set_lhs (new_stmt, new_temp);
9673 new_stmt_info
9674 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9675
9676 if (alignment_support_scheme == dr_explicit_realign_optimized)
9677 {
9678 gcc_assert (phi);
9679 if (i == vec_num - 1 && j == ncopies - 1)
9680 add_phi_arg (phi, lsq,
9681 loop_latch_edge (containing_loop),
9682 UNKNOWN_LOCATION);
9683 msq = lsq;
9684 }
9685 }
9686
9687 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9688 {
9689 tree perm_mask = perm_mask_for_reverse (vectype);
9690 new_temp = permute_vec_elements (new_temp, new_temp,
9691 perm_mask, stmt_info, gsi);
9692 new_stmt_info = vinfo->lookup_def (new_temp);
9693 }
9694
9695 /* Collect vector loads and later create their permutation in
9696 vect_transform_grouped_load (). */
9697 if (grouped_load || slp_perm)
9698 dr_chain.quick_push (new_temp);
9699
9700 /* Store vector loads in the corresponding SLP_NODE. */
9701 if (slp && !slp_perm)
9702 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9703
9704 /* With SLP permutation we load the gaps as well, without
9705 we need to skip the gaps after we manage to fully load
9706 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9707 group_elt += nunits;
9708 if (maybe_ne (group_gap_adj, 0U)
9709 && !slp_perm
9710 && known_eq (group_elt, group_size - group_gap_adj))
9711 {
9712 poly_wide_int bump_val
9713 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9714 * group_gap_adj);
9715 tree bump = wide_int_to_tree (sizetype, bump_val);
9716 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9717 stmt_info, bump);
9718 group_elt = 0;
9719 }
9720 }
9721 /* Bump the vector pointer to account for a gap or for excess
9722 elements loaded for a permuted SLP load. */
9723 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9724 {
9725 poly_wide_int bump_val
9726 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9727 * group_gap_adj);
9728 tree bump = wide_int_to_tree (sizetype, bump_val);
9729 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9730 stmt_info, bump);
9731 }
9732 }
9733
9734 if (slp && !slp_perm)
9735 continue;
9736
9737 if (slp_perm)
9738 {
9739 unsigned n_perms;
9740 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9741 slp_node_instance, false,
9742 &n_perms))
9743 {
9744 dr_chain.release ();
9745 return false;
9746 }
9747 }
9748 else
9749 {
9750 if (grouped_load)
9751 {
9752 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9753 vect_transform_grouped_load (stmt_info, dr_chain,
9754 group_size, gsi);
9755 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9756 }
9757 else
9758 {
9759 if (j == 0)
9760 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9761 else
9762 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9763 prev_stmt_info = new_stmt_info;
9764 }
9765 }
9766 dr_chain.release ();
9767 }
9768
9769 return true;
9770 }
9771
9772 /* Function vect_is_simple_cond.
9773
9774 Input:
9775 LOOP - the loop that is being vectorized.
9776 COND - Condition that is checked for simple use.
9777
9778 Output:
9779 *COMP_VECTYPE - the vector type for the comparison.
9780 *DTS - The def types for the arguments of the comparison
9781
9782 Returns whether a COND can be vectorized. Checks whether
9783 condition operands are supportable using vec_is_simple_use. */
9784
9785 static bool
9786 vect_is_simple_cond (tree cond, vec_info *vinfo,
9787 tree *comp_vectype, enum vect_def_type *dts,
9788 tree vectype)
9789 {
9790 tree lhs, rhs;
9791 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9792
9793 /* Mask case. */
9794 if (TREE_CODE (cond) == SSA_NAME
9795 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9796 {
9797 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9798 || !*comp_vectype
9799 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9800 return false;
9801 return true;
9802 }
9803
9804 if (!COMPARISON_CLASS_P (cond))
9805 return false;
9806
9807 lhs = TREE_OPERAND (cond, 0);
9808 rhs = TREE_OPERAND (cond, 1);
9809
9810 if (TREE_CODE (lhs) == SSA_NAME)
9811 {
9812 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9813 return false;
9814 }
9815 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9816 || TREE_CODE (lhs) == FIXED_CST)
9817 dts[0] = vect_constant_def;
9818 else
9819 return false;
9820
9821 if (TREE_CODE (rhs) == SSA_NAME)
9822 {
9823 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9824 return false;
9825 }
9826 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9827 || TREE_CODE (rhs) == FIXED_CST)
9828 dts[1] = vect_constant_def;
9829 else
9830 return false;
9831
9832 if (vectype1 && vectype2
9833 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9834 TYPE_VECTOR_SUBPARTS (vectype2)))
9835 return false;
9836
9837 *comp_vectype = vectype1 ? vectype1 : vectype2;
9838 /* Invariant comparison. */
9839 if (! *comp_vectype)
9840 {
9841 tree scalar_type = TREE_TYPE (lhs);
9842 /* If we can widen the comparison to match vectype do so. */
9843 if (INTEGRAL_TYPE_P (scalar_type)
9844 && vectype
9845 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9846 TYPE_SIZE (TREE_TYPE (vectype))))
9847 scalar_type = build_nonstandard_integer_type
9848 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9849 TYPE_UNSIGNED (scalar_type));
9850 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
9851 }
9852
9853 return true;
9854 }
9855
9856 /* vectorizable_condition.
9857
9858 Check if STMT_INFO is conditional modify expression that can be vectorized.
9859 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9860 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9861 at GSI.
9862
9863 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9864
9865 Return true if STMT_INFO is vectorizable in this way. */
9866
9867 static bool
9868 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9869 stmt_vec_info *vec_stmt,
9870 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9871 {
9872 vec_info *vinfo = stmt_info->vinfo;
9873 tree scalar_dest = NULL_TREE;
9874 tree vec_dest = NULL_TREE;
9875 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9876 tree then_clause, else_clause;
9877 tree comp_vectype = NULL_TREE;
9878 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9879 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9880 tree vec_compare;
9881 tree new_temp;
9882 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9883 enum vect_def_type dts[4]
9884 = {vect_unknown_def_type, vect_unknown_def_type,
9885 vect_unknown_def_type, vect_unknown_def_type};
9886 int ndts = 4;
9887 int ncopies;
9888 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9889 stmt_vec_info prev_stmt_info = NULL;
9890 int i, j;
9891 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9892 vec<tree> vec_oprnds0 = vNULL;
9893 vec<tree> vec_oprnds1 = vNULL;
9894 vec<tree> vec_oprnds2 = vNULL;
9895 vec<tree> vec_oprnds3 = vNULL;
9896 tree vec_cmp_type;
9897 bool masked = false;
9898
9899 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9900 return false;
9901
9902 /* Is vectorizable conditional operation? */
9903 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9904 if (!stmt)
9905 return false;
9906
9907 code = gimple_assign_rhs_code (stmt);
9908 if (code != COND_EXPR)
9909 return false;
9910
9911 stmt_vec_info reduc_info = NULL;
9912 int reduc_index = -1;
9913 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9914 bool for_reduction
9915 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9916 if (for_reduction)
9917 {
9918 if (STMT_SLP_TYPE (stmt_info))
9919 return false;
9920 reduc_info = info_for_reduction (stmt_info);
9921 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9922 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9923 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9924 || reduc_index != -1);
9925 }
9926 else
9927 {
9928 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9929 return false;
9930
9931 /* FORNOW: only supported as part of a reduction. */
9932 if (STMT_VINFO_LIVE_P (stmt_info))
9933 {
9934 if (dump_enabled_p ())
9935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9936 "value used after loop.\n");
9937 return false;
9938 }
9939 }
9940
9941 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9942 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9943
9944 if (slp_node)
9945 ncopies = 1;
9946 else
9947 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9948
9949 gcc_assert (ncopies >= 1);
9950 if (for_reduction && ncopies > 1)
9951 return false; /* FORNOW */
9952
9953 cond_expr = gimple_assign_rhs1 (stmt);
9954 then_clause = gimple_assign_rhs2 (stmt);
9955 else_clause = gimple_assign_rhs3 (stmt);
9956
9957 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
9958 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
9959 || !comp_vectype)
9960 return false;
9961
9962 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
9963 return false;
9964 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
9965 return false;
9966
9967 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9968 return false;
9969
9970 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9971 return false;
9972
9973 masked = !COMPARISON_CLASS_P (cond_expr);
9974 vec_cmp_type = truth_type_for (comp_vectype);
9975
9976 if (vec_cmp_type == NULL_TREE)
9977 return false;
9978
9979 cond_code = TREE_CODE (cond_expr);
9980 if (!masked)
9981 {
9982 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9983 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9984 }
9985
9986 /* For conditional reductions, the "then" value needs to be the candidate
9987 value calculated by this iteration while the "else" value needs to be
9988 the result carried over from previous iterations. If the COND_EXPR
9989 is the other way around, we need to swap it. */
9990 bool must_invert_cmp_result = false;
9991 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9992 {
9993 if (masked)
9994 must_invert_cmp_result = true;
9995 else
9996 {
9997 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9998 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9999 if (new_code == ERROR_MARK)
10000 must_invert_cmp_result = true;
10001 else
10002 cond_code = new_code;
10003 }
10004 /* Make sure we don't accidentally use the old condition. */
10005 cond_expr = NULL_TREE;
10006 std::swap (then_clause, else_clause);
10007 }
10008
10009 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10010 {
10011 /* Boolean values may have another representation in vectors
10012 and therefore we prefer bit operations over comparison for
10013 them (which also works for scalar masks). We store opcodes
10014 to use in bitop1 and bitop2. Statement is vectorized as
10015 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10016 depending on bitop1 and bitop2 arity. */
10017 switch (cond_code)
10018 {
10019 case GT_EXPR:
10020 bitop1 = BIT_NOT_EXPR;
10021 bitop2 = BIT_AND_EXPR;
10022 break;
10023 case GE_EXPR:
10024 bitop1 = BIT_NOT_EXPR;
10025 bitop2 = BIT_IOR_EXPR;
10026 break;
10027 case LT_EXPR:
10028 bitop1 = BIT_NOT_EXPR;
10029 bitop2 = BIT_AND_EXPR;
10030 std::swap (cond_expr0, cond_expr1);
10031 break;
10032 case LE_EXPR:
10033 bitop1 = BIT_NOT_EXPR;
10034 bitop2 = BIT_IOR_EXPR;
10035 std::swap (cond_expr0, cond_expr1);
10036 break;
10037 case NE_EXPR:
10038 bitop1 = BIT_XOR_EXPR;
10039 break;
10040 case EQ_EXPR:
10041 bitop1 = BIT_XOR_EXPR;
10042 bitop2 = BIT_NOT_EXPR;
10043 break;
10044 default:
10045 return false;
10046 }
10047 cond_code = SSA_NAME;
10048 }
10049
10050 if (!vec_stmt)
10051 {
10052 if (bitop1 != NOP_EXPR)
10053 {
10054 machine_mode mode = TYPE_MODE (comp_vectype);
10055 optab optab;
10056
10057 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10058 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10059 return false;
10060
10061 if (bitop2 != NOP_EXPR)
10062 {
10063 optab = optab_for_tree_code (bitop2, comp_vectype,
10064 optab_default);
10065 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10066 return false;
10067 }
10068 }
10069 if (expand_vec_cond_expr_p (vectype, comp_vectype,
10070 cond_code))
10071 {
10072 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10073 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10074 cost_vec);
10075 return true;
10076 }
10077 return false;
10078 }
10079
10080 /* Transform. */
10081
10082 if (!slp_node)
10083 {
10084 vec_oprnds0.create (1);
10085 vec_oprnds1.create (1);
10086 vec_oprnds2.create (1);
10087 vec_oprnds3.create (1);
10088 }
10089
10090 /* Handle def. */
10091 scalar_dest = gimple_assign_lhs (stmt);
10092 if (reduction_type != EXTRACT_LAST_REDUCTION)
10093 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10094
10095 /* Handle cond expr. */
10096 for (j = 0; j < ncopies; j++)
10097 {
10098 bool swap_cond_operands = false;
10099
10100 /* See whether another part of the vectorized code applies a loop
10101 mask to the condition, or to its inverse. */
10102
10103 vec_loop_masks *masks = NULL;
10104 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10105 {
10106 if (reduction_type == EXTRACT_LAST_REDUCTION)
10107 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10108 else
10109 {
10110 scalar_cond_masked_key cond (cond_expr, ncopies);
10111 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10112 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10113 else
10114 {
10115 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10116 cond.code = invert_tree_comparison (cond.code, honor_nans);
10117 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10118 {
10119 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10120 cond_code = cond.code;
10121 swap_cond_operands = true;
10122 }
10123 }
10124 }
10125 }
10126
10127 stmt_vec_info new_stmt_info = NULL;
10128 if (j == 0)
10129 {
10130 if (slp_node)
10131 {
10132 auto_vec<vec<tree>, 4> vec_defs;
10133 vect_get_slp_defs (slp_node, &vec_defs);
10134 vec_oprnds3 = vec_defs.pop ();
10135 vec_oprnds2 = vec_defs.pop ();
10136 if (!masked)
10137 vec_oprnds1 = vec_defs.pop ();
10138 vec_oprnds0 = vec_defs.pop ();
10139 }
10140 else
10141 {
10142 if (masked)
10143 {
10144 vec_cond_lhs
10145 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10146 comp_vectype);
10147 }
10148 else
10149 {
10150 vec_cond_lhs
10151 = vect_get_vec_def_for_operand (cond_expr0,
10152 stmt_info, comp_vectype);
10153 vec_cond_rhs
10154 = vect_get_vec_def_for_operand (cond_expr1,
10155 stmt_info, comp_vectype);
10156 }
10157 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10158 stmt_info);
10159 if (reduction_type != EXTRACT_LAST_REDUCTION)
10160 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10161 stmt_info);
10162 }
10163 }
10164 else
10165 {
10166 vec_cond_lhs
10167 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10168 if (!masked)
10169 vec_cond_rhs
10170 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10171
10172 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10173 vec_oprnds2.pop ());
10174 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10175 vec_oprnds3.pop ());
10176 }
10177
10178 if (!slp_node)
10179 {
10180 vec_oprnds0.quick_push (vec_cond_lhs);
10181 if (!masked)
10182 vec_oprnds1.quick_push (vec_cond_rhs);
10183 vec_oprnds2.quick_push (vec_then_clause);
10184 vec_oprnds3.quick_push (vec_else_clause);
10185 }
10186
10187 /* Arguments are ready. Create the new vector stmt. */
10188 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10189 {
10190 vec_then_clause = vec_oprnds2[i];
10191 vec_else_clause = vec_oprnds3[i];
10192
10193 if (swap_cond_operands)
10194 std::swap (vec_then_clause, vec_else_clause);
10195
10196 if (masked)
10197 vec_compare = vec_cond_lhs;
10198 else
10199 {
10200 vec_cond_rhs = vec_oprnds1[i];
10201 if (bitop1 == NOP_EXPR)
10202 vec_compare = build2 (cond_code, vec_cmp_type,
10203 vec_cond_lhs, vec_cond_rhs);
10204 else
10205 {
10206 new_temp = make_ssa_name (vec_cmp_type);
10207 gassign *new_stmt;
10208 if (bitop1 == BIT_NOT_EXPR)
10209 new_stmt = gimple_build_assign (new_temp, bitop1,
10210 vec_cond_rhs);
10211 else
10212 new_stmt
10213 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10214 vec_cond_rhs);
10215 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10216 if (bitop2 == NOP_EXPR)
10217 vec_compare = new_temp;
10218 else if (bitop2 == BIT_NOT_EXPR)
10219 {
10220 /* Instead of doing ~x ? y : z do x ? z : y. */
10221 vec_compare = new_temp;
10222 std::swap (vec_then_clause, vec_else_clause);
10223 }
10224 else
10225 {
10226 vec_compare = make_ssa_name (vec_cmp_type);
10227 new_stmt
10228 = gimple_build_assign (vec_compare, bitop2,
10229 vec_cond_lhs, new_temp);
10230 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10231 }
10232 }
10233 }
10234
10235 /* If we decided to apply a loop mask to the result of the vector
10236 comparison, AND the comparison with the mask now. Later passes
10237 should then be able to reuse the AND results between mulitple
10238 vector statements.
10239
10240 For example:
10241 for (int i = 0; i < 100; ++i)
10242 x[i] = y[i] ? z[i] : 10;
10243
10244 results in following optimized GIMPLE:
10245
10246 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10247 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10248 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10249 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10250 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10251 vect_iftmp.11_47, { 10, ... }>;
10252
10253 instead of using a masked and unmasked forms of
10254 vec != { 0, ... } (masked in the MASK_LOAD,
10255 unmasked in the VEC_COND_EXPR). */
10256
10257 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10258 in cases where that's necessary. */
10259
10260 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10261 {
10262 if (!is_gimple_val (vec_compare))
10263 {
10264 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10265 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10266 vec_compare);
10267 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10268 vec_compare = vec_compare_name;
10269 }
10270
10271 if (must_invert_cmp_result)
10272 {
10273 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10274 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10275 BIT_NOT_EXPR,
10276 vec_compare);
10277 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10278 vec_compare = vec_compare_name;
10279 }
10280
10281 if (masks)
10282 {
10283 unsigned vec_num = vec_oprnds0.length ();
10284 tree loop_mask
10285 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10286 vectype, vec_num * j + i);
10287 tree tmp2 = make_ssa_name (vec_cmp_type);
10288 gassign *g
10289 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10290 loop_mask);
10291 vect_finish_stmt_generation (stmt_info, g, gsi);
10292 vec_compare = tmp2;
10293 }
10294 }
10295
10296 if (reduction_type == EXTRACT_LAST_REDUCTION)
10297 {
10298 gcall *new_stmt = gimple_build_call_internal
10299 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10300 vec_then_clause);
10301 gimple_call_set_lhs (new_stmt, scalar_dest);
10302 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
10303 if (stmt_info->stmt == gsi_stmt (*gsi))
10304 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10305 else
10306 {
10307 /* In this case we're moving the definition to later in the
10308 block. That doesn't matter because the only uses of the
10309 lhs are in phi statements. */
10310 gimple_stmt_iterator old_gsi
10311 = gsi_for_stmt (stmt_info->stmt);
10312 gsi_remove (&old_gsi, true);
10313 new_stmt_info
10314 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10315 }
10316 }
10317 else
10318 {
10319 new_temp = make_ssa_name (vec_dest);
10320 gassign *new_stmt
10321 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10322 vec_then_clause, vec_else_clause);
10323 new_stmt_info
10324 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10325 }
10326 if (slp_node)
10327 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10328 }
10329
10330 if (slp_node)
10331 continue;
10332
10333 if (j == 0)
10334 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10335 else
10336 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10337
10338 prev_stmt_info = new_stmt_info;
10339 }
10340
10341 vec_oprnds0.release ();
10342 vec_oprnds1.release ();
10343 vec_oprnds2.release ();
10344 vec_oprnds3.release ();
10345
10346 return true;
10347 }
10348
10349 /* vectorizable_comparison.
10350
10351 Check if STMT_INFO is comparison expression that can be vectorized.
10352 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10353 comparison, put it in VEC_STMT, and insert it at GSI.
10354
10355 Return true if STMT_INFO is vectorizable in this way. */
10356
10357 static bool
10358 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10359 stmt_vec_info *vec_stmt,
10360 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10361 {
10362 vec_info *vinfo = stmt_info->vinfo;
10363 tree lhs, rhs1, rhs2;
10364 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10365 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10366 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10367 tree new_temp;
10368 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10369 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10370 int ndts = 2;
10371 poly_uint64 nunits;
10372 int ncopies;
10373 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10374 stmt_vec_info prev_stmt_info = NULL;
10375 int i, j;
10376 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10377 vec<tree> vec_oprnds0 = vNULL;
10378 vec<tree> vec_oprnds1 = vNULL;
10379 tree mask_type;
10380 tree mask;
10381
10382 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10383 return false;
10384
10385 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10386 return false;
10387
10388 mask_type = vectype;
10389 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10390
10391 if (slp_node)
10392 ncopies = 1;
10393 else
10394 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10395
10396 gcc_assert (ncopies >= 1);
10397 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10398 return false;
10399
10400 if (STMT_VINFO_LIVE_P (stmt_info))
10401 {
10402 if (dump_enabled_p ())
10403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10404 "value used after loop.\n");
10405 return false;
10406 }
10407
10408 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10409 if (!stmt)
10410 return false;
10411
10412 code = gimple_assign_rhs_code (stmt);
10413
10414 if (TREE_CODE_CLASS (code) != tcc_comparison)
10415 return false;
10416
10417 rhs1 = gimple_assign_rhs1 (stmt);
10418 rhs2 = gimple_assign_rhs2 (stmt);
10419
10420 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10421 return false;
10422
10423 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10424 return false;
10425
10426 if (vectype1 && vectype2
10427 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10428 TYPE_VECTOR_SUBPARTS (vectype2)))
10429 return false;
10430
10431 vectype = vectype1 ? vectype1 : vectype2;
10432
10433 /* Invariant comparison. */
10434 if (!vectype)
10435 {
10436 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
10437 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10438 return false;
10439 }
10440 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10441 return false;
10442
10443 /* Can't compare mask and non-mask types. */
10444 if (vectype1 && vectype2
10445 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10446 return false;
10447
10448 /* Boolean values may have another representation in vectors
10449 and therefore we prefer bit operations over comparison for
10450 them (which also works for scalar masks). We store opcodes
10451 to use in bitop1 and bitop2. Statement is vectorized as
10452 BITOP2 (rhs1 BITOP1 rhs2) or
10453 rhs1 BITOP2 (BITOP1 rhs2)
10454 depending on bitop1 and bitop2 arity. */
10455 bool swap_p = false;
10456 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10457 {
10458 if (code == GT_EXPR)
10459 {
10460 bitop1 = BIT_NOT_EXPR;
10461 bitop2 = BIT_AND_EXPR;
10462 }
10463 else if (code == GE_EXPR)
10464 {
10465 bitop1 = BIT_NOT_EXPR;
10466 bitop2 = BIT_IOR_EXPR;
10467 }
10468 else if (code == LT_EXPR)
10469 {
10470 bitop1 = BIT_NOT_EXPR;
10471 bitop2 = BIT_AND_EXPR;
10472 swap_p = true;
10473 }
10474 else if (code == LE_EXPR)
10475 {
10476 bitop1 = BIT_NOT_EXPR;
10477 bitop2 = BIT_IOR_EXPR;
10478 swap_p = true;
10479 }
10480 else
10481 {
10482 bitop1 = BIT_XOR_EXPR;
10483 if (code == EQ_EXPR)
10484 bitop2 = BIT_NOT_EXPR;
10485 }
10486 }
10487
10488 if (!vec_stmt)
10489 {
10490 if (bitop1 == NOP_EXPR)
10491 {
10492 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10493 return false;
10494 }
10495 else
10496 {
10497 machine_mode mode = TYPE_MODE (vectype);
10498 optab optab;
10499
10500 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10501 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10502 return false;
10503
10504 if (bitop2 != NOP_EXPR)
10505 {
10506 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10507 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10508 return false;
10509 }
10510 }
10511
10512 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10513 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10514 dts, ndts, slp_node, cost_vec);
10515 return true;
10516 }
10517
10518 /* Transform. */
10519 if (!slp_node)
10520 {
10521 vec_oprnds0.create (1);
10522 vec_oprnds1.create (1);
10523 }
10524
10525 /* Handle def. */
10526 lhs = gimple_assign_lhs (stmt);
10527 mask = vect_create_destination_var (lhs, mask_type);
10528
10529 /* Handle cmp expr. */
10530 for (j = 0; j < ncopies; j++)
10531 {
10532 stmt_vec_info new_stmt_info = NULL;
10533 if (j == 0)
10534 {
10535 if (slp_node)
10536 {
10537 auto_vec<vec<tree>, 2> vec_defs;
10538 vect_get_slp_defs (slp_node, &vec_defs);
10539 vec_oprnds1 = vec_defs.pop ();
10540 vec_oprnds0 = vec_defs.pop ();
10541 if (swap_p)
10542 std::swap (vec_oprnds0, vec_oprnds1);
10543 }
10544 else
10545 {
10546 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10547 vectype);
10548 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10549 vectype);
10550 }
10551 }
10552 else
10553 {
10554 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10555 vec_oprnds0.pop ());
10556 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10557 vec_oprnds1.pop ());
10558 }
10559
10560 if (!slp_node)
10561 {
10562 if (swap_p && j == 0)
10563 std::swap (vec_rhs1, vec_rhs2);
10564 vec_oprnds0.quick_push (vec_rhs1);
10565 vec_oprnds1.quick_push (vec_rhs2);
10566 }
10567
10568 /* Arguments are ready. Create the new vector stmt. */
10569 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10570 {
10571 vec_rhs2 = vec_oprnds1[i];
10572
10573 new_temp = make_ssa_name (mask);
10574 if (bitop1 == NOP_EXPR)
10575 {
10576 gassign *new_stmt = gimple_build_assign (new_temp, code,
10577 vec_rhs1, vec_rhs2);
10578 new_stmt_info
10579 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10580 }
10581 else
10582 {
10583 gassign *new_stmt;
10584 if (bitop1 == BIT_NOT_EXPR)
10585 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10586 else
10587 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10588 vec_rhs2);
10589 new_stmt_info
10590 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10591 if (bitop2 != NOP_EXPR)
10592 {
10593 tree res = make_ssa_name (mask);
10594 if (bitop2 == BIT_NOT_EXPR)
10595 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10596 else
10597 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10598 new_temp);
10599 new_stmt_info
10600 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10601 }
10602 }
10603 if (slp_node)
10604 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10605 }
10606
10607 if (slp_node)
10608 continue;
10609
10610 if (j == 0)
10611 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10612 else
10613 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10614
10615 prev_stmt_info = new_stmt_info;
10616 }
10617
10618 vec_oprnds0.release ();
10619 vec_oprnds1.release ();
10620
10621 return true;
10622 }
10623
10624 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10625 can handle all live statements in the node. Otherwise return true
10626 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10627 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10628
10629 static bool
10630 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10631 slp_tree slp_node, slp_instance slp_node_instance,
10632 bool vec_stmt_p,
10633 stmt_vector_for_cost *cost_vec)
10634 {
10635 if (slp_node)
10636 {
10637 stmt_vec_info slp_stmt_info;
10638 unsigned int i;
10639 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10640 {
10641 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10642 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10643 slp_node_instance, i,
10644 vec_stmt_p, cost_vec))
10645 return false;
10646 }
10647 }
10648 else if (STMT_VINFO_LIVE_P (stmt_info)
10649 && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10650 slp_node_instance, -1,
10651 vec_stmt_p, cost_vec))
10652 return false;
10653
10654 return true;
10655 }
10656
10657 /* Make sure the statement is vectorizable. */
10658
10659 opt_result
10660 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10661 slp_tree node, slp_instance node_instance,
10662 stmt_vector_for_cost *cost_vec)
10663 {
10664 vec_info *vinfo = stmt_info->vinfo;
10665 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10666 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10667 bool ok;
10668 gimple_seq pattern_def_seq;
10669
10670 if (dump_enabled_p ())
10671 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10672 stmt_info->stmt);
10673
10674 if (gimple_has_volatile_ops (stmt_info->stmt))
10675 return opt_result::failure_at (stmt_info->stmt,
10676 "not vectorized:"
10677 " stmt has volatile operands: %G\n",
10678 stmt_info->stmt);
10679
10680 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10681 && node == NULL
10682 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10683 {
10684 gimple_stmt_iterator si;
10685
10686 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10687 {
10688 stmt_vec_info pattern_def_stmt_info
10689 = vinfo->lookup_stmt (gsi_stmt (si));
10690 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10691 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10692 {
10693 /* Analyze def stmt of STMT if it's a pattern stmt. */
10694 if (dump_enabled_p ())
10695 dump_printf_loc (MSG_NOTE, vect_location,
10696 "==> examining pattern def statement: %G",
10697 pattern_def_stmt_info->stmt);
10698
10699 opt_result res
10700 = vect_analyze_stmt (pattern_def_stmt_info,
10701 need_to_vectorize, node, node_instance,
10702 cost_vec);
10703 if (!res)
10704 return res;
10705 }
10706 }
10707 }
10708
10709 /* Skip stmts that do not need to be vectorized. In loops this is expected
10710 to include:
10711 - the COND_EXPR which is the loop exit condition
10712 - any LABEL_EXPRs in the loop
10713 - computations that are used only for array indexing or loop control.
10714 In basic blocks we only analyze statements that are a part of some SLP
10715 instance, therefore, all the statements are relevant.
10716
10717 Pattern statement needs to be analyzed instead of the original statement
10718 if the original statement is not relevant. Otherwise, we analyze both
10719 statements. In basic blocks we are called from some SLP instance
10720 traversal, don't analyze pattern stmts instead, the pattern stmts
10721 already will be part of SLP instance. */
10722
10723 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10724 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10725 && !STMT_VINFO_LIVE_P (stmt_info))
10726 {
10727 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10728 && pattern_stmt_info
10729 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10730 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10731 {
10732 /* Analyze PATTERN_STMT instead of the original stmt. */
10733 stmt_info = pattern_stmt_info;
10734 if (dump_enabled_p ())
10735 dump_printf_loc (MSG_NOTE, vect_location,
10736 "==> examining pattern statement: %G",
10737 stmt_info->stmt);
10738 }
10739 else
10740 {
10741 if (dump_enabled_p ())
10742 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10743
10744 return opt_result::success ();
10745 }
10746 }
10747 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10748 && node == NULL
10749 && pattern_stmt_info
10750 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10751 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10752 {
10753 /* Analyze PATTERN_STMT too. */
10754 if (dump_enabled_p ())
10755 dump_printf_loc (MSG_NOTE, vect_location,
10756 "==> examining pattern statement: %G",
10757 pattern_stmt_info->stmt);
10758
10759 opt_result res
10760 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10761 node_instance, cost_vec);
10762 if (!res)
10763 return res;
10764 }
10765
10766 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10767 {
10768 case vect_internal_def:
10769 break;
10770
10771 case vect_reduction_def:
10772 case vect_nested_cycle:
10773 gcc_assert (!bb_vinfo
10774 && (relevance == vect_used_in_outer
10775 || relevance == vect_used_in_outer_by_reduction
10776 || relevance == vect_used_by_reduction
10777 || relevance == vect_unused_in_scope
10778 || relevance == vect_used_only_live));
10779 break;
10780
10781 case vect_induction_def:
10782 gcc_assert (!bb_vinfo);
10783 break;
10784
10785 case vect_constant_def:
10786 case vect_external_def:
10787 case vect_unknown_def_type:
10788 default:
10789 gcc_unreachable ();
10790 }
10791
10792 if (STMT_VINFO_RELEVANT_P (stmt_info))
10793 {
10794 tree type = gimple_expr_type (stmt_info->stmt);
10795 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10796 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10797 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10798 || (call && gimple_call_lhs (call) == NULL_TREE));
10799 *need_to_vectorize = true;
10800 }
10801
10802 if (PURE_SLP_STMT (stmt_info) && !node)
10803 {
10804 if (dump_enabled_p ())
10805 dump_printf_loc (MSG_NOTE, vect_location,
10806 "handled only by SLP analysis\n");
10807 return opt_result::success ();
10808 }
10809
10810 ok = true;
10811 if (!bb_vinfo
10812 && (STMT_VINFO_RELEVANT_P (stmt_info)
10813 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10814 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10815 -mveclibabi= takes preference over library functions with
10816 the simd attribute. */
10817 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10818 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10819 cost_vec)
10820 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10821 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10822 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10823 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10824 cost_vec)
10825 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10826 || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10827 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10828 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10829 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10830 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10831 cost_vec)
10832 || vectorizable_lc_phi (stmt_info, NULL, node));
10833 else
10834 {
10835 if (bb_vinfo)
10836 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10837 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10838 cost_vec)
10839 || vectorizable_conversion (stmt_info, NULL, NULL, node,
10840 cost_vec)
10841 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10842 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10843 || vectorizable_assignment (stmt_info, NULL, NULL, node,
10844 cost_vec)
10845 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10846 cost_vec)
10847 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10848 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10849 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10850 cost_vec));
10851 }
10852
10853 if (!ok)
10854 return opt_result::failure_at (stmt_info->stmt,
10855 "not vectorized:"
10856 " relevant stmt not supported: %G",
10857 stmt_info->stmt);
10858
10859 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10860 need extra handling, except for vectorizable reductions. */
10861 if (!bb_vinfo
10862 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10863 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10864 && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
10865 false, cost_vec))
10866 return opt_result::failure_at (stmt_info->stmt,
10867 "not vectorized:"
10868 " live stmt not supported: %G",
10869 stmt_info->stmt);
10870
10871 return opt_result::success ();
10872 }
10873
10874
10875 /* Function vect_transform_stmt.
10876
10877 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10878
10879 bool
10880 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10881 slp_tree slp_node, slp_instance slp_node_instance)
10882 {
10883 vec_info *vinfo = stmt_info->vinfo;
10884 bool is_store = false;
10885 stmt_vec_info vec_stmt = NULL;
10886 bool done;
10887
10888 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10889 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
10890
10891 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
10892 && nested_in_vect_loop_p
10893 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
10894 stmt_info));
10895
10896 gimple *stmt = stmt_info->stmt;
10897 switch (STMT_VINFO_TYPE (stmt_info))
10898 {
10899 case type_demotion_vec_info_type:
10900 case type_promotion_vec_info_type:
10901 case type_conversion_vec_info_type:
10902 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
10903 NULL);
10904 gcc_assert (done);
10905 break;
10906
10907 case induc_vec_info_type:
10908 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
10909 NULL);
10910 gcc_assert (done);
10911 break;
10912
10913 case shift_vec_info_type:
10914 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10915 gcc_assert (done);
10916 break;
10917
10918 case op_vec_info_type:
10919 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
10920 NULL);
10921 gcc_assert (done);
10922 break;
10923
10924 case assignment_vec_info_type:
10925 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
10926 NULL);
10927 gcc_assert (done);
10928 break;
10929
10930 case load_vec_info_type:
10931 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
10932 slp_node_instance, NULL);
10933 gcc_assert (done);
10934 break;
10935
10936 case store_vec_info_type:
10937 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10938 gcc_assert (done);
10939 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10940 {
10941 /* In case of interleaving, the whole chain is vectorized when the
10942 last store in the chain is reached. Store stmts before the last
10943 one are skipped, and there vec_stmt_info shouldn't be freed
10944 meanwhile. */
10945 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10946 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10947 is_store = true;
10948 }
10949 else
10950 is_store = true;
10951 break;
10952
10953 case condition_vec_info_type:
10954 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10955 gcc_assert (done);
10956 break;
10957
10958 case comparison_vec_info_type:
10959 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
10960 slp_node, NULL);
10961 gcc_assert (done);
10962 break;
10963
10964 case call_vec_info_type:
10965 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10966 stmt = gsi_stmt (*gsi);
10967 break;
10968
10969 case call_simd_clone_vec_info_type:
10970 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
10971 slp_node, NULL);
10972 stmt = gsi_stmt (*gsi);
10973 break;
10974
10975 case reduc_vec_info_type:
10976 done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
10977 gcc_assert (done);
10978 break;
10979
10980 case cycle_phi_info_type:
10981 done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
10982 slp_node_instance);
10983 gcc_assert (done);
10984 break;
10985
10986 case lc_phi_info_type:
10987 done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
10988 gcc_assert (done);
10989 break;
10990
10991 default:
10992 if (!STMT_VINFO_LIVE_P (stmt_info))
10993 {
10994 if (dump_enabled_p ())
10995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10996 "stmt not supported.\n");
10997 gcc_unreachable ();
10998 }
10999 }
11000
11001 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11002 This would break hybrid SLP vectorization. */
11003 if (slp_node)
11004 gcc_assert (!vec_stmt
11005 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11006
11007 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11008 is being vectorized, but outside the immediately enclosing loop. */
11009 if (vec_stmt
11010 && nested_p
11011 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11012 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11013 || STMT_VINFO_RELEVANT (stmt_info) ==
11014 vect_used_in_outer_by_reduction))
11015 {
11016 class loop *innerloop = LOOP_VINFO_LOOP (
11017 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11018 imm_use_iterator imm_iter;
11019 use_operand_p use_p;
11020 tree scalar_dest;
11021
11022 if (dump_enabled_p ())
11023 dump_printf_loc (MSG_NOTE, vect_location,
11024 "Record the vdef for outer-loop vectorization.\n");
11025
11026 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11027 (to be used when vectorizing outer-loop stmts that use the DEF of
11028 STMT). */
11029 if (gimple_code (stmt) == GIMPLE_PHI)
11030 scalar_dest = PHI_RESULT (stmt);
11031 else
11032 scalar_dest = gimple_get_lhs (stmt);
11033
11034 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11035 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11036 {
11037 stmt_vec_info exit_phi_info
11038 = vinfo->lookup_stmt (USE_STMT (use_p));
11039 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11040 }
11041 }
11042
11043 if (vec_stmt)
11044 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11045
11046 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11047 return is_store;
11048
11049 /* If this stmt defines a value used on a backedge, update the
11050 vectorized PHIs. */
11051 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11052 stmt_vec_info reduc_info;
11053 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11054 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11055 && (reduc_info = info_for_reduction (orig_stmt_info))
11056 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11057 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11058 {
11059 gphi *phi;
11060 edge e;
11061 if (!slp_node
11062 && (phi = dyn_cast <gphi *>
11063 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11064 && dominated_by_p (CDI_DOMINATORS,
11065 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11066 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11067 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11068 == gimple_get_lhs (orig_stmt_info->stmt)))
11069 {
11070 stmt_vec_info phi_info
11071 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11072 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11073 do
11074 {
11075 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11076 gimple_get_lhs (vec_stmt->stmt), e,
11077 gimple_phi_arg_location (phi, e->dest_idx));
11078 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11079 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11080 }
11081 while (phi_info);
11082 gcc_assert (!vec_stmt);
11083 }
11084 else if (slp_node
11085 && slp_node != slp_node_instance->reduc_phis)
11086 {
11087 slp_tree phi_node = slp_node_instance->reduc_phis;
11088 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11089 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11090 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11091 == SLP_TREE_VEC_STMTS (slp_node).length ());
11092 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11093 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11094 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11095 e, gimple_phi_arg_location (phi, e->dest_idx));
11096 }
11097 }
11098
11099 /* Handle stmts whose DEF is used outside the loop-nest that is
11100 being vectorized. */
11101 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11102 slp_node_instance, true, NULL);
11103 gcc_assert (done);
11104
11105 return false;
11106 }
11107
11108
11109 /* Remove a group of stores (for SLP or interleaving), free their
11110 stmt_vec_info. */
11111
11112 void
11113 vect_remove_stores (stmt_vec_info first_stmt_info)
11114 {
11115 vec_info *vinfo = first_stmt_info->vinfo;
11116 stmt_vec_info next_stmt_info = first_stmt_info;
11117
11118 while (next_stmt_info)
11119 {
11120 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11121 next_stmt_info = vect_orig_stmt (next_stmt_info);
11122 /* Free the attached stmt_vec_info and remove the stmt. */
11123 vinfo->remove_stmt (next_stmt_info);
11124 next_stmt_info = tmp;
11125 }
11126 }
11127
11128 /* Function get_vectype_for_scalar_type_and_size.
11129
11130 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
11131 by the target. */
11132
11133 tree
11134 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
11135 {
11136 tree orig_scalar_type = scalar_type;
11137 scalar_mode inner_mode;
11138 machine_mode simd_mode;
11139 poly_uint64 nunits;
11140 tree vectype;
11141
11142 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11143 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11144 return NULL_TREE;
11145
11146 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11147
11148 /* For vector types of elements whose mode precision doesn't
11149 match their types precision we use a element type of mode
11150 precision. The vectorization routines will have to make sure
11151 they support the proper result truncation/extension.
11152 We also make sure to build vector types with INTEGER_TYPE
11153 component type only. */
11154 if (INTEGRAL_TYPE_P (scalar_type)
11155 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11156 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11157 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11158 TYPE_UNSIGNED (scalar_type));
11159
11160 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11161 When the component mode passes the above test simply use a type
11162 corresponding to that mode. The theory is that any use that
11163 would cause problems with this will disable vectorization anyway. */
11164 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11165 && !INTEGRAL_TYPE_P (scalar_type))
11166 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11167
11168 /* We can't build a vector type of elements with alignment bigger than
11169 their size. */
11170 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11171 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11172 TYPE_UNSIGNED (scalar_type));
11173
11174 /* If we felt back to using the mode fail if there was
11175 no scalar type for it. */
11176 if (scalar_type == NULL_TREE)
11177 return NULL_TREE;
11178
11179 /* If no size was supplied use the mode the target prefers. Otherwise
11180 lookup a vector mode of the specified size. */
11181 if (known_eq (size, 0U))
11182 {
11183 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11184 if (SCALAR_INT_MODE_P (simd_mode))
11185 {
11186 /* Traditional behavior is not to take the integer mode
11187 literally, but simply to use it as a way of determining
11188 the vector size. It is up to mode_for_vector to decide
11189 what the TYPE_MODE should be.
11190
11191 Note that nunits == 1 is allowed in order to support single
11192 element vector types. */
11193 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11194 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11195 return NULL_TREE;
11196 }
11197 }
11198 else if (!multiple_p (size, nbytes, &nunits)
11199 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11200 return NULL_TREE;
11201
11202 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11203
11204 /* In cases where the mode was chosen by mode_for_vector, check that
11205 the target actually supports the chosen mode, or that it at least
11206 allows the vector mode to be replaced by a like-sized integer. */
11207 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11208 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11209 return NULL_TREE;
11210
11211 /* Re-attach the address-space qualifier if we canonicalized the scalar
11212 type. */
11213 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11214 return build_qualified_type
11215 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11216
11217 return vectype;
11218 }
11219
11220 /* Function get_vectype_for_scalar_type.
11221
11222 Returns the vector type corresponding to SCALAR_TYPE as supported
11223 by the target. */
11224
11225 tree
11226 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
11227 {
11228 tree vectype;
11229 poly_uint64 vector_size = GET_MODE_SIZE (vinfo->vector_mode);
11230 vectype = get_vectype_for_scalar_type_and_size (scalar_type, vector_size);
11231 if (vectype && vinfo->vector_mode == VOIDmode)
11232 vinfo->vector_mode = TYPE_MODE (vectype);
11233 return vectype;
11234 }
11235
11236 /* Function get_mask_type_for_scalar_type.
11237
11238 Returns the mask type corresponding to a result of comparison
11239 of vectors of specified SCALAR_TYPE as supported by target. */
11240
11241 tree
11242 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
11243 {
11244 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
11245
11246 if (!vectype)
11247 return NULL;
11248
11249 return truth_type_for (vectype);
11250 }
11251
11252 /* Function get_same_sized_vectype
11253
11254 Returns a vector type corresponding to SCALAR_TYPE of size
11255 VECTOR_TYPE if supported by the target. */
11256
11257 tree
11258 get_same_sized_vectype (tree scalar_type, tree vector_type)
11259 {
11260 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11261 return truth_type_for (vector_type);
11262
11263 return get_vectype_for_scalar_type_and_size
11264 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
11265 }
11266
11267 /* Function vect_is_simple_use.
11268
11269 Input:
11270 VINFO - the vect info of the loop or basic block that is being vectorized.
11271 OPERAND - operand in the loop or bb.
11272 Output:
11273 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11274 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11275 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11276 the definition could be anywhere in the function
11277 DT - the type of definition
11278
11279 Returns whether a stmt with OPERAND can be vectorized.
11280 For loops, supportable operands are constants, loop invariants, and operands
11281 that are defined by the current iteration of the loop. Unsupportable
11282 operands are those that are defined by a previous iteration of the loop (as
11283 is the case in reduction/induction computations).
11284 For basic blocks, supportable operands are constants and bb invariants.
11285 For now, operands defined outside the basic block are not supported. */
11286
11287 bool
11288 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11289 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11290 {
11291 if (def_stmt_info_out)
11292 *def_stmt_info_out = NULL;
11293 if (def_stmt_out)
11294 *def_stmt_out = NULL;
11295 *dt = vect_unknown_def_type;
11296
11297 if (dump_enabled_p ())
11298 {
11299 dump_printf_loc (MSG_NOTE, vect_location,
11300 "vect_is_simple_use: operand ");
11301 if (TREE_CODE (operand) == SSA_NAME
11302 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11303 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11304 else
11305 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11306 }
11307
11308 if (CONSTANT_CLASS_P (operand))
11309 *dt = vect_constant_def;
11310 else if (is_gimple_min_invariant (operand))
11311 *dt = vect_external_def;
11312 else if (TREE_CODE (operand) != SSA_NAME)
11313 *dt = vect_unknown_def_type;
11314 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11315 *dt = vect_external_def;
11316 else
11317 {
11318 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11319 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11320 if (!stmt_vinfo)
11321 *dt = vect_external_def;
11322 else
11323 {
11324 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11325 def_stmt = stmt_vinfo->stmt;
11326 switch (gimple_code (def_stmt))
11327 {
11328 case GIMPLE_PHI:
11329 case GIMPLE_ASSIGN:
11330 case GIMPLE_CALL:
11331 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11332 break;
11333 default:
11334 *dt = vect_unknown_def_type;
11335 break;
11336 }
11337 if (def_stmt_info_out)
11338 *def_stmt_info_out = stmt_vinfo;
11339 }
11340 if (def_stmt_out)
11341 *def_stmt_out = def_stmt;
11342 }
11343
11344 if (dump_enabled_p ())
11345 {
11346 dump_printf (MSG_NOTE, ", type of def: ");
11347 switch (*dt)
11348 {
11349 case vect_uninitialized_def:
11350 dump_printf (MSG_NOTE, "uninitialized\n");
11351 break;
11352 case vect_constant_def:
11353 dump_printf (MSG_NOTE, "constant\n");
11354 break;
11355 case vect_external_def:
11356 dump_printf (MSG_NOTE, "external\n");
11357 break;
11358 case vect_internal_def:
11359 dump_printf (MSG_NOTE, "internal\n");
11360 break;
11361 case vect_induction_def:
11362 dump_printf (MSG_NOTE, "induction\n");
11363 break;
11364 case vect_reduction_def:
11365 dump_printf (MSG_NOTE, "reduction\n");
11366 break;
11367 case vect_double_reduction_def:
11368 dump_printf (MSG_NOTE, "double reduction\n");
11369 break;
11370 case vect_nested_cycle:
11371 dump_printf (MSG_NOTE, "nested cycle\n");
11372 break;
11373 case vect_unknown_def_type:
11374 dump_printf (MSG_NOTE, "unknown\n");
11375 break;
11376 }
11377 }
11378
11379 if (*dt == vect_unknown_def_type)
11380 {
11381 if (dump_enabled_p ())
11382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11383 "Unsupported pattern.\n");
11384 return false;
11385 }
11386
11387 return true;
11388 }
11389
11390 /* Function vect_is_simple_use.
11391
11392 Same as vect_is_simple_use but also determines the vector operand
11393 type of OPERAND and stores it to *VECTYPE. If the definition of
11394 OPERAND is vect_uninitialized_def, vect_constant_def or
11395 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11396 is responsible to compute the best suited vector type for the
11397 scalar operand. */
11398
11399 bool
11400 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11401 tree *vectype, stmt_vec_info *def_stmt_info_out,
11402 gimple **def_stmt_out)
11403 {
11404 stmt_vec_info def_stmt_info;
11405 gimple *def_stmt;
11406 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11407 return false;
11408
11409 if (def_stmt_out)
11410 *def_stmt_out = def_stmt;
11411 if (def_stmt_info_out)
11412 *def_stmt_info_out = def_stmt_info;
11413
11414 /* Now get a vector type if the def is internal, otherwise supply
11415 NULL_TREE and leave it up to the caller to figure out a proper
11416 type for the use stmt. */
11417 if (*dt == vect_internal_def
11418 || *dt == vect_induction_def
11419 || *dt == vect_reduction_def
11420 || *dt == vect_double_reduction_def
11421 || *dt == vect_nested_cycle)
11422 {
11423 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11424 gcc_assert (*vectype != NULL_TREE);
11425 if (dump_enabled_p ())
11426 dump_printf_loc (MSG_NOTE, vect_location,
11427 "vect_is_simple_use: vectype %T\n", *vectype);
11428 }
11429 else if (*dt == vect_uninitialized_def
11430 || *dt == vect_constant_def
11431 || *dt == vect_external_def)
11432 *vectype = NULL_TREE;
11433 else
11434 gcc_unreachable ();
11435
11436 return true;
11437 }
11438
11439
11440 /* Function supportable_widening_operation
11441
11442 Check whether an operation represented by the code CODE is a
11443 widening operation that is supported by the target platform in
11444 vector form (i.e., when operating on arguments of type VECTYPE_IN
11445 producing a result of type VECTYPE_OUT).
11446
11447 Widening operations we currently support are NOP (CONVERT), FLOAT,
11448 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11449 are supported by the target platform either directly (via vector
11450 tree-codes), or via target builtins.
11451
11452 Output:
11453 - CODE1 and CODE2 are codes of vector operations to be used when
11454 vectorizing the operation, if available.
11455 - MULTI_STEP_CVT determines the number of required intermediate steps in
11456 case of multi-step conversion (like char->short->int - in that case
11457 MULTI_STEP_CVT will be 1).
11458 - INTERM_TYPES contains the intermediate type required to perform the
11459 widening operation (short in the above example). */
11460
11461 bool
11462 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11463 tree vectype_out, tree vectype_in,
11464 enum tree_code *code1, enum tree_code *code2,
11465 int *multi_step_cvt,
11466 vec<tree> *interm_types)
11467 {
11468 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11469 class loop *vect_loop = NULL;
11470 machine_mode vec_mode;
11471 enum insn_code icode1, icode2;
11472 optab optab1, optab2;
11473 tree vectype = vectype_in;
11474 tree wide_vectype = vectype_out;
11475 enum tree_code c1, c2;
11476 int i;
11477 tree prev_type, intermediate_type;
11478 machine_mode intermediate_mode, prev_mode;
11479 optab optab3, optab4;
11480
11481 *multi_step_cvt = 0;
11482 if (loop_info)
11483 vect_loop = LOOP_VINFO_LOOP (loop_info);
11484
11485 switch (code)
11486 {
11487 case WIDEN_MULT_EXPR:
11488 /* The result of a vectorized widening operation usually requires
11489 two vectors (because the widened results do not fit into one vector).
11490 The generated vector results would normally be expected to be
11491 generated in the same order as in the original scalar computation,
11492 i.e. if 8 results are generated in each vector iteration, they are
11493 to be organized as follows:
11494 vect1: [res1,res2,res3,res4],
11495 vect2: [res5,res6,res7,res8].
11496
11497 However, in the special case that the result of the widening
11498 operation is used in a reduction computation only, the order doesn't
11499 matter (because when vectorizing a reduction we change the order of
11500 the computation). Some targets can take advantage of this and
11501 generate more efficient code. For example, targets like Altivec,
11502 that support widen_mult using a sequence of {mult_even,mult_odd}
11503 generate the following vectors:
11504 vect1: [res1,res3,res5,res7],
11505 vect2: [res2,res4,res6,res8].
11506
11507 When vectorizing outer-loops, we execute the inner-loop sequentially
11508 (each vectorized inner-loop iteration contributes to VF outer-loop
11509 iterations in parallel). We therefore don't allow to change the
11510 order of the computation in the inner-loop during outer-loop
11511 vectorization. */
11512 /* TODO: Another case in which order doesn't *really* matter is when we
11513 widen and then contract again, e.g. (short)((int)x * y >> 8).
11514 Normally, pack_trunc performs an even/odd permute, whereas the
11515 repack from an even/odd expansion would be an interleave, which
11516 would be significantly simpler for e.g. AVX2. */
11517 /* In any case, in order to avoid duplicating the code below, recurse
11518 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11519 are properly set up for the caller. If we fail, we'll continue with
11520 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11521 if (vect_loop
11522 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11523 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11524 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11525 stmt_info, vectype_out,
11526 vectype_in, code1, code2,
11527 multi_step_cvt, interm_types))
11528 {
11529 /* Elements in a vector with vect_used_by_reduction property cannot
11530 be reordered if the use chain with this property does not have the
11531 same operation. One such an example is s += a * b, where elements
11532 in a and b cannot be reordered. Here we check if the vector defined
11533 by STMT is only directly used in the reduction statement. */
11534 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11535 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11536 if (use_stmt_info
11537 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11538 return true;
11539 }
11540 c1 = VEC_WIDEN_MULT_LO_EXPR;
11541 c2 = VEC_WIDEN_MULT_HI_EXPR;
11542 break;
11543
11544 case DOT_PROD_EXPR:
11545 c1 = DOT_PROD_EXPR;
11546 c2 = DOT_PROD_EXPR;
11547 break;
11548
11549 case SAD_EXPR:
11550 c1 = SAD_EXPR;
11551 c2 = SAD_EXPR;
11552 break;
11553
11554 case VEC_WIDEN_MULT_EVEN_EXPR:
11555 /* Support the recursion induced just above. */
11556 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11557 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11558 break;
11559
11560 case WIDEN_LSHIFT_EXPR:
11561 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11562 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11563 break;
11564
11565 CASE_CONVERT:
11566 c1 = VEC_UNPACK_LO_EXPR;
11567 c2 = VEC_UNPACK_HI_EXPR;
11568 break;
11569
11570 case FLOAT_EXPR:
11571 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11572 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11573 break;
11574
11575 case FIX_TRUNC_EXPR:
11576 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11577 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11578 break;
11579
11580 default:
11581 gcc_unreachable ();
11582 }
11583
11584 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11585 std::swap (c1, c2);
11586
11587 if (code == FIX_TRUNC_EXPR)
11588 {
11589 /* The signedness is determined from output operand. */
11590 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11591 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11592 }
11593 else if (CONVERT_EXPR_CODE_P (code)
11594 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11595 && VECTOR_BOOLEAN_TYPE_P (vectype)
11596 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11597 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11598 {
11599 /* If the input and result modes are the same, a different optab
11600 is needed where we pass in the number of units in vectype. */
11601 optab1 = vec_unpacks_sbool_lo_optab;
11602 optab2 = vec_unpacks_sbool_hi_optab;
11603 }
11604 else
11605 {
11606 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11607 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11608 }
11609
11610 if (!optab1 || !optab2)
11611 return false;
11612
11613 vec_mode = TYPE_MODE (vectype);
11614 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11615 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11616 return false;
11617
11618 *code1 = c1;
11619 *code2 = c2;
11620
11621 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11622 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11623 {
11624 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11625 return true;
11626 /* For scalar masks we may have different boolean
11627 vector types having the same QImode. Thus we
11628 add additional check for elements number. */
11629 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11630 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11631 return true;
11632 }
11633
11634 /* Check if it's a multi-step conversion that can be done using intermediate
11635 types. */
11636
11637 prev_type = vectype;
11638 prev_mode = vec_mode;
11639
11640 if (!CONVERT_EXPR_CODE_P (code))
11641 return false;
11642
11643 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11644 intermediate steps in promotion sequence. We try
11645 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11646 not. */
11647 interm_types->create (MAX_INTERM_CVT_STEPS);
11648 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11649 {
11650 intermediate_mode = insn_data[icode1].operand[0].mode;
11651 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11652 intermediate_type
11653 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11654 else
11655 intermediate_type
11656 = lang_hooks.types.type_for_mode (intermediate_mode,
11657 TYPE_UNSIGNED (prev_type));
11658
11659 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11660 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11661 && intermediate_mode == prev_mode
11662 && SCALAR_INT_MODE_P (prev_mode))
11663 {
11664 /* If the input and result modes are the same, a different optab
11665 is needed where we pass in the number of units in vectype. */
11666 optab3 = vec_unpacks_sbool_lo_optab;
11667 optab4 = vec_unpacks_sbool_hi_optab;
11668 }
11669 else
11670 {
11671 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11672 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11673 }
11674
11675 if (!optab3 || !optab4
11676 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11677 || insn_data[icode1].operand[0].mode != intermediate_mode
11678 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11679 || insn_data[icode2].operand[0].mode != intermediate_mode
11680 || ((icode1 = optab_handler (optab3, intermediate_mode))
11681 == CODE_FOR_nothing)
11682 || ((icode2 = optab_handler (optab4, intermediate_mode))
11683 == CODE_FOR_nothing))
11684 break;
11685
11686 interm_types->quick_push (intermediate_type);
11687 (*multi_step_cvt)++;
11688
11689 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11690 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11691 {
11692 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11693 return true;
11694 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11695 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11696 return true;
11697 }
11698
11699 prev_type = intermediate_type;
11700 prev_mode = intermediate_mode;
11701 }
11702
11703 interm_types->release ();
11704 return false;
11705 }
11706
11707
11708 /* Function supportable_narrowing_operation
11709
11710 Check whether an operation represented by the code CODE is a
11711 narrowing operation that is supported by the target platform in
11712 vector form (i.e., when operating on arguments of type VECTYPE_IN
11713 and producing a result of type VECTYPE_OUT).
11714
11715 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11716 and FLOAT. This function checks if these operations are supported by
11717 the target platform directly via vector tree-codes.
11718
11719 Output:
11720 - CODE1 is the code of a vector operation to be used when
11721 vectorizing the operation, if available.
11722 - MULTI_STEP_CVT determines the number of required intermediate steps in
11723 case of multi-step conversion (like int->short->char - in that case
11724 MULTI_STEP_CVT will be 1).
11725 - INTERM_TYPES contains the intermediate type required to perform the
11726 narrowing operation (short in the above example). */
11727
11728 bool
11729 supportable_narrowing_operation (enum tree_code code,
11730 tree vectype_out, tree vectype_in,
11731 enum tree_code *code1, int *multi_step_cvt,
11732 vec<tree> *interm_types)
11733 {
11734 machine_mode vec_mode;
11735 enum insn_code icode1;
11736 optab optab1, interm_optab;
11737 tree vectype = vectype_in;
11738 tree narrow_vectype = vectype_out;
11739 enum tree_code c1;
11740 tree intermediate_type, prev_type;
11741 machine_mode intermediate_mode, prev_mode;
11742 int i;
11743 bool uns;
11744
11745 *multi_step_cvt = 0;
11746 switch (code)
11747 {
11748 CASE_CONVERT:
11749 c1 = VEC_PACK_TRUNC_EXPR;
11750 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11751 && VECTOR_BOOLEAN_TYPE_P (vectype)
11752 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11753 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11754 optab1 = vec_pack_sbool_trunc_optab;
11755 else
11756 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11757 break;
11758
11759 case FIX_TRUNC_EXPR:
11760 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11761 /* The signedness is determined from output operand. */
11762 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11763 break;
11764
11765 case FLOAT_EXPR:
11766 c1 = VEC_PACK_FLOAT_EXPR;
11767 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11768 break;
11769
11770 default:
11771 gcc_unreachable ();
11772 }
11773
11774 if (!optab1)
11775 return false;
11776
11777 vec_mode = TYPE_MODE (vectype);
11778 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11779 return false;
11780
11781 *code1 = c1;
11782
11783 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11784 {
11785 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11786 return true;
11787 /* For scalar masks we may have different boolean
11788 vector types having the same QImode. Thus we
11789 add additional check for elements number. */
11790 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11791 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11792 return true;
11793 }
11794
11795 if (code == FLOAT_EXPR)
11796 return false;
11797
11798 /* Check if it's a multi-step conversion that can be done using intermediate
11799 types. */
11800 prev_mode = vec_mode;
11801 prev_type = vectype;
11802 if (code == FIX_TRUNC_EXPR)
11803 uns = TYPE_UNSIGNED (vectype_out);
11804 else
11805 uns = TYPE_UNSIGNED (vectype);
11806
11807 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11808 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11809 costly than signed. */
11810 if (code == FIX_TRUNC_EXPR && uns)
11811 {
11812 enum insn_code icode2;
11813
11814 intermediate_type
11815 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11816 interm_optab
11817 = optab_for_tree_code (c1, intermediate_type, optab_default);
11818 if (interm_optab != unknown_optab
11819 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11820 && insn_data[icode1].operand[0].mode
11821 == insn_data[icode2].operand[0].mode)
11822 {
11823 uns = false;
11824 optab1 = interm_optab;
11825 icode1 = icode2;
11826 }
11827 }
11828
11829 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11830 intermediate steps in promotion sequence. We try
11831 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11832 interm_types->create (MAX_INTERM_CVT_STEPS);
11833 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11834 {
11835 intermediate_mode = insn_data[icode1].operand[0].mode;
11836 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11837 intermediate_type
11838 = vect_double_mask_nunits (prev_type, intermediate_mode);
11839 else
11840 intermediate_type
11841 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11842 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11843 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11844 && intermediate_mode == prev_mode
11845 && SCALAR_INT_MODE_P (prev_mode))
11846 interm_optab = vec_pack_sbool_trunc_optab;
11847 else
11848 interm_optab
11849 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11850 optab_default);
11851 if (!interm_optab
11852 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11853 || insn_data[icode1].operand[0].mode != intermediate_mode
11854 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11855 == CODE_FOR_nothing))
11856 break;
11857
11858 interm_types->quick_push (intermediate_type);
11859 (*multi_step_cvt)++;
11860
11861 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11862 {
11863 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11864 return true;
11865 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11866 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11867 return true;
11868 }
11869
11870 prev_mode = intermediate_mode;
11871 prev_type = intermediate_type;
11872 optab1 = interm_optab;
11873 }
11874
11875 interm_types->release ();
11876 return false;
11877 }
11878
11879 /* Generate and return a statement that sets vector mask MASK such that
11880 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11881
11882 gcall *
11883 vect_gen_while (tree mask, tree start_index, tree end_index)
11884 {
11885 tree cmp_type = TREE_TYPE (start_index);
11886 tree mask_type = TREE_TYPE (mask);
11887 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11888 cmp_type, mask_type,
11889 OPTIMIZE_FOR_SPEED));
11890 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11891 start_index, end_index,
11892 build_zero_cst (mask_type));
11893 gimple_call_set_lhs (call, mask);
11894 return call;
11895 }
11896
11897 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11898 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11899
11900 tree
11901 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11902 tree end_index)
11903 {
11904 tree tmp = make_ssa_name (mask_type);
11905 gcall *call = vect_gen_while (tmp, start_index, end_index);
11906 gimple_seq_add_stmt (seq, call);
11907 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11908 }
11909
11910 /* Try to compute the vector types required to vectorize STMT_INFO,
11911 returning true on success and false if vectorization isn't possible.
11912
11913 On success:
11914
11915 - Set *STMT_VECTYPE_OUT to:
11916 - NULL_TREE if the statement doesn't need to be vectorized;
11917 - boolean_type_node if the statement is a boolean operation whose
11918 vector type can only be determined once all the other vector types
11919 are known; and
11920 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11921
11922 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11923 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11924 statement does not help to determine the overall number of units. */
11925
11926 opt_result
11927 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
11928 tree *stmt_vectype_out,
11929 tree *nunits_vectype_out)
11930 {
11931 vec_info *vinfo = stmt_info->vinfo;
11932 gimple *stmt = stmt_info->stmt;
11933
11934 *stmt_vectype_out = NULL_TREE;
11935 *nunits_vectype_out = NULL_TREE;
11936
11937 if (gimple_get_lhs (stmt) == NULL_TREE
11938 /* MASK_STORE has no lhs, but is ok. */
11939 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11940 {
11941 if (is_a <gcall *> (stmt))
11942 {
11943 /* Ignore calls with no lhs. These must be calls to
11944 #pragma omp simd functions, and what vectorization factor
11945 it really needs can't be determined until
11946 vectorizable_simd_clone_call. */
11947 if (dump_enabled_p ())
11948 dump_printf_loc (MSG_NOTE, vect_location,
11949 "defer to SIMD clone analysis.\n");
11950 return opt_result::success ();
11951 }
11952
11953 return opt_result::failure_at (stmt,
11954 "not vectorized: irregular stmt.%G", stmt);
11955 }
11956
11957 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11958 return opt_result::failure_at (stmt,
11959 "not vectorized: vector stmt in loop:%G",
11960 stmt);
11961
11962 tree vectype;
11963 tree scalar_type = NULL_TREE;
11964 if (STMT_VINFO_VECTYPE (stmt_info))
11965 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
11966 else
11967 {
11968 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
11969 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11970 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11971 else
11972 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11973
11974 /* Pure bool ops don't participate in number-of-units computation.
11975 For comparisons use the types being compared. */
11976 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
11977 && is_gimple_assign (stmt)
11978 && gimple_assign_rhs_code (stmt) != COND_EXPR)
11979 {
11980 *stmt_vectype_out = boolean_type_node;
11981
11982 tree rhs1 = gimple_assign_rhs1 (stmt);
11983 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
11984 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11985 scalar_type = TREE_TYPE (rhs1);
11986 else
11987 {
11988 if (dump_enabled_p ())
11989 dump_printf_loc (MSG_NOTE, vect_location,
11990 "pure bool operation.\n");
11991 return opt_result::success ();
11992 }
11993 }
11994
11995 if (dump_enabled_p ())
11996 dump_printf_loc (MSG_NOTE, vect_location,
11997 "get vectype for scalar type: %T\n", scalar_type);
11998 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
11999 if (!vectype)
12000 return opt_result::failure_at (stmt,
12001 "not vectorized:"
12002 " unsupported data-type %T\n",
12003 scalar_type);
12004
12005 if (!*stmt_vectype_out)
12006 *stmt_vectype_out = vectype;
12007
12008 if (dump_enabled_p ())
12009 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12010 }
12011
12012 /* Don't try to compute scalar types if the stmt produces a boolean
12013 vector; use the existing vector type instead. */
12014 tree nunits_vectype;
12015 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12016 nunits_vectype = vectype;
12017 else
12018 {
12019 /* The number of units is set according to the smallest scalar
12020 type (or the largest vector size, but we only support one
12021 vector size per vectorization). */
12022 if (*stmt_vectype_out != boolean_type_node)
12023 {
12024 HOST_WIDE_INT dummy;
12025 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12026 &dummy, &dummy);
12027 }
12028 if (dump_enabled_p ())
12029 dump_printf_loc (MSG_NOTE, vect_location,
12030 "get vectype for scalar type: %T\n", scalar_type);
12031 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
12032 }
12033 if (!nunits_vectype)
12034 return opt_result::failure_at (stmt,
12035 "not vectorized: unsupported data-type %T\n",
12036 scalar_type);
12037
12038 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
12039 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
12040 return opt_result::failure_at (stmt,
12041 "not vectorized: different sized vector "
12042 "types in statement, %T and %T\n",
12043 vectype, nunits_vectype);
12044
12045 if (dump_enabled_p ())
12046 {
12047 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
12048 nunits_vectype);
12049
12050 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12051 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12052 dump_printf (MSG_NOTE, "\n");
12053 }
12054
12055 *nunits_vectype_out = nunits_vectype;
12056 return opt_result::success ();
12057 }
12058
12059 /* Try to determine the correct vector type for STMT_INFO, which is a
12060 statement that produces a scalar boolean result. Return the vector
12061 type on success, otherwise return NULL_TREE. */
12062
12063 opt_tree
12064 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
12065 {
12066 vec_info *vinfo = stmt_info->vinfo;
12067 gimple *stmt = stmt_info->stmt;
12068 tree mask_type = NULL;
12069 tree vectype, scalar_type;
12070
12071 if (is_gimple_assign (stmt)
12072 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
12073 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
12074 {
12075 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
12076 mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
12077
12078 if (!mask_type)
12079 return opt_tree::failure_at (stmt,
12080 "not vectorized: unsupported mask\n");
12081 }
12082 else
12083 {
12084 tree rhs;
12085 ssa_op_iter iter;
12086 enum vect_def_type dt;
12087
12088 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
12089 {
12090 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
12091 return opt_tree::failure_at (stmt,
12092 "not vectorized:can't compute mask"
12093 " type for statement, %G", stmt);
12094
12095 /* No vectype probably means external definition.
12096 Allow it in case there is another operand which
12097 allows to determine mask type. */
12098 if (!vectype)
12099 continue;
12100
12101 if (!mask_type)
12102 mask_type = vectype;
12103 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
12104 TYPE_VECTOR_SUBPARTS (vectype)))
12105 return opt_tree::failure_at (stmt,
12106 "not vectorized: different sized mask"
12107 " types in statement, %T and %T\n",
12108 mask_type, vectype);
12109 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
12110 != VECTOR_BOOLEAN_TYPE_P (vectype))
12111 return opt_tree::failure_at (stmt,
12112 "not vectorized: mixed mask and "
12113 "nonmask vector types in statement, "
12114 "%T and %T\n",
12115 mask_type, vectype);
12116 }
12117
12118 /* We may compare boolean value loaded as vector of integers.
12119 Fix mask_type in such case. */
12120 if (mask_type
12121 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
12122 && gimple_code (stmt) == GIMPLE_ASSIGN
12123 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
12124 mask_type = truth_type_for (mask_type);
12125 }
12126
12127 /* No mask_type should mean loop invariant predicate.
12128 This is probably a subject for optimization in if-conversion. */
12129 if (!mask_type)
12130 return opt_tree::failure_at (stmt,
12131 "not vectorized: can't compute mask type "
12132 "for statement: %G", stmt);
12133
12134 return opt_tree::success (mask_type);
12135 }