add vect_get_slp_vect_def
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
643
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
646 }
647 }
648
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
659
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
664
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
667
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
675
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
677 {
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
687
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
695
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
703
704 default:
705 break;
706 }
707
708 if (is_pattern_stmt_p (stmt_vinfo))
709 {
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
714 {
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
717
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
720 {
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
731 }
732 for (; i < gimple_num_ops (assign); i++)
733 {
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
736 {
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
742 }
743 }
744 }
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
746 {
747 for (i = 0; i < gimple_call_num_args (call); i++)
748 {
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
755 }
756 }
757 }
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
760 {
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
767 }
768
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
770 {
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 {
779 if (fatal)
780 *fatal = false;
781 return res;
782 }
783 }
784 } /* while worklist */
785
786 return opt_result::success ();
787 }
788
789 /* Function vect_model_simple_cost.
790
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
794
795 static void
796 vect_model_simple_cost (vec_info *,
797 stmt_vec_info stmt_info, int ncopies,
798 enum vect_def_type *dt,
799 int ndts,
800 slp_tree node,
801 stmt_vector_for_cost *cost_vec,
802 vect_cost_for_stmt kind = vector_stmt)
803 {
804 int inside_cost = 0, prologue_cost = 0;
805
806 gcc_assert (cost_vec != NULL);
807
808 /* ??? Somehow we need to fix this at the callers. */
809 if (node)
810 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811
812 if (!node)
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
815 cost model. */
816 for (int i = 0; i < ndts; i++)
817 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
818 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
819 stmt_info, 0, vect_prologue);
820
821 /* Adjust for two-operator SLP nodes. */
822 if (node && SLP_TREE_TWO_OPERATORS (node))
823 {
824 ncopies *= 2;
825 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
826 stmt_info, 0, vect_body);
827 }
828
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
831 stmt_info, 0, vect_body);
832
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost, prologue_cost);
837 }
838
839
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
846
847 static void
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
849 enum vect_def_type *dt,
850 unsigned int ncopies, int pwr,
851 stmt_vector_for_cost *cost_vec)
852 {
853 int i;
854 int inside_cost = 0, prologue_cost = 0;
855
856 for (i = 0; i < pwr + 1; i++)
857 {
858 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
859 stmt_info, 0, vect_body);
860 ncopies *= 2;
861 }
862
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i = 0; i < 2; i++)
865 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
866 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
867 stmt_info, 0, vect_prologue);
868
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE, vect_location,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 }
874
875 /* Returns true if the current function returns DECL. */
876
877 static bool
878 cfun_returns (tree decl)
879 {
880 edge_iterator ei;
881 edge e;
882 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
883 {
884 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
885 if (!ret)
886 continue;
887 if (gimple_return_retval (ret) == decl)
888 return true;
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
891 though. */
892 gimple *def = ret;
893 do
894 {
895 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
896 }
897 while (gimple_clobber_p (def));
898 if (is_a <gassign *> (def)
899 && gimple_assign_lhs (def) == gimple_return_retval (ret)
900 && gimple_assign_rhs1 (def) == decl)
901 return true;
902 }
903 return false;
904 }
905
906 /* Function vect_model_store_cost
907
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
910
911 static void
912 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
913 vect_memory_access_type memory_access_type,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
916 {
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924
925 if (vls_type == VLS_STORE_INVARIANT)
926 {
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
930 }
931
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
941
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948 {
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
955
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
960 }
961
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
966 {
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
972 }
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
975
976 if (memory_access_type == VMAT_ELEMENTWISE
977 || memory_access_type == VMAT_STRIDED_SLP)
978 {
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
981 inside_cost += record_stmt_cost (cost_vec,
982 ncopies * assumed_nunits,
983 vec_to_scalar, stmt_info, 0, vect_body);
984 }
985
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
991 if (base
992 && (TREE_CODE (base) == RESULT_DECL
993 || (DECL_P (base) && cfun_returns (base)))
994 && !aggregate_value_p (base, cfun->decl))
995 {
996 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
998 if (REG_P (reg))
999 {
1000 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1003 if (nregs > 1)
1004 {
1005 /* Spill. */
1006 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1007 vector_store,
1008 stmt_info, 0, vect_epilogue);
1009 /* Loads. */
1010 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1011 scalar_load,
1012 stmt_info, 0, vect_epilogue);
1013 }
1014 }
1015 }
1016
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1021 }
1022
1023
1024 /* Calculate cost of DR's memory access. */
1025 void
1026 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1027 unsigned int *inside_cost,
1028 stmt_vector_for_cost *body_cost_vec)
1029 {
1030 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1033
1034 switch (alignment_support_scheme)
1035 {
1036 case dr_aligned:
1037 {
1038 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1039 vector_store, stmt_info, 0,
1040 vect_body);
1041
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE, vect_location,
1044 "vect_model_store_cost: aligned.\n");
1045 break;
1046 }
1047
1048 case dr_unaligned_supported:
1049 {
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1052 unaligned_store, stmt_info,
1053 DR_MISALIGNMENT (dr_info),
1054 vect_body);
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE, vect_location,
1057 "vect_model_store_cost: unaligned supported by "
1058 "hardware.\n");
1059 break;
1060 }
1061
1062 case dr_unaligned_unsupported:
1063 {
1064 *inside_cost = VECT_MAX_COST;
1065
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1068 "vect_model_store_cost: unsupported access.\n");
1069 break;
1070 }
1071
1072 default:
1073 gcc_unreachable ();
1074 }
1075 }
1076
1077
1078 /* Function vect_model_load_cost
1079
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1084
1085 static void
1086 vect_model_load_cost (vec_info *vinfo,
1087 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1088 vect_memory_access_type memory_access_type,
1089 slp_tree slp_node,
1090 stmt_vector_for_cost *cost_vec)
1091 {
1092 unsigned int inside_cost = 0, prologue_cost = 0;
1093 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1094
1095 gcc_assert (cost_vec);
1096
1097 /* ??? Somehow we need to fix this at the callers. */
1098 if (slp_node)
1099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1100
1101 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1102 {
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1106 /* Record the cost for the permutation. */
1107 unsigned n_perms;
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1110 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1111 vf, true, &n_perms);
1112 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1113 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1117 bitmap_clear (perm);
1118 for (unsigned i = 0;
1119 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1120 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1121 ncopies = 0;
1122 bool load_seen = false;
1123 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1124 {
1125 if (i % assumed_nunits == 0)
1126 {
1127 if (load_seen)
1128 ncopies++;
1129 load_seen = false;
1130 }
1131 if (bitmap_bit_p (perm, i))
1132 load_seen = true;
1133 }
1134 if (load_seen)
1135 ncopies++;
1136 gcc_assert (ncopies
1137 <= (DR_GROUP_SIZE (first_stmt_info)
1138 - DR_GROUP_GAP (first_stmt_info)
1139 + assumed_nunits - 1) / assumed_nunits);
1140 }
1141
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info = stmt_info;
1145 if (!slp_node && grouped_access_p)
1146 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1147
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p = (first_stmt_info == stmt_info);
1152
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1157 if (first_stmt_p
1158 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1159 {
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size = DR_GROUP_SIZE (first_stmt_info);
1163 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1164 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1165 stmt_info, 0, vect_body);
1166
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1170 group_size);
1171 }
1172
1173 /* The loads themselves. */
1174 if (memory_access_type == VMAT_ELEMENTWISE
1175 || memory_access_type == VMAT_GATHER_SCATTER)
1176 {
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1179 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1180 inside_cost += record_stmt_cost (cost_vec,
1181 ncopies * assumed_nunits,
1182 scalar_load, stmt_info, 0, vect_body);
1183 }
1184 else
1185 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1186 &inside_cost, &prologue_cost,
1187 cost_vec, cost_vec, true);
1188 if (memory_access_type == VMAT_ELEMENTWISE
1189 || memory_access_type == VMAT_STRIDED_SLP)
1190 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1191 stmt_info, 0, vect_body);
1192
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE, vect_location,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1197 }
1198
1199
1200 /* Calculate cost of DR's memory access. */
1201 void
1202 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1203 bool add_realign_cost, unsigned int *inside_cost,
1204 unsigned int *prologue_cost,
1205 stmt_vector_for_cost *prologue_cost_vec,
1206 stmt_vector_for_cost *body_cost_vec,
1207 bool record_prologue_costs)
1208 {
1209 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1212
1213 switch (alignment_support_scheme)
1214 {
1215 case dr_aligned:
1216 {
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1218 stmt_info, 0, vect_body);
1219
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: aligned.\n");
1223
1224 break;
1225 }
1226 case dr_unaligned_supported:
1227 {
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1230 unaligned_load, stmt_info,
1231 DR_MISALIGNMENT (dr_info),
1232 vect_body);
1233
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE, vect_location,
1236 "vect_model_load_cost: unaligned supported by "
1237 "hardware.\n");
1238
1239 break;
1240 }
1241 case dr_explicit_realign:
1242 {
1243 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1244 vector_load, stmt_info, 0, vect_body);
1245 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1246 vec_perm, stmt_info, 0, vect_body);
1247
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1250 prologue costs. */
1251 if (targetm.vectorize.builtin_mask_for_load)
1252 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1253 stmt_info, 0, vect_body);
1254
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_model_load_cost: explicit realign\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign_optimized:
1262 {
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location,
1265 "vect_model_load_cost: unaligned software "
1266 "pipelined.\n");
1267
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1274
1275 if (add_realign_cost && record_prologue_costs)
1276 {
1277 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1278 vector_stmt, stmt_info,
1279 0, vect_prologue);
1280 if (targetm.vectorize.builtin_mask_for_load)
1281 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1282 vector_stmt, stmt_info,
1283 0, vect_prologue);
1284 }
1285
1286 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1287 stmt_info, 0, vect_body);
1288 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1289 stmt_info, 0, vect_body);
1290
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_model_load_cost: explicit realign optimized"
1294 "\n");
1295
1296 break;
1297 }
1298
1299 case dr_unaligned_unsupported:
1300 {
1301 *inside_cost = VECT_MAX_COST;
1302
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1305 "vect_model_load_cost: unsupported access.\n");
1306 break;
1307 }
1308
1309 default:
1310 gcc_unreachable ();
1311 }
1312 }
1313
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1316
1317 static void
1318 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1319 gimple_stmt_iterator *gsi)
1320 {
1321 if (gsi)
1322 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1323 else
1324 {
1325 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1326
1327 if (loop_vinfo)
1328 {
1329 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1330 basic_block new_bb;
1331 edge pe;
1332
1333 if (stmt_vinfo && nested_in_vect_loop_p (loop, stmt_vinfo))
1334 loop = loop->inner;
1335
1336 pe = loop_preheader_edge (loop);
1337 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1338 gcc_assert (!new_bb);
1339 }
1340 else
1341 {
1342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
1343 gimple_stmt_iterator gsi_region_begin = bb_vinfo->region_begin;
1344 gsi_insert_before (&gsi_region_begin, new_stmt, GSI_SAME_STMT);
1345 }
1346 }
1347
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE, vect_location,
1350 "created new init_stmt: %G", new_stmt);
1351 }
1352
1353 /* Function vect_init_vector.
1354
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1362
1363 tree
1364 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1365 gimple_stmt_iterator *gsi)
1366 {
1367 gimple *init_stmt;
1368 tree new_temp;
1369
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1372 {
1373 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1374 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1375 {
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type))
1379 {
1380 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1381 tree false_val = build_zero_cst (TREE_TYPE (type));
1382
1383 if (CONSTANT_CLASS_P (val))
1384 val = integer_zerop (val) ? false_val : true_val;
1385 else
1386 {
1387 new_temp = make_ssa_name (TREE_TYPE (type));
1388 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1389 val, true_val, false_val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 val = new_temp;
1392 }
1393 }
1394 else
1395 {
1396 gimple_seq stmts = NULL;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1398 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1399 TREE_TYPE (type), val);
1400 else
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1404 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1405 !gsi_end_p (gsi2); )
1406 {
1407 init_stmt = gsi_stmt (gsi2);
1408 gsi_remove (&gsi2, false);
1409 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1410 }
1411 }
1412 }
1413 val = build_vector_from_val (type, val);
1414 }
1415
1416 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1417 init_stmt = gimple_build_assign (new_temp, val);
1418 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1419 return new_temp;
1420 }
1421
1422 /* Function vect_get_vec_def_for_operand_1.
1423
1424 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1425 with type DT that will be used in the vectorized stmt. */
1426
1427 tree
1428 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1429 enum vect_def_type dt)
1430 {
1431 tree vec_oprnd;
1432 stmt_vec_info vec_stmt_info;
1433
1434 switch (dt)
1435 {
1436 /* operand is a constant or a loop invariant. */
1437 case vect_constant_def:
1438 case vect_external_def:
1439 /* Code should use vect_get_vec_def_for_operand. */
1440 gcc_unreachable ();
1441
1442 /* Operand is defined by a loop header phi. In case of nested
1443 cycles we also may have uses of the backedge def. */
1444 case vect_reduction_def:
1445 case vect_double_reduction_def:
1446 case vect_nested_cycle:
1447 case vect_induction_def:
1448 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1449 || dt == vect_nested_cycle);
1450 /* Fallthru. */
1451
1452 /* operand is defined inside the loop. */
1453 case vect_internal_def:
1454 {
1455 /* Get the def from the vectorized stmt. */
1456 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1457 /* Get vectorized pattern statement. */
1458 if (!vec_stmt_info
1459 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1460 && !STMT_VINFO_RELEVANT (def_stmt_info))
1461 vec_stmt_info = (STMT_VINFO_VEC_STMT
1462 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1463 gcc_assert (vec_stmt_info);
1464 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1465 vec_oprnd = PHI_RESULT (phi);
1466 else
1467 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1468 return vec_oprnd;
1469 }
1470
1471 default:
1472 gcc_unreachable ();
1473 }
1474 }
1475
1476
1477 /* Function vect_get_vec_def_for_operand.
1478
1479 OP is an operand in STMT_VINFO. This function returns a (vector) def
1480 that will be used in the vectorized stmt for STMT_VINFO.
1481
1482 In the case that OP is an SSA_NAME which is defined in the loop, then
1483 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1484
1485 In case OP is an invariant or constant, a new stmt that creates a vector def
1486 needs to be introduced. VECTYPE may be used to specify a required type for
1487 vector invariant. */
1488
1489 tree
1490 vect_get_vec_def_for_operand (vec_info *vinfo,
1491 tree op, stmt_vec_info stmt_vinfo, tree vectype)
1492 {
1493 gimple *def_stmt;
1494 enum vect_def_type dt;
1495 bool is_simple_use;
1496 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1497
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE, vect_location,
1500 "vect_get_vec_def_for_operand: %T\n", op);
1501
1502 stmt_vec_info def_stmt_info;
1503 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1504 &def_stmt_info, &def_stmt);
1505 gcc_assert (is_simple_use);
1506 if (def_stmt && dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1508
1509 if (dt == vect_constant_def || dt == vect_external_def)
1510 {
1511 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1512 tree vector_type;
1513
1514 if (vectype)
1515 vector_type = vectype;
1516 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1517 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1518 vector_type = truth_type_for (stmt_vectype);
1519 else
1520 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1521
1522 gcc_assert (vector_type);
1523 return vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1524 }
1525 else
1526 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1527 }
1528
1529
1530 /* Function vect_get_vec_def_for_stmt_copy
1531
1532 Return a vector-def for an operand. This function is used when the
1533 vectorized stmt to be created (by the caller to this function) is a "copy"
1534 created in case the vectorized result cannot fit in one vector, and several
1535 copies of the vector-stmt are required. In this case the vector-def is
1536 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1537 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1538
1539 Context:
1540 In case the vectorization factor (VF) is bigger than the number
1541 of elements that can fit in a vectype (nunits), we have to generate
1542 more than one vector stmt to vectorize the scalar stmt. This situation
1543 arises when there are multiple data-types operated upon in the loop; the
1544 smallest data-type determines the VF, and as a result, when vectorizing
1545 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1546 vector stmt (each computing a vector of 'nunits' results, and together
1547 computing 'VF' results in each iteration). This function is called when
1548 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1549 which VF=16 and nunits=4, so the number of copies required is 4):
1550
1551 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1552
1553 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1554 VS1.1: vx.1 = memref1 VS1.2
1555 VS1.2: vx.2 = memref2 VS1.3
1556 VS1.3: vx.3 = memref3
1557
1558 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1559 VSnew.1: vz1 = vx.1 + ... VSnew.2
1560 VSnew.2: vz2 = vx.2 + ... VSnew.3
1561 VSnew.3: vz3 = vx.3 + ...
1562
1563 The vectorization of S1 is explained in vectorizable_load.
1564 The vectorization of S2:
1565 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1566 the function 'vect_get_vec_def_for_operand' is called to
1567 get the relevant vector-def for each operand of S2. For operand x it
1568 returns the vector-def 'vx.0'.
1569
1570 To create the remaining copies of the vector-stmt (VSnew.j), this
1571 function is called to get the relevant vector-def for each operand. It is
1572 obtained from the respective VS1.j stmt, which is recorded in the
1573 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1574
1575 For example, to obtain the vector-def 'vx.1' in order to create the
1576 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1577 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1578 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1579 and return its def ('vx.1').
1580 Overall, to create the above sequence this function will be called 3 times:
1581 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1582 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1583 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1584
1585 tree
1586 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1587 {
1588 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1589 if (!def_stmt_info)
1590 /* Do nothing; can reuse same def. */
1591 return vec_oprnd;
1592
1593 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1594 gcc_assert (def_stmt_info);
1595 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1596 vec_oprnd = PHI_RESULT (phi);
1597 else
1598 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1599 return vec_oprnd;
1600 }
1601
1602
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1605
1606 void
1607 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1608 vec<tree> *vec_oprnds0,
1609 vec<tree> *vec_oprnds1)
1610 {
1611 tree vec_oprnd = vec_oprnds0->pop ();
1612
1613 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1614 vec_oprnds0->quick_push (vec_oprnd);
1615
1616 if (vec_oprnds1 && vec_oprnds1->length ())
1617 {
1618 vec_oprnd = vec_oprnds1->pop ();
1619 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1620 vec_oprnds1->quick_push (vec_oprnd);
1621 }
1622 }
1623
1624
1625 /* Get vectorized definitions for OP0 and OP1. */
1626
1627 void
1628 vect_get_vec_defs (vec_info *vinfo, tree op0, tree op1, stmt_vec_info stmt_info,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1,
1631 slp_tree slp_node)
1632 {
1633 if (slp_node)
1634 {
1635 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1636 vect_get_slp_defs (vinfo, slp_node, &vec_defs, op1 ? 2 : 1);
1637 *vec_oprnds0 = vec_defs[0];
1638 if (op1)
1639 *vec_oprnds1 = vec_defs[1];
1640 }
1641 else
1642 {
1643 tree vec_oprnd;
1644
1645 vec_oprnds0->create (1);
1646 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op0, stmt_info);
1647 vec_oprnds0->quick_push (vec_oprnd);
1648
1649 if (op1)
1650 {
1651 vec_oprnds1->create (1);
1652 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op1, stmt_info);
1653 vec_oprnds1->quick_push (vec_oprnd);
1654 }
1655 }
1656 }
1657
1658 /* Helper function called by vect_finish_replace_stmt and
1659 vect_finish_stmt_generation. Set the location of the new
1660 statement and create and return a stmt_vec_info for it. */
1661
1662 static stmt_vec_info
1663 vect_finish_stmt_generation_1 (vec_info *vinfo,
1664 stmt_vec_info stmt_info, gimple *vec_stmt)
1665 {
1666 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1667
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1670
1671 if (stmt_info)
1672 {
1673 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1674
1675 /* While EH edges will generally prevent vectorization, stmt might
1676 e.g. be in a must-not-throw region. Ensure newly created stmts
1677 that could throw are part of the same region. */
1678 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1679 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1680 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1681 }
1682 else
1683 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1684
1685 return vec_stmt_info;
1686 }
1687
1688 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1689 which sets the same scalar result as STMT_INFO did. Create and return a
1690 stmt_vec_info for VEC_STMT. */
1691
1692 stmt_vec_info
1693 vect_finish_replace_stmt (vec_info *vinfo,
1694 stmt_vec_info stmt_info, gimple *vec_stmt)
1695 {
1696 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1697 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1698
1699 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1700 gsi_replace (&gsi, vec_stmt, true);
1701
1702 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1703 }
1704
1705 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1706 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1707
1708 stmt_vec_info
1709 vect_finish_stmt_generation (vec_info *vinfo,
1710 stmt_vec_info stmt_info, gimple *vec_stmt,
1711 gimple_stmt_iterator *gsi)
1712 {
1713 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1714
1715 if (!gsi_end_p (*gsi)
1716 && gimple_has_mem_ops (vec_stmt))
1717 {
1718 gimple *at_stmt = gsi_stmt (*gsi);
1719 tree vuse = gimple_vuse (at_stmt);
1720 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1721 {
1722 tree vdef = gimple_vdef (at_stmt);
1723 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1724 /* If we have an SSA vuse and insert a store, update virtual
1725 SSA form to avoid triggering the renamer. Do so only
1726 if we can easily see all uses - which is what almost always
1727 happens with the way vectorized stmts are inserted. */
1728 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1729 && ((is_gimple_assign (vec_stmt)
1730 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1731 || (is_gimple_call (vec_stmt)
1732 && !(gimple_call_flags (vec_stmt)
1733 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1734 {
1735 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1736 gimple_set_vdef (vec_stmt, new_vdef);
1737 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1738 }
1739 }
1740 }
1741 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1742 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1743 }
1744
1745 /* We want to vectorize a call to combined function CFN with function
1746 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1747 as the types of all inputs. Check whether this is possible using
1748 an internal function, returning its code if so or IFN_LAST if not. */
1749
1750 static internal_fn
1751 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1752 tree vectype_out, tree vectype_in)
1753 {
1754 internal_fn ifn;
1755 if (internal_fn_p (cfn))
1756 ifn = as_internal_fn (cfn);
1757 else
1758 ifn = associated_internal_fn (fndecl);
1759 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1760 {
1761 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1762 if (info.vectorizable)
1763 {
1764 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1765 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1766 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1767 OPTIMIZE_FOR_SPEED))
1768 return ifn;
1769 }
1770 }
1771 return IFN_LAST;
1772 }
1773
1774
1775 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1776 gimple_stmt_iterator *);
1777
1778 /* Check whether a load or store statement in the loop described by
1779 LOOP_VINFO is possible in a fully-masked loop. This is testing
1780 whether the vectorizer pass has the appropriate support, as well as
1781 whether the target does.
1782
1783 VLS_TYPE says whether the statement is a load or store and VECTYPE
1784 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1785 says how the load or store is going to be implemented and GROUP_SIZE
1786 is the number of load or store statements in the containing group.
1787 If the access is a gather load or scatter store, GS_INFO describes
1788 its arguments. If the load or store is conditional, SCALAR_MASK is the
1789 condition under which it occurs.
1790
1791 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1792 supported, otherwise record the required mask types. */
1793
1794 static void
1795 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1796 vec_load_store_type vls_type, int group_size,
1797 vect_memory_access_type memory_access_type,
1798 gather_scatter_info *gs_info, tree scalar_mask)
1799 {
1800 /* Invariant loads need no special support. */
1801 if (memory_access_type == VMAT_INVARIANT)
1802 return;
1803
1804 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1805 machine_mode vecmode = TYPE_MODE (vectype);
1806 bool is_load = (vls_type == VLS_LOAD);
1807 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1808 {
1809 if (is_load
1810 ? !vect_load_lanes_supported (vectype, group_size, true)
1811 : !vect_store_lanes_supported (vectype, group_size, true))
1812 {
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 "can't use a fully-masked loop because the"
1816 " target doesn't have an appropriate masked"
1817 " load/store-lanes instruction.\n");
1818 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1819 return;
1820 }
1821 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1822 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1823 return;
1824 }
1825
1826 if (memory_access_type == VMAT_GATHER_SCATTER)
1827 {
1828 internal_fn ifn = (is_load
1829 ? IFN_MASK_GATHER_LOAD
1830 : IFN_MASK_SCATTER_STORE);
1831 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1832 gs_info->memory_type,
1833 gs_info->offset_vectype,
1834 gs_info->scale))
1835 {
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "can't use a fully-masked loop because the"
1839 " target doesn't have an appropriate masked"
1840 " gather load or scatter store instruction.\n");
1841 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1842 return;
1843 }
1844 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1845 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1846 return;
1847 }
1848
1849 if (memory_access_type != VMAT_CONTIGUOUS
1850 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1851 {
1852 /* Element X of the data must come from iteration i * VF + X of the
1853 scalar loop. We need more work to support other mappings. */
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1856 "can't use a fully-masked loop because an access"
1857 " isn't contiguous.\n");
1858 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1859 return;
1860 }
1861
1862 machine_mode mask_mode;
1863 if (!VECTOR_MODE_P (vecmode)
1864 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1865 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1866 {
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1869 "can't use a fully-masked loop because the target"
1870 " doesn't have the appropriate masked load or"
1871 " store.\n");
1872 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1873 return;
1874 }
1875 /* We might load more scalars than we need for permuting SLP loads.
1876 We checked in get_group_load_store_type that the extra elements
1877 don't leak into a new vector. */
1878 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1879 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1880 unsigned int nvectors;
1881 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1882 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1883 else
1884 gcc_unreachable ();
1885 }
1886
1887 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1888 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1889 that needs to be applied to all loads and stores in a vectorized loop.
1890 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1891
1892 MASK_TYPE is the type of both masks. If new statements are needed,
1893 insert them before GSI. */
1894
1895 static tree
1896 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1897 gimple_stmt_iterator *gsi)
1898 {
1899 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1900 if (!loop_mask)
1901 return vec_mask;
1902
1903 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1904 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1905 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1906 vec_mask, loop_mask);
1907 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1908 return and_res;
1909 }
1910
1911 /* Determine whether we can use a gather load or scatter store to vectorize
1912 strided load or store STMT_INFO by truncating the current offset to a
1913 smaller width. We need to be able to construct an offset vector:
1914
1915 { 0, X, X*2, X*3, ... }
1916
1917 without loss of precision, where X is STMT_INFO's DR_STEP.
1918
1919 Return true if this is possible, describing the gather load or scatter
1920 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1921
1922 static bool
1923 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1924 loop_vec_info loop_vinfo, bool masked_p,
1925 gather_scatter_info *gs_info)
1926 {
1927 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1928 data_reference *dr = dr_info->dr;
1929 tree step = DR_STEP (dr);
1930 if (TREE_CODE (step) != INTEGER_CST)
1931 {
1932 /* ??? Perhaps we could use range information here? */
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_NOTE, vect_location,
1935 "cannot truncate variable step.\n");
1936 return false;
1937 }
1938
1939 /* Get the number of bits in an element. */
1940 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1941 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1942 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1943
1944 /* Set COUNT to the upper limit on the number of elements - 1.
1945 Start with the maximum vectorization factor. */
1946 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1947
1948 /* Try lowering COUNT to the number of scalar latch iterations. */
1949 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1950 widest_int max_iters;
1951 if (max_loop_iterations (loop, &max_iters)
1952 && max_iters < count)
1953 count = max_iters.to_shwi ();
1954
1955 /* Try scales of 1 and the element size. */
1956 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1957 wi::overflow_type overflow = wi::OVF_NONE;
1958 for (int i = 0; i < 2; ++i)
1959 {
1960 int scale = scales[i];
1961 widest_int factor;
1962 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1963 continue;
1964
1965 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1966 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1967 if (overflow)
1968 continue;
1969 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1970 unsigned int min_offset_bits = wi::min_precision (range, sign);
1971
1972 /* Find the narrowest viable offset type. */
1973 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1974 tree offset_type = build_nonstandard_integer_type (offset_bits,
1975 sign == UNSIGNED);
1976
1977 /* See whether the target supports the operation with an offset
1978 no narrower than OFFSET_TYPE. */
1979 tree memory_type = TREE_TYPE (DR_REF (dr));
1980 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1981 vectype, memory_type, offset_type, scale,
1982 &gs_info->ifn, &gs_info->offset_vectype))
1983 continue;
1984
1985 gs_info->decl = NULL_TREE;
1986 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1987 but we don't need to store that here. */
1988 gs_info->base = NULL_TREE;
1989 gs_info->element_type = TREE_TYPE (vectype);
1990 gs_info->offset = fold_convert (offset_type, step);
1991 gs_info->offset_dt = vect_constant_def;
1992 gs_info->scale = scale;
1993 gs_info->memory_type = memory_type;
1994 return true;
1995 }
1996
1997 if (overflow && dump_enabled_p ())
1998 dump_printf_loc (MSG_NOTE, vect_location,
1999 "truncating gather/scatter offset to %d bits"
2000 " might change its value.\n", element_bits);
2001
2002 return false;
2003 }
2004
2005 /* Return true if we can use gather/scatter internal functions to
2006 vectorize STMT_INFO, which is a grouped or strided load or store.
2007 MASKED_P is true if load or store is conditional. When returning
2008 true, fill in GS_INFO with the information required to perform the
2009 operation. */
2010
2011 static bool
2012 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2013 loop_vec_info loop_vinfo, bool masked_p,
2014 gather_scatter_info *gs_info)
2015 {
2016 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2017 || gs_info->decl)
2018 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2019 masked_p, gs_info);
2020
2021 tree old_offset_type = TREE_TYPE (gs_info->offset);
2022 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2023
2024 gcc_assert (TYPE_PRECISION (new_offset_type)
2025 >= TYPE_PRECISION (old_offset_type));
2026 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2027
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE, vect_location,
2030 "using gather/scatter for strided/grouped access,"
2031 " scale = %d\n", gs_info->scale);
2032
2033 return true;
2034 }
2035
2036 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2037 elements with a known constant step. Return -1 if that step
2038 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2039
2040 static int
2041 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2042 {
2043 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2044 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2045 size_zero_node);
2046 }
2047
2048 /* If the target supports a permute mask that reverses the elements in
2049 a vector of type VECTYPE, return that mask, otherwise return null. */
2050
2051 static tree
2052 perm_mask_for_reverse (tree vectype)
2053 {
2054 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2055
2056 /* The encoding has a single stepped pattern. */
2057 vec_perm_builder sel (nunits, 1, 3);
2058 for (int i = 0; i < 3; ++i)
2059 sel.quick_push (nunits - 1 - i);
2060
2061 vec_perm_indices indices (sel, 1, nunits);
2062 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2063 return NULL_TREE;
2064 return vect_gen_perm_mask_checked (vectype, indices);
2065 }
2066
2067 /* A subroutine of get_load_store_type, with a subset of the same
2068 arguments. Handle the case where STMT_INFO is a load or store that
2069 accesses consecutive elements with a negative step. */
2070
2071 static vect_memory_access_type
2072 get_negative_load_store_type (vec_info *vinfo,
2073 stmt_vec_info stmt_info, tree vectype,
2074 vec_load_store_type vls_type,
2075 unsigned int ncopies)
2076 {
2077 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2078 dr_alignment_support alignment_support_scheme;
2079
2080 if (ncopies > 1)
2081 {
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2084 "multiple types with negative step.\n");
2085 return VMAT_ELEMENTWISE;
2086 }
2087
2088 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
2089 dr_info, false);
2090 if (alignment_support_scheme != dr_aligned
2091 && alignment_support_scheme != dr_unaligned_supported)
2092 {
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2095 "negative step but alignment required.\n");
2096 return VMAT_ELEMENTWISE;
2097 }
2098
2099 if (vls_type == VLS_STORE_INVARIANT)
2100 {
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_NOTE, vect_location,
2103 "negative step with invariant source;"
2104 " no permute needed.\n");
2105 return VMAT_CONTIGUOUS_DOWN;
2106 }
2107
2108 if (!perm_mask_for_reverse (vectype))
2109 {
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112 "negative step and reversing not supported.\n");
2113 return VMAT_ELEMENTWISE;
2114 }
2115
2116 return VMAT_CONTIGUOUS_REVERSE;
2117 }
2118
2119 /* STMT_INFO is either a masked or unconditional store. Return the value
2120 being stored. */
2121
2122 tree
2123 vect_get_store_rhs (stmt_vec_info stmt_info)
2124 {
2125 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2126 {
2127 gcc_assert (gimple_assign_single_p (assign));
2128 return gimple_assign_rhs1 (assign);
2129 }
2130 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2131 {
2132 internal_fn ifn = gimple_call_internal_fn (call);
2133 int index = internal_fn_stored_value_index (ifn);
2134 gcc_assert (index >= 0);
2135 return gimple_call_arg (call, index);
2136 }
2137 gcc_unreachable ();
2138 }
2139
2140 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2141
2142 This function returns a vector type which can be composed with NETLS pieces,
2143 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2144 same vector size as the return vector. It checks target whether supports
2145 pieces-size vector mode for construction firstly, if target fails to, check
2146 pieces-size scalar mode for construction further. It returns NULL_TREE if
2147 fails to find the available composition.
2148
2149 For example, for (vtype=V16QI, nelts=4), we can probably get:
2150 - V16QI with PTYPE V4QI.
2151 - V4SI with PTYPE SI.
2152 - NULL_TREE. */
2153
2154 static tree
2155 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2156 {
2157 gcc_assert (VECTOR_TYPE_P (vtype));
2158 gcc_assert (known_gt (nelts, 0U));
2159
2160 machine_mode vmode = TYPE_MODE (vtype);
2161 if (!VECTOR_MODE_P (vmode))
2162 return NULL_TREE;
2163
2164 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2165 unsigned int pbsize;
2166 if (constant_multiple_p (vbsize, nelts, &pbsize))
2167 {
2168 /* First check if vec_init optab supports construction from
2169 vector pieces directly. */
2170 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2171 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2172 machine_mode rmode;
2173 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2174 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2175 != CODE_FOR_nothing))
2176 {
2177 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2178 return vtype;
2179 }
2180
2181 /* Otherwise check if exists an integer type of the same piece size and
2182 if vec_init optab supports construction from it directly. */
2183 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2184 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2185 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2186 != CODE_FOR_nothing))
2187 {
2188 *ptype = build_nonstandard_integer_type (pbsize, 1);
2189 return build_vector_type (*ptype, nelts);
2190 }
2191 }
2192
2193 return NULL_TREE;
2194 }
2195
2196 /* A subroutine of get_load_store_type, with a subset of the same
2197 arguments. Handle the case where STMT_INFO is part of a grouped load
2198 or store.
2199
2200 For stores, the statements in the group are all consecutive
2201 and there is no gap at the end. For loads, the statements in the
2202 group might not be consecutive; there can be gaps between statements
2203 as well as at the end. */
2204
2205 static bool
2206 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2207 tree vectype, bool slp,
2208 bool masked_p, vec_load_store_type vls_type,
2209 vect_memory_access_type *memory_access_type,
2210 gather_scatter_info *gs_info)
2211 {
2212 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2213 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2214 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2215 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2216 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2217 bool single_element_p = (stmt_info == first_stmt_info
2218 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2219 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2220 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2221
2222 /* True if the vectorized statements would access beyond the last
2223 statement in the group. */
2224 bool overrun_p = false;
2225
2226 /* True if we can cope with such overrun by peeling for gaps, so that
2227 there is at least one final scalar iteration after the vector loop. */
2228 bool can_overrun_p = (!masked_p
2229 && vls_type == VLS_LOAD
2230 && loop_vinfo
2231 && !loop->inner);
2232
2233 /* There can only be a gap at the end of the group if the stride is
2234 known at compile time. */
2235 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2236
2237 /* Stores can't yet have gaps. */
2238 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2239
2240 if (slp)
2241 {
2242 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2243 {
2244 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2245 separated by the stride, until we have a complete vector.
2246 Fall back to scalar accesses if that isn't possible. */
2247 if (multiple_p (nunits, group_size))
2248 *memory_access_type = VMAT_STRIDED_SLP;
2249 else
2250 *memory_access_type = VMAT_ELEMENTWISE;
2251 }
2252 else
2253 {
2254 overrun_p = loop_vinfo && gap != 0;
2255 if (overrun_p && vls_type != VLS_LOAD)
2256 {
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2258 "Grouped store with gaps requires"
2259 " non-consecutive accesses\n");
2260 return false;
2261 }
2262 /* An overrun is fine if the trailing elements are smaller
2263 than the alignment boundary B. Every vector access will
2264 be a multiple of B and so we are guaranteed to access a
2265 non-gap element in the same B-sized block. */
2266 if (overrun_p
2267 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2268 / vect_get_scalar_dr_size (first_dr_info)))
2269 overrun_p = false;
2270
2271 /* If the gap splits the vector in half and the target
2272 can do half-vector operations avoid the epilogue peeling
2273 by simply loading half of the vector only. Usually
2274 the construction with an upper zero half will be elided. */
2275 dr_alignment_support alignment_support_scheme;
2276 tree half_vtype;
2277 if (overrun_p
2278 && !masked_p
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (vinfo,
2281 first_dr_info, false)))
2282 == dr_aligned
2283 || alignment_support_scheme == dr_unaligned_supported)
2284 && known_eq (nunits, (group_size - gap) * 2)
2285 && known_eq (nunits, group_size)
2286 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2287 != NULL_TREE))
2288 overrun_p = false;
2289
2290 if (overrun_p && !can_overrun_p)
2291 {
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "Peeling for outer loop is not supported\n");
2295 return false;
2296 }
2297 int cmp = compare_step_with_zero (vinfo, stmt_info);
2298 if (cmp < 0)
2299 *memory_access_type = get_negative_load_store_type
2300 (vinfo, stmt_info, vectype, vls_type, 1);
2301 else
2302 {
2303 gcc_assert (!loop_vinfo || cmp > 0);
2304 *memory_access_type = VMAT_CONTIGUOUS;
2305 }
2306 }
2307 }
2308 else
2309 {
2310 /* We can always handle this case using elementwise accesses,
2311 but see if something more efficient is available. */
2312 *memory_access_type = VMAT_ELEMENTWISE;
2313
2314 /* If there is a gap at the end of the group then these optimizations
2315 would access excess elements in the last iteration. */
2316 bool would_overrun_p = (gap != 0);
2317 /* An overrun is fine if the trailing elements are smaller than the
2318 alignment boundary B. Every vector access will be a multiple of B
2319 and so we are guaranteed to access a non-gap element in the
2320 same B-sized block. */
2321 if (would_overrun_p
2322 && !masked_p
2323 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2324 / vect_get_scalar_dr_size (first_dr_info)))
2325 would_overrun_p = false;
2326
2327 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2328 && (can_overrun_p || !would_overrun_p)
2329 && compare_step_with_zero (vinfo, stmt_info) > 0)
2330 {
2331 /* First cope with the degenerate case of a single-element
2332 vector. */
2333 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2334 *memory_access_type = VMAT_CONTIGUOUS;
2335
2336 /* Otherwise try using LOAD/STORE_LANES. */
2337 if (*memory_access_type == VMAT_ELEMENTWISE
2338 && (vls_type == VLS_LOAD
2339 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2340 : vect_store_lanes_supported (vectype, group_size,
2341 masked_p)))
2342 {
2343 *memory_access_type = VMAT_LOAD_STORE_LANES;
2344 overrun_p = would_overrun_p;
2345 }
2346
2347 /* If that fails, try using permuting loads. */
2348 if (*memory_access_type == VMAT_ELEMENTWISE
2349 && (vls_type == VLS_LOAD
2350 ? vect_grouped_load_supported (vectype, single_element_p,
2351 group_size)
2352 : vect_grouped_store_supported (vectype, group_size)))
2353 {
2354 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2355 overrun_p = would_overrun_p;
2356 }
2357 }
2358
2359 /* As a last resort, trying using a gather load or scatter store.
2360
2361 ??? Although the code can handle all group sizes correctly,
2362 it probably isn't a win to use separate strided accesses based
2363 on nearby locations. Or, even if it's a win over scalar code,
2364 it might not be a win over vectorizing at a lower VF, if that
2365 allows us to use contiguous accesses. */
2366 if (*memory_access_type == VMAT_ELEMENTWISE
2367 && single_element_p
2368 && loop_vinfo
2369 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2370 masked_p, gs_info))
2371 *memory_access_type = VMAT_GATHER_SCATTER;
2372 }
2373
2374 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2375 {
2376 /* STMT is the leader of the group. Check the operands of all the
2377 stmts of the group. */
2378 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2379 while (next_stmt_info)
2380 {
2381 tree op = vect_get_store_rhs (next_stmt_info);
2382 enum vect_def_type dt;
2383 if (!vect_is_simple_use (op, vinfo, &dt))
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "use not simple.\n");
2388 return false;
2389 }
2390 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2391 }
2392 }
2393
2394 if (overrun_p)
2395 {
2396 gcc_assert (can_overrun_p);
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "Data access with gaps requires scalar "
2400 "epilogue loop\n");
2401 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2402 }
2403
2404 return true;
2405 }
2406
2407 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2408 if there is a memory access type that the vectorized form can use,
2409 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2410 or scatters, fill in GS_INFO accordingly.
2411
2412 SLP says whether we're performing SLP rather than loop vectorization.
2413 MASKED_P is true if the statement is conditional on a vectorized mask.
2414 VECTYPE is the vector type that the vectorized statements will use.
2415 NCOPIES is the number of vector statements that will be needed. */
2416
2417 static bool
2418 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2419 tree vectype, bool slp,
2420 bool masked_p, vec_load_store_type vls_type,
2421 unsigned int ncopies,
2422 vect_memory_access_type *memory_access_type,
2423 gather_scatter_info *gs_info)
2424 {
2425 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2426 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2428 {
2429 *memory_access_type = VMAT_GATHER_SCATTER;
2430 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2431 gcc_unreachable ();
2432 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2433 &gs_info->offset_dt,
2434 &gs_info->offset_vectype))
2435 {
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438 "%s index use not simple.\n",
2439 vls_type == VLS_LOAD ? "gather" : "scatter");
2440 return false;
2441 }
2442 }
2443 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2444 {
2445 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
2446 vls_type, memory_access_type, gs_info))
2447 return false;
2448 }
2449 else if (STMT_VINFO_STRIDED_P (stmt_info))
2450 {
2451 gcc_assert (!slp);
2452 if (loop_vinfo
2453 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2454 masked_p, gs_info))
2455 *memory_access_type = VMAT_GATHER_SCATTER;
2456 else
2457 *memory_access_type = VMAT_ELEMENTWISE;
2458 }
2459 else
2460 {
2461 int cmp = compare_step_with_zero (vinfo, stmt_info);
2462 if (cmp < 0)
2463 *memory_access_type = get_negative_load_store_type
2464 (vinfo, stmt_info, vectype, vls_type, ncopies);
2465 else if (cmp == 0)
2466 {
2467 gcc_assert (vls_type == VLS_LOAD);
2468 *memory_access_type = VMAT_INVARIANT;
2469 }
2470 else
2471 *memory_access_type = VMAT_CONTIGUOUS;
2472 }
2473
2474 if ((*memory_access_type == VMAT_ELEMENTWISE
2475 || *memory_access_type == VMAT_STRIDED_SLP)
2476 && !nunits.is_constant ())
2477 {
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480 "Not using elementwise accesses due to variable "
2481 "vectorization factor.\n");
2482 return false;
2483 }
2484
2485 /* FIXME: At the moment the cost model seems to underestimate the
2486 cost of using elementwise accesses. This check preserves the
2487 traditional behavior until that can be fixed. */
2488 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2489 if (!first_stmt_info)
2490 first_stmt_info = stmt_info;
2491 if (*memory_access_type == VMAT_ELEMENTWISE
2492 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2493 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2494 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2495 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2496 {
2497 if (dump_enabled_p ())
2498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2499 "not falling back to elementwise accesses\n");
2500 return false;
2501 }
2502 return true;
2503 }
2504
2505 /* Return true if boolean argument MASK is suitable for vectorizing
2506 conditional operation STMT_INFO. When returning true, store the type
2507 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2508 in *MASK_VECTYPE_OUT. */
2509
2510 static bool
2511 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2512 vect_def_type *mask_dt_out,
2513 tree *mask_vectype_out)
2514 {
2515 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "mask argument is not a boolean.\n");
2520 return false;
2521 }
2522
2523 if (TREE_CODE (mask) != SSA_NAME)
2524 {
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "mask argument is not an SSA name.\n");
2528 return false;
2529 }
2530
2531 enum vect_def_type mask_dt;
2532 tree mask_vectype;
2533 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2534 {
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2537 "mask use not simple.\n");
2538 return false;
2539 }
2540
2541 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2542 if (!mask_vectype)
2543 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2544
2545 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2546 {
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2549 "could not find an appropriate vector mask type.\n");
2550 return false;
2551 }
2552
2553 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2554 TYPE_VECTOR_SUBPARTS (vectype)))
2555 {
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2558 "vector mask type %T"
2559 " does not match vector data type %T.\n",
2560 mask_vectype, vectype);
2561
2562 return false;
2563 }
2564
2565 *mask_dt_out = mask_dt;
2566 *mask_vectype_out = mask_vectype;
2567 return true;
2568 }
2569
2570 /* Return true if stored value RHS is suitable for vectorizing store
2571 statement STMT_INFO. When returning true, store the type of the
2572 definition in *RHS_DT_OUT, the type of the vectorized store value in
2573 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2574
2575 static bool
2576 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2577 slp_tree slp_node, tree rhs,
2578 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2579 vec_load_store_type *vls_type_out)
2580 {
2581 /* In the case this is a store from a constant make sure
2582 native_encode_expr can handle it. */
2583 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2584 {
2585 if (dump_enabled_p ())
2586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2587 "cannot encode constant as a byte sequence.\n");
2588 return false;
2589 }
2590
2591 enum vect_def_type rhs_dt;
2592 tree rhs_vectype;
2593 slp_tree slp_op;
2594 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2595 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2596 {
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2599 "use not simple.\n");
2600 return false;
2601 }
2602
2603 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2604 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2605 {
2606 if (dump_enabled_p ())
2607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2608 "incompatible vector types.\n");
2609 return false;
2610 }
2611
2612 *rhs_dt_out = rhs_dt;
2613 *rhs_vectype_out = rhs_vectype;
2614 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2615 *vls_type_out = VLS_STORE_INVARIANT;
2616 else
2617 *vls_type_out = VLS_STORE;
2618 return true;
2619 }
2620
2621 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2622 Note that we support masks with floating-point type, in which case the
2623 floats are interpreted as a bitmask. */
2624
2625 static tree
2626 vect_build_all_ones_mask (vec_info *vinfo,
2627 stmt_vec_info stmt_info, tree masktype)
2628 {
2629 if (TREE_CODE (masktype) == INTEGER_TYPE)
2630 return build_int_cst (masktype, -1);
2631 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2632 {
2633 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2634 mask = build_vector_from_val (masktype, mask);
2635 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2636 }
2637 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2638 {
2639 REAL_VALUE_TYPE r;
2640 long tmp[6];
2641 for (int j = 0; j < 6; ++j)
2642 tmp[j] = -1;
2643 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2644 tree mask = build_real (TREE_TYPE (masktype), r);
2645 mask = build_vector_from_val (masktype, mask);
2646 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2647 }
2648 gcc_unreachable ();
2649 }
2650
2651 /* Build an all-zero merge value of type VECTYPE while vectorizing
2652 STMT_INFO as a gather load. */
2653
2654 static tree
2655 vect_build_zero_merge_argument (vec_info *vinfo,
2656 stmt_vec_info stmt_info, tree vectype)
2657 {
2658 tree merge;
2659 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2660 merge = build_int_cst (TREE_TYPE (vectype), 0);
2661 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2662 {
2663 REAL_VALUE_TYPE r;
2664 long tmp[6];
2665 for (int j = 0; j < 6; ++j)
2666 tmp[j] = 0;
2667 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2668 merge = build_real (TREE_TYPE (vectype), r);
2669 }
2670 else
2671 gcc_unreachable ();
2672 merge = build_vector_from_val (vectype, merge);
2673 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2674 }
2675
2676 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2677 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2678 the gather load operation. If the load is conditional, MASK is the
2679 unvectorized condition and MASK_DT is its definition type, otherwise
2680 MASK is null. */
2681
2682 static void
2683 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2684 gimple_stmt_iterator *gsi,
2685 stmt_vec_info *vec_stmt,
2686 gather_scatter_info *gs_info,
2687 tree mask)
2688 {
2689 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2690 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2691 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2692 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2693 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2694 edge pe = loop_preheader_edge (loop);
2695 enum { NARROW, NONE, WIDEN } modifier;
2696 poly_uint64 gather_off_nunits
2697 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2698
2699 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2700 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2701 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2703 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2704 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2705 tree scaletype = TREE_VALUE (arglist);
2706 tree real_masktype = masktype;
2707 gcc_checking_assert (types_compatible_p (srctype, rettype)
2708 && (!mask
2709 || TREE_CODE (masktype) == INTEGER_TYPE
2710 || types_compatible_p (srctype, masktype)));
2711 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2712 masktype = truth_type_for (srctype);
2713
2714 tree mask_halftype = masktype;
2715 tree perm_mask = NULL_TREE;
2716 tree mask_perm_mask = NULL_TREE;
2717 if (known_eq (nunits, gather_off_nunits))
2718 modifier = NONE;
2719 else if (known_eq (nunits * 2, gather_off_nunits))
2720 {
2721 modifier = WIDEN;
2722
2723 /* Currently widening gathers and scatters are only supported for
2724 fixed-length vectors. */
2725 int count = gather_off_nunits.to_constant ();
2726 vec_perm_builder sel (count, count, 1);
2727 for (int i = 0; i < count; ++i)
2728 sel.quick_push (i | (count / 2));
2729
2730 vec_perm_indices indices (sel, 1, count);
2731 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2732 indices);
2733 }
2734 else if (known_eq (nunits, gather_off_nunits * 2))
2735 {
2736 modifier = NARROW;
2737
2738 /* Currently narrowing gathers and scatters are only supported for
2739 fixed-length vectors. */
2740 int count = nunits.to_constant ();
2741 vec_perm_builder sel (count, count, 1);
2742 sel.quick_grow (count);
2743 for (int i = 0; i < count; ++i)
2744 sel[i] = i < count / 2 ? i : i + count / 2;
2745 vec_perm_indices indices (sel, 2, count);
2746 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2747
2748 ncopies *= 2;
2749
2750 if (mask && masktype == real_masktype)
2751 {
2752 for (int i = 0; i < count; ++i)
2753 sel[i] = i | (count / 2);
2754 indices.new_vector (sel, 2, count);
2755 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2756 }
2757 else if (mask)
2758 mask_halftype = truth_type_for (gs_info->offset_vectype);
2759 }
2760 else
2761 gcc_unreachable ();
2762
2763 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2764 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2765
2766 tree ptr = fold_convert (ptrtype, gs_info->base);
2767 if (!is_gimple_min_invariant (ptr))
2768 {
2769 gimple_seq seq;
2770 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2771 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2772 gcc_assert (!new_bb);
2773 }
2774
2775 tree scale = build_int_cst (scaletype, gs_info->scale);
2776
2777 tree vec_oprnd0 = NULL_TREE;
2778 tree vec_mask = NULL_TREE;
2779 tree src_op = NULL_TREE;
2780 tree mask_op = NULL_TREE;
2781 tree prev_res = NULL_TREE;
2782 stmt_vec_info prev_stmt_info = NULL;
2783
2784 if (!mask)
2785 {
2786 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2787 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2788 }
2789
2790 for (int j = 0; j < ncopies; ++j)
2791 {
2792 tree op, var;
2793 if (modifier == WIDEN && (j & 1))
2794 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2795 perm_mask, stmt_info, gsi);
2796 else if (j == 0)
2797 op = vec_oprnd0
2798 = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info);
2799 else
2800 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2801 vec_oprnd0);
2802
2803 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2804 {
2805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2806 TYPE_VECTOR_SUBPARTS (idxtype)));
2807 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2808 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2809 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 op = var;
2812 }
2813
2814 if (mask)
2815 {
2816 if (mask_perm_mask && (j & 1))
2817 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2818 mask_perm_mask, stmt_info, gsi);
2819 else
2820 {
2821 if (j == 0)
2822 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info);
2823 else if (modifier != NARROW || (j & 1) == 0)
2824 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2825 vec_mask);
2826
2827 mask_op = vec_mask;
2828 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2829 {
2830 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2831 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2832 gcc_assert (known_eq (sub1, sub2));
2833 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2834 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2835 gassign *new_stmt
2836 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2837 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2838 mask_op = var;
2839 }
2840 }
2841 if (modifier == NARROW && masktype != real_masktype)
2842 {
2843 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2844 gassign *new_stmt
2845 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2846 : VEC_UNPACK_LO_EXPR,
2847 mask_op);
2848 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2849 mask_op = var;
2850 }
2851 src_op = mask_op;
2852 }
2853
2854 tree mask_arg = mask_op;
2855 if (masktype != real_masktype)
2856 {
2857 tree utype, optype = TREE_TYPE (mask_op);
2858 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2859 utype = real_masktype;
2860 else
2861 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2862 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2863 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2864 gassign *new_stmt
2865 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2866 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2867 mask_arg = var;
2868 if (!useless_type_conversion_p (real_masktype, utype))
2869 {
2870 gcc_assert (TYPE_PRECISION (utype)
2871 <= TYPE_PRECISION (real_masktype));
2872 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2873 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2874 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2875 mask_arg = var;
2876 }
2877 src_op = build_zero_cst (srctype);
2878 }
2879 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2880 mask_arg, scale);
2881
2882 stmt_vec_info new_stmt_info;
2883 if (!useless_type_conversion_p (vectype, rettype))
2884 {
2885 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2886 TYPE_VECTOR_SUBPARTS (rettype)));
2887 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2888 gimple_call_set_lhs (new_call, op);
2889 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2890 var = make_ssa_name (vec_dest);
2891 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2892 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2893 new_stmt_info
2894 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2895 }
2896 else
2897 {
2898 var = make_ssa_name (vec_dest, new_call);
2899 gimple_call_set_lhs (new_call, var);
2900 new_stmt_info
2901 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2902 }
2903
2904 if (modifier == NARROW)
2905 {
2906 if ((j & 1) == 0)
2907 {
2908 prev_res = var;
2909 continue;
2910 }
2911 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2912 stmt_info, gsi);
2913 new_stmt_info = loop_vinfo->lookup_def (var);
2914 }
2915
2916 if (prev_stmt_info == NULL)
2917 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2918 else
2919 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2920 prev_stmt_info = new_stmt_info;
2921 }
2922 }
2923
2924 /* Prepare the base and offset in GS_INFO for vectorization.
2925 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2926 to the vectorized offset argument for the first copy of STMT_INFO.
2927 STMT_INFO is the statement described by GS_INFO and LOOP is the
2928 containing loop. */
2929
2930 static void
2931 vect_get_gather_scatter_ops (vec_info *vinfo,
2932 class loop *loop, stmt_vec_info stmt_info,
2933 gather_scatter_info *gs_info,
2934 tree *dataref_ptr, tree *vec_offset)
2935 {
2936 gimple_seq stmts = NULL;
2937 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2938 if (stmts != NULL)
2939 {
2940 basic_block new_bb;
2941 edge pe = loop_preheader_edge (loop);
2942 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2943 gcc_assert (!new_bb);
2944 }
2945 *vec_offset = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info,
2946 gs_info->offset_vectype);
2947 }
2948
2949 /* Prepare to implement a grouped or strided load or store using
2950 the gather load or scatter store operation described by GS_INFO.
2951 STMT_INFO is the load or store statement.
2952
2953 Set *DATAREF_BUMP to the amount that should be added to the base
2954 address after each copy of the vectorized statement. Set *VEC_OFFSET
2955 to an invariant offset vector in which element I has the value
2956 I * DR_STEP / SCALE. */
2957
2958 static void
2959 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2960 loop_vec_info loop_vinfo,
2961 gather_scatter_info *gs_info,
2962 tree *dataref_bump, tree *vec_offset)
2963 {
2964 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2965 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2966 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2967 gimple_seq stmts;
2968
2969 tree bump = size_binop (MULT_EXPR,
2970 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2971 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2972 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2973 if (stmts)
2974 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2975
2976 /* The offset given in GS_INFO can have pointer type, so use the element
2977 type of the vector instead. */
2978 tree offset_type = TREE_TYPE (gs_info->offset);
2979 offset_type = TREE_TYPE (gs_info->offset_vectype);
2980
2981 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2982 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2983 ssize_int (gs_info->scale));
2984 step = fold_convert (offset_type, step);
2985 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2986
2987 /* Create {0, X, X*2, X*3, ...}. */
2988 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
2989 build_zero_cst (offset_type), step);
2990 if (stmts)
2991 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2992 }
2993
2994 /* Return the amount that should be added to a vector pointer to move
2995 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2996 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2997 vectorization. */
2998
2999 static tree
3000 vect_get_data_ptr_increment (vec_info *vinfo,
3001 dr_vec_info *dr_info, tree aggr_type,
3002 vect_memory_access_type memory_access_type)
3003 {
3004 if (memory_access_type == VMAT_INVARIANT)
3005 return size_zero_node;
3006
3007 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3008 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3009 if (tree_int_cst_sgn (step) == -1)
3010 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3011 return iv_step;
3012 }
3013
3014 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3015
3016 static bool
3017 vectorizable_bswap (vec_info *vinfo,
3018 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3019 stmt_vec_info *vec_stmt, slp_tree slp_node,
3020 slp_tree *slp_op,
3021 tree vectype_in, stmt_vector_for_cost *cost_vec)
3022 {
3023 tree op, vectype;
3024 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3025 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3026 unsigned ncopies;
3027
3028 op = gimple_call_arg (stmt, 0);
3029 vectype = STMT_VINFO_VECTYPE (stmt_info);
3030 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3031
3032 /* Multiple types in SLP are handled by creating the appropriate number of
3033 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3034 case of SLP. */
3035 if (slp_node)
3036 ncopies = 1;
3037 else
3038 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3039
3040 gcc_assert (ncopies >= 1);
3041
3042 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3043 if (! char_vectype)
3044 return false;
3045
3046 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3047 unsigned word_bytes;
3048 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3049 return false;
3050
3051 /* The encoding uses one stepped pattern for each byte in the word. */
3052 vec_perm_builder elts (num_bytes, word_bytes, 3);
3053 for (unsigned i = 0; i < 3; ++i)
3054 for (unsigned j = 0; j < word_bytes; ++j)
3055 elts.quick_push ((i + 1) * word_bytes - j - 1);
3056
3057 vec_perm_indices indices (elts, 1, num_bytes);
3058 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3059 return false;
3060
3061 if (! vec_stmt)
3062 {
3063 if (slp_node
3064 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3065 {
3066 if (dump_enabled_p ())
3067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3068 "incompatible vector types for invariants\n");
3069 return false;
3070 }
3071
3072 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3073 DUMP_VECT_SCOPE ("vectorizable_bswap");
3074 if (! slp_node)
3075 {
3076 record_stmt_cost (cost_vec,
3077 1, vector_stmt, stmt_info, 0, vect_prologue);
3078 record_stmt_cost (cost_vec,
3079 ncopies, vec_perm, stmt_info, 0, vect_body);
3080 }
3081 return true;
3082 }
3083
3084 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3085
3086 /* Transform. */
3087 vec<tree> vec_oprnds = vNULL;
3088 stmt_vec_info new_stmt_info = NULL;
3089 stmt_vec_info prev_stmt_info = NULL;
3090 for (unsigned j = 0; j < ncopies; j++)
3091 {
3092 /* Handle uses. */
3093 if (j == 0)
3094 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
3095 slp_node);
3096 else
3097 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3098
3099 /* Arguments are ready. create the new vector stmt. */
3100 unsigned i;
3101 tree vop;
3102 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3103 {
3104 gimple *new_stmt;
3105 tree tem = make_ssa_name (char_vectype);
3106 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3107 char_vectype, vop));
3108 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3109 tree tem2 = make_ssa_name (char_vectype);
3110 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3111 tem, tem, bswap_vconst);
3112 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3113 tem = make_ssa_name (vectype);
3114 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3115 vectype, tem2));
3116 new_stmt_info
3117 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3118 if (slp_node)
3119 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3120 }
3121
3122 if (slp_node)
3123 continue;
3124
3125 if (j == 0)
3126 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3127 else
3128 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3129
3130 prev_stmt_info = new_stmt_info;
3131 }
3132
3133 vec_oprnds.release ();
3134 return true;
3135 }
3136
3137 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3138 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3139 in a single step. On success, store the binary pack code in
3140 *CONVERT_CODE. */
3141
3142 static bool
3143 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3144 tree_code *convert_code)
3145 {
3146 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3147 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3148 return false;
3149
3150 tree_code code;
3151 int multi_step_cvt = 0;
3152 auto_vec <tree, 8> interm_types;
3153 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3154 &code, &multi_step_cvt, &interm_types)
3155 || multi_step_cvt)
3156 return false;
3157
3158 *convert_code = code;
3159 return true;
3160 }
3161
3162 /* Function vectorizable_call.
3163
3164 Check if STMT_INFO performs a function call that can be vectorized.
3165 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3166 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3167 Return true if STMT_INFO is vectorizable in this way. */
3168
3169 static bool
3170 vectorizable_call (vec_info *vinfo,
3171 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3172 stmt_vec_info *vec_stmt, slp_tree slp_node,
3173 stmt_vector_for_cost *cost_vec)
3174 {
3175 gcall *stmt;
3176 tree vec_dest;
3177 tree scalar_dest;
3178 tree op;
3179 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3180 stmt_vec_info prev_stmt_info;
3181 tree vectype_out, vectype_in;
3182 poly_uint64 nunits_in;
3183 poly_uint64 nunits_out;
3184 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3185 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3186 tree fndecl, new_temp, rhs_type;
3187 enum vect_def_type dt[4]
3188 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3189 vect_unknown_def_type };
3190 tree vectypes[ARRAY_SIZE (dt)] = {};
3191 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3192 int ndts = ARRAY_SIZE (dt);
3193 int ncopies, j;
3194 auto_vec<tree, 8> vargs;
3195 auto_vec<tree, 8> orig_vargs;
3196 enum { NARROW, NONE, WIDEN } modifier;
3197 size_t i, nargs;
3198 tree lhs;
3199
3200 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3201 return false;
3202
3203 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3204 && ! vec_stmt)
3205 return false;
3206
3207 /* Is STMT_INFO a vectorizable call? */
3208 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3209 if (!stmt)
3210 return false;
3211
3212 if (gimple_call_internal_p (stmt)
3213 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3214 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3215 /* Handled by vectorizable_load and vectorizable_store. */
3216 return false;
3217
3218 if (gimple_call_lhs (stmt) == NULL_TREE
3219 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3220 return false;
3221
3222 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3223
3224 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3225
3226 /* Process function arguments. */
3227 rhs_type = NULL_TREE;
3228 vectype_in = NULL_TREE;
3229 nargs = gimple_call_num_args (stmt);
3230
3231 /* Bail out if the function has more than four arguments, we do not have
3232 interesting builtin functions to vectorize with more than two arguments
3233 except for fma. No arguments is also not good. */
3234 if (nargs == 0 || nargs > 4)
3235 return false;
3236
3237 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3238 combined_fn cfn = gimple_call_combined_fn (stmt);
3239 if (cfn == CFN_GOMP_SIMD_LANE)
3240 {
3241 nargs = 0;
3242 rhs_type = unsigned_type_node;
3243 }
3244
3245 int mask_opno = -1;
3246 if (internal_fn_p (cfn))
3247 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3248
3249 for (i = 0; i < nargs; i++)
3250 {
3251 if ((int) i == mask_opno)
3252 {
3253 op = gimple_call_arg (stmt, i);
3254 if (!vect_check_scalar_mask (vinfo,
3255 stmt_info, op, &dt[i], &vectypes[i]))
3256 return false;
3257 continue;
3258 }
3259
3260 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3261 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3262 {
3263 if (dump_enabled_p ())
3264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3265 "use not simple.\n");
3266 return false;
3267 }
3268
3269 /* We can only handle calls with arguments of the same type. */
3270 if (rhs_type
3271 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3272 {
3273 if (dump_enabled_p ())
3274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3275 "argument types differ.\n");
3276 return false;
3277 }
3278 if (!rhs_type)
3279 rhs_type = TREE_TYPE (op);
3280
3281 if (!vectype_in)
3282 vectype_in = vectypes[i];
3283 else if (vectypes[i]
3284 && !types_compatible_p (vectypes[i], vectype_in))
3285 {
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288 "argument vector types differ.\n");
3289 return false;
3290 }
3291 }
3292 /* If all arguments are external or constant defs, infer the vector type
3293 from the scalar type. */
3294 if (!vectype_in)
3295 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3296 if (vec_stmt)
3297 gcc_assert (vectype_in);
3298 if (!vectype_in)
3299 {
3300 if (dump_enabled_p ())
3301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3302 "no vectype for scalar type %T\n", rhs_type);
3303
3304 return false;
3305 }
3306 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3307 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3308 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3309 by a pack of the two vectors into an SI vector. We would need
3310 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3311 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3312 {
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3315 "mismatched vector sizes %T and %T\n",
3316 vectype_in, vectype_out);
3317 return false;
3318 }
3319
3320 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3321 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3322 {
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3325 "mixed mask and nonmask vector types\n");
3326 return false;
3327 }
3328
3329 /* FORNOW */
3330 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3331 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3332 if (known_eq (nunits_in * 2, nunits_out))
3333 modifier = NARROW;
3334 else if (known_eq (nunits_out, nunits_in))
3335 modifier = NONE;
3336 else if (known_eq (nunits_out * 2, nunits_in))
3337 modifier = WIDEN;
3338 else
3339 return false;
3340
3341 /* We only handle functions that do not read or clobber memory. */
3342 if (gimple_vuse (stmt))
3343 {
3344 if (dump_enabled_p ())
3345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3346 "function reads from or writes to memory.\n");
3347 return false;
3348 }
3349
3350 /* For now, we only vectorize functions if a target specific builtin
3351 is available. TODO -- in some cases, it might be profitable to
3352 insert the calls for pieces of the vector, in order to be able
3353 to vectorize other operations in the loop. */
3354 fndecl = NULL_TREE;
3355 internal_fn ifn = IFN_LAST;
3356 tree callee = gimple_call_fndecl (stmt);
3357
3358 /* First try using an internal function. */
3359 tree_code convert_code = ERROR_MARK;
3360 if (cfn != CFN_LAST
3361 && (modifier == NONE
3362 || (modifier == NARROW
3363 && simple_integer_narrowing (vectype_out, vectype_in,
3364 &convert_code))))
3365 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3366 vectype_in);
3367
3368 /* If that fails, try asking for a target-specific built-in function. */
3369 if (ifn == IFN_LAST)
3370 {
3371 if (cfn != CFN_LAST)
3372 fndecl = targetm.vectorize.builtin_vectorized_function
3373 (cfn, vectype_out, vectype_in);
3374 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3375 fndecl = targetm.vectorize.builtin_md_vectorized_function
3376 (callee, vectype_out, vectype_in);
3377 }
3378
3379 if (ifn == IFN_LAST && !fndecl)
3380 {
3381 if (cfn == CFN_GOMP_SIMD_LANE
3382 && !slp_node
3383 && loop_vinfo
3384 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3385 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3386 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3387 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3388 {
3389 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3390 { 0, 1, 2, ... vf - 1 } vector. */
3391 gcc_assert (nargs == 0);
3392 }
3393 else if (modifier == NONE
3394 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3395 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3396 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3397 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3398 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3399 slp_op, vectype_in, cost_vec);
3400 else
3401 {
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3404 "function is not vectorizable.\n");
3405 return false;
3406 }
3407 }
3408
3409 if (slp_node)
3410 ncopies = 1;
3411 else if (modifier == NARROW && ifn == IFN_LAST)
3412 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3413 else
3414 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3415
3416 /* Sanity check: make sure that at least one copy of the vectorized stmt
3417 needs to be generated. */
3418 gcc_assert (ncopies >= 1);
3419
3420 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3421 if (!vec_stmt) /* transformation not required. */
3422 {
3423 if (slp_node)
3424 for (i = 0; i < nargs; ++i)
3425 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3426 {
3427 if (dump_enabled_p ())
3428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3429 "incompatible vector types for invariants\n");
3430 return false;
3431 }
3432 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3433 DUMP_VECT_SCOPE ("vectorizable_call");
3434 vect_model_simple_cost (vinfo, stmt_info,
3435 ncopies, dt, ndts, slp_node, cost_vec);
3436 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3437 record_stmt_cost (cost_vec, ncopies / 2,
3438 vec_promote_demote, stmt_info, 0, vect_body);
3439
3440 if (loop_vinfo && mask_opno >= 0)
3441 {
3442 unsigned int nvectors = (slp_node
3443 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3444 : ncopies);
3445 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3446 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3447 vectype_out, scalar_mask);
3448 }
3449 return true;
3450 }
3451
3452 /* Transform. */
3453
3454 if (dump_enabled_p ())
3455 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3456
3457 /* Handle def. */
3458 scalar_dest = gimple_call_lhs (stmt);
3459 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3460
3461 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3462
3463 stmt_vec_info new_stmt_info = NULL;
3464 prev_stmt_info = NULL;
3465 if (modifier == NONE || ifn != IFN_LAST)
3466 {
3467 tree prev_res = NULL_TREE;
3468 vargs.safe_grow (nargs);
3469 orig_vargs.safe_grow (nargs);
3470 for (j = 0; j < ncopies; ++j)
3471 {
3472 /* Build argument list for the vectorized call. */
3473 if (slp_node)
3474 {
3475 auto_vec<vec<tree> > vec_defs (nargs);
3476 vec<tree> vec_oprnds0;
3477
3478 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3479 vec_oprnds0 = vec_defs[0];
3480
3481 /* Arguments are ready. Create the new vector stmt. */
3482 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3483 {
3484 size_t k;
3485 for (k = 0; k < nargs; k++)
3486 {
3487 vec<tree> vec_oprndsk = vec_defs[k];
3488 vargs[k] = vec_oprndsk[i];
3489 }
3490 if (modifier == NARROW)
3491 {
3492 /* We don't define any narrowing conditional functions
3493 at present. */
3494 gcc_assert (mask_opno < 0);
3495 tree half_res = make_ssa_name (vectype_in);
3496 gcall *call
3497 = gimple_build_call_internal_vec (ifn, vargs);
3498 gimple_call_set_lhs (call, half_res);
3499 gimple_call_set_nothrow (call, true);
3500 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3501 if ((i & 1) == 0)
3502 {
3503 prev_res = half_res;
3504 continue;
3505 }
3506 new_temp = make_ssa_name (vec_dest);
3507 gimple *new_stmt
3508 = gimple_build_assign (new_temp, convert_code,
3509 prev_res, half_res);
3510 new_stmt_info
3511 = vect_finish_stmt_generation (vinfo, stmt_info,
3512 new_stmt, gsi);
3513 }
3514 else
3515 {
3516 if (mask_opno >= 0 && masked_loop_p)
3517 {
3518 unsigned int vec_num = vec_oprnds0.length ();
3519 /* Always true for SLP. */
3520 gcc_assert (ncopies == 1);
3521 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3522 vectype_out, i);
3523 vargs[mask_opno] = prepare_load_store_mask
3524 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3525 }
3526
3527 gcall *call;
3528 if (ifn != IFN_LAST)
3529 call = gimple_build_call_internal_vec (ifn, vargs);
3530 else
3531 call = gimple_build_call_vec (fndecl, vargs);
3532 new_temp = make_ssa_name (vec_dest, call);
3533 gimple_call_set_lhs (call, new_temp);
3534 gimple_call_set_nothrow (call, true);
3535 new_stmt_info
3536 = vect_finish_stmt_generation (vinfo, stmt_info,
3537 call, gsi);
3538 }
3539 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3540 }
3541
3542 for (i = 0; i < nargs; i++)
3543 {
3544 vec<tree> vec_oprndsi = vec_defs[i];
3545 vec_oprndsi.release ();
3546 }
3547 continue;
3548 }
3549
3550 for (i = 0; i < nargs; i++)
3551 {
3552 op = gimple_call_arg (stmt, i);
3553 if (j == 0)
3554 vec_oprnd0
3555 = vect_get_vec_def_for_operand (vinfo,
3556 op, stmt_info, vectypes[i]);
3557 else
3558 vec_oprnd0
3559 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3560
3561 orig_vargs[i] = vargs[i] = vec_oprnd0;
3562 }
3563
3564 if (mask_opno >= 0 && masked_loop_p)
3565 {
3566 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3567 vectype_out, j);
3568 vargs[mask_opno]
3569 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3570 vargs[mask_opno], gsi);
3571 }
3572
3573 if (cfn == CFN_GOMP_SIMD_LANE)
3574 {
3575 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3576 tree new_var
3577 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3578 gimple *init_stmt = gimple_build_assign (new_var, cst);
3579 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3580 new_temp = make_ssa_name (vec_dest);
3581 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3582 new_stmt_info
3583 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3584 }
3585 else if (modifier == NARROW)
3586 {
3587 /* We don't define any narrowing conditional functions at
3588 present. */
3589 gcc_assert (mask_opno < 0);
3590 tree half_res = make_ssa_name (vectype_in);
3591 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3592 gimple_call_set_lhs (call, half_res);
3593 gimple_call_set_nothrow (call, true);
3594 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3595 if ((j & 1) == 0)
3596 {
3597 prev_res = half_res;
3598 continue;
3599 }
3600 new_temp = make_ssa_name (vec_dest);
3601 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3602 prev_res, half_res);
3603 new_stmt_info
3604 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3605 }
3606 else
3607 {
3608 gcall *call;
3609 if (ifn != IFN_LAST)
3610 call = gimple_build_call_internal_vec (ifn, vargs);
3611 else
3612 call = gimple_build_call_vec (fndecl, vargs);
3613 new_temp = make_ssa_name (vec_dest, call);
3614 gimple_call_set_lhs (call, new_temp);
3615 gimple_call_set_nothrow (call, true);
3616 new_stmt_info
3617 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3618 }
3619
3620 if (j == (modifier == NARROW ? 1 : 0))
3621 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3622 else
3623 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3624
3625 prev_stmt_info = new_stmt_info;
3626 }
3627 }
3628 else if (modifier == NARROW)
3629 {
3630 /* We don't define any narrowing conditional functions at present. */
3631 gcc_assert (mask_opno < 0);
3632 for (j = 0; j < ncopies; ++j)
3633 {
3634 /* Build argument list for the vectorized call. */
3635 if (j == 0)
3636 vargs.create (nargs * 2);
3637 else
3638 vargs.truncate (0);
3639
3640 if (slp_node)
3641 {
3642 auto_vec<vec<tree> > vec_defs (nargs);
3643 vec<tree> vec_oprnds0;
3644
3645 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3646 vec_oprnds0 = vec_defs[0];
3647
3648 /* Arguments are ready. Create the new vector stmt. */
3649 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3650 {
3651 size_t k;
3652 vargs.truncate (0);
3653 for (k = 0; k < nargs; k++)
3654 {
3655 vec<tree> vec_oprndsk = vec_defs[k];
3656 vargs.quick_push (vec_oprndsk[i]);
3657 vargs.quick_push (vec_oprndsk[i + 1]);
3658 }
3659 gcall *call;
3660 if (ifn != IFN_LAST)
3661 call = gimple_build_call_internal_vec (ifn, vargs);
3662 else
3663 call = gimple_build_call_vec (fndecl, vargs);
3664 new_temp = make_ssa_name (vec_dest, call);
3665 gimple_call_set_lhs (call, new_temp);
3666 gimple_call_set_nothrow (call, true);
3667 new_stmt_info
3668 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3669 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3670 }
3671
3672 for (i = 0; i < nargs; i++)
3673 {
3674 vec<tree> vec_oprndsi = vec_defs[i];
3675 vec_oprndsi.release ();
3676 }
3677 continue;
3678 }
3679
3680 for (i = 0; i < nargs; i++)
3681 {
3682 op = gimple_call_arg (stmt, i);
3683 if (j == 0)
3684 {
3685 vec_oprnd0
3686 = vect_get_vec_def_for_operand (vinfo, op, stmt_info,
3687 vectypes[i]);
3688 vec_oprnd1
3689 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3690 }
3691 else
3692 {
3693 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3694 2 * i + 1);
3695 vec_oprnd0
3696 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3697 vec_oprnd1
3698 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3699 }
3700
3701 vargs.quick_push (vec_oprnd0);
3702 vargs.quick_push (vec_oprnd1);
3703 }
3704
3705 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3706 new_temp = make_ssa_name (vec_dest, new_stmt);
3707 gimple_call_set_lhs (new_stmt, new_temp);
3708 new_stmt_info
3709 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3710
3711 if (j == 0)
3712 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3713 else
3714 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3715
3716 prev_stmt_info = new_stmt_info;
3717 }
3718
3719 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3720 }
3721 else
3722 /* No current target implements this case. */
3723 return false;
3724
3725 vargs.release ();
3726
3727 /* The call in STMT might prevent it from being removed in dce.
3728 We however cannot remove it here, due to the way the ssa name
3729 it defines is mapped to the new definition. So just replace
3730 rhs of the statement with something harmless. */
3731
3732 if (slp_node)
3733 return true;
3734
3735 stmt_info = vect_orig_stmt (stmt_info);
3736 lhs = gimple_get_lhs (stmt_info->stmt);
3737
3738 gassign *new_stmt
3739 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3740 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3741
3742 return true;
3743 }
3744
3745
3746 struct simd_call_arg_info
3747 {
3748 tree vectype;
3749 tree op;
3750 HOST_WIDE_INT linear_step;
3751 enum vect_def_type dt;
3752 unsigned int align;
3753 bool simd_lane_linear;
3754 };
3755
3756 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3757 is linear within simd lane (but not within whole loop), note it in
3758 *ARGINFO. */
3759
3760 static void
3761 vect_simd_lane_linear (tree op, class loop *loop,
3762 struct simd_call_arg_info *arginfo)
3763 {
3764 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3765
3766 if (!is_gimple_assign (def_stmt)
3767 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3768 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3769 return;
3770
3771 tree base = gimple_assign_rhs1 (def_stmt);
3772 HOST_WIDE_INT linear_step = 0;
3773 tree v = gimple_assign_rhs2 (def_stmt);
3774 while (TREE_CODE (v) == SSA_NAME)
3775 {
3776 tree t;
3777 def_stmt = SSA_NAME_DEF_STMT (v);
3778 if (is_gimple_assign (def_stmt))
3779 switch (gimple_assign_rhs_code (def_stmt))
3780 {
3781 case PLUS_EXPR:
3782 t = gimple_assign_rhs2 (def_stmt);
3783 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3784 return;
3785 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3786 v = gimple_assign_rhs1 (def_stmt);
3787 continue;
3788 case MULT_EXPR:
3789 t = gimple_assign_rhs2 (def_stmt);
3790 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3791 return;
3792 linear_step = tree_to_shwi (t);
3793 v = gimple_assign_rhs1 (def_stmt);
3794 continue;
3795 CASE_CONVERT:
3796 t = gimple_assign_rhs1 (def_stmt);
3797 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3798 || (TYPE_PRECISION (TREE_TYPE (v))
3799 < TYPE_PRECISION (TREE_TYPE (t))))
3800 return;
3801 if (!linear_step)
3802 linear_step = 1;
3803 v = t;
3804 continue;
3805 default:
3806 return;
3807 }
3808 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3809 && loop->simduid
3810 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3811 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3812 == loop->simduid))
3813 {
3814 if (!linear_step)
3815 linear_step = 1;
3816 arginfo->linear_step = linear_step;
3817 arginfo->op = base;
3818 arginfo->simd_lane_linear = true;
3819 return;
3820 }
3821 }
3822 }
3823
3824 /* Return the number of elements in vector type VECTYPE, which is associated
3825 with a SIMD clone. At present these vectors always have a constant
3826 length. */
3827
3828 static unsigned HOST_WIDE_INT
3829 simd_clone_subparts (tree vectype)
3830 {
3831 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3832 }
3833
3834 /* Function vectorizable_simd_clone_call.
3835
3836 Check if STMT_INFO performs a function call that can be vectorized
3837 by calling a simd clone of the function.
3838 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3839 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3840 Return true if STMT_INFO is vectorizable in this way. */
3841
3842 static bool
3843 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3844 gimple_stmt_iterator *gsi,
3845 stmt_vec_info *vec_stmt, slp_tree slp_node,
3846 stmt_vector_for_cost *)
3847 {
3848 tree vec_dest;
3849 tree scalar_dest;
3850 tree op, type;
3851 tree vec_oprnd0 = NULL_TREE;
3852 stmt_vec_info prev_stmt_info;
3853 tree vectype;
3854 unsigned int nunits;
3855 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3856 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3857 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3858 tree fndecl, new_temp;
3859 int ncopies, j;
3860 auto_vec<simd_call_arg_info> arginfo;
3861 vec<tree> vargs = vNULL;
3862 size_t i, nargs;
3863 tree lhs, rtype, ratype;
3864 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3865
3866 /* Is STMT a vectorizable call? */
3867 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3868 if (!stmt)
3869 return false;
3870
3871 fndecl = gimple_call_fndecl (stmt);
3872 if (fndecl == NULL_TREE)
3873 return false;
3874
3875 struct cgraph_node *node = cgraph_node::get (fndecl);
3876 if (node == NULL || node->simd_clones == NULL)
3877 return false;
3878
3879 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3880 return false;
3881
3882 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3883 && ! vec_stmt)
3884 return false;
3885
3886 if (gimple_call_lhs (stmt)
3887 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3888 return false;
3889
3890 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3891
3892 vectype = STMT_VINFO_VECTYPE (stmt_info);
3893
3894 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3895 return false;
3896
3897 /* FORNOW */
3898 if (slp_node)
3899 return false;
3900
3901 /* Process function arguments. */
3902 nargs = gimple_call_num_args (stmt);
3903
3904 /* Bail out if the function has zero arguments. */
3905 if (nargs == 0)
3906 return false;
3907
3908 arginfo.reserve (nargs, true);
3909
3910 for (i = 0; i < nargs; i++)
3911 {
3912 simd_call_arg_info thisarginfo;
3913 affine_iv iv;
3914
3915 thisarginfo.linear_step = 0;
3916 thisarginfo.align = 0;
3917 thisarginfo.op = NULL_TREE;
3918 thisarginfo.simd_lane_linear = false;
3919
3920 op = gimple_call_arg (stmt, i);
3921 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3922 &thisarginfo.vectype)
3923 || thisarginfo.dt == vect_uninitialized_def)
3924 {
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3927 "use not simple.\n");
3928 return false;
3929 }
3930
3931 if (thisarginfo.dt == vect_constant_def
3932 || thisarginfo.dt == vect_external_def)
3933 gcc_assert (thisarginfo.vectype == NULL_TREE);
3934 else
3935 {
3936 gcc_assert (thisarginfo.vectype != NULL_TREE);
3937 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3938 {
3939 if (dump_enabled_p ())
3940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3941 "vector mask arguments are not supported\n");
3942 return false;
3943 }
3944 }
3945
3946 /* For linear arguments, the analyze phase should have saved
3947 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3948 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3949 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3950 {
3951 gcc_assert (vec_stmt);
3952 thisarginfo.linear_step
3953 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3954 thisarginfo.op
3955 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3956 thisarginfo.simd_lane_linear
3957 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3958 == boolean_true_node);
3959 /* If loop has been peeled for alignment, we need to adjust it. */
3960 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3961 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3962 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3963 {
3964 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3965 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3966 tree opt = TREE_TYPE (thisarginfo.op);
3967 bias = fold_convert (TREE_TYPE (step), bias);
3968 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3969 thisarginfo.op
3970 = fold_build2 (POINTER_TYPE_P (opt)
3971 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3972 thisarginfo.op, bias);
3973 }
3974 }
3975 else if (!vec_stmt
3976 && thisarginfo.dt != vect_constant_def
3977 && thisarginfo.dt != vect_external_def
3978 && loop_vinfo
3979 && TREE_CODE (op) == SSA_NAME
3980 && simple_iv (loop, loop_containing_stmt (stmt), op,
3981 &iv, false)
3982 && tree_fits_shwi_p (iv.step))
3983 {
3984 thisarginfo.linear_step = tree_to_shwi (iv.step);
3985 thisarginfo.op = iv.base;
3986 }
3987 else if ((thisarginfo.dt == vect_constant_def
3988 || thisarginfo.dt == vect_external_def)
3989 && POINTER_TYPE_P (TREE_TYPE (op)))
3990 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3991 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3992 linear too. */
3993 if (POINTER_TYPE_P (TREE_TYPE (op))
3994 && !thisarginfo.linear_step
3995 && !vec_stmt
3996 && thisarginfo.dt != vect_constant_def
3997 && thisarginfo.dt != vect_external_def
3998 && loop_vinfo
3999 && !slp_node
4000 && TREE_CODE (op) == SSA_NAME)
4001 vect_simd_lane_linear (op, loop, &thisarginfo);
4002
4003 arginfo.quick_push (thisarginfo);
4004 }
4005
4006 unsigned HOST_WIDE_INT vf;
4007 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4008 {
4009 if (dump_enabled_p ())
4010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4011 "not considering SIMD clones; not yet supported"
4012 " for variable-width vectors.\n");
4013 return false;
4014 }
4015
4016 unsigned int badness = 0;
4017 struct cgraph_node *bestn = NULL;
4018 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4019 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4020 else
4021 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4022 n = n->simdclone->next_clone)
4023 {
4024 unsigned int this_badness = 0;
4025 if (n->simdclone->simdlen > vf
4026 || n->simdclone->nargs != nargs)
4027 continue;
4028 if (n->simdclone->simdlen < vf)
4029 this_badness += (exact_log2 (vf)
4030 - exact_log2 (n->simdclone->simdlen)) * 1024;
4031 if (n->simdclone->inbranch)
4032 this_badness += 2048;
4033 int target_badness = targetm.simd_clone.usable (n);
4034 if (target_badness < 0)
4035 continue;
4036 this_badness += target_badness * 512;
4037 /* FORNOW: Have to add code to add the mask argument. */
4038 if (n->simdclone->inbranch)
4039 continue;
4040 for (i = 0; i < nargs; i++)
4041 {
4042 switch (n->simdclone->args[i].arg_type)
4043 {
4044 case SIMD_CLONE_ARG_TYPE_VECTOR:
4045 if (!useless_type_conversion_p
4046 (n->simdclone->args[i].orig_type,
4047 TREE_TYPE (gimple_call_arg (stmt, i))))
4048 i = -1;
4049 else if (arginfo[i].dt == vect_constant_def
4050 || arginfo[i].dt == vect_external_def
4051 || arginfo[i].linear_step)
4052 this_badness += 64;
4053 break;
4054 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4055 if (arginfo[i].dt != vect_constant_def
4056 && arginfo[i].dt != vect_external_def)
4057 i = -1;
4058 break;
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4061 if (arginfo[i].dt == vect_constant_def
4062 || arginfo[i].dt == vect_external_def
4063 || (arginfo[i].linear_step
4064 != n->simdclone->args[i].linear_step))
4065 i = -1;
4066 break;
4067 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4068 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4073 /* FORNOW */
4074 i = -1;
4075 break;
4076 case SIMD_CLONE_ARG_TYPE_MASK:
4077 gcc_unreachable ();
4078 }
4079 if (i == (size_t) -1)
4080 break;
4081 if (n->simdclone->args[i].alignment > arginfo[i].align)
4082 {
4083 i = -1;
4084 break;
4085 }
4086 if (arginfo[i].align)
4087 this_badness += (exact_log2 (arginfo[i].align)
4088 - exact_log2 (n->simdclone->args[i].alignment));
4089 }
4090 if (i == (size_t) -1)
4091 continue;
4092 if (bestn == NULL || this_badness < badness)
4093 {
4094 bestn = n;
4095 badness = this_badness;
4096 }
4097 }
4098
4099 if (bestn == NULL)
4100 return false;
4101
4102 for (i = 0; i < nargs; i++)
4103 if ((arginfo[i].dt == vect_constant_def
4104 || arginfo[i].dt == vect_external_def)
4105 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4106 {
4107 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4108 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4109 slp_node);
4110 if (arginfo[i].vectype == NULL
4111 || (simd_clone_subparts (arginfo[i].vectype)
4112 > bestn->simdclone->simdlen))
4113 return false;
4114 }
4115
4116 fndecl = bestn->decl;
4117 nunits = bestn->simdclone->simdlen;
4118 ncopies = vf / nunits;
4119
4120 /* If the function isn't const, only allow it in simd loops where user
4121 has asserted that at least nunits consecutive iterations can be
4122 performed using SIMD instructions. */
4123 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4124 && gimple_vuse (stmt))
4125 return false;
4126
4127 /* Sanity check: make sure that at least one copy of the vectorized stmt
4128 needs to be generated. */
4129 gcc_assert (ncopies >= 1);
4130
4131 if (!vec_stmt) /* transformation not required. */
4132 {
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4134 for (i = 0; i < nargs; i++)
4135 if ((bestn->simdclone->args[i].arg_type
4136 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4137 || (bestn->simdclone->args[i].arg_type
4138 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4139 {
4140 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4141 + 1);
4142 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4143 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4144 ? size_type_node : TREE_TYPE (arginfo[i].op);
4145 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4146 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4147 tree sll = arginfo[i].simd_lane_linear
4148 ? boolean_true_node : boolean_false_node;
4149 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4150 }
4151 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4152 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4153 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4154 dt, slp_node, cost_vec); */
4155 return true;
4156 }
4157
4158 /* Transform. */
4159
4160 if (dump_enabled_p ())
4161 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4162
4163 /* Handle def. */
4164 scalar_dest = gimple_call_lhs (stmt);
4165 vec_dest = NULL_TREE;
4166 rtype = NULL_TREE;
4167 ratype = NULL_TREE;
4168 if (scalar_dest)
4169 {
4170 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4171 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4172 if (TREE_CODE (rtype) == ARRAY_TYPE)
4173 {
4174 ratype = rtype;
4175 rtype = TREE_TYPE (ratype);
4176 }
4177 }
4178
4179 prev_stmt_info = NULL;
4180 for (j = 0; j < ncopies; ++j)
4181 {
4182 /* Build argument list for the vectorized call. */
4183 if (j == 0)
4184 vargs.create (nargs);
4185 else
4186 vargs.truncate (0);
4187
4188 for (i = 0; i < nargs; i++)
4189 {
4190 unsigned int k, l, m, o;
4191 tree atype;
4192 op = gimple_call_arg (stmt, i);
4193 switch (bestn->simdclone->args[i].arg_type)
4194 {
4195 case SIMD_CLONE_ARG_TYPE_VECTOR:
4196 atype = bestn->simdclone->args[i].vector_type;
4197 o = nunits / simd_clone_subparts (atype);
4198 for (m = j * o; m < (j + 1) * o; m++)
4199 {
4200 if (simd_clone_subparts (atype)
4201 < simd_clone_subparts (arginfo[i].vectype))
4202 {
4203 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4204 k = (simd_clone_subparts (arginfo[i].vectype)
4205 / simd_clone_subparts (atype));
4206 gcc_assert ((k & (k - 1)) == 0);
4207 if (m == 0)
4208 vec_oprnd0
4209 = vect_get_vec_def_for_operand (vinfo, op, stmt_info);
4210 else
4211 {
4212 vec_oprnd0 = arginfo[i].op;
4213 if ((m & (k - 1)) == 0)
4214 vec_oprnd0
4215 = vect_get_vec_def_for_stmt_copy (vinfo,
4216 vec_oprnd0);
4217 }
4218 arginfo[i].op = vec_oprnd0;
4219 vec_oprnd0
4220 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4221 bitsize_int (prec),
4222 bitsize_int ((m & (k - 1)) * prec));
4223 gassign *new_stmt
4224 = gimple_build_assign (make_ssa_name (atype),
4225 vec_oprnd0);
4226 vect_finish_stmt_generation (vinfo, stmt_info,
4227 new_stmt, gsi);
4228 vargs.safe_push (gimple_assign_lhs (new_stmt));
4229 }
4230 else
4231 {
4232 k = (simd_clone_subparts (atype)
4233 / simd_clone_subparts (arginfo[i].vectype));
4234 gcc_assert ((k & (k - 1)) == 0);
4235 vec<constructor_elt, va_gc> *ctor_elts;
4236 if (k != 1)
4237 vec_alloc (ctor_elts, k);
4238 else
4239 ctor_elts = NULL;
4240 for (l = 0; l < k; l++)
4241 {
4242 if (m == 0 && l == 0)
4243 vec_oprnd0
4244 = vect_get_vec_def_for_operand (vinfo,
4245 op, stmt_info);
4246 else
4247 vec_oprnd0
4248 = vect_get_vec_def_for_stmt_copy (vinfo,
4249 arginfo[i].op);
4250 arginfo[i].op = vec_oprnd0;
4251 if (k == 1)
4252 break;
4253 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4254 vec_oprnd0);
4255 }
4256 if (k == 1)
4257 vargs.safe_push (vec_oprnd0);
4258 else
4259 {
4260 vec_oprnd0 = build_constructor (atype, ctor_elts);
4261 gassign *new_stmt
4262 = gimple_build_assign (make_ssa_name (atype),
4263 vec_oprnd0);
4264 vect_finish_stmt_generation (vinfo, stmt_info,
4265 new_stmt, gsi);
4266 vargs.safe_push (gimple_assign_lhs (new_stmt));
4267 }
4268 }
4269 }
4270 break;
4271 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4272 vargs.safe_push (op);
4273 break;
4274 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4275 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4276 if (j == 0)
4277 {
4278 gimple_seq stmts;
4279 arginfo[i].op
4280 = force_gimple_operand (unshare_expr (arginfo[i].op),
4281 &stmts, true, NULL_TREE);
4282 if (stmts != NULL)
4283 {
4284 basic_block new_bb;
4285 edge pe = loop_preheader_edge (loop);
4286 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4287 gcc_assert (!new_bb);
4288 }
4289 if (arginfo[i].simd_lane_linear)
4290 {
4291 vargs.safe_push (arginfo[i].op);
4292 break;
4293 }
4294 tree phi_res = copy_ssa_name (op);
4295 gphi *new_phi = create_phi_node (phi_res, loop->header);
4296 loop_vinfo->add_stmt (new_phi);
4297 add_phi_arg (new_phi, arginfo[i].op,
4298 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4299 enum tree_code code
4300 = POINTER_TYPE_P (TREE_TYPE (op))
4301 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4302 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4303 ? sizetype : TREE_TYPE (op);
4304 widest_int cst
4305 = wi::mul (bestn->simdclone->args[i].linear_step,
4306 ncopies * nunits);
4307 tree tcst = wide_int_to_tree (type, cst);
4308 tree phi_arg = copy_ssa_name (op);
4309 gassign *new_stmt
4310 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4311 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4312 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4313 loop_vinfo->add_stmt (new_stmt);
4314 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4315 UNKNOWN_LOCATION);
4316 arginfo[i].op = phi_res;
4317 vargs.safe_push (phi_res);
4318 }
4319 else
4320 {
4321 enum tree_code code
4322 = POINTER_TYPE_P (TREE_TYPE (op))
4323 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4324 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4325 ? sizetype : TREE_TYPE (op);
4326 widest_int cst
4327 = wi::mul (bestn->simdclone->args[i].linear_step,
4328 j * nunits);
4329 tree tcst = wide_int_to_tree (type, cst);
4330 new_temp = make_ssa_name (TREE_TYPE (op));
4331 gassign *new_stmt
4332 = gimple_build_assign (new_temp, code,
4333 arginfo[i].op, tcst);
4334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4335 vargs.safe_push (new_temp);
4336 }
4337 break;
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4341 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4342 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4344 default:
4345 gcc_unreachable ();
4346 }
4347 }
4348
4349 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4350 if (vec_dest)
4351 {
4352 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4353 if (ratype)
4354 new_temp = create_tmp_var (ratype);
4355 else if (simd_clone_subparts (vectype)
4356 == simd_clone_subparts (rtype))
4357 new_temp = make_ssa_name (vec_dest, new_call);
4358 else
4359 new_temp = make_ssa_name (rtype, new_call);
4360 gimple_call_set_lhs (new_call, new_temp);
4361 }
4362 stmt_vec_info new_stmt_info
4363 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4364
4365 if (vec_dest)
4366 {
4367 if (simd_clone_subparts (vectype) < nunits)
4368 {
4369 unsigned int k, l;
4370 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4371 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4372 k = nunits / simd_clone_subparts (vectype);
4373 gcc_assert ((k & (k - 1)) == 0);
4374 for (l = 0; l < k; l++)
4375 {
4376 tree t;
4377 if (ratype)
4378 {
4379 t = build_fold_addr_expr (new_temp);
4380 t = build2 (MEM_REF, vectype, t,
4381 build_int_cst (TREE_TYPE (t), l * bytes));
4382 }
4383 else
4384 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4385 bitsize_int (prec), bitsize_int (l * prec));
4386 gimple *new_stmt
4387 = gimple_build_assign (make_ssa_name (vectype), t);
4388 new_stmt_info
4389 = vect_finish_stmt_generation (vinfo, stmt_info,
4390 new_stmt, gsi);
4391
4392 if (j == 0 && l == 0)
4393 STMT_VINFO_VEC_STMT (stmt_info)
4394 = *vec_stmt = new_stmt_info;
4395 else
4396 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4397
4398 prev_stmt_info = new_stmt_info;
4399 }
4400
4401 if (ratype)
4402 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4403 continue;
4404 }
4405 else if (simd_clone_subparts (vectype) > nunits)
4406 {
4407 unsigned int k = (simd_clone_subparts (vectype)
4408 / simd_clone_subparts (rtype));
4409 gcc_assert ((k & (k - 1)) == 0);
4410 if ((j & (k - 1)) == 0)
4411 vec_alloc (ret_ctor_elts, k);
4412 if (ratype)
4413 {
4414 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4415 for (m = 0; m < o; m++)
4416 {
4417 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4418 size_int (m), NULL_TREE, NULL_TREE);
4419 gimple *new_stmt
4420 = gimple_build_assign (make_ssa_name (rtype), tem);
4421 new_stmt_info
4422 = vect_finish_stmt_generation (vinfo, stmt_info,
4423 new_stmt, gsi);
4424 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4425 gimple_assign_lhs (new_stmt));
4426 }
4427 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4428 }
4429 else
4430 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4431 if ((j & (k - 1)) != k - 1)
4432 continue;
4433 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4434 gimple *new_stmt
4435 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4436 new_stmt_info
4437 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4438
4439 if ((unsigned) j == k - 1)
4440 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4441 else
4442 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4443
4444 prev_stmt_info = new_stmt_info;
4445 continue;
4446 }
4447 else if (ratype)
4448 {
4449 tree t = build_fold_addr_expr (new_temp);
4450 t = build2 (MEM_REF, vectype, t,
4451 build_int_cst (TREE_TYPE (t), 0));
4452 gimple *new_stmt
4453 = gimple_build_assign (make_ssa_name (vec_dest), t);
4454 new_stmt_info
4455 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4456 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4457 }
4458 }
4459
4460 if (j == 0)
4461 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4462 else
4463 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4464
4465 prev_stmt_info = new_stmt_info;
4466 }
4467
4468 vargs.release ();
4469
4470 /* The call in STMT might prevent it from being removed in dce.
4471 We however cannot remove it here, due to the way the ssa name
4472 it defines is mapped to the new definition. So just replace
4473 rhs of the statement with something harmless. */
4474
4475 if (slp_node)
4476 return true;
4477
4478 gimple *new_stmt;
4479 if (scalar_dest)
4480 {
4481 type = TREE_TYPE (scalar_dest);
4482 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4483 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4484 }
4485 else
4486 new_stmt = gimple_build_nop ();
4487 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4488 unlink_stmt_vdef (stmt);
4489
4490 return true;
4491 }
4492
4493
4494 /* Function vect_gen_widened_results_half
4495
4496 Create a vector stmt whose code, type, number of arguments, and result
4497 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4498 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4499 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4500 needs to be created (DECL is a function-decl of a target-builtin).
4501 STMT_INFO is the original scalar stmt that we are vectorizing. */
4502
4503 static gimple *
4504 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4505 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4506 tree vec_dest, gimple_stmt_iterator *gsi,
4507 stmt_vec_info stmt_info)
4508 {
4509 gimple *new_stmt;
4510 tree new_temp;
4511
4512 /* Generate half of the widened result: */
4513 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4514 if (op_type != binary_op)
4515 vec_oprnd1 = NULL;
4516 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4517 new_temp = make_ssa_name (vec_dest, new_stmt);
4518 gimple_assign_set_lhs (new_stmt, new_temp);
4519 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4520
4521 return new_stmt;
4522 }
4523
4524
4525 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4526 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4527 containing scalar operand), and for the rest we get a copy with
4528 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4529 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4530 The vectors are collected into VEC_OPRNDS. */
4531
4532 static void
4533 vect_get_loop_based_defs (vec_info *vinfo, tree *oprnd, stmt_vec_info stmt_info,
4534 vec<tree> *vec_oprnds, int multi_step_cvt)
4535 {
4536 tree vec_oprnd;
4537
4538 /* Get first vector operand. */
4539 /* All the vector operands except the very first one (that is scalar oprnd)
4540 are stmt copies. */
4541 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4542 vec_oprnd = vect_get_vec_def_for_operand (vinfo, *oprnd, stmt_info);
4543 else
4544 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4545
4546 vec_oprnds->quick_push (vec_oprnd);
4547
4548 /* Get second vector operand. */
4549 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4550 vec_oprnds->quick_push (vec_oprnd);
4551
4552 *oprnd = vec_oprnd;
4553
4554 /* For conversion in multiple steps, continue to get operands
4555 recursively. */
4556 if (multi_step_cvt)
4557 vect_get_loop_based_defs (vinfo, oprnd, stmt_info, vec_oprnds,
4558 multi_step_cvt - 1);
4559 }
4560
4561
4562 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4563 For multi-step conversions store the resulting vectors and call the function
4564 recursively. */
4565
4566 static void
4567 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4568 int multi_step_cvt,
4569 stmt_vec_info stmt_info,
4570 vec<tree> vec_dsts,
4571 gimple_stmt_iterator *gsi,
4572 slp_tree slp_node, enum tree_code code,
4573 stmt_vec_info *prev_stmt_info)
4574 {
4575 unsigned int i;
4576 tree vop0, vop1, new_tmp, vec_dest;
4577
4578 vec_dest = vec_dsts.pop ();
4579
4580 for (i = 0; i < vec_oprnds->length (); i += 2)
4581 {
4582 /* Create demotion operation. */
4583 vop0 = (*vec_oprnds)[i];
4584 vop1 = (*vec_oprnds)[i + 1];
4585 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4586 new_tmp = make_ssa_name (vec_dest, new_stmt);
4587 gimple_assign_set_lhs (new_stmt, new_tmp);
4588 stmt_vec_info new_stmt_info
4589 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4590
4591 if (multi_step_cvt)
4592 /* Store the resulting vector for next recursive call. */
4593 (*vec_oprnds)[i/2] = new_tmp;
4594 else
4595 {
4596 /* This is the last step of the conversion sequence. Store the
4597 vectors in SLP_NODE or in vector info of the scalar statement
4598 (or in STMT_VINFO_RELATED_STMT chain). */
4599 if (slp_node)
4600 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4601 else
4602 {
4603 if (!*prev_stmt_info)
4604 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4605 else
4606 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4607
4608 *prev_stmt_info = new_stmt_info;
4609 }
4610 }
4611 }
4612
4613 /* For multi-step demotion operations we first generate demotion operations
4614 from the source type to the intermediate types, and then combine the
4615 results (stored in VEC_OPRNDS) in demotion operation to the destination
4616 type. */
4617 if (multi_step_cvt)
4618 {
4619 /* At each level of recursion we have half of the operands we had at the
4620 previous level. */
4621 vec_oprnds->truncate ((i+1)/2);
4622 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4623 multi_step_cvt - 1,
4624 stmt_info, vec_dsts, gsi,
4625 slp_node, VEC_PACK_TRUNC_EXPR,
4626 prev_stmt_info);
4627 }
4628
4629 vec_dsts.quick_push (vec_dest);
4630 }
4631
4632
4633 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4634 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4635 STMT_INFO. For multi-step conversions store the resulting vectors and
4636 call the function recursively. */
4637
4638 static void
4639 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4640 vec<tree> *vec_oprnds0,
4641 vec<tree> *vec_oprnds1,
4642 stmt_vec_info stmt_info, tree vec_dest,
4643 gimple_stmt_iterator *gsi,
4644 enum tree_code code1,
4645 enum tree_code code2, int op_type)
4646 {
4647 int i;
4648 tree vop0, vop1, new_tmp1, new_tmp2;
4649 gimple *new_stmt1, *new_stmt2;
4650 vec<tree> vec_tmp = vNULL;
4651
4652 vec_tmp.create (vec_oprnds0->length () * 2);
4653 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4654 {
4655 if (op_type == binary_op)
4656 vop1 = (*vec_oprnds1)[i];
4657 else
4658 vop1 = NULL_TREE;
4659
4660 /* Generate the two halves of promotion operation. */
4661 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4662 op_type, vec_dest, gsi,
4663 stmt_info);
4664 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4665 op_type, vec_dest, gsi,
4666 stmt_info);
4667 if (is_gimple_call (new_stmt1))
4668 {
4669 new_tmp1 = gimple_call_lhs (new_stmt1);
4670 new_tmp2 = gimple_call_lhs (new_stmt2);
4671 }
4672 else
4673 {
4674 new_tmp1 = gimple_assign_lhs (new_stmt1);
4675 new_tmp2 = gimple_assign_lhs (new_stmt2);
4676 }
4677
4678 /* Store the results for the next step. */
4679 vec_tmp.quick_push (new_tmp1);
4680 vec_tmp.quick_push (new_tmp2);
4681 }
4682
4683 vec_oprnds0->release ();
4684 *vec_oprnds0 = vec_tmp;
4685 }
4686
4687
4688 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4689 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4690 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4691 Return true if STMT_INFO is vectorizable in this way. */
4692
4693 static bool
4694 vectorizable_conversion (vec_info *vinfo,
4695 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4696 stmt_vec_info *vec_stmt, slp_tree slp_node,
4697 stmt_vector_for_cost *cost_vec)
4698 {
4699 tree vec_dest;
4700 tree scalar_dest;
4701 tree op0, op1 = NULL_TREE;
4702 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4703 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4704 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4705 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4706 tree new_temp;
4707 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4708 int ndts = 2;
4709 stmt_vec_info prev_stmt_info;
4710 poly_uint64 nunits_in;
4711 poly_uint64 nunits_out;
4712 tree vectype_out, vectype_in;
4713 int ncopies, i, j;
4714 tree lhs_type, rhs_type;
4715 enum { NARROW, NONE, WIDEN } modifier;
4716 vec<tree> vec_oprnds0 = vNULL;
4717 vec<tree> vec_oprnds1 = vNULL;
4718 tree vop0;
4719 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4720 int multi_step_cvt = 0;
4721 vec<tree> interm_types = vNULL;
4722 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4723 int op_type;
4724 unsigned short fltsz;
4725
4726 /* Is STMT a vectorizable conversion? */
4727
4728 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4729 return false;
4730
4731 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4732 && ! vec_stmt)
4733 return false;
4734
4735 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4736 if (!stmt)
4737 return false;
4738
4739 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4740 return false;
4741
4742 code = gimple_assign_rhs_code (stmt);
4743 if (!CONVERT_EXPR_CODE_P (code)
4744 && code != FIX_TRUNC_EXPR
4745 && code != FLOAT_EXPR
4746 && code != WIDEN_MULT_EXPR
4747 && code != WIDEN_LSHIFT_EXPR)
4748 return false;
4749
4750 op_type = TREE_CODE_LENGTH (code);
4751
4752 /* Check types of lhs and rhs. */
4753 scalar_dest = gimple_assign_lhs (stmt);
4754 lhs_type = TREE_TYPE (scalar_dest);
4755 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4756
4757 /* Check the operands of the operation. */
4758 slp_tree slp_op0, slp_op1 = NULL;
4759 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4760 0, &op0, &slp_op0, &dt[0], &vectype_in))
4761 {
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4764 "use not simple.\n");
4765 return false;
4766 }
4767
4768 rhs_type = TREE_TYPE (op0);
4769 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4770 && !((INTEGRAL_TYPE_P (lhs_type)
4771 && INTEGRAL_TYPE_P (rhs_type))
4772 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4773 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4774 return false;
4775
4776 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4777 && ((INTEGRAL_TYPE_P (lhs_type)
4778 && !type_has_mode_precision_p (lhs_type))
4779 || (INTEGRAL_TYPE_P (rhs_type)
4780 && !type_has_mode_precision_p (rhs_type))))
4781 {
4782 if (dump_enabled_p ())
4783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4784 "type conversion to/from bit-precision unsupported."
4785 "\n");
4786 return false;
4787 }
4788
4789 if (op_type == binary_op)
4790 {
4791 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4792
4793 op1 = gimple_assign_rhs2 (stmt);
4794 tree vectype1_in;
4795 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4796 &op1, &slp_op1, &dt[1], &vectype1_in))
4797 {
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4800 "use not simple.\n");
4801 return false;
4802 }
4803 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4804 OP1. */
4805 if (!vectype_in)
4806 vectype_in = vectype1_in;
4807 }
4808
4809 /* If op0 is an external or constant def, infer the vector type
4810 from the scalar type. */
4811 if (!vectype_in)
4812 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4813 if (vec_stmt)
4814 gcc_assert (vectype_in);
4815 if (!vectype_in)
4816 {
4817 if (dump_enabled_p ())
4818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4819 "no vectype for scalar type %T\n", rhs_type);
4820
4821 return false;
4822 }
4823
4824 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4825 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4826 {
4827 if (dump_enabled_p ())
4828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4829 "can't convert between boolean and non "
4830 "boolean vectors %T\n", rhs_type);
4831
4832 return false;
4833 }
4834
4835 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4836 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4837 if (known_eq (nunits_out, nunits_in))
4838 modifier = NONE;
4839 else if (multiple_p (nunits_out, nunits_in))
4840 modifier = NARROW;
4841 else
4842 {
4843 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4844 modifier = WIDEN;
4845 }
4846
4847 /* Multiple types in SLP are handled by creating the appropriate number of
4848 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4849 case of SLP. */
4850 if (slp_node)
4851 ncopies = 1;
4852 else if (modifier == NARROW)
4853 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4854 else
4855 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4856
4857 /* Sanity check: make sure that at least one copy of the vectorized stmt
4858 needs to be generated. */
4859 gcc_assert (ncopies >= 1);
4860
4861 bool found_mode = false;
4862 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4863 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4864 opt_scalar_mode rhs_mode_iter;
4865
4866 /* Supportable by target? */
4867 switch (modifier)
4868 {
4869 case NONE:
4870 if (code != FIX_TRUNC_EXPR
4871 && code != FLOAT_EXPR
4872 && !CONVERT_EXPR_CODE_P (code))
4873 return false;
4874 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4875 break;
4876 /* FALLTHRU */
4877 unsupported:
4878 if (dump_enabled_p ())
4879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4880 "conversion not supported by target.\n");
4881 return false;
4882
4883 case WIDEN:
4884 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4885 vectype_in, &code1, &code2,
4886 &multi_step_cvt, &interm_types))
4887 {
4888 /* Binary widening operation can only be supported directly by the
4889 architecture. */
4890 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4891 break;
4892 }
4893
4894 if (code != FLOAT_EXPR
4895 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4896 goto unsupported;
4897
4898 fltsz = GET_MODE_SIZE (lhs_mode);
4899 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4900 {
4901 rhs_mode = rhs_mode_iter.require ();
4902 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4903 break;
4904
4905 cvt_type
4906 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4907 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4908 if (cvt_type == NULL_TREE)
4909 goto unsupported;
4910
4911 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4912 {
4913 if (!supportable_convert_operation (code, vectype_out,
4914 cvt_type, &codecvt1))
4915 goto unsupported;
4916 }
4917 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4918 vectype_out, cvt_type,
4919 &codecvt1, &codecvt2,
4920 &multi_step_cvt,
4921 &interm_types))
4922 continue;
4923 else
4924 gcc_assert (multi_step_cvt == 0);
4925
4926 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4927 cvt_type,
4928 vectype_in, &code1, &code2,
4929 &multi_step_cvt, &interm_types))
4930 {
4931 found_mode = true;
4932 break;
4933 }
4934 }
4935
4936 if (!found_mode)
4937 goto unsupported;
4938
4939 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4940 codecvt2 = ERROR_MARK;
4941 else
4942 {
4943 multi_step_cvt++;
4944 interm_types.safe_push (cvt_type);
4945 cvt_type = NULL_TREE;
4946 }
4947 break;
4948
4949 case NARROW:
4950 gcc_assert (op_type == unary_op);
4951 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4952 &code1, &multi_step_cvt,
4953 &interm_types))
4954 break;
4955
4956 if (code != FIX_TRUNC_EXPR
4957 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4958 goto unsupported;
4959
4960 cvt_type
4961 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4962 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4963 if (cvt_type == NULL_TREE)
4964 goto unsupported;
4965 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4966 &codecvt1))
4967 goto unsupported;
4968 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4969 &code1, &multi_step_cvt,
4970 &interm_types))
4971 break;
4972 goto unsupported;
4973
4974 default:
4975 gcc_unreachable ();
4976 }
4977
4978 if (!vec_stmt) /* transformation not required. */
4979 {
4980 if (slp_node
4981 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4982 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4983 {
4984 if (dump_enabled_p ())
4985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4986 "incompatible vector types for invariants\n");
4987 return false;
4988 }
4989 DUMP_VECT_SCOPE ("vectorizable_conversion");
4990 if (modifier == NONE)
4991 {
4992 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4993 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4994 cost_vec);
4995 }
4996 else if (modifier == NARROW)
4997 {
4998 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4999 /* The final packing step produces one vector result per copy. */
5000 unsigned int nvectors
5001 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5002 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5003 multi_step_cvt, cost_vec);
5004 }
5005 else
5006 {
5007 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5008 /* The initial unpacking step produces two vector results
5009 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5010 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5011 unsigned int nvectors
5012 = (slp_node
5013 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5014 : ncopies * 2);
5015 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5016 multi_step_cvt, cost_vec);
5017 }
5018 interm_types.release ();
5019 return true;
5020 }
5021
5022 /* Transform. */
5023 if (dump_enabled_p ())
5024 dump_printf_loc (MSG_NOTE, vect_location,
5025 "transform conversion. ncopies = %d.\n", ncopies);
5026
5027 if (op_type == binary_op)
5028 {
5029 if (CONSTANT_CLASS_P (op0))
5030 op0 = fold_convert (TREE_TYPE (op1), op0);
5031 else if (CONSTANT_CLASS_P (op1))
5032 op1 = fold_convert (TREE_TYPE (op0), op1);
5033 }
5034
5035 /* In case of multi-step conversion, we first generate conversion operations
5036 to the intermediate types, and then from that types to the final one.
5037 We create vector destinations for the intermediate type (TYPES) received
5038 from supportable_*_operation, and store them in the correct order
5039 for future use in vect_create_vectorized_*_stmts (). */
5040 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5041 vec_dest = vect_create_destination_var (scalar_dest,
5042 (cvt_type && modifier == WIDEN)
5043 ? cvt_type : vectype_out);
5044 vec_dsts.quick_push (vec_dest);
5045
5046 if (multi_step_cvt)
5047 {
5048 for (i = interm_types.length () - 1;
5049 interm_types.iterate (i, &intermediate_type); i--)
5050 {
5051 vec_dest = vect_create_destination_var (scalar_dest,
5052 intermediate_type);
5053 vec_dsts.quick_push (vec_dest);
5054 }
5055 }
5056
5057 if (cvt_type)
5058 vec_dest = vect_create_destination_var (scalar_dest,
5059 modifier == WIDEN
5060 ? vectype_out : cvt_type);
5061
5062 if (!slp_node)
5063 {
5064 if (modifier == WIDEN)
5065 {
5066 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5067 if (op_type == binary_op)
5068 vec_oprnds1.create (1);
5069 }
5070 else if (modifier == NARROW)
5071 vec_oprnds0.create (
5072 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5073 }
5074 else if (code == WIDEN_LSHIFT_EXPR)
5075 vec_oprnds1.create (slp_node->vec_stmts_size);
5076
5077 last_oprnd = op0;
5078 prev_stmt_info = NULL;
5079 switch (modifier)
5080 {
5081 case NONE:
5082 for (j = 0; j < ncopies; j++)
5083 {
5084 if (j == 0)
5085 vect_get_vec_defs (vinfo, op0, NULL, stmt_info, &vec_oprnds0,
5086 NULL, slp_node);
5087 else
5088 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5089
5090 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5091 {
5092 stmt_vec_info new_stmt_info;
5093 /* Arguments are ready, create the new vector stmt. */
5094 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5095 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5096 new_temp = make_ssa_name (vec_dest, new_stmt);
5097 gimple_assign_set_lhs (new_stmt, new_temp);
5098 new_stmt_info
5099 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5100
5101 if (slp_node)
5102 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5103 else
5104 {
5105 if (!prev_stmt_info)
5106 STMT_VINFO_VEC_STMT (stmt_info)
5107 = *vec_stmt = new_stmt_info;
5108 else
5109 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5110 prev_stmt_info = new_stmt_info;
5111 }
5112 }
5113 }
5114 break;
5115
5116 case WIDEN:
5117 /* In case the vectorization factor (VF) is bigger than the number
5118 of elements that we can fit in a vectype (nunits), we have to
5119 generate more than one vector stmt - i.e - we need to "unroll"
5120 the vector stmt by a factor VF/nunits. */
5121 for (j = 0; j < ncopies; j++)
5122 {
5123 /* Handle uses. */
5124 if (j == 0)
5125 {
5126 if (slp_node)
5127 {
5128 if (code == WIDEN_LSHIFT_EXPR)
5129 {
5130 unsigned int k;
5131
5132 vec_oprnd1 = op1;
5133 /* Store vec_oprnd1 for every vector stmt to be created
5134 for SLP_NODE. We check during the analysis that all
5135 the shift arguments are the same. */
5136 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5137 vec_oprnds1.quick_push (vec_oprnd1);
5138
5139 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5140 &vec_oprnds0, NULL, slp_node);
5141 }
5142 else
5143 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
5144 &vec_oprnds1, slp_node);
5145 }
5146 else
5147 {
5148 vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
5149 op0, stmt_info);
5150 vec_oprnds0.quick_push (vec_oprnd0);
5151 if (op_type == binary_op)
5152 {
5153 if (code == WIDEN_LSHIFT_EXPR)
5154 vec_oprnd1 = op1;
5155 else
5156 vec_oprnd1
5157 = vect_get_vec_def_for_operand (vinfo,
5158 op1, stmt_info);
5159 vec_oprnds1.quick_push (vec_oprnd1);
5160 }
5161 }
5162 }
5163 else
5164 {
5165 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5166 vec_oprnds0.truncate (0);
5167 vec_oprnds0.quick_push (vec_oprnd0);
5168 if (op_type == binary_op)
5169 {
5170 if (code == WIDEN_LSHIFT_EXPR)
5171 vec_oprnd1 = op1;
5172 else
5173 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5174 vec_oprnd1);
5175 vec_oprnds1.truncate (0);
5176 vec_oprnds1.quick_push (vec_oprnd1);
5177 }
5178 }
5179
5180 /* Arguments are ready. Create the new vector stmts. */
5181 for (i = multi_step_cvt; i >= 0; i--)
5182 {
5183 tree this_dest = vec_dsts[i];
5184 enum tree_code c1 = code1, c2 = code2;
5185 if (i == 0 && codecvt2 != ERROR_MARK)
5186 {
5187 c1 = codecvt1;
5188 c2 = codecvt2;
5189 }
5190 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5191 &vec_oprnds1, stmt_info,
5192 this_dest, gsi,
5193 c1, c2, op_type);
5194 }
5195
5196 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5197 {
5198 stmt_vec_info new_stmt_info;
5199 if (cvt_type)
5200 {
5201 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5202 new_temp = make_ssa_name (vec_dest);
5203 gassign *new_stmt
5204 = gimple_build_assign (new_temp, codecvt1, vop0);
5205 new_stmt_info
5206 = vect_finish_stmt_generation (vinfo, stmt_info,
5207 new_stmt, gsi);
5208 }
5209 else
5210 new_stmt_info = vinfo->lookup_def (vop0);
5211
5212 if (slp_node)
5213 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5214 else
5215 {
5216 if (!prev_stmt_info)
5217 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5218 else
5219 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5220 prev_stmt_info = new_stmt_info;
5221 }
5222 }
5223 }
5224
5225 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5226 break;
5227
5228 case NARROW:
5229 /* In case the vectorization factor (VF) is bigger than the number
5230 of elements that we can fit in a vectype (nunits), we have to
5231 generate more than one vector stmt - i.e - we need to "unroll"
5232 the vector stmt by a factor VF/nunits. */
5233 for (j = 0; j < ncopies; j++)
5234 {
5235 /* Handle uses. */
5236 if (slp_node)
5237 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
5238 NULL, slp_node);
5239 else
5240 {
5241 vec_oprnds0.truncate (0);
5242 vect_get_loop_based_defs (vinfo,
5243 &last_oprnd, stmt_info, &vec_oprnds0,
5244 vect_pow2 (multi_step_cvt) - 1);
5245 }
5246
5247 /* Arguments are ready. Create the new vector stmts. */
5248 if (cvt_type)
5249 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5250 {
5251 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5252 new_temp = make_ssa_name (vec_dest);
5253 gassign *new_stmt
5254 = gimple_build_assign (new_temp, codecvt1, vop0);
5255 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5256 vec_oprnds0[i] = new_temp;
5257 }
5258
5259 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5260 multi_step_cvt,
5261 stmt_info, vec_dsts, gsi,
5262 slp_node, code1,
5263 &prev_stmt_info);
5264 }
5265
5266 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5267 break;
5268 }
5269
5270 vec_oprnds0.release ();
5271 vec_oprnds1.release ();
5272 interm_types.release ();
5273
5274 return true;
5275 }
5276
5277 /* Return true if we can assume from the scalar form of STMT_INFO that
5278 neither the scalar nor the vector forms will generate code. STMT_INFO
5279 is known not to involve a data reference. */
5280
5281 bool
5282 vect_nop_conversion_p (stmt_vec_info stmt_info)
5283 {
5284 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5285 if (!stmt)
5286 return false;
5287
5288 tree lhs = gimple_assign_lhs (stmt);
5289 tree_code code = gimple_assign_rhs_code (stmt);
5290 tree rhs = gimple_assign_rhs1 (stmt);
5291
5292 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5293 return true;
5294
5295 if (CONVERT_EXPR_CODE_P (code))
5296 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5297
5298 return false;
5299 }
5300
5301 /* Function vectorizable_assignment.
5302
5303 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5304 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5305 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5306 Return true if STMT_INFO is vectorizable in this way. */
5307
5308 static bool
5309 vectorizable_assignment (vec_info *vinfo,
5310 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5311 stmt_vec_info *vec_stmt, slp_tree slp_node,
5312 stmt_vector_for_cost *cost_vec)
5313 {
5314 tree vec_dest;
5315 tree scalar_dest;
5316 tree op;
5317 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5318 tree new_temp;
5319 enum vect_def_type dt[1] = {vect_unknown_def_type};
5320 int ndts = 1;
5321 int ncopies;
5322 int i, j;
5323 vec<tree> vec_oprnds = vNULL;
5324 tree vop;
5325 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5326 stmt_vec_info prev_stmt_info = NULL;
5327 enum tree_code code;
5328 tree vectype_in;
5329
5330 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5331 return false;
5332
5333 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5334 && ! vec_stmt)
5335 return false;
5336
5337 /* Is vectorizable assignment? */
5338 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5339 if (!stmt)
5340 return false;
5341
5342 scalar_dest = gimple_assign_lhs (stmt);
5343 if (TREE_CODE (scalar_dest) != SSA_NAME)
5344 return false;
5345
5346 if (STMT_VINFO_DATA_REF (stmt_info))
5347 return false;
5348
5349 code = gimple_assign_rhs_code (stmt);
5350 if (!(gimple_assign_single_p (stmt)
5351 || code == PAREN_EXPR
5352 || CONVERT_EXPR_CODE_P (code)))
5353 return false;
5354
5355 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5356 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5357
5358 /* Multiple types in SLP are handled by creating the appropriate number of
5359 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5360 case of SLP. */
5361 if (slp_node)
5362 ncopies = 1;
5363 else
5364 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5365
5366 gcc_assert (ncopies >= 1);
5367
5368 slp_tree slp_op;
5369 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5370 &dt[0], &vectype_in))
5371 {
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5374 "use not simple.\n");
5375 return false;
5376 }
5377 if (!vectype_in)
5378 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5379
5380 /* We can handle NOP_EXPR conversions that do not change the number
5381 of elements or the vector size. */
5382 if ((CONVERT_EXPR_CODE_P (code)
5383 || code == VIEW_CONVERT_EXPR)
5384 && (!vectype_in
5385 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5386 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5387 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5388 return false;
5389
5390 /* We do not handle bit-precision changes. */
5391 if ((CONVERT_EXPR_CODE_P (code)
5392 || code == VIEW_CONVERT_EXPR)
5393 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5394 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5395 || !type_has_mode_precision_p (TREE_TYPE (op)))
5396 /* But a conversion that does not change the bit-pattern is ok. */
5397 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5398 > TYPE_PRECISION (TREE_TYPE (op)))
5399 && TYPE_UNSIGNED (TREE_TYPE (op)))
5400 /* Conversion between boolean types of different sizes is
5401 a simple assignment in case their vectypes are same
5402 boolean vectors. */
5403 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5404 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5405 {
5406 if (dump_enabled_p ())
5407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5408 "type conversion to/from bit-precision "
5409 "unsupported.\n");
5410 return false;
5411 }
5412
5413 if (!vec_stmt) /* transformation not required. */
5414 {
5415 if (slp_node
5416 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5417 {
5418 if (dump_enabled_p ())
5419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5420 "incompatible vector types for invariants\n");
5421 return false;
5422 }
5423 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5424 DUMP_VECT_SCOPE ("vectorizable_assignment");
5425 if (!vect_nop_conversion_p (stmt_info))
5426 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5427 cost_vec);
5428 return true;
5429 }
5430
5431 /* Transform. */
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5434
5435 /* Handle def. */
5436 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5437
5438 /* Handle use. */
5439 for (j = 0; j < ncopies; j++)
5440 {
5441 /* Handle uses. */
5442 if (j == 0)
5443 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
5444 slp_node);
5445 else
5446 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5447
5448 /* Arguments are ready. create the new vector stmt. */
5449 stmt_vec_info new_stmt_info = NULL;
5450 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5451 {
5452 if (CONVERT_EXPR_CODE_P (code)
5453 || code == VIEW_CONVERT_EXPR)
5454 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5455 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5456 new_temp = make_ssa_name (vec_dest, new_stmt);
5457 gimple_assign_set_lhs (new_stmt, new_temp);
5458 new_stmt_info
5459 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5460 if (slp_node)
5461 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5462 }
5463
5464 if (slp_node)
5465 continue;
5466
5467 if (j == 0)
5468 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5469 else
5470 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5471
5472 prev_stmt_info = new_stmt_info;
5473 }
5474
5475 vec_oprnds.release ();
5476 return true;
5477 }
5478
5479
5480 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5481 either as shift by a scalar or by a vector. */
5482
5483 bool
5484 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5485 {
5486
5487 machine_mode vec_mode;
5488 optab optab;
5489 int icode;
5490 tree vectype;
5491
5492 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5493 if (!vectype)
5494 return false;
5495
5496 optab = optab_for_tree_code (code, vectype, optab_scalar);
5497 if (!optab
5498 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5499 {
5500 optab = optab_for_tree_code (code, vectype, optab_vector);
5501 if (!optab
5502 || (optab_handler (optab, TYPE_MODE (vectype))
5503 == CODE_FOR_nothing))
5504 return false;
5505 }
5506
5507 vec_mode = TYPE_MODE (vectype);
5508 icode = (int) optab_handler (optab, vec_mode);
5509 if (icode == CODE_FOR_nothing)
5510 return false;
5511
5512 return true;
5513 }
5514
5515
5516 /* Function vectorizable_shift.
5517
5518 Check if STMT_INFO performs a shift operation that can be vectorized.
5519 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5520 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5521 Return true if STMT_INFO is vectorizable in this way. */
5522
5523 static bool
5524 vectorizable_shift (vec_info *vinfo,
5525 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5526 stmt_vec_info *vec_stmt, slp_tree slp_node,
5527 stmt_vector_for_cost *cost_vec)
5528 {
5529 tree vec_dest;
5530 tree scalar_dest;
5531 tree op0, op1 = NULL;
5532 tree vec_oprnd1 = NULL_TREE;
5533 tree vectype;
5534 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5535 enum tree_code code;
5536 machine_mode vec_mode;
5537 tree new_temp;
5538 optab optab;
5539 int icode;
5540 machine_mode optab_op2_mode;
5541 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5542 int ndts = 2;
5543 stmt_vec_info prev_stmt_info;
5544 poly_uint64 nunits_in;
5545 poly_uint64 nunits_out;
5546 tree vectype_out;
5547 tree op1_vectype;
5548 int ncopies;
5549 int j, i;
5550 vec<tree> vec_oprnds0 = vNULL;
5551 vec<tree> vec_oprnds1 = vNULL;
5552 tree vop0, vop1;
5553 unsigned int k;
5554 bool scalar_shift_arg = true;
5555 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5556 bool incompatible_op1_vectype_p = false;
5557
5558 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5559 return false;
5560
5561 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5562 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5563 && ! vec_stmt)
5564 return false;
5565
5566 /* Is STMT a vectorizable binary/unary operation? */
5567 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5568 if (!stmt)
5569 return false;
5570
5571 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5572 return false;
5573
5574 code = gimple_assign_rhs_code (stmt);
5575
5576 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5577 || code == RROTATE_EXPR))
5578 return false;
5579
5580 scalar_dest = gimple_assign_lhs (stmt);
5581 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5582 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5583 {
5584 if (dump_enabled_p ())
5585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5586 "bit-precision shifts not supported.\n");
5587 return false;
5588 }
5589
5590 slp_tree slp_op0;
5591 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5592 0, &op0, &slp_op0, &dt[0], &vectype))
5593 {
5594 if (dump_enabled_p ())
5595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5596 "use not simple.\n");
5597 return false;
5598 }
5599 /* If op0 is an external or constant def, infer the vector type
5600 from the scalar type. */
5601 if (!vectype)
5602 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5603 if (vec_stmt)
5604 gcc_assert (vectype);
5605 if (!vectype)
5606 {
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5609 "no vectype for scalar type\n");
5610 return false;
5611 }
5612
5613 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5614 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5615 if (maybe_ne (nunits_out, nunits_in))
5616 return false;
5617
5618 stmt_vec_info op1_def_stmt_info;
5619 slp_tree slp_op1;
5620 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5621 &dt[1], &op1_vectype, &op1_def_stmt_info))
5622 {
5623 if (dump_enabled_p ())
5624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5625 "use not simple.\n");
5626 return false;
5627 }
5628
5629 /* Multiple types in SLP are handled by creating the appropriate number of
5630 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5631 case of SLP. */
5632 if (slp_node)
5633 ncopies = 1;
5634 else
5635 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5636
5637 gcc_assert (ncopies >= 1);
5638
5639 /* Determine whether the shift amount is a vector, or scalar. If the
5640 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5641
5642 if ((dt[1] == vect_internal_def
5643 || dt[1] == vect_induction_def
5644 || dt[1] == vect_nested_cycle)
5645 && !slp_node)
5646 scalar_shift_arg = false;
5647 else if (dt[1] == vect_constant_def
5648 || dt[1] == vect_external_def
5649 || dt[1] == vect_internal_def)
5650 {
5651 /* In SLP, need to check whether the shift count is the same,
5652 in loops if it is a constant or invariant, it is always
5653 a scalar shift. */
5654 if (slp_node)
5655 {
5656 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5657 stmt_vec_info slpstmt_info;
5658
5659 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5660 {
5661 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5662 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5663 scalar_shift_arg = false;
5664 }
5665
5666 /* For internal SLP defs we have to make sure we see scalar stmts
5667 for all vector elements.
5668 ??? For different vectors we could resort to a different
5669 scalar shift operand but code-generation below simply always
5670 takes the first. */
5671 if (dt[1] == vect_internal_def
5672 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5673 stmts.length ()))
5674 scalar_shift_arg = false;
5675 }
5676
5677 /* If the shift amount is computed by a pattern stmt we cannot
5678 use the scalar amount directly thus give up and use a vector
5679 shift. */
5680 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5681 scalar_shift_arg = false;
5682 }
5683 else
5684 {
5685 if (dump_enabled_p ())
5686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5687 "operand mode requires invariant argument.\n");
5688 return false;
5689 }
5690
5691 /* Vector shifted by vector. */
5692 bool was_scalar_shift_arg = scalar_shift_arg;
5693 if (!scalar_shift_arg)
5694 {
5695 optab = optab_for_tree_code (code, vectype, optab_vector);
5696 if (dump_enabled_p ())
5697 dump_printf_loc (MSG_NOTE, vect_location,
5698 "vector/vector shift/rotate found.\n");
5699
5700 if (!op1_vectype)
5701 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5702 slp_op1);
5703 incompatible_op1_vectype_p
5704 = (op1_vectype == NULL_TREE
5705 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5706 TYPE_VECTOR_SUBPARTS (vectype))
5707 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5708 if (incompatible_op1_vectype_p
5709 && (!slp_node
5710 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5711 || slp_op1->refcnt != 1))
5712 {
5713 if (dump_enabled_p ())
5714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5715 "unusable type for last operand in"
5716 " vector/vector shift/rotate.\n");
5717 return false;
5718 }
5719 }
5720 /* See if the machine has a vector shifted by scalar insn and if not
5721 then see if it has a vector shifted by vector insn. */
5722 else
5723 {
5724 optab = optab_for_tree_code (code, vectype, optab_scalar);
5725 if (optab
5726 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5727 {
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_NOTE, vect_location,
5730 "vector/scalar shift/rotate found.\n");
5731 }
5732 else
5733 {
5734 optab = optab_for_tree_code (code, vectype, optab_vector);
5735 if (optab
5736 && (optab_handler (optab, TYPE_MODE (vectype))
5737 != CODE_FOR_nothing))
5738 {
5739 scalar_shift_arg = false;
5740
5741 if (dump_enabled_p ())
5742 dump_printf_loc (MSG_NOTE, vect_location,
5743 "vector/vector shift/rotate found.\n");
5744
5745 if (!op1_vectype)
5746 op1_vectype = get_vectype_for_scalar_type (vinfo,
5747 TREE_TYPE (op1),
5748 slp_node);
5749
5750 /* Unlike the other binary operators, shifts/rotates have
5751 the rhs being int, instead of the same type as the lhs,
5752 so make sure the scalar is the right type if we are
5753 dealing with vectors of long long/long/short/char. */
5754 incompatible_op1_vectype_p
5755 = (!op1_vectype
5756 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5757 TREE_TYPE (op1)));
5758 }
5759 }
5760 }
5761
5762 /* Supportable by target? */
5763 if (!optab)
5764 {
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5767 "no optab.\n");
5768 return false;
5769 }
5770 vec_mode = TYPE_MODE (vectype);
5771 icode = (int) optab_handler (optab, vec_mode);
5772 if (icode == CODE_FOR_nothing)
5773 {
5774 if (dump_enabled_p ())
5775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5776 "op not supported by target.\n");
5777 /* Check only during analysis. */
5778 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5779 || (!vec_stmt
5780 && !vect_worthwhile_without_simd_p (vinfo, code)))
5781 return false;
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_NOTE, vect_location,
5784 "proceeding using word mode.\n");
5785 }
5786
5787 /* Worthwhile without SIMD support? Check only during analysis. */
5788 if (!vec_stmt
5789 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5790 && !vect_worthwhile_without_simd_p (vinfo, code))
5791 {
5792 if (dump_enabled_p ())
5793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5794 "not worthwhile without SIMD support.\n");
5795 return false;
5796 }
5797
5798 if (!vec_stmt) /* transformation not required. */
5799 {
5800 if (slp_node
5801 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5802 || (!scalar_shift_arg
5803 && (!incompatible_op1_vectype_p
5804 || dt[1] == vect_constant_def)
5805 && !vect_maybe_update_slp_op_vectype
5806 (slp_op1,
5807 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5808 {
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5811 "incompatible vector types for invariants\n");
5812 return false;
5813 }
5814 /* Now adjust the constant shift amount in place. */
5815 if (slp_node
5816 && incompatible_op1_vectype_p
5817 && dt[1] == vect_constant_def)
5818 {
5819 for (unsigned i = 0;
5820 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5821 {
5822 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5823 = fold_convert (TREE_TYPE (vectype),
5824 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5825 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5826 == INTEGER_CST));
5827 }
5828 }
5829 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5830 DUMP_VECT_SCOPE ("vectorizable_shift");
5831 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5832 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5833 return true;
5834 }
5835
5836 /* Transform. */
5837
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_NOTE, vect_location,
5840 "transform binary/unary operation.\n");
5841
5842 if (incompatible_op1_vectype_p && !slp_node)
5843 {
5844 op1 = fold_convert (TREE_TYPE (vectype), op1);
5845 if (dt[1] != vect_constant_def)
5846 op1 = vect_init_vector (vinfo, stmt_info, op1,
5847 TREE_TYPE (vectype), NULL);
5848 }
5849
5850 /* Handle def. */
5851 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5852
5853 prev_stmt_info = NULL;
5854 for (j = 0; j < ncopies; j++)
5855 {
5856 /* Handle uses. */
5857 if (j == 0)
5858 {
5859 if (scalar_shift_arg)
5860 {
5861 /* Vector shl and shr insn patterns can be defined with scalar
5862 operand 2 (shift operand). In this case, use constant or loop
5863 invariant op1 directly, without extending it to vector mode
5864 first. */
5865 optab_op2_mode = insn_data[icode].operand[2].mode;
5866 if (!VECTOR_MODE_P (optab_op2_mode))
5867 {
5868 if (dump_enabled_p ())
5869 dump_printf_loc (MSG_NOTE, vect_location,
5870 "operand 1 using scalar mode.\n");
5871 vec_oprnd1 = op1;
5872 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5873 vec_oprnds1.quick_push (vec_oprnd1);
5874 if (slp_node)
5875 {
5876 /* Store vec_oprnd1 for every vector stmt to be created
5877 for SLP_NODE. We check during the analysis that all
5878 the shift arguments are the same.
5879 TODO: Allow different constants for different vector
5880 stmts generated for an SLP instance. */
5881 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5882 vec_oprnds1.quick_push (vec_oprnd1);
5883 }
5884 }
5885 }
5886 else if (slp_node && incompatible_op1_vectype_p)
5887 {
5888 if (was_scalar_shift_arg)
5889 {
5890 /* If the argument was the same in all lanes create
5891 the correctly typed vector shift amount directly. */
5892 op1 = fold_convert (TREE_TYPE (vectype), op1);
5893 op1 = vect_init_vector (vinfo, stmt_info,
5894 op1, TREE_TYPE (vectype),
5895 !loop_vinfo ? gsi : NULL);
5896 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5897 !loop_vinfo ? gsi : NULL);
5898 vec_oprnds1.create (slp_node->vec_stmts_size);
5899 for (k = 0; k < slp_node->vec_stmts_size; k++)
5900 vec_oprnds1.quick_push (vec_oprnd1);
5901 }
5902 else if (dt[1] == vect_constant_def)
5903 /* The constant shift amount has been adjusted in place. */
5904 ;
5905 else
5906 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5907 }
5908
5909 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5910 (a special case for certain kind of vector shifts); otherwise,
5911 operand 1 should be of a vector type (the usual case). */
5912 if (vec_oprnd1)
5913 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5914 &vec_oprnds0, NULL, slp_node);
5915 else
5916 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
5917 &vec_oprnds0, &vec_oprnds1, slp_node);
5918 }
5919 else
5920 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5921
5922 /* Arguments are ready. Create the new vector stmt. */
5923 stmt_vec_info new_stmt_info = NULL;
5924 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5925 {
5926 vop1 = vec_oprnds1[i];
5927 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5928 new_temp = make_ssa_name (vec_dest, new_stmt);
5929 gimple_assign_set_lhs (new_stmt, new_temp);
5930 new_stmt_info
5931 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5932 if (slp_node)
5933 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5934 }
5935
5936 if (slp_node)
5937 continue;
5938
5939 if (j == 0)
5940 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5941 else
5942 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5943 prev_stmt_info = new_stmt_info;
5944 }
5945
5946 vec_oprnds0.release ();
5947 vec_oprnds1.release ();
5948
5949 return true;
5950 }
5951
5952
5953 /* Function vectorizable_operation.
5954
5955 Check if STMT_INFO performs a binary, unary or ternary operation that can
5956 be vectorized.
5957 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5958 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5959 Return true if STMT_INFO is vectorizable in this way. */
5960
5961 static bool
5962 vectorizable_operation (vec_info *vinfo,
5963 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5964 stmt_vec_info *vec_stmt, slp_tree slp_node,
5965 stmt_vector_for_cost *cost_vec)
5966 {
5967 tree vec_dest;
5968 tree scalar_dest;
5969 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5970 tree vectype;
5971 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5972 enum tree_code code, orig_code;
5973 machine_mode vec_mode;
5974 tree new_temp;
5975 int op_type;
5976 optab optab;
5977 bool target_support_p;
5978 enum vect_def_type dt[3]
5979 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5980 int ndts = 3;
5981 stmt_vec_info prev_stmt_info;
5982 poly_uint64 nunits_in;
5983 poly_uint64 nunits_out;
5984 tree vectype_out;
5985 int ncopies, vec_num;
5986 int j, i;
5987 vec<tree> vec_oprnds0 = vNULL;
5988 vec<tree> vec_oprnds1 = vNULL;
5989 vec<tree> vec_oprnds2 = vNULL;
5990 tree vop0, vop1, vop2;
5991 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5992
5993 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5994 return false;
5995
5996 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5997 && ! vec_stmt)
5998 return false;
5999
6000 /* Is STMT a vectorizable binary/unary operation? */
6001 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6002 if (!stmt)
6003 return false;
6004
6005 /* Loads and stores are handled in vectorizable_{load,store}. */
6006 if (STMT_VINFO_DATA_REF (stmt_info))
6007 return false;
6008
6009 orig_code = code = gimple_assign_rhs_code (stmt);
6010
6011 /* Shifts are handled in vectorizable_shift. */
6012 if (code == LSHIFT_EXPR
6013 || code == RSHIFT_EXPR
6014 || code == LROTATE_EXPR
6015 || code == RROTATE_EXPR)
6016 return false;
6017
6018 /* Comparisons are handled in vectorizable_comparison. */
6019 if (TREE_CODE_CLASS (code) == tcc_comparison)
6020 return false;
6021
6022 /* Conditions are handled in vectorizable_condition. */
6023 if (code == COND_EXPR)
6024 return false;
6025
6026 /* For pointer addition and subtraction, we should use the normal
6027 plus and minus for the vector operation. */
6028 if (code == POINTER_PLUS_EXPR)
6029 code = PLUS_EXPR;
6030 if (code == POINTER_DIFF_EXPR)
6031 code = MINUS_EXPR;
6032
6033 /* Support only unary or binary operations. */
6034 op_type = TREE_CODE_LENGTH (code);
6035 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6036 {
6037 if (dump_enabled_p ())
6038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6039 "num. args = %d (not unary/binary/ternary op).\n",
6040 op_type);
6041 return false;
6042 }
6043
6044 scalar_dest = gimple_assign_lhs (stmt);
6045 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6046
6047 /* Most operations cannot handle bit-precision types without extra
6048 truncations. */
6049 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6050 if (!mask_op_p
6051 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6052 /* Exception are bitwise binary operations. */
6053 && code != BIT_IOR_EXPR
6054 && code != BIT_XOR_EXPR
6055 && code != BIT_AND_EXPR)
6056 {
6057 if (dump_enabled_p ())
6058 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6059 "bit-precision arithmetic not supported.\n");
6060 return false;
6061 }
6062
6063 slp_tree slp_op0;
6064 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6065 0, &op0, &slp_op0, &dt[0], &vectype))
6066 {
6067 if (dump_enabled_p ())
6068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6069 "use not simple.\n");
6070 return false;
6071 }
6072 /* If op0 is an external or constant def, infer the vector type
6073 from the scalar type. */
6074 if (!vectype)
6075 {
6076 /* For boolean type we cannot determine vectype by
6077 invariant value (don't know whether it is a vector
6078 of booleans or vector of integers). We use output
6079 vectype because operations on boolean don't change
6080 type. */
6081 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6082 {
6083 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6084 {
6085 if (dump_enabled_p ())
6086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6087 "not supported operation on bool value.\n");
6088 return false;
6089 }
6090 vectype = vectype_out;
6091 }
6092 else
6093 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6094 slp_node);
6095 }
6096 if (vec_stmt)
6097 gcc_assert (vectype);
6098 if (!vectype)
6099 {
6100 if (dump_enabled_p ())
6101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6102 "no vectype for scalar type %T\n",
6103 TREE_TYPE (op0));
6104
6105 return false;
6106 }
6107
6108 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6109 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6110 if (maybe_ne (nunits_out, nunits_in))
6111 return false;
6112
6113 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6114 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6115 if (op_type == binary_op || op_type == ternary_op)
6116 {
6117 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6118 1, &op1, &slp_op1, &dt[1], &vectype2))
6119 {
6120 if (dump_enabled_p ())
6121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6122 "use not simple.\n");
6123 return false;
6124 }
6125 }
6126 if (op_type == ternary_op)
6127 {
6128 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6129 2, &op2, &slp_op2, &dt[2], &vectype3))
6130 {
6131 if (dump_enabled_p ())
6132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6133 "use not simple.\n");
6134 return false;
6135 }
6136 }
6137
6138 /* Multiple types in SLP are handled by creating the appropriate number of
6139 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6140 case of SLP. */
6141 if (slp_node)
6142 {
6143 ncopies = 1;
6144 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6145 }
6146 else
6147 {
6148 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6149 vec_num = 1;
6150 }
6151
6152 gcc_assert (ncopies >= 1);
6153
6154 /* Reject attempts to combine mask types with nonmask types, e.g. if
6155 we have an AND between a (nonmask) boolean loaded from memory and
6156 a (mask) boolean result of a comparison.
6157
6158 TODO: We could easily fix these cases up using pattern statements. */
6159 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6160 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6161 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6162 {
6163 if (dump_enabled_p ())
6164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6165 "mixed mask and nonmask vector types\n");
6166 return false;
6167 }
6168
6169 /* Supportable by target? */
6170
6171 vec_mode = TYPE_MODE (vectype);
6172 if (code == MULT_HIGHPART_EXPR)
6173 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6174 else
6175 {
6176 optab = optab_for_tree_code (code, vectype, optab_default);
6177 if (!optab)
6178 {
6179 if (dump_enabled_p ())
6180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6181 "no optab.\n");
6182 return false;
6183 }
6184 target_support_p = (optab_handler (optab, vec_mode)
6185 != CODE_FOR_nothing);
6186 }
6187
6188 if (!target_support_p)
6189 {
6190 if (dump_enabled_p ())
6191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6192 "op not supported by target.\n");
6193 /* Check only during analysis. */
6194 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6195 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6196 return false;
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_NOTE, vect_location,
6199 "proceeding using word mode.\n");
6200 }
6201
6202 /* Worthwhile without SIMD support? Check only during analysis. */
6203 if (!VECTOR_MODE_P (vec_mode)
6204 && !vec_stmt
6205 && !vect_worthwhile_without_simd_p (vinfo, code))
6206 {
6207 if (dump_enabled_p ())
6208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6209 "not worthwhile without SIMD support.\n");
6210 return false;
6211 }
6212
6213 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6214 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6215 internal_fn cond_fn = get_conditional_internal_fn (code);
6216
6217 if (!vec_stmt) /* transformation not required. */
6218 {
6219 /* If this operation is part of a reduction, a fully-masked loop
6220 should only change the active lanes of the reduction chain,
6221 keeping the inactive lanes as-is. */
6222 if (loop_vinfo
6223 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6224 && reduc_idx >= 0)
6225 {
6226 if (cond_fn == IFN_LAST
6227 || !direct_internal_fn_supported_p (cond_fn, vectype,
6228 OPTIMIZE_FOR_SPEED))
6229 {
6230 if (dump_enabled_p ())
6231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6232 "can't use a fully-masked loop because no"
6233 " conditional operation is available.\n");
6234 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6235 }
6236 else
6237 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6238 vectype, NULL);
6239 }
6240
6241 /* Put types on constant and invariant SLP children. */
6242 if (slp_node
6243 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6244 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6245 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6246 {
6247 if (dump_enabled_p ())
6248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6249 "incompatible vector types for invariants\n");
6250 return false;
6251 }
6252
6253 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6254 DUMP_VECT_SCOPE ("vectorizable_operation");
6255 vect_model_simple_cost (vinfo, stmt_info,
6256 ncopies, dt, ndts, slp_node, cost_vec);
6257 return true;
6258 }
6259
6260 /* Transform. */
6261
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_NOTE, vect_location,
6264 "transform binary/unary operation.\n");
6265
6266 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6267
6268 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6269 vectors with unsigned elements, but the result is signed. So, we
6270 need to compute the MINUS_EXPR into vectype temporary and
6271 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6272 tree vec_cvt_dest = NULL_TREE;
6273 if (orig_code == POINTER_DIFF_EXPR)
6274 {
6275 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6276 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6277 }
6278 /* Handle def. */
6279 else
6280 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6281
6282 /* In case the vectorization factor (VF) is bigger than the number
6283 of elements that we can fit in a vectype (nunits), we have to generate
6284 more than one vector stmt - i.e - we need to "unroll" the
6285 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6286 from one copy of the vector stmt to the next, in the field
6287 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6288 stages to find the correct vector defs to be used when vectorizing
6289 stmts that use the defs of the current stmt. The example below
6290 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6291 we need to create 4 vectorized stmts):
6292
6293 before vectorization:
6294 RELATED_STMT VEC_STMT
6295 S1: x = memref - -
6296 S2: z = x + 1 - -
6297
6298 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6299 there):
6300 RELATED_STMT VEC_STMT
6301 VS1_0: vx0 = memref0 VS1_1 -
6302 VS1_1: vx1 = memref1 VS1_2 -
6303 VS1_2: vx2 = memref2 VS1_3 -
6304 VS1_3: vx3 = memref3 - -
6305 S1: x = load - VS1_0
6306 S2: z = x + 1 - -
6307
6308 step2: vectorize stmt S2 (done here):
6309 To vectorize stmt S2 we first need to find the relevant vector
6310 def for the first operand 'x'. This is, as usual, obtained from
6311 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6312 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6313 relevant vector def 'vx0'. Having found 'vx0' we can generate
6314 the vector stmt VS2_0, and as usual, record it in the
6315 STMT_VINFO_VEC_STMT of stmt S2.
6316 When creating the second copy (VS2_1), we obtain the relevant vector
6317 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6318 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6319 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6320 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6321 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6322 chain of stmts and pointers:
6323 RELATED_STMT VEC_STMT
6324 VS1_0: vx0 = memref0 VS1_1 -
6325 VS1_1: vx1 = memref1 VS1_2 -
6326 VS1_2: vx2 = memref2 VS1_3 -
6327 VS1_3: vx3 = memref3 - -
6328 S1: x = load - VS1_0
6329 VS2_0: vz0 = vx0 + v1 VS2_1 -
6330 VS2_1: vz1 = vx1 + v1 VS2_2 -
6331 VS2_2: vz2 = vx2 + v1 VS2_3 -
6332 VS2_3: vz3 = vx3 + v1 - -
6333 S2: z = x + 1 - VS2_0 */
6334
6335 prev_stmt_info = NULL;
6336 for (j = 0; j < ncopies; j++)
6337 {
6338 /* Handle uses. */
6339 if (j == 0)
6340 {
6341 if (op_type == binary_op)
6342 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
6343 &vec_oprnds0, &vec_oprnds1, slp_node);
6344 else if (op_type == ternary_op)
6345 {
6346 if (slp_node)
6347 {
6348 auto_vec<vec<tree> > vec_defs(3);
6349 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
6350 vec_oprnds0 = vec_defs[0];
6351 vec_oprnds1 = vec_defs[1];
6352 vec_oprnds2 = vec_defs[2];
6353 }
6354 else
6355 {
6356 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
6357 &vec_oprnds1, NULL);
6358 vect_get_vec_defs (vinfo, op2, NULL_TREE, stmt_info,
6359 &vec_oprnds2, NULL, NULL);
6360 }
6361 }
6362 else
6363 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
6364 NULL, slp_node);
6365 }
6366 else
6367 {
6368 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6369 if (op_type == ternary_op)
6370 {
6371 tree vec_oprnd = vec_oprnds2.pop ();
6372 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6373 vec_oprnd));
6374 }
6375 }
6376
6377 /* Arguments are ready. Create the new vector stmt. */
6378 stmt_vec_info new_stmt_info = NULL;
6379 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6380 {
6381 vop1 = ((op_type == binary_op || op_type == ternary_op)
6382 ? vec_oprnds1[i] : NULL_TREE);
6383 vop2 = ((op_type == ternary_op)
6384 ? vec_oprnds2[i] : NULL_TREE);
6385 if (masked_loop_p && reduc_idx >= 0)
6386 {
6387 /* Perform the operation on active elements only and take
6388 inactive elements from the reduction chain input. */
6389 gcc_assert (!vop2);
6390 vop2 = reduc_idx == 1 ? vop1 : vop0;
6391 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6392 vectype, i * ncopies + j);
6393 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6394 vop0, vop1, vop2);
6395 new_temp = make_ssa_name (vec_dest, call);
6396 gimple_call_set_lhs (call, new_temp);
6397 gimple_call_set_nothrow (call, true);
6398 new_stmt_info
6399 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6400 }
6401 else
6402 {
6403 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6404 vop0, vop1, vop2);
6405 new_temp = make_ssa_name (vec_dest, new_stmt);
6406 gimple_assign_set_lhs (new_stmt, new_temp);
6407 new_stmt_info
6408 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6409 if (vec_cvt_dest)
6410 {
6411 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6412 gassign *new_stmt
6413 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6414 new_temp);
6415 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6416 gimple_assign_set_lhs (new_stmt, new_temp);
6417 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
6418 new_stmt, gsi);
6419 }
6420 }
6421 if (slp_node)
6422 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6423 }
6424
6425 if (slp_node)
6426 continue;
6427
6428 if (j == 0)
6429 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6430 else
6431 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6432 prev_stmt_info = new_stmt_info;
6433 }
6434
6435 vec_oprnds0.release ();
6436 vec_oprnds1.release ();
6437 vec_oprnds2.release ();
6438
6439 return true;
6440 }
6441
6442 /* A helper function to ensure data reference DR_INFO's base alignment. */
6443
6444 static void
6445 ensure_base_align (dr_vec_info *dr_info)
6446 {
6447 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6448 return;
6449
6450 if (dr_info->base_misaligned)
6451 {
6452 tree base_decl = dr_info->base_decl;
6453
6454 // We should only be able to increase the alignment of a base object if
6455 // we know what its new alignment should be at compile time.
6456 unsigned HOST_WIDE_INT align_base_to =
6457 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6458
6459 if (decl_in_symtab_p (base_decl))
6460 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6461 else if (DECL_ALIGN (base_decl) < align_base_to)
6462 {
6463 SET_DECL_ALIGN (base_decl, align_base_to);
6464 DECL_USER_ALIGN (base_decl) = 1;
6465 }
6466 dr_info->base_misaligned = false;
6467 }
6468 }
6469
6470
6471 /* Function get_group_alias_ptr_type.
6472
6473 Return the alias type for the group starting at FIRST_STMT_INFO. */
6474
6475 static tree
6476 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6477 {
6478 struct data_reference *first_dr, *next_dr;
6479
6480 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6481 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6482 while (next_stmt_info)
6483 {
6484 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6485 if (get_alias_set (DR_REF (first_dr))
6486 != get_alias_set (DR_REF (next_dr)))
6487 {
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_NOTE, vect_location,
6490 "conflicting alias set types.\n");
6491 return ptr_type_node;
6492 }
6493 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6494 }
6495 return reference_alias_ptr_type (DR_REF (first_dr));
6496 }
6497
6498
6499 /* Function scan_operand_equal_p.
6500
6501 Helper function for check_scan_store. Compare two references
6502 with .GOMP_SIMD_LANE bases. */
6503
6504 static bool
6505 scan_operand_equal_p (tree ref1, tree ref2)
6506 {
6507 tree ref[2] = { ref1, ref2 };
6508 poly_int64 bitsize[2], bitpos[2];
6509 tree offset[2], base[2];
6510 for (int i = 0; i < 2; ++i)
6511 {
6512 machine_mode mode;
6513 int unsignedp, reversep, volatilep = 0;
6514 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6515 &offset[i], &mode, &unsignedp,
6516 &reversep, &volatilep);
6517 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6518 return false;
6519 if (TREE_CODE (base[i]) == MEM_REF
6520 && offset[i] == NULL_TREE
6521 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6522 {
6523 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6524 if (is_gimple_assign (def_stmt)
6525 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6526 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6527 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6528 {
6529 if (maybe_ne (mem_ref_offset (base[i]), 0))
6530 return false;
6531 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6532 offset[i] = gimple_assign_rhs2 (def_stmt);
6533 }
6534 }
6535 }
6536
6537 if (!operand_equal_p (base[0], base[1], 0))
6538 return false;
6539 if (maybe_ne (bitsize[0], bitsize[1]))
6540 return false;
6541 if (offset[0] != offset[1])
6542 {
6543 if (!offset[0] || !offset[1])
6544 return false;
6545 if (!operand_equal_p (offset[0], offset[1], 0))
6546 {
6547 tree step[2];
6548 for (int i = 0; i < 2; ++i)
6549 {
6550 step[i] = integer_one_node;
6551 if (TREE_CODE (offset[i]) == SSA_NAME)
6552 {
6553 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6554 if (is_gimple_assign (def_stmt)
6555 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6556 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6557 == INTEGER_CST))
6558 {
6559 step[i] = gimple_assign_rhs2 (def_stmt);
6560 offset[i] = gimple_assign_rhs1 (def_stmt);
6561 }
6562 }
6563 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6564 {
6565 step[i] = TREE_OPERAND (offset[i], 1);
6566 offset[i] = TREE_OPERAND (offset[i], 0);
6567 }
6568 tree rhs1 = NULL_TREE;
6569 if (TREE_CODE (offset[i]) == SSA_NAME)
6570 {
6571 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6572 if (gimple_assign_cast_p (def_stmt))
6573 rhs1 = gimple_assign_rhs1 (def_stmt);
6574 }
6575 else if (CONVERT_EXPR_P (offset[i]))
6576 rhs1 = TREE_OPERAND (offset[i], 0);
6577 if (rhs1
6578 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6579 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6580 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6581 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6582 offset[i] = rhs1;
6583 }
6584 if (!operand_equal_p (offset[0], offset[1], 0)
6585 || !operand_equal_p (step[0], step[1], 0))
6586 return false;
6587 }
6588 }
6589 return true;
6590 }
6591
6592
6593 enum scan_store_kind {
6594 /* Normal permutation. */
6595 scan_store_kind_perm,
6596
6597 /* Whole vector left shift permutation with zero init. */
6598 scan_store_kind_lshift_zero,
6599
6600 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6601 scan_store_kind_lshift_cond
6602 };
6603
6604 /* Function check_scan_store.
6605
6606 Verify if we can perform the needed permutations or whole vector shifts.
6607 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6608 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6609 to do at each step. */
6610
6611 static int
6612 scan_store_can_perm_p (tree vectype, tree init,
6613 vec<enum scan_store_kind> *use_whole_vector = NULL)
6614 {
6615 enum machine_mode vec_mode = TYPE_MODE (vectype);
6616 unsigned HOST_WIDE_INT nunits;
6617 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6618 return -1;
6619 int units_log2 = exact_log2 (nunits);
6620 if (units_log2 <= 0)
6621 return -1;
6622
6623 int i;
6624 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6625 for (i = 0; i <= units_log2; ++i)
6626 {
6627 unsigned HOST_WIDE_INT j, k;
6628 enum scan_store_kind kind = scan_store_kind_perm;
6629 vec_perm_builder sel (nunits, nunits, 1);
6630 sel.quick_grow (nunits);
6631 if (i == units_log2)
6632 {
6633 for (j = 0; j < nunits; ++j)
6634 sel[j] = nunits - 1;
6635 }
6636 else
6637 {
6638 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6639 sel[j] = j;
6640 for (k = 0; j < nunits; ++j, ++k)
6641 sel[j] = nunits + k;
6642 }
6643 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6644 if (!can_vec_perm_const_p (vec_mode, indices))
6645 {
6646 if (i == units_log2)
6647 return -1;
6648
6649 if (whole_vector_shift_kind == scan_store_kind_perm)
6650 {
6651 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6652 return -1;
6653 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6654 /* Whole vector shifts shift in zeros, so if init is all zero
6655 constant, there is no need to do anything further. */
6656 if ((TREE_CODE (init) != INTEGER_CST
6657 && TREE_CODE (init) != REAL_CST)
6658 || !initializer_zerop (init))
6659 {
6660 tree masktype = truth_type_for (vectype);
6661 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6662 return -1;
6663 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6664 }
6665 }
6666 kind = whole_vector_shift_kind;
6667 }
6668 if (use_whole_vector)
6669 {
6670 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6671 use_whole_vector->safe_grow_cleared (i);
6672 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6673 use_whole_vector->safe_push (kind);
6674 }
6675 }
6676
6677 return units_log2;
6678 }
6679
6680
6681 /* Function check_scan_store.
6682
6683 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6684
6685 static bool
6686 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6687 enum vect_def_type rhs_dt, bool slp, tree mask,
6688 vect_memory_access_type memory_access_type)
6689 {
6690 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6691 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6692 tree ref_type;
6693
6694 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6695 if (slp
6696 || mask
6697 || memory_access_type != VMAT_CONTIGUOUS
6698 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6699 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6700 || loop_vinfo == NULL
6701 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6702 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6703 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6704 || !integer_zerop (DR_INIT (dr_info->dr))
6705 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6706 || !alias_sets_conflict_p (get_alias_set (vectype),
6707 get_alias_set (TREE_TYPE (ref_type))))
6708 {
6709 if (dump_enabled_p ())
6710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6711 "unsupported OpenMP scan store.\n");
6712 return false;
6713 }
6714
6715 /* We need to pattern match code built by OpenMP lowering and simplified
6716 by following optimizations into something we can handle.
6717 #pragma omp simd reduction(inscan,+:r)
6718 for (...)
6719 {
6720 r += something ();
6721 #pragma omp scan inclusive (r)
6722 use (r);
6723 }
6724 shall have body with:
6725 // Initialization for input phase, store the reduction initializer:
6726 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6727 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6728 D.2042[_21] = 0;
6729 // Actual input phase:
6730 ...
6731 r.0_5 = D.2042[_20];
6732 _6 = _4 + r.0_5;
6733 D.2042[_20] = _6;
6734 // Initialization for scan phase:
6735 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6736 _26 = D.2043[_25];
6737 _27 = D.2042[_25];
6738 _28 = _26 + _27;
6739 D.2043[_25] = _28;
6740 D.2042[_25] = _28;
6741 // Actual scan phase:
6742 ...
6743 r.1_8 = D.2042[_20];
6744 ...
6745 The "omp simd array" variable D.2042 holds the privatized copy used
6746 inside of the loop and D.2043 is another one that holds copies of
6747 the current original list item. The separate GOMP_SIMD_LANE ifn
6748 kinds are there in order to allow optimizing the initializer store
6749 and combiner sequence, e.g. if it is originally some C++ish user
6750 defined reduction, but allow the vectorizer to pattern recognize it
6751 and turn into the appropriate vectorized scan.
6752
6753 For exclusive scan, this is slightly different:
6754 #pragma omp simd reduction(inscan,+:r)
6755 for (...)
6756 {
6757 use (r);
6758 #pragma omp scan exclusive (r)
6759 r += something ();
6760 }
6761 shall have body with:
6762 // Initialization for input phase, store the reduction initializer:
6763 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6764 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6765 D.2042[_21] = 0;
6766 // Actual input phase:
6767 ...
6768 r.0_5 = D.2042[_20];
6769 _6 = _4 + r.0_5;
6770 D.2042[_20] = _6;
6771 // Initialization for scan phase:
6772 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6773 _26 = D.2043[_25];
6774 D.2044[_25] = _26;
6775 _27 = D.2042[_25];
6776 _28 = _26 + _27;
6777 D.2043[_25] = _28;
6778 // Actual scan phase:
6779 ...
6780 r.1_8 = D.2044[_20];
6781 ... */
6782
6783 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6784 {
6785 /* Match the D.2042[_21] = 0; store above. Just require that
6786 it is a constant or external definition store. */
6787 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6788 {
6789 fail_init:
6790 if (dump_enabled_p ())
6791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6792 "unsupported OpenMP scan initializer store.\n");
6793 return false;
6794 }
6795
6796 if (! loop_vinfo->scan_map)
6797 loop_vinfo->scan_map = new hash_map<tree, tree>;
6798 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6799 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6800 if (cached)
6801 goto fail_init;
6802 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6803
6804 /* These stores can be vectorized normally. */
6805 return true;
6806 }
6807
6808 if (rhs_dt != vect_internal_def)
6809 {
6810 fail:
6811 if (dump_enabled_p ())
6812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6813 "unsupported OpenMP scan combiner pattern.\n");
6814 return false;
6815 }
6816
6817 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6818 tree rhs = gimple_assign_rhs1 (stmt);
6819 if (TREE_CODE (rhs) != SSA_NAME)
6820 goto fail;
6821
6822 gimple *other_store_stmt = NULL;
6823 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6824 bool inscan_var_store
6825 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6826
6827 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6828 {
6829 if (!inscan_var_store)
6830 {
6831 use_operand_p use_p;
6832 imm_use_iterator iter;
6833 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6834 {
6835 gimple *use_stmt = USE_STMT (use_p);
6836 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6837 continue;
6838 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6839 || !is_gimple_assign (use_stmt)
6840 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6841 || other_store_stmt
6842 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6843 goto fail;
6844 other_store_stmt = use_stmt;
6845 }
6846 if (other_store_stmt == NULL)
6847 goto fail;
6848 rhs = gimple_assign_lhs (other_store_stmt);
6849 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6850 goto fail;
6851 }
6852 }
6853 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6854 {
6855 use_operand_p use_p;
6856 imm_use_iterator iter;
6857 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6858 {
6859 gimple *use_stmt = USE_STMT (use_p);
6860 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6861 continue;
6862 if (other_store_stmt)
6863 goto fail;
6864 other_store_stmt = use_stmt;
6865 }
6866 }
6867 else
6868 goto fail;
6869
6870 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6871 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6872 || !is_gimple_assign (def_stmt)
6873 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6874 goto fail;
6875
6876 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6877 /* For pointer addition, we should use the normal plus for the vector
6878 operation. */
6879 switch (code)
6880 {
6881 case POINTER_PLUS_EXPR:
6882 code = PLUS_EXPR;
6883 break;
6884 case MULT_HIGHPART_EXPR:
6885 goto fail;
6886 default:
6887 break;
6888 }
6889 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6890 goto fail;
6891
6892 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6893 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6894 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6895 goto fail;
6896
6897 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6898 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6899 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6900 || !gimple_assign_load_p (load1_stmt)
6901 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6902 || !gimple_assign_load_p (load2_stmt))
6903 goto fail;
6904
6905 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6906 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6907 if (load1_stmt_info == NULL
6908 || load2_stmt_info == NULL
6909 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6910 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6911 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6912 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6913 goto fail;
6914
6915 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6916 {
6917 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6918 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6919 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6920 goto fail;
6921 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6922 tree lrhs;
6923 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6924 lrhs = rhs1;
6925 else
6926 lrhs = rhs2;
6927 use_operand_p use_p;
6928 imm_use_iterator iter;
6929 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6930 {
6931 gimple *use_stmt = USE_STMT (use_p);
6932 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6933 continue;
6934 if (other_store_stmt)
6935 goto fail;
6936 other_store_stmt = use_stmt;
6937 }
6938 }
6939
6940 if (other_store_stmt == NULL)
6941 goto fail;
6942 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6943 || !gimple_store_p (other_store_stmt))
6944 goto fail;
6945
6946 stmt_vec_info other_store_stmt_info
6947 = loop_vinfo->lookup_stmt (other_store_stmt);
6948 if (other_store_stmt_info == NULL
6949 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6950 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6951 goto fail;
6952
6953 gimple *stmt1 = stmt;
6954 gimple *stmt2 = other_store_stmt;
6955 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6956 std::swap (stmt1, stmt2);
6957 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6958 gimple_assign_rhs1 (load2_stmt)))
6959 {
6960 std::swap (rhs1, rhs2);
6961 std::swap (load1_stmt, load2_stmt);
6962 std::swap (load1_stmt_info, load2_stmt_info);
6963 }
6964 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6965 gimple_assign_rhs1 (load1_stmt)))
6966 goto fail;
6967
6968 tree var3 = NULL_TREE;
6969 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6970 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6971 gimple_assign_rhs1 (load2_stmt)))
6972 goto fail;
6973 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6974 {
6975 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6976 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6977 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6978 goto fail;
6979 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6980 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6981 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6982 || lookup_attribute ("omp simd inscan exclusive",
6983 DECL_ATTRIBUTES (var3)))
6984 goto fail;
6985 }
6986
6987 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6988 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6989 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6990 goto fail;
6991
6992 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6993 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6994 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6995 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6996 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6997 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6998 goto fail;
6999
7000 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7001 std::swap (var1, var2);
7002
7003 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7004 {
7005 if (!lookup_attribute ("omp simd inscan exclusive",
7006 DECL_ATTRIBUTES (var1)))
7007 goto fail;
7008 var1 = var3;
7009 }
7010
7011 if (loop_vinfo->scan_map == NULL)
7012 goto fail;
7013 tree *init = loop_vinfo->scan_map->get (var1);
7014 if (init == NULL)
7015 goto fail;
7016
7017 /* The IL is as expected, now check if we can actually vectorize it.
7018 Inclusive scan:
7019 _26 = D.2043[_25];
7020 _27 = D.2042[_25];
7021 _28 = _26 + _27;
7022 D.2043[_25] = _28;
7023 D.2042[_25] = _28;
7024 should be vectorized as (where _40 is the vectorized rhs
7025 from the D.2042[_21] = 0; store):
7026 _30 = MEM <vector(8) int> [(int *)&D.2043];
7027 _31 = MEM <vector(8) int> [(int *)&D.2042];
7028 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7029 _33 = _31 + _32;
7030 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7031 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7032 _35 = _33 + _34;
7033 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7034 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7035 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7036 _37 = _35 + _36;
7037 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7038 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7039 _38 = _30 + _37;
7040 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7041 MEM <vector(8) int> [(int *)&D.2043] = _39;
7042 MEM <vector(8) int> [(int *)&D.2042] = _38;
7043 Exclusive scan:
7044 _26 = D.2043[_25];
7045 D.2044[_25] = _26;
7046 _27 = D.2042[_25];
7047 _28 = _26 + _27;
7048 D.2043[_25] = _28;
7049 should be vectorized as (where _40 is the vectorized rhs
7050 from the D.2042[_21] = 0; store):
7051 _30 = MEM <vector(8) int> [(int *)&D.2043];
7052 _31 = MEM <vector(8) int> [(int *)&D.2042];
7053 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7054 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7055 _34 = _32 + _33;
7056 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7057 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7058 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7059 _36 = _34 + _35;
7060 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7061 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7062 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7063 _38 = _36 + _37;
7064 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7065 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7066 _39 = _30 + _38;
7067 _50 = _31 + _39;
7068 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7069 MEM <vector(8) int> [(int *)&D.2044] = _39;
7070 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7071 enum machine_mode vec_mode = TYPE_MODE (vectype);
7072 optab optab = optab_for_tree_code (code, vectype, optab_default);
7073 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7074 goto fail;
7075
7076 int units_log2 = scan_store_can_perm_p (vectype, *init);
7077 if (units_log2 == -1)
7078 goto fail;
7079
7080 return true;
7081 }
7082
7083
7084 /* Function vectorizable_scan_store.
7085
7086 Helper of vectorizable_score, arguments like on vectorizable_store.
7087 Handle only the transformation, checking is done in check_scan_store. */
7088
7089 static bool
7090 vectorizable_scan_store (vec_info *vinfo,
7091 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7092 stmt_vec_info *vec_stmt, int ncopies)
7093 {
7094 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7095 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7096 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7098
7099 if (dump_enabled_p ())
7100 dump_printf_loc (MSG_NOTE, vect_location,
7101 "transform scan store. ncopies = %d\n", ncopies);
7102
7103 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7104 tree rhs = gimple_assign_rhs1 (stmt);
7105 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7106
7107 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7108 bool inscan_var_store
7109 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7110
7111 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7112 {
7113 use_operand_p use_p;
7114 imm_use_iterator iter;
7115 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7116 {
7117 gimple *use_stmt = USE_STMT (use_p);
7118 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7119 continue;
7120 rhs = gimple_assign_lhs (use_stmt);
7121 break;
7122 }
7123 }
7124
7125 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7126 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7127 if (code == POINTER_PLUS_EXPR)
7128 code = PLUS_EXPR;
7129 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7130 && commutative_tree_code (code));
7131 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7132 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7133 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7134 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7135 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7136 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7137 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7138 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7139 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7140 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7141 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7142
7143 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7144 {
7145 std::swap (rhs1, rhs2);
7146 std::swap (var1, var2);
7147 std::swap (load1_dr_info, load2_dr_info);
7148 }
7149
7150 tree *init = loop_vinfo->scan_map->get (var1);
7151 gcc_assert (init);
7152
7153 unsigned HOST_WIDE_INT nunits;
7154 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7155 gcc_unreachable ();
7156 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7157 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7158 gcc_assert (units_log2 > 0);
7159 auto_vec<tree, 16> perms;
7160 perms.quick_grow (units_log2 + 1);
7161 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7162 for (int i = 0; i <= units_log2; ++i)
7163 {
7164 unsigned HOST_WIDE_INT j, k;
7165 vec_perm_builder sel (nunits, nunits, 1);
7166 sel.quick_grow (nunits);
7167 if (i == units_log2)
7168 for (j = 0; j < nunits; ++j)
7169 sel[j] = nunits - 1;
7170 else
7171 {
7172 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7173 sel[j] = j;
7174 for (k = 0; j < nunits; ++j, ++k)
7175 sel[j] = nunits + k;
7176 }
7177 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7178 if (!use_whole_vector.is_empty ()
7179 && use_whole_vector[i] != scan_store_kind_perm)
7180 {
7181 if (zero_vec == NULL_TREE)
7182 zero_vec = build_zero_cst (vectype);
7183 if (masktype == NULL_TREE
7184 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7185 masktype = truth_type_for (vectype);
7186 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7187 }
7188 else
7189 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7190 }
7191
7192 stmt_vec_info prev_stmt_info = NULL;
7193 tree vec_oprnd1 = NULL_TREE;
7194 tree vec_oprnd2 = NULL_TREE;
7195 tree vec_oprnd3 = NULL_TREE;
7196 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7197 tree dataref_offset = build_int_cst (ref_type, 0);
7198 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7199 vectype, VMAT_CONTIGUOUS);
7200 tree ldataref_ptr = NULL_TREE;
7201 tree orig = NULL_TREE;
7202 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7203 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7204 for (int j = 0; j < ncopies; j++)
7205 {
7206 stmt_vec_info new_stmt_info;
7207 if (j == 0)
7208 {
7209 vec_oprnd1 = vect_get_vec_def_for_operand (vinfo, *init, stmt_info);
7210 if (ldataref_ptr == NULL)
7211 vec_oprnd2 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info);
7212 vec_oprnd3 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info);
7213 orig = vec_oprnd3;
7214 }
7215 else
7216 {
7217 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7218 if (ldataref_ptr == NULL)
7219 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7220 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7221 if (!inscan_var_store)
7222 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7223 }
7224
7225 if (ldataref_ptr)
7226 {
7227 vec_oprnd2 = make_ssa_name (vectype);
7228 tree data_ref = fold_build2 (MEM_REF, vectype,
7229 unshare_expr (ldataref_ptr),
7230 dataref_offset);
7231 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7232 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7233 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7234 if (prev_stmt_info == NULL)
7235 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7236 else
7237 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7238 prev_stmt_info = new_stmt_info;
7239 }
7240
7241 tree v = vec_oprnd2;
7242 for (int i = 0; i < units_log2; ++i)
7243 {
7244 tree new_temp = make_ssa_name (vectype);
7245 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7246 (zero_vec
7247 && (use_whole_vector[i]
7248 != scan_store_kind_perm))
7249 ? zero_vec : vec_oprnd1, v,
7250 perms[i]);
7251 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7252 if (prev_stmt_info == NULL)
7253 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7254 else
7255 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7256 prev_stmt_info = new_stmt_info;
7257
7258 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7259 {
7260 /* Whole vector shift shifted in zero bits, but if *init
7261 is not initializer_zerop, we need to replace those elements
7262 with elements from vec_oprnd1. */
7263 tree_vector_builder vb (masktype, nunits, 1);
7264 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7265 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7266 ? boolean_false_node : boolean_true_node);
7267
7268 tree new_temp2 = make_ssa_name (vectype);
7269 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7270 new_temp, vec_oprnd1);
7271 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
7272 g, gsi);
7273 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7274 prev_stmt_info = new_stmt_info;
7275 new_temp = new_temp2;
7276 }
7277
7278 /* For exclusive scan, perform the perms[i] permutation once
7279 more. */
7280 if (i == 0
7281 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7282 && v == vec_oprnd2)
7283 {
7284 v = new_temp;
7285 --i;
7286 continue;
7287 }
7288
7289 tree new_temp2 = make_ssa_name (vectype);
7290 g = gimple_build_assign (new_temp2, code, v, new_temp);
7291 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7292 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7293 prev_stmt_info = new_stmt_info;
7294
7295 v = new_temp2;
7296 }
7297
7298 tree new_temp = make_ssa_name (vectype);
7299 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7300 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7301 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7302 prev_stmt_info = new_stmt_info;
7303
7304 tree last_perm_arg = new_temp;
7305 /* For exclusive scan, new_temp computed above is the exclusive scan
7306 prefix sum. Turn it into inclusive prefix sum for the broadcast
7307 of the last element into orig. */
7308 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7309 {
7310 last_perm_arg = make_ssa_name (vectype);
7311 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7312 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7313 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7314 prev_stmt_info = new_stmt_info;
7315 }
7316
7317 orig = make_ssa_name (vectype);
7318 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7319 last_perm_arg, perms[units_log2]);
7320 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7321 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7322 prev_stmt_info = new_stmt_info;
7323
7324 if (!inscan_var_store)
7325 {
7326 tree data_ref = fold_build2 (MEM_REF, vectype,
7327 unshare_expr (dataref_ptr),
7328 dataref_offset);
7329 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7330 g = gimple_build_assign (data_ref, new_temp);
7331 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7332 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7333 prev_stmt_info = new_stmt_info;
7334 }
7335 }
7336
7337 if (inscan_var_store)
7338 for (int j = 0; j < ncopies; j++)
7339 {
7340 if (j != 0)
7341 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7342
7343 tree data_ref = fold_build2 (MEM_REF, vectype,
7344 unshare_expr (dataref_ptr),
7345 dataref_offset);
7346 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7347 gimple *g = gimple_build_assign (data_ref, orig);
7348 stmt_vec_info new_stmt_info
7349 = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7350 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7351 prev_stmt_info = new_stmt_info;
7352 }
7353 return true;
7354 }
7355
7356
7357 /* Function vectorizable_store.
7358
7359 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7360 that can be vectorized.
7361 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7362 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7363 Return true if STMT_INFO is vectorizable in this way. */
7364
7365 static bool
7366 vectorizable_store (vec_info *vinfo,
7367 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7368 stmt_vec_info *vec_stmt, slp_tree slp_node,
7369 stmt_vector_for_cost *cost_vec)
7370 {
7371 tree data_ref;
7372 tree op;
7373 tree vec_oprnd = NULL_TREE;
7374 tree elem_type;
7375 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7376 class loop *loop = NULL;
7377 machine_mode vec_mode;
7378 tree dummy;
7379 enum dr_alignment_support alignment_support_scheme;
7380 enum vect_def_type rhs_dt = vect_unknown_def_type;
7381 enum vect_def_type mask_dt = vect_unknown_def_type;
7382 stmt_vec_info prev_stmt_info = NULL;
7383 tree dataref_ptr = NULL_TREE;
7384 tree dataref_offset = NULL_TREE;
7385 gimple *ptr_incr = NULL;
7386 int ncopies;
7387 int j;
7388 stmt_vec_info first_stmt_info;
7389 bool grouped_store;
7390 unsigned int group_size, i;
7391 vec<tree> oprnds = vNULL;
7392 vec<tree> result_chain = vNULL;
7393 tree offset = NULL_TREE;
7394 vec<tree> vec_oprnds = vNULL;
7395 bool slp = (slp_node != NULL);
7396 unsigned int vec_num;
7397 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7398 tree aggr_type;
7399 gather_scatter_info gs_info;
7400 poly_uint64 vf;
7401 vec_load_store_type vls_type;
7402 tree ref_type;
7403
7404 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7405 return false;
7406
7407 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7408 && ! vec_stmt)
7409 return false;
7410
7411 /* Is vectorizable store? */
7412
7413 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7414 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7415 {
7416 tree scalar_dest = gimple_assign_lhs (assign);
7417 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7418 && is_pattern_stmt_p (stmt_info))
7419 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7420 if (TREE_CODE (scalar_dest) != ARRAY_REF
7421 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7422 && TREE_CODE (scalar_dest) != INDIRECT_REF
7423 && TREE_CODE (scalar_dest) != COMPONENT_REF
7424 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7425 && TREE_CODE (scalar_dest) != REALPART_EXPR
7426 && TREE_CODE (scalar_dest) != MEM_REF)
7427 return false;
7428 }
7429 else
7430 {
7431 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7432 if (!call || !gimple_call_internal_p (call))
7433 return false;
7434
7435 internal_fn ifn = gimple_call_internal_fn (call);
7436 if (!internal_store_fn_p (ifn))
7437 return false;
7438
7439 if (slp_node != NULL)
7440 {
7441 if (dump_enabled_p ())
7442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7443 "SLP of masked stores not supported.\n");
7444 return false;
7445 }
7446
7447 int mask_index = internal_fn_mask_index (ifn);
7448 if (mask_index >= 0)
7449 {
7450 mask = gimple_call_arg (call, mask_index);
7451 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7452 &mask_vectype))
7453 return false;
7454 }
7455 }
7456
7457 op = vect_get_store_rhs (stmt_info);
7458
7459 /* Cannot have hybrid store SLP -- that would mean storing to the
7460 same location twice. */
7461 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7462
7463 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7464 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7465
7466 if (loop_vinfo)
7467 {
7468 loop = LOOP_VINFO_LOOP (loop_vinfo);
7469 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7470 }
7471 else
7472 vf = 1;
7473
7474 /* Multiple types in SLP are handled by creating the appropriate number of
7475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7476 case of SLP. */
7477 if (slp)
7478 ncopies = 1;
7479 else
7480 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7481
7482 gcc_assert (ncopies >= 1);
7483
7484 /* FORNOW. This restriction should be relaxed. */
7485 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7486 {
7487 if (dump_enabled_p ())
7488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7489 "multiple types in nested loop.\n");
7490 return false;
7491 }
7492
7493 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7494 op, &rhs_dt, &rhs_vectype, &vls_type))
7495 return false;
7496
7497 elem_type = TREE_TYPE (vectype);
7498 vec_mode = TYPE_MODE (vectype);
7499
7500 if (!STMT_VINFO_DATA_REF (stmt_info))
7501 return false;
7502
7503 vect_memory_access_type memory_access_type;
7504 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
7505 ncopies, &memory_access_type, &gs_info))
7506 return false;
7507
7508 if (mask)
7509 {
7510 if (memory_access_type == VMAT_CONTIGUOUS)
7511 {
7512 if (!VECTOR_MODE_P (vec_mode)
7513 || !can_vec_mask_load_store_p (vec_mode,
7514 TYPE_MODE (mask_vectype), false))
7515 return false;
7516 }
7517 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7518 && (memory_access_type != VMAT_GATHER_SCATTER
7519 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7520 {
7521 if (dump_enabled_p ())
7522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7523 "unsupported access type for masked store.\n");
7524 return false;
7525 }
7526 }
7527 else
7528 {
7529 /* FORNOW. In some cases can vectorize even if data-type not supported
7530 (e.g. - array initialization with 0). */
7531 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7532 return false;
7533 }
7534
7535 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7536 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7537 && memory_access_type != VMAT_GATHER_SCATTER
7538 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7539 if (grouped_store)
7540 {
7541 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7542 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7543 group_size = DR_GROUP_SIZE (first_stmt_info);
7544 }
7545 else
7546 {
7547 first_stmt_info = stmt_info;
7548 first_dr_info = dr_info;
7549 group_size = vec_num = 1;
7550 }
7551
7552 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7553 {
7554 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7555 memory_access_type))
7556 return false;
7557 }
7558
7559 if (!vec_stmt) /* transformation not required. */
7560 {
7561 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7562
7563 if (loop_vinfo
7564 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7565 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7566 memory_access_type, &gs_info, mask);
7567
7568 if (slp_node
7569 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7570 vectype))
7571 {
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7574 "incompatible vector types for invariants\n");
7575 return false;
7576 }
7577
7578 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7579 vect_model_store_cost (vinfo, stmt_info, ncopies,
7580 memory_access_type, vls_type, slp_node, cost_vec);
7581 return true;
7582 }
7583 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7584
7585 /* Transform. */
7586
7587 ensure_base_align (dr_info);
7588
7589 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7590 {
7591 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7592 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7593 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7594 tree ptr, var, scale, vec_mask;
7595 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7596 tree mask_halfvectype = mask_vectype;
7597 edge pe = loop_preheader_edge (loop);
7598 gimple_seq seq;
7599 basic_block new_bb;
7600 enum { NARROW, NONE, WIDEN } modifier;
7601 poly_uint64 scatter_off_nunits
7602 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7603
7604 if (known_eq (nunits, scatter_off_nunits))
7605 modifier = NONE;
7606 else if (known_eq (nunits * 2, scatter_off_nunits))
7607 {
7608 modifier = WIDEN;
7609
7610 /* Currently gathers and scatters are only supported for
7611 fixed-length vectors. */
7612 unsigned int count = scatter_off_nunits.to_constant ();
7613 vec_perm_builder sel (count, count, 1);
7614 for (i = 0; i < (unsigned int) count; ++i)
7615 sel.quick_push (i | (count / 2));
7616
7617 vec_perm_indices indices (sel, 1, count);
7618 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7619 indices);
7620 gcc_assert (perm_mask != NULL_TREE);
7621 }
7622 else if (known_eq (nunits, scatter_off_nunits * 2))
7623 {
7624 modifier = NARROW;
7625
7626 /* Currently gathers and scatters are only supported for
7627 fixed-length vectors. */
7628 unsigned int count = nunits.to_constant ();
7629 vec_perm_builder sel (count, count, 1);
7630 for (i = 0; i < (unsigned int) count; ++i)
7631 sel.quick_push (i | (count / 2));
7632
7633 vec_perm_indices indices (sel, 2, count);
7634 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7635 gcc_assert (perm_mask != NULL_TREE);
7636 ncopies *= 2;
7637
7638 if (mask)
7639 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7640 }
7641 else
7642 gcc_unreachable ();
7643
7644 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7645 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7646 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7647 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7648 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7649 scaletype = TREE_VALUE (arglist);
7650
7651 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7652 && TREE_CODE (rettype) == VOID_TYPE);
7653
7654 ptr = fold_convert (ptrtype, gs_info.base);
7655 if (!is_gimple_min_invariant (ptr))
7656 {
7657 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7658 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7659 gcc_assert (!new_bb);
7660 }
7661
7662 if (mask == NULL_TREE)
7663 {
7664 mask_arg = build_int_cst (masktype, -1);
7665 mask_arg = vect_init_vector (vinfo, stmt_info,
7666 mask_arg, masktype, NULL);
7667 }
7668
7669 scale = build_int_cst (scaletype, gs_info.scale);
7670
7671 prev_stmt_info = NULL;
7672 for (j = 0; j < ncopies; ++j)
7673 {
7674 if (j == 0)
7675 {
7676 src = vec_oprnd1 = vect_get_vec_def_for_operand (vinfo,
7677 op, stmt_info);
7678 op = vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
7679 gs_info.offset,
7680 stmt_info);
7681 if (mask)
7682 {
7683 tree mask_vectype = truth_type_for (vectype);
7684 mask_op = vec_mask
7685 = vect_get_vec_def_for_operand (vinfo, mask,
7686 stmt_info, mask_vectype);
7687 }
7688 }
7689 else if (modifier != NONE && (j & 1))
7690 {
7691 if (modifier == WIDEN)
7692 {
7693 src
7694 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7695 vec_oprnd1);
7696 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7697 perm_mask, stmt_info, gsi);
7698 if (mask)
7699 mask_op
7700 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7701 vec_mask);
7702 }
7703 else if (modifier == NARROW)
7704 {
7705 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7706 perm_mask, stmt_info, gsi);
7707 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7708 vec_oprnd0);
7709 }
7710 else
7711 gcc_unreachable ();
7712 }
7713 else
7714 {
7715 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7716 vec_oprnd1);
7717 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7718 vec_oprnd0);
7719 if (mask)
7720 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7721 vec_mask);
7722 }
7723
7724 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7725 {
7726 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7727 TYPE_VECTOR_SUBPARTS (srctype)));
7728 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7729 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7730 gassign *new_stmt
7731 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7732 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7733 src = var;
7734 }
7735
7736 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7737 {
7738 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7739 TYPE_VECTOR_SUBPARTS (idxtype)));
7740 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7741 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7742 gassign *new_stmt
7743 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7744 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7745 op = var;
7746 }
7747
7748 if (mask)
7749 {
7750 tree utype;
7751 mask_arg = mask_op;
7752 if (modifier == NARROW)
7753 {
7754 var = vect_get_new_ssa_name (mask_halfvectype,
7755 vect_simple_var);
7756 gassign *new_stmt
7757 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7758 : VEC_UNPACK_LO_EXPR,
7759 mask_op);
7760 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7761 mask_arg = var;
7762 }
7763 tree optype = TREE_TYPE (mask_arg);
7764 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7765 utype = masktype;
7766 else
7767 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7768 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7769 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7770 gassign *new_stmt
7771 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7772 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7773 mask_arg = var;
7774 if (!useless_type_conversion_p (masktype, utype))
7775 {
7776 gcc_assert (TYPE_PRECISION (utype)
7777 <= TYPE_PRECISION (masktype));
7778 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7779 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7780 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7781 mask_arg = var;
7782 }
7783 }
7784
7785 gcall *new_stmt
7786 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7787 stmt_vec_info new_stmt_info
7788 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7789
7790 if (prev_stmt_info == NULL)
7791 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7792 else
7793 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7794 prev_stmt_info = new_stmt_info;
7795 }
7796 return true;
7797 }
7798 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7799 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7800
7801 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7802 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7803
7804 if (grouped_store)
7805 {
7806 /* FORNOW */
7807 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7808
7809 /* We vectorize all the stmts of the interleaving group when we
7810 reach the last stmt in the group. */
7811 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7812 < DR_GROUP_SIZE (first_stmt_info)
7813 && !slp)
7814 {
7815 *vec_stmt = NULL;
7816 return true;
7817 }
7818
7819 if (slp)
7820 {
7821 grouped_store = false;
7822 /* VEC_NUM is the number of vect stmts to be created for this
7823 group. */
7824 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7825 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7826 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7827 == first_stmt_info);
7828 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7829 op = vect_get_store_rhs (first_stmt_info);
7830 }
7831 else
7832 /* VEC_NUM is the number of vect stmts to be created for this
7833 group. */
7834 vec_num = group_size;
7835
7836 ref_type = get_group_alias_ptr_type (first_stmt_info);
7837 }
7838 else
7839 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7840
7841 if (dump_enabled_p ())
7842 dump_printf_loc (MSG_NOTE, vect_location,
7843 "transform store. ncopies = %d\n", ncopies);
7844
7845 if (memory_access_type == VMAT_ELEMENTWISE
7846 || memory_access_type == VMAT_STRIDED_SLP)
7847 {
7848 gimple_stmt_iterator incr_gsi;
7849 bool insert_after;
7850 gimple *incr;
7851 tree offvar;
7852 tree ivstep;
7853 tree running_off;
7854 tree stride_base, stride_step, alias_off;
7855 tree vec_oprnd;
7856 tree dr_offset;
7857 unsigned int g;
7858 /* Checked by get_load_store_type. */
7859 unsigned int const_nunits = nunits.to_constant ();
7860
7861 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7862 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7863
7864 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7865 stride_base
7866 = fold_build_pointer_plus
7867 (DR_BASE_ADDRESS (first_dr_info->dr),
7868 size_binop (PLUS_EXPR,
7869 convert_to_ptrofftype (dr_offset),
7870 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7871 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7872
7873 /* For a store with loop-invariant (but other than power-of-2)
7874 stride (i.e. not a grouped access) like so:
7875
7876 for (i = 0; i < n; i += stride)
7877 array[i] = ...;
7878
7879 we generate a new induction variable and new stores from
7880 the components of the (vectorized) rhs:
7881
7882 for (j = 0; ; j += VF*stride)
7883 vectemp = ...;
7884 tmp1 = vectemp[0];
7885 array[j] = tmp1;
7886 tmp2 = vectemp[1];
7887 array[j + stride] = tmp2;
7888 ...
7889 */
7890
7891 unsigned nstores = const_nunits;
7892 unsigned lnel = 1;
7893 tree ltype = elem_type;
7894 tree lvectype = vectype;
7895 if (slp)
7896 {
7897 if (group_size < const_nunits
7898 && const_nunits % group_size == 0)
7899 {
7900 nstores = const_nunits / group_size;
7901 lnel = group_size;
7902 ltype = build_vector_type (elem_type, group_size);
7903 lvectype = vectype;
7904
7905 /* First check if vec_extract optab doesn't support extraction
7906 of vector elts directly. */
7907 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7908 machine_mode vmode;
7909 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7910 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7911 group_size).exists (&vmode)
7912 || (convert_optab_handler (vec_extract_optab,
7913 TYPE_MODE (vectype), vmode)
7914 == CODE_FOR_nothing))
7915 {
7916 /* Try to avoid emitting an extract of vector elements
7917 by performing the extracts using an integer type of the
7918 same size, extracting from a vector of those and then
7919 re-interpreting it as the original vector type if
7920 supported. */
7921 unsigned lsize
7922 = group_size * GET_MODE_BITSIZE (elmode);
7923 unsigned int lnunits = const_nunits / group_size;
7924 /* If we can't construct such a vector fall back to
7925 element extracts from the original vector type and
7926 element size stores. */
7927 if (int_mode_for_size (lsize, 0).exists (&elmode)
7928 && VECTOR_MODE_P (TYPE_MODE (vectype))
7929 && related_vector_mode (TYPE_MODE (vectype), elmode,
7930 lnunits).exists (&vmode)
7931 && (convert_optab_handler (vec_extract_optab,
7932 vmode, elmode)
7933 != CODE_FOR_nothing))
7934 {
7935 nstores = lnunits;
7936 lnel = group_size;
7937 ltype = build_nonstandard_integer_type (lsize, 1);
7938 lvectype = build_vector_type (ltype, nstores);
7939 }
7940 /* Else fall back to vector extraction anyway.
7941 Fewer stores are more important than avoiding spilling
7942 of the vector we extract from. Compared to the
7943 construction case in vectorizable_load no store-forwarding
7944 issue exists here for reasonable archs. */
7945 }
7946 }
7947 else if (group_size >= const_nunits
7948 && group_size % const_nunits == 0)
7949 {
7950 nstores = 1;
7951 lnel = const_nunits;
7952 ltype = vectype;
7953 lvectype = vectype;
7954 }
7955 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7956 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7957 }
7958
7959 ivstep = stride_step;
7960 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7961 build_int_cst (TREE_TYPE (ivstep), vf));
7962
7963 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7964
7965 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7966 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7967 create_iv (stride_base, ivstep, NULL,
7968 loop, &incr_gsi, insert_after,
7969 &offvar, NULL);
7970 incr = gsi_stmt (incr_gsi);
7971 loop_vinfo->add_stmt (incr);
7972
7973 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7974
7975 prev_stmt_info = NULL;
7976 alias_off = build_int_cst (ref_type, 0);
7977 stmt_vec_info next_stmt_info = first_stmt_info;
7978 for (g = 0; g < group_size; g++)
7979 {
7980 running_off = offvar;
7981 if (g)
7982 {
7983 tree size = TYPE_SIZE_UNIT (ltype);
7984 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7985 size);
7986 tree newoff = copy_ssa_name (running_off, NULL);
7987 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7988 running_off, pos);
7989 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7990 running_off = newoff;
7991 }
7992 unsigned int group_el = 0;
7993 unsigned HOST_WIDE_INT
7994 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7995 for (j = 0; j < ncopies; j++)
7996 {
7997 /* We've set op and dt above, from vect_get_store_rhs,
7998 and first_stmt_info == stmt_info. */
7999 if (j == 0)
8000 {
8001 if (slp)
8002 {
8003 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info,
8004 &vec_oprnds, NULL, slp_node);
8005 vec_oprnd = vec_oprnds[0];
8006 }
8007 else
8008 {
8009 op = vect_get_store_rhs (next_stmt_info);
8010 vec_oprnd = vect_get_vec_def_for_operand
8011 (vinfo, op, next_stmt_info);
8012 }
8013 }
8014 else
8015 {
8016 if (slp)
8017 vec_oprnd = vec_oprnds[j];
8018 else
8019 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
8020 vec_oprnd);
8021 }
8022 /* Pun the vector to extract from if necessary. */
8023 if (lvectype != vectype)
8024 {
8025 tree tem = make_ssa_name (lvectype);
8026 gimple *pun
8027 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8028 lvectype, vec_oprnd));
8029 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8030 vec_oprnd = tem;
8031 }
8032 for (i = 0; i < nstores; i++)
8033 {
8034 tree newref, newoff;
8035 gimple *incr, *assign;
8036 tree size = TYPE_SIZE (ltype);
8037 /* Extract the i'th component. */
8038 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8039 bitsize_int (i), size);
8040 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8041 size, pos);
8042
8043 elem = force_gimple_operand_gsi (gsi, elem, true,
8044 NULL_TREE, true,
8045 GSI_SAME_STMT);
8046
8047 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8048 group_el * elsz);
8049 newref = build2 (MEM_REF, ltype,
8050 running_off, this_off);
8051 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8052
8053 /* And store it to *running_off. */
8054 assign = gimple_build_assign (newref, elem);
8055 stmt_vec_info assign_info
8056 = vect_finish_stmt_generation (vinfo, stmt_info,
8057 assign, gsi);
8058
8059 group_el += lnel;
8060 if (! slp
8061 || group_el == group_size)
8062 {
8063 newoff = copy_ssa_name (running_off, NULL);
8064 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8065 running_off, stride_step);
8066 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8067
8068 running_off = newoff;
8069 group_el = 0;
8070 }
8071 if (g == group_size - 1
8072 && !slp)
8073 {
8074 if (j == 0 && i == 0)
8075 STMT_VINFO_VEC_STMT (stmt_info)
8076 = *vec_stmt = assign_info;
8077 else
8078 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8079 prev_stmt_info = assign_info;
8080 }
8081 }
8082 }
8083 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8084 if (slp)
8085 break;
8086 }
8087
8088 vec_oprnds.release ();
8089 return true;
8090 }
8091
8092 auto_vec<tree> dr_chain (group_size);
8093 oprnds.create (group_size);
8094
8095 /* Gather-scatter accesses perform only component accesses, alignment
8096 is irrelevant for them. */
8097 if (memory_access_type == VMAT_GATHER_SCATTER)
8098 alignment_support_scheme = dr_unaligned_supported;
8099 else
8100 alignment_support_scheme
8101 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
8102
8103 gcc_assert (alignment_support_scheme);
8104 vec_loop_masks *loop_masks
8105 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8106 ? &LOOP_VINFO_MASKS (loop_vinfo)
8107 : NULL);
8108 /* Targets with store-lane instructions must not require explicit
8109 realignment. vect_supportable_dr_alignment always returns either
8110 dr_aligned or dr_unaligned_supported for masked operations. */
8111 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8112 && !mask
8113 && !loop_masks)
8114 || alignment_support_scheme == dr_aligned
8115 || alignment_support_scheme == dr_unaligned_supported);
8116
8117 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8118 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8119 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8120
8121 tree bump;
8122 tree vec_offset = NULL_TREE;
8123 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8124 {
8125 aggr_type = NULL_TREE;
8126 bump = NULL_TREE;
8127 }
8128 else if (memory_access_type == VMAT_GATHER_SCATTER)
8129 {
8130 aggr_type = elem_type;
8131 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8132 &bump, &vec_offset);
8133 }
8134 else
8135 {
8136 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8137 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8138 else
8139 aggr_type = vectype;
8140 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8141 memory_access_type);
8142 }
8143
8144 if (mask)
8145 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8146
8147 /* In case the vectorization factor (VF) is bigger than the number
8148 of elements that we can fit in a vectype (nunits), we have to generate
8149 more than one vector stmt - i.e - we need to "unroll" the
8150 vector stmt by a factor VF/nunits. For more details see documentation in
8151 vect_get_vec_def_for_copy_stmt. */
8152
8153 /* In case of interleaving (non-unit grouped access):
8154
8155 S1: &base + 2 = x2
8156 S2: &base = x0
8157 S3: &base + 1 = x1
8158 S4: &base + 3 = x3
8159
8160 We create vectorized stores starting from base address (the access of the
8161 first stmt in the chain (S2 in the above example), when the last store stmt
8162 of the chain (S4) is reached:
8163
8164 VS1: &base = vx2
8165 VS2: &base + vec_size*1 = vx0
8166 VS3: &base + vec_size*2 = vx1
8167 VS4: &base + vec_size*3 = vx3
8168
8169 Then permutation statements are generated:
8170
8171 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8172 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8173 ...
8174
8175 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8176 (the order of the data-refs in the output of vect_permute_store_chain
8177 corresponds to the order of scalar stmts in the interleaving chain - see
8178 the documentation of vect_permute_store_chain()).
8179
8180 In case of both multiple types and interleaving, above vector stores and
8181 permutation stmts are created for every copy. The result vector stmts are
8182 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8183 STMT_VINFO_RELATED_STMT for the next copies.
8184 */
8185
8186 prev_stmt_info = NULL;
8187 tree vec_mask = NULL_TREE;
8188 for (j = 0; j < ncopies; j++)
8189 {
8190 stmt_vec_info new_stmt_info;
8191 if (j == 0)
8192 {
8193 if (slp)
8194 {
8195 /* Get vectorized arguments for SLP_NODE. */
8196 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info, &vec_oprnds,
8197 NULL, slp_node);
8198
8199 vec_oprnd = vec_oprnds[0];
8200 }
8201 else
8202 {
8203 /* For interleaved stores we collect vectorized defs for all the
8204 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8205 used as an input to vect_permute_store_chain(), and OPRNDS as
8206 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8207
8208 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8209 OPRNDS are of size 1. */
8210 stmt_vec_info next_stmt_info = first_stmt_info;
8211 for (i = 0; i < group_size; i++)
8212 {
8213 /* Since gaps are not supported for interleaved stores,
8214 DR_GROUP_SIZE is the exact number of stmts in the chain.
8215 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8216 that there is no interleaving, DR_GROUP_SIZE is 1,
8217 and only one iteration of the loop will be executed. */
8218 op = vect_get_store_rhs (next_stmt_info);
8219 vec_oprnd = vect_get_vec_def_for_operand
8220 (vinfo, op, next_stmt_info);
8221 dr_chain.quick_push (vec_oprnd);
8222 oprnds.quick_push (vec_oprnd);
8223 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8224 }
8225 if (mask)
8226 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
8227 mask_vectype);
8228 }
8229
8230 /* We should have catched mismatched types earlier. */
8231 gcc_assert (useless_type_conversion_p (vectype,
8232 TREE_TYPE (vec_oprnd)));
8233 bool simd_lane_access_p
8234 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8235 if (simd_lane_access_p
8236 && !loop_masks
8237 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8238 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8239 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8240 && integer_zerop (DR_INIT (first_dr_info->dr))
8241 && alias_sets_conflict_p (get_alias_set (aggr_type),
8242 get_alias_set (TREE_TYPE (ref_type))))
8243 {
8244 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8245 dataref_offset = build_int_cst (ref_type, 0);
8246 }
8247 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8248 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8249 &dataref_ptr, &vec_offset);
8250 else
8251 dataref_ptr
8252 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8253 simd_lane_access_p ? loop : NULL,
8254 offset, &dummy, gsi, &ptr_incr,
8255 simd_lane_access_p, NULL_TREE, bump);
8256 }
8257 else
8258 {
8259 /* For interleaved stores we created vectorized defs for all the
8260 defs stored in OPRNDS in the previous iteration (previous copy).
8261 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8262 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8263 next copy.
8264 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8265 OPRNDS are of size 1. */
8266 for (i = 0; i < group_size; i++)
8267 {
8268 op = oprnds[i];
8269 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8270 dr_chain[i] = vec_oprnd;
8271 oprnds[i] = vec_oprnd;
8272 }
8273 if (mask)
8274 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8275 if (dataref_offset)
8276 dataref_offset
8277 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8278 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8279 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8280 else
8281 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8282 stmt_info, bump);
8283 }
8284
8285 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8286 {
8287 tree vec_array;
8288
8289 /* Get an array into which we can store the individual vectors. */
8290 vec_array = create_vector_array (vectype, vec_num);
8291
8292 /* Invalidate the current contents of VEC_ARRAY. This should
8293 become an RTL clobber too, which prevents the vector registers
8294 from being upward-exposed. */
8295 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8296
8297 /* Store the individual vectors into the array. */
8298 for (i = 0; i < vec_num; i++)
8299 {
8300 vec_oprnd = dr_chain[i];
8301 write_vector_array (vinfo, stmt_info,
8302 gsi, vec_oprnd, vec_array, i);
8303 }
8304
8305 tree final_mask = NULL;
8306 if (loop_masks)
8307 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8308 vectype, j);
8309 if (vec_mask)
8310 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8311 vec_mask, gsi);
8312
8313 gcall *call;
8314 if (final_mask)
8315 {
8316 /* Emit:
8317 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8318 VEC_ARRAY). */
8319 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8320 tree alias_ptr = build_int_cst (ref_type, align);
8321 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8322 dataref_ptr, alias_ptr,
8323 final_mask, vec_array);
8324 }
8325 else
8326 {
8327 /* Emit:
8328 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8329 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8330 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8331 vec_array);
8332 gimple_call_set_lhs (call, data_ref);
8333 }
8334 gimple_call_set_nothrow (call, true);
8335 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
8336 call, gsi);
8337
8338 /* Record that VEC_ARRAY is now dead. */
8339 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8340 }
8341 else
8342 {
8343 new_stmt_info = NULL;
8344 if (grouped_store)
8345 {
8346 if (j == 0)
8347 result_chain.create (group_size);
8348 /* Permute. */
8349 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8350 gsi, &result_chain);
8351 }
8352
8353 stmt_vec_info next_stmt_info = first_stmt_info;
8354 for (i = 0; i < vec_num; i++)
8355 {
8356 unsigned misalign;
8357 unsigned HOST_WIDE_INT align;
8358
8359 tree final_mask = NULL_TREE;
8360 if (loop_masks)
8361 final_mask = vect_get_loop_mask (gsi, loop_masks,
8362 vec_num * ncopies,
8363 vectype, vec_num * j + i);
8364 if (vec_mask)
8365 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8366 vec_mask, gsi);
8367
8368 if (memory_access_type == VMAT_GATHER_SCATTER)
8369 {
8370 tree scale = size_int (gs_info.scale);
8371 gcall *call;
8372 if (loop_masks)
8373 call = gimple_build_call_internal
8374 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8375 scale, vec_oprnd, final_mask);
8376 else
8377 call = gimple_build_call_internal
8378 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8379 scale, vec_oprnd);
8380 gimple_call_set_nothrow (call, true);
8381 new_stmt_info
8382 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8383 break;
8384 }
8385
8386 if (i > 0)
8387 /* Bump the vector pointer. */
8388 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8389 gsi, stmt_info, bump);
8390
8391 if (slp)
8392 vec_oprnd = vec_oprnds[i];
8393 else if (grouped_store)
8394 /* For grouped stores vectorized defs are interleaved in
8395 vect_permute_store_chain(). */
8396 vec_oprnd = result_chain[i];
8397
8398 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8399 if (aligned_access_p (first_dr_info))
8400 misalign = 0;
8401 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8402 {
8403 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8404 misalign = 0;
8405 }
8406 else
8407 misalign = DR_MISALIGNMENT (first_dr_info);
8408 if (dataref_offset == NULL_TREE
8409 && TREE_CODE (dataref_ptr) == SSA_NAME)
8410 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8411 misalign);
8412
8413 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8414 {
8415 tree perm_mask = perm_mask_for_reverse (vectype);
8416 tree perm_dest = vect_create_destination_var
8417 (vect_get_store_rhs (stmt_info), vectype);
8418 tree new_temp = make_ssa_name (perm_dest);
8419
8420 /* Generate the permute statement. */
8421 gimple *perm_stmt
8422 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8423 vec_oprnd, perm_mask);
8424 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8425
8426 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8427 vec_oprnd = new_temp;
8428 }
8429
8430 /* Arguments are ready. Create the new vector stmt. */
8431 if (final_mask)
8432 {
8433 align = least_bit_hwi (misalign | align);
8434 tree ptr = build_int_cst (ref_type, align);
8435 gcall *call
8436 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8437 dataref_ptr, ptr,
8438 final_mask, vec_oprnd);
8439 gimple_call_set_nothrow (call, true);
8440 new_stmt_info
8441 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8442 }
8443 else
8444 {
8445 data_ref = fold_build2 (MEM_REF, vectype,
8446 dataref_ptr,
8447 dataref_offset
8448 ? dataref_offset
8449 : build_int_cst (ref_type, 0));
8450 if (aligned_access_p (first_dr_info))
8451 ;
8452 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8453 TREE_TYPE (data_ref)
8454 = build_aligned_type (TREE_TYPE (data_ref),
8455 align * BITS_PER_UNIT);
8456 else
8457 TREE_TYPE (data_ref)
8458 = build_aligned_type (TREE_TYPE (data_ref),
8459 TYPE_ALIGN (elem_type));
8460 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8461 gassign *new_stmt
8462 = gimple_build_assign (data_ref, vec_oprnd);
8463 new_stmt_info
8464 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8465 }
8466
8467 if (slp)
8468 continue;
8469
8470 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8471 if (!next_stmt_info)
8472 break;
8473 }
8474 }
8475 if (!slp)
8476 {
8477 if (j == 0)
8478 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8479 else
8480 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8481 prev_stmt_info = new_stmt_info;
8482 }
8483 }
8484
8485 oprnds.release ();
8486 result_chain.release ();
8487 vec_oprnds.release ();
8488
8489 return true;
8490 }
8491
8492 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8493 VECTOR_CST mask. No checks are made that the target platform supports the
8494 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8495 vect_gen_perm_mask_checked. */
8496
8497 tree
8498 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8499 {
8500 tree mask_type;
8501
8502 poly_uint64 nunits = sel.length ();
8503 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8504
8505 mask_type = build_vector_type (ssizetype, nunits);
8506 return vec_perm_indices_to_tree (mask_type, sel);
8507 }
8508
8509 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8510 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8511
8512 tree
8513 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8514 {
8515 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8516 return vect_gen_perm_mask_any (vectype, sel);
8517 }
8518
8519 /* Given a vector variable X and Y, that was generated for the scalar
8520 STMT_INFO, generate instructions to permute the vector elements of X and Y
8521 using permutation mask MASK_VEC, insert them at *GSI and return the
8522 permuted vector variable. */
8523
8524 static tree
8525 permute_vec_elements (vec_info *vinfo,
8526 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8527 gimple_stmt_iterator *gsi)
8528 {
8529 tree vectype = TREE_TYPE (x);
8530 tree perm_dest, data_ref;
8531 gimple *perm_stmt;
8532
8533 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8534 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8535 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8536 else
8537 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8538 data_ref = make_ssa_name (perm_dest);
8539
8540 /* Generate the permute statement. */
8541 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8542 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8543
8544 return data_ref;
8545 }
8546
8547 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8548 inserting them on the loops preheader edge. Returns true if we
8549 were successful in doing so (and thus STMT_INFO can be moved then),
8550 otherwise returns false. */
8551
8552 static bool
8553 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8554 {
8555 ssa_op_iter i;
8556 tree op;
8557 bool any = false;
8558
8559 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8560 {
8561 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8562 if (!gimple_nop_p (def_stmt)
8563 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8564 {
8565 /* Make sure we don't need to recurse. While we could do
8566 so in simple cases when there are more complex use webs
8567 we don't have an easy way to preserve stmt order to fulfil
8568 dependencies within them. */
8569 tree op2;
8570 ssa_op_iter i2;
8571 if (gimple_code (def_stmt) == GIMPLE_PHI)
8572 return false;
8573 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8574 {
8575 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8576 if (!gimple_nop_p (def_stmt2)
8577 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8578 return false;
8579 }
8580 any = true;
8581 }
8582 }
8583
8584 if (!any)
8585 return true;
8586
8587 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8588 {
8589 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8590 if (!gimple_nop_p (def_stmt)
8591 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8592 {
8593 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8594 gsi_remove (&gsi, false);
8595 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8596 }
8597 }
8598
8599 return true;
8600 }
8601
8602 /* vectorizable_load.
8603
8604 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8605 that can be vectorized.
8606 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8607 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8608 Return true if STMT_INFO is vectorizable in this way. */
8609
8610 static bool
8611 vectorizable_load (vec_info *vinfo,
8612 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8613 stmt_vec_info *vec_stmt, slp_tree slp_node,
8614 stmt_vector_for_cost *cost_vec)
8615 {
8616 tree scalar_dest;
8617 tree vec_dest = NULL;
8618 tree data_ref = NULL;
8619 stmt_vec_info prev_stmt_info;
8620 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8621 class loop *loop = NULL;
8622 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8623 bool nested_in_vect_loop = false;
8624 tree elem_type;
8625 tree new_temp;
8626 machine_mode mode;
8627 tree dummy;
8628 enum dr_alignment_support alignment_support_scheme;
8629 tree dataref_ptr = NULL_TREE;
8630 tree dataref_offset = NULL_TREE;
8631 gimple *ptr_incr = NULL;
8632 int ncopies;
8633 int i, j;
8634 unsigned int group_size;
8635 poly_uint64 group_gap_adj;
8636 tree msq = NULL_TREE, lsq;
8637 tree offset = NULL_TREE;
8638 tree byte_offset = NULL_TREE;
8639 tree realignment_token = NULL_TREE;
8640 gphi *phi = NULL;
8641 vec<tree> dr_chain = vNULL;
8642 bool grouped_load = false;
8643 stmt_vec_info first_stmt_info;
8644 stmt_vec_info first_stmt_info_for_drptr = NULL;
8645 bool compute_in_loop = false;
8646 class loop *at_loop;
8647 int vec_num;
8648 bool slp = (slp_node != NULL);
8649 bool slp_perm = false;
8650 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8651 poly_uint64 vf;
8652 tree aggr_type;
8653 gather_scatter_info gs_info;
8654 tree ref_type;
8655 enum vect_def_type mask_dt = vect_unknown_def_type;
8656
8657 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8658 return false;
8659
8660 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8661 && ! vec_stmt)
8662 return false;
8663
8664 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8665 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8666 {
8667 scalar_dest = gimple_assign_lhs (assign);
8668 if (TREE_CODE (scalar_dest) != SSA_NAME)
8669 return false;
8670
8671 tree_code code = gimple_assign_rhs_code (assign);
8672 if (code != ARRAY_REF
8673 && code != BIT_FIELD_REF
8674 && code != INDIRECT_REF
8675 && code != COMPONENT_REF
8676 && code != IMAGPART_EXPR
8677 && code != REALPART_EXPR
8678 && code != MEM_REF
8679 && TREE_CODE_CLASS (code) != tcc_declaration)
8680 return false;
8681 }
8682 else
8683 {
8684 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8685 if (!call || !gimple_call_internal_p (call))
8686 return false;
8687
8688 internal_fn ifn = gimple_call_internal_fn (call);
8689 if (!internal_load_fn_p (ifn))
8690 return false;
8691
8692 scalar_dest = gimple_call_lhs (call);
8693 if (!scalar_dest)
8694 return false;
8695
8696 int mask_index = internal_fn_mask_index (ifn);
8697 if (mask_index >= 0)
8698 {
8699 mask = gimple_call_arg (call, mask_index);
8700 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8701 &mask_vectype))
8702 return false;
8703 }
8704 }
8705
8706 if (!STMT_VINFO_DATA_REF (stmt_info))
8707 return false;
8708
8709 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8710 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8711
8712 if (loop_vinfo)
8713 {
8714 loop = LOOP_VINFO_LOOP (loop_vinfo);
8715 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8716 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8717 }
8718 else
8719 vf = 1;
8720
8721 /* Multiple types in SLP are handled by creating the appropriate number of
8722 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8723 case of SLP. */
8724 if (slp)
8725 ncopies = 1;
8726 else
8727 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8728
8729 gcc_assert (ncopies >= 1);
8730
8731 /* FORNOW. This restriction should be relaxed. */
8732 if (nested_in_vect_loop && ncopies > 1)
8733 {
8734 if (dump_enabled_p ())
8735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8736 "multiple types in nested loop.\n");
8737 return false;
8738 }
8739
8740 /* Invalidate assumptions made by dependence analysis when vectorization
8741 on the unrolled body effectively re-orders stmts. */
8742 if (ncopies > 1
8743 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8744 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8745 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8746 {
8747 if (dump_enabled_p ())
8748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8749 "cannot perform implicit CSE when unrolling "
8750 "with negative dependence distance\n");
8751 return false;
8752 }
8753
8754 elem_type = TREE_TYPE (vectype);
8755 mode = TYPE_MODE (vectype);
8756
8757 /* FORNOW. In some cases can vectorize even if data-type not supported
8758 (e.g. - data copies). */
8759 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8760 {
8761 if (dump_enabled_p ())
8762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8763 "Aligned load, but unsupported type.\n");
8764 return false;
8765 }
8766
8767 /* Check if the load is a part of an interleaving chain. */
8768 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8769 {
8770 grouped_load = true;
8771 /* FORNOW */
8772 gcc_assert (!nested_in_vect_loop);
8773 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8774
8775 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8776 group_size = DR_GROUP_SIZE (first_stmt_info);
8777
8778 /* Refuse non-SLP vectorization of SLP-only groups. */
8779 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8780 {
8781 if (dump_enabled_p ())
8782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8783 "cannot vectorize load in non-SLP mode.\n");
8784 return false;
8785 }
8786
8787 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8788 {
8789 slp_perm = true;
8790
8791 if (!loop_vinfo)
8792 {
8793 /* In BB vectorization we may not actually use a loaded vector
8794 accessing elements in excess of DR_GROUP_SIZE. */
8795 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8796 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8797 unsigned HOST_WIDE_INT nunits;
8798 unsigned j, k, maxk = 0;
8799 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8800 if (k > maxk)
8801 maxk = k;
8802 tree vectype = STMT_VINFO_VECTYPE (group_info);
8803 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8804 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8805 {
8806 if (dump_enabled_p ())
8807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8808 "BB vectorization with gaps at the end of "
8809 "a load is not supported\n");
8810 return false;
8811 }
8812 }
8813
8814 auto_vec<tree> tem;
8815 unsigned n_perms;
8816 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8817 true, &n_perms))
8818 {
8819 if (dump_enabled_p ())
8820 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8821 vect_location,
8822 "unsupported load permutation\n");
8823 return false;
8824 }
8825 }
8826
8827 /* Invalidate assumptions made by dependence analysis when vectorization
8828 on the unrolled body effectively re-orders stmts. */
8829 if (!PURE_SLP_STMT (stmt_info)
8830 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8831 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8832 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8833 {
8834 if (dump_enabled_p ())
8835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8836 "cannot perform implicit CSE when performing "
8837 "group loads with negative dependence distance\n");
8838 return false;
8839 }
8840 }
8841 else
8842 group_size = 1;
8843
8844 vect_memory_access_type memory_access_type;
8845 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
8846 ncopies, &memory_access_type, &gs_info))
8847 return false;
8848
8849 if (mask)
8850 {
8851 if (memory_access_type == VMAT_CONTIGUOUS)
8852 {
8853 machine_mode vec_mode = TYPE_MODE (vectype);
8854 if (!VECTOR_MODE_P (vec_mode)
8855 || !can_vec_mask_load_store_p (vec_mode,
8856 TYPE_MODE (mask_vectype), true))
8857 return false;
8858 }
8859 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8860 && memory_access_type != VMAT_GATHER_SCATTER)
8861 {
8862 if (dump_enabled_p ())
8863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8864 "unsupported access type for masked load.\n");
8865 return false;
8866 }
8867 }
8868
8869 if (!vec_stmt) /* transformation not required. */
8870 {
8871 if (!slp)
8872 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8873
8874 if (loop_vinfo
8875 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8876 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8877 memory_access_type, &gs_info, mask);
8878
8879 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8880 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8881 slp_node, cost_vec);
8882 return true;
8883 }
8884
8885 if (!slp)
8886 gcc_assert (memory_access_type
8887 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8888
8889 if (dump_enabled_p ())
8890 dump_printf_loc (MSG_NOTE, vect_location,
8891 "transform load. ncopies = %d\n", ncopies);
8892
8893 /* Transform. */
8894
8895 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8896 ensure_base_align (dr_info);
8897
8898 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8899 {
8900 vect_build_gather_load_calls (vinfo,
8901 stmt_info, gsi, vec_stmt, &gs_info, mask);
8902 return true;
8903 }
8904
8905 if (memory_access_type == VMAT_INVARIANT)
8906 {
8907 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8908 /* If we have versioned for aliasing or the loop doesn't
8909 have any data dependencies that would preclude this,
8910 then we are sure this is a loop invariant load and
8911 thus we can insert it on the preheader edge. */
8912 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8913 && !nested_in_vect_loop
8914 && hoist_defs_of_uses (stmt_info, loop));
8915 if (hoist_p)
8916 {
8917 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8918 if (dump_enabled_p ())
8919 dump_printf_loc (MSG_NOTE, vect_location,
8920 "hoisting out of the vectorized loop: %G", stmt);
8921 scalar_dest = copy_ssa_name (scalar_dest);
8922 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8923 gsi_insert_on_edge_immediate
8924 (loop_preheader_edge (loop),
8925 gimple_build_assign (scalar_dest, rhs));
8926 }
8927 /* These copies are all equivalent, but currently the representation
8928 requires a separate STMT_VINFO_VEC_STMT for each one. */
8929 prev_stmt_info = NULL;
8930 gimple_stmt_iterator gsi2 = *gsi;
8931 gsi_next (&gsi2);
8932 for (j = 0; j < ncopies; j++)
8933 {
8934 stmt_vec_info new_stmt_info;
8935 if (hoist_p)
8936 {
8937 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8938 vectype, NULL);
8939 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8940 new_stmt_info = vinfo->add_stmt (new_stmt);
8941 }
8942 else
8943 {
8944 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8945 vectype, &gsi2);
8946 new_stmt_info = vinfo->lookup_def (new_temp);
8947 }
8948 if (slp)
8949 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8950 else if (j == 0)
8951 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8952 else
8953 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8954 prev_stmt_info = new_stmt_info;
8955 }
8956 return true;
8957 }
8958
8959 if (memory_access_type == VMAT_ELEMENTWISE
8960 || memory_access_type == VMAT_STRIDED_SLP)
8961 {
8962 gimple_stmt_iterator incr_gsi;
8963 bool insert_after;
8964 gimple *incr;
8965 tree offvar;
8966 tree ivstep;
8967 tree running_off;
8968 vec<constructor_elt, va_gc> *v = NULL;
8969 tree stride_base, stride_step, alias_off;
8970 /* Checked by get_load_store_type. */
8971 unsigned int const_nunits = nunits.to_constant ();
8972 unsigned HOST_WIDE_INT cst_offset = 0;
8973 tree dr_offset;
8974
8975 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8976 gcc_assert (!nested_in_vect_loop);
8977
8978 if (grouped_load)
8979 {
8980 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8981 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8982 }
8983 else
8984 {
8985 first_stmt_info = stmt_info;
8986 first_dr_info = dr_info;
8987 }
8988 if (slp && grouped_load)
8989 {
8990 group_size = DR_GROUP_SIZE (first_stmt_info);
8991 ref_type = get_group_alias_ptr_type (first_stmt_info);
8992 }
8993 else
8994 {
8995 if (grouped_load)
8996 cst_offset
8997 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8998 * vect_get_place_in_interleaving_chain (stmt_info,
8999 first_stmt_info));
9000 group_size = 1;
9001 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9002 }
9003
9004 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9005 stride_base
9006 = fold_build_pointer_plus
9007 (DR_BASE_ADDRESS (first_dr_info->dr),
9008 size_binop (PLUS_EXPR,
9009 convert_to_ptrofftype (dr_offset),
9010 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9011 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9012
9013 /* For a load with loop-invariant (but other than power-of-2)
9014 stride (i.e. not a grouped access) like so:
9015
9016 for (i = 0; i < n; i += stride)
9017 ... = array[i];
9018
9019 we generate a new induction variable and new accesses to
9020 form a new vector (or vectors, depending on ncopies):
9021
9022 for (j = 0; ; j += VF*stride)
9023 tmp1 = array[j];
9024 tmp2 = array[j + stride];
9025 ...
9026 vectemp = {tmp1, tmp2, ...}
9027 */
9028
9029 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9030 build_int_cst (TREE_TYPE (stride_step), vf));
9031
9032 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9033
9034 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9035 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9036 create_iv (stride_base, ivstep, NULL,
9037 loop, &incr_gsi, insert_after,
9038 &offvar, NULL);
9039 incr = gsi_stmt (incr_gsi);
9040 loop_vinfo->add_stmt (incr);
9041
9042 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9043
9044 prev_stmt_info = NULL;
9045 running_off = offvar;
9046 alias_off = build_int_cst (ref_type, 0);
9047 int nloads = const_nunits;
9048 int lnel = 1;
9049 tree ltype = TREE_TYPE (vectype);
9050 tree lvectype = vectype;
9051 auto_vec<tree> dr_chain;
9052 if (memory_access_type == VMAT_STRIDED_SLP)
9053 {
9054 if (group_size < const_nunits)
9055 {
9056 /* First check if vec_init optab supports construction from vector
9057 elts directly. Otherwise avoid emitting a constructor of
9058 vector elements by performing the loads using an integer type
9059 of the same size, constructing a vector of those and then
9060 re-interpreting it as the original vector type. This avoids a
9061 huge runtime penalty due to the general inability to perform
9062 store forwarding from smaller stores to a larger load. */
9063 tree ptype;
9064 tree vtype
9065 = vector_vector_composition_type (vectype,
9066 const_nunits / group_size,
9067 &ptype);
9068 if (vtype != NULL_TREE)
9069 {
9070 nloads = const_nunits / group_size;
9071 lnel = group_size;
9072 lvectype = vtype;
9073 ltype = ptype;
9074 }
9075 }
9076 else
9077 {
9078 nloads = 1;
9079 lnel = const_nunits;
9080 ltype = vectype;
9081 }
9082 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9083 }
9084 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9085 else if (nloads == 1)
9086 ltype = vectype;
9087
9088 if (slp)
9089 {
9090 /* For SLP permutation support we need to load the whole group,
9091 not only the number of vector stmts the permutation result
9092 fits in. */
9093 if (slp_perm)
9094 {
9095 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9096 variable VF. */
9097 unsigned int const_vf = vf.to_constant ();
9098 ncopies = CEIL (group_size * const_vf, const_nunits);
9099 dr_chain.create (ncopies);
9100 }
9101 else
9102 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9103 }
9104 unsigned int group_el = 0;
9105 unsigned HOST_WIDE_INT
9106 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9107 for (j = 0; j < ncopies; j++)
9108 {
9109 if (nloads > 1)
9110 vec_alloc (v, nloads);
9111 stmt_vec_info new_stmt_info = NULL;
9112 for (i = 0; i < nloads; i++)
9113 {
9114 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9115 group_el * elsz + cst_offset);
9116 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9117 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9118 gassign *new_stmt
9119 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9120 new_stmt_info
9121 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9122 if (nloads > 1)
9123 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9124 gimple_assign_lhs (new_stmt));
9125
9126 group_el += lnel;
9127 if (! slp
9128 || group_el == group_size)
9129 {
9130 tree newoff = copy_ssa_name (running_off);
9131 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9132 running_off, stride_step);
9133 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9134
9135 running_off = newoff;
9136 group_el = 0;
9137 }
9138 }
9139 if (nloads > 1)
9140 {
9141 tree vec_inv = build_constructor (lvectype, v);
9142 new_temp = vect_init_vector (vinfo, stmt_info,
9143 vec_inv, lvectype, gsi);
9144 new_stmt_info = vinfo->lookup_def (new_temp);
9145 if (lvectype != vectype)
9146 {
9147 gassign *new_stmt
9148 = gimple_build_assign (make_ssa_name (vectype),
9149 VIEW_CONVERT_EXPR,
9150 build1 (VIEW_CONVERT_EXPR,
9151 vectype, new_temp));
9152 new_stmt_info
9153 = vect_finish_stmt_generation (vinfo, stmt_info,
9154 new_stmt, gsi);
9155 }
9156 }
9157
9158 if (slp)
9159 {
9160 if (slp_perm)
9161 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9162 else
9163 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9164 }
9165 else
9166 {
9167 if (j == 0)
9168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9169 else
9170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9171 prev_stmt_info = new_stmt_info;
9172 }
9173 }
9174 if (slp_perm)
9175 {
9176 unsigned n_perms;
9177 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9178 false, &n_perms);
9179 }
9180 return true;
9181 }
9182
9183 if (memory_access_type == VMAT_GATHER_SCATTER
9184 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9185 grouped_load = false;
9186
9187 if (grouped_load)
9188 {
9189 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9190 group_size = DR_GROUP_SIZE (first_stmt_info);
9191 /* For SLP vectorization we directly vectorize a subchain
9192 without permutation. */
9193 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9194 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9195 /* For BB vectorization always use the first stmt to base
9196 the data ref pointer on. */
9197 if (bb_vinfo)
9198 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9199
9200 /* Check if the chain of loads is already vectorized. */
9201 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9202 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9203 ??? But we can only do so if there is exactly one
9204 as we have no way to get at the rest. Leave the CSE
9205 opportunity alone.
9206 ??? With the group load eventually participating
9207 in multiple different permutations (having multiple
9208 slp nodes which refer to the same group) the CSE
9209 is even wrong code. See PR56270. */
9210 && !slp)
9211 {
9212 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9213 return true;
9214 }
9215 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9216 group_gap_adj = 0;
9217
9218 /* VEC_NUM is the number of vect stmts to be created for this group. */
9219 if (slp)
9220 {
9221 grouped_load = false;
9222 /* If an SLP permutation is from N elements to N elements,
9223 and if one vector holds a whole number of N, we can load
9224 the inputs to the permutation in the same way as an
9225 unpermuted sequence. In other cases we need to load the
9226 whole group, not only the number of vector stmts the
9227 permutation result fits in. */
9228 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9229 if (slp_perm
9230 && (group_size != scalar_lanes
9231 || !multiple_p (nunits, group_size)))
9232 {
9233 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9234 variable VF; see vect_transform_slp_perm_load. */
9235 unsigned int const_vf = vf.to_constant ();
9236 unsigned int const_nunits = nunits.to_constant ();
9237 vec_num = CEIL (group_size * const_vf, const_nunits);
9238 group_gap_adj = vf * group_size - nunits * vec_num;
9239 }
9240 else
9241 {
9242 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9243 group_gap_adj
9244 = group_size - scalar_lanes;
9245 }
9246 }
9247 else
9248 vec_num = group_size;
9249
9250 ref_type = get_group_alias_ptr_type (first_stmt_info);
9251 }
9252 else
9253 {
9254 first_stmt_info = stmt_info;
9255 first_dr_info = dr_info;
9256 group_size = vec_num = 1;
9257 group_gap_adj = 0;
9258 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9259 }
9260
9261 /* Gather-scatter accesses perform only component accesses, alignment
9262 is irrelevant for them. */
9263 if (memory_access_type == VMAT_GATHER_SCATTER)
9264 alignment_support_scheme = dr_unaligned_supported;
9265 else
9266 alignment_support_scheme
9267 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
9268
9269 gcc_assert (alignment_support_scheme);
9270 vec_loop_masks *loop_masks
9271 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9272 ? &LOOP_VINFO_MASKS (loop_vinfo)
9273 : NULL);
9274 /* Targets with store-lane instructions must not require explicit
9275 realignment. vect_supportable_dr_alignment always returns either
9276 dr_aligned or dr_unaligned_supported for masked operations. */
9277 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9278 && !mask
9279 && !loop_masks)
9280 || alignment_support_scheme == dr_aligned
9281 || alignment_support_scheme == dr_unaligned_supported);
9282
9283 /* In case the vectorization factor (VF) is bigger than the number
9284 of elements that we can fit in a vectype (nunits), we have to generate
9285 more than one vector stmt - i.e - we need to "unroll" the
9286 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9287 from one copy of the vector stmt to the next, in the field
9288 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9289 stages to find the correct vector defs to be used when vectorizing
9290 stmts that use the defs of the current stmt. The example below
9291 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9292 need to create 4 vectorized stmts):
9293
9294 before vectorization:
9295 RELATED_STMT VEC_STMT
9296 S1: x = memref - -
9297 S2: z = x + 1 - -
9298
9299 step 1: vectorize stmt S1:
9300 We first create the vector stmt VS1_0, and, as usual, record a
9301 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9302 Next, we create the vector stmt VS1_1, and record a pointer to
9303 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9304 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9305 stmts and pointers:
9306 RELATED_STMT VEC_STMT
9307 VS1_0: vx0 = memref0 VS1_1 -
9308 VS1_1: vx1 = memref1 VS1_2 -
9309 VS1_2: vx2 = memref2 VS1_3 -
9310 VS1_3: vx3 = memref3 - -
9311 S1: x = load - VS1_0
9312 S2: z = x + 1 - -
9313
9314 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9315 information we recorded in RELATED_STMT field is used to vectorize
9316 stmt S2. */
9317
9318 /* In case of interleaving (non-unit grouped access):
9319
9320 S1: x2 = &base + 2
9321 S2: x0 = &base
9322 S3: x1 = &base + 1
9323 S4: x3 = &base + 3
9324
9325 Vectorized loads are created in the order of memory accesses
9326 starting from the access of the first stmt of the chain:
9327
9328 VS1: vx0 = &base
9329 VS2: vx1 = &base + vec_size*1
9330 VS3: vx3 = &base + vec_size*2
9331 VS4: vx4 = &base + vec_size*3
9332
9333 Then permutation statements are generated:
9334
9335 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9336 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9337 ...
9338
9339 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9340 (the order of the data-refs in the output of vect_permute_load_chain
9341 corresponds to the order of scalar stmts in the interleaving chain - see
9342 the documentation of vect_permute_load_chain()).
9343 The generation of permutation stmts and recording them in
9344 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9345
9346 In case of both multiple types and interleaving, the vector loads and
9347 permutation stmts above are created for every copy. The result vector
9348 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9349 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9350
9351 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9352 on a target that supports unaligned accesses (dr_unaligned_supported)
9353 we generate the following code:
9354 p = initial_addr;
9355 indx = 0;
9356 loop {
9357 p = p + indx * vectype_size;
9358 vec_dest = *(p);
9359 indx = indx + 1;
9360 }
9361
9362 Otherwise, the data reference is potentially unaligned on a target that
9363 does not support unaligned accesses (dr_explicit_realign_optimized) -
9364 then generate the following code, in which the data in each iteration is
9365 obtained by two vector loads, one from the previous iteration, and one
9366 from the current iteration:
9367 p1 = initial_addr;
9368 msq_init = *(floor(p1))
9369 p2 = initial_addr + VS - 1;
9370 realignment_token = call target_builtin;
9371 indx = 0;
9372 loop {
9373 p2 = p2 + indx * vectype_size
9374 lsq = *(floor(p2))
9375 vec_dest = realign_load (msq, lsq, realignment_token)
9376 indx = indx + 1;
9377 msq = lsq;
9378 } */
9379
9380 /* If the misalignment remains the same throughout the execution of the
9381 loop, we can create the init_addr and permutation mask at the loop
9382 preheader. Otherwise, it needs to be created inside the loop.
9383 This can only occur when vectorizing memory accesses in the inner-loop
9384 nested within an outer-loop that is being vectorized. */
9385
9386 if (nested_in_vect_loop
9387 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9388 GET_MODE_SIZE (TYPE_MODE (vectype))))
9389 {
9390 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9391 compute_in_loop = true;
9392 }
9393
9394 bool diff_first_stmt_info
9395 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9396
9397 if ((alignment_support_scheme == dr_explicit_realign_optimized
9398 || alignment_support_scheme == dr_explicit_realign)
9399 && !compute_in_loop)
9400 {
9401 /* If we have different first_stmt_info, we can't set up realignment
9402 here, since we can't guarantee first_stmt_info DR has been
9403 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9404 distance from first_stmt_info DR instead as below. */
9405 if (!diff_first_stmt_info)
9406 msq = vect_setup_realignment (vinfo,
9407 first_stmt_info, gsi, &realignment_token,
9408 alignment_support_scheme, NULL_TREE,
9409 &at_loop);
9410 if (alignment_support_scheme == dr_explicit_realign_optimized)
9411 {
9412 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9413 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9414 size_one_node);
9415 gcc_assert (!first_stmt_info_for_drptr);
9416 }
9417 }
9418 else
9419 at_loop = loop;
9420
9421 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9422 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9423
9424 tree bump;
9425 tree vec_offset = NULL_TREE;
9426 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9427 {
9428 aggr_type = NULL_TREE;
9429 bump = NULL_TREE;
9430 }
9431 else if (memory_access_type == VMAT_GATHER_SCATTER)
9432 {
9433 aggr_type = elem_type;
9434 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9435 &bump, &vec_offset);
9436 }
9437 else
9438 {
9439 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9440 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9441 else
9442 aggr_type = vectype;
9443 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9444 memory_access_type);
9445 }
9446
9447 tree vec_mask = NULL_TREE;
9448 prev_stmt_info = NULL;
9449 poly_uint64 group_elt = 0;
9450 for (j = 0; j < ncopies; j++)
9451 {
9452 stmt_vec_info new_stmt_info = NULL;
9453 /* 1. Create the vector or array pointer update chain. */
9454 if (j == 0)
9455 {
9456 bool simd_lane_access_p
9457 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9458 if (simd_lane_access_p
9459 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9460 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9461 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9462 && integer_zerop (DR_INIT (first_dr_info->dr))
9463 && alias_sets_conflict_p (get_alias_set (aggr_type),
9464 get_alias_set (TREE_TYPE (ref_type)))
9465 && (alignment_support_scheme == dr_aligned
9466 || alignment_support_scheme == dr_unaligned_supported))
9467 {
9468 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9469 dataref_offset = build_int_cst (ref_type, 0);
9470 }
9471 else if (diff_first_stmt_info)
9472 {
9473 dataref_ptr
9474 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9475 aggr_type, at_loop, offset, &dummy,
9476 gsi, &ptr_incr, simd_lane_access_p,
9477 byte_offset, bump);
9478 /* Adjust the pointer by the difference to first_stmt. */
9479 data_reference_p ptrdr
9480 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9481 tree diff
9482 = fold_convert (sizetype,
9483 size_binop (MINUS_EXPR,
9484 DR_INIT (first_dr_info->dr),
9485 DR_INIT (ptrdr)));
9486 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9487 stmt_info, diff);
9488 if (alignment_support_scheme == dr_explicit_realign)
9489 {
9490 msq = vect_setup_realignment (vinfo,
9491 first_stmt_info_for_drptr, gsi,
9492 &realignment_token,
9493 alignment_support_scheme,
9494 dataref_ptr, &at_loop);
9495 gcc_assert (!compute_in_loop);
9496 }
9497 }
9498 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9499 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9500 &dataref_ptr, &vec_offset);
9501 else
9502 dataref_ptr
9503 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9504 at_loop,
9505 offset, &dummy, gsi, &ptr_incr,
9506 simd_lane_access_p,
9507 byte_offset, bump);
9508 if (mask)
9509 {
9510 if (slp_node)
9511 {
9512 auto_vec<vec<tree> > vec_defs (1);
9513 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
9514 vec_mask = vec_defs[0][0];
9515 }
9516 else
9517 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
9518 mask_vectype);
9519 }
9520 }
9521 else
9522 {
9523 if (dataref_offset)
9524 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9525 bump);
9526 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9527 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9528 else
9529 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9530 stmt_info, bump);
9531 if (mask)
9532 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9533 }
9534
9535 if (grouped_load || slp_perm)
9536 dr_chain.create (vec_num);
9537
9538 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9539 {
9540 tree vec_array;
9541
9542 vec_array = create_vector_array (vectype, vec_num);
9543
9544 tree final_mask = NULL_TREE;
9545 if (loop_masks)
9546 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9547 vectype, j);
9548 if (vec_mask)
9549 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9550 vec_mask, gsi);
9551
9552 gcall *call;
9553 if (final_mask)
9554 {
9555 /* Emit:
9556 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9557 VEC_MASK). */
9558 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9559 tree alias_ptr = build_int_cst (ref_type, align);
9560 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9561 dataref_ptr, alias_ptr,
9562 final_mask);
9563 }
9564 else
9565 {
9566 /* Emit:
9567 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9568 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9569 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9570 }
9571 gimple_call_set_lhs (call, vec_array);
9572 gimple_call_set_nothrow (call, true);
9573 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
9574 call, gsi);
9575
9576 /* Extract each vector into an SSA_NAME. */
9577 for (i = 0; i < vec_num; i++)
9578 {
9579 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9580 vec_array, i);
9581 dr_chain.quick_push (new_temp);
9582 }
9583
9584 /* Record the mapping between SSA_NAMEs and statements. */
9585 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9586
9587 /* Record that VEC_ARRAY is now dead. */
9588 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9589 }
9590 else
9591 {
9592 for (i = 0; i < vec_num; i++)
9593 {
9594 tree final_mask = NULL_TREE;
9595 if (loop_masks
9596 && memory_access_type != VMAT_INVARIANT)
9597 final_mask = vect_get_loop_mask (gsi, loop_masks,
9598 vec_num * ncopies,
9599 vectype, vec_num * j + i);
9600 if (vec_mask)
9601 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9602 vec_mask, gsi);
9603
9604 if (i > 0)
9605 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9606 gsi, stmt_info, bump);
9607
9608 /* 2. Create the vector-load in the loop. */
9609 gimple *new_stmt = NULL;
9610 switch (alignment_support_scheme)
9611 {
9612 case dr_aligned:
9613 case dr_unaligned_supported:
9614 {
9615 unsigned int misalign;
9616 unsigned HOST_WIDE_INT align;
9617
9618 if (memory_access_type == VMAT_GATHER_SCATTER)
9619 {
9620 tree zero = build_zero_cst (vectype);
9621 tree scale = size_int (gs_info.scale);
9622 gcall *call;
9623 if (loop_masks)
9624 call = gimple_build_call_internal
9625 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9626 vec_offset, scale, zero, final_mask);
9627 else
9628 call = gimple_build_call_internal
9629 (IFN_GATHER_LOAD, 4, dataref_ptr,
9630 vec_offset, scale, zero);
9631 gimple_call_set_nothrow (call, true);
9632 new_stmt = call;
9633 data_ref = NULL_TREE;
9634 break;
9635 }
9636
9637 align =
9638 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9639 if (alignment_support_scheme == dr_aligned)
9640 {
9641 gcc_assert (aligned_access_p (first_dr_info));
9642 misalign = 0;
9643 }
9644 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9645 {
9646 align = dr_alignment
9647 (vect_dr_behavior (vinfo, first_dr_info));
9648 misalign = 0;
9649 }
9650 else
9651 misalign = DR_MISALIGNMENT (first_dr_info);
9652 if (dataref_offset == NULL_TREE
9653 && TREE_CODE (dataref_ptr) == SSA_NAME)
9654 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9655 align, misalign);
9656
9657 if (final_mask)
9658 {
9659 align = least_bit_hwi (misalign | align);
9660 tree ptr = build_int_cst (ref_type, align);
9661 gcall *call
9662 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9663 dataref_ptr, ptr,
9664 final_mask);
9665 gimple_call_set_nothrow (call, true);
9666 new_stmt = call;
9667 data_ref = NULL_TREE;
9668 }
9669 else
9670 {
9671 tree ltype = vectype;
9672 tree new_vtype = NULL_TREE;
9673 unsigned HOST_WIDE_INT gap
9674 = DR_GROUP_GAP (first_stmt_info);
9675 unsigned int vect_align
9676 = vect_known_alignment_in_bytes (first_dr_info);
9677 unsigned int scalar_dr_size
9678 = vect_get_scalar_dr_size (first_dr_info);
9679 /* If there's no peeling for gaps but we have a gap
9680 with slp loads then load the lower half of the
9681 vector only. See get_group_load_store_type for
9682 when we apply this optimization. */
9683 if (slp
9684 && loop_vinfo
9685 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9686 && gap != 0
9687 && known_eq (nunits, (group_size - gap) * 2)
9688 && known_eq (nunits, group_size)
9689 && gap >= (vect_align / scalar_dr_size))
9690 {
9691 tree half_vtype;
9692 new_vtype
9693 = vector_vector_composition_type (vectype, 2,
9694 &half_vtype);
9695 if (new_vtype != NULL_TREE)
9696 ltype = half_vtype;
9697 }
9698 tree offset
9699 = (dataref_offset ? dataref_offset
9700 : build_int_cst (ref_type, 0));
9701 if (ltype != vectype
9702 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9703 {
9704 unsigned HOST_WIDE_INT gap_offset
9705 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9706 tree gapcst = build_int_cst (ref_type, gap_offset);
9707 offset = size_binop (PLUS_EXPR, offset, gapcst);
9708 }
9709 data_ref
9710 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9711 if (alignment_support_scheme == dr_aligned)
9712 ;
9713 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9714 TREE_TYPE (data_ref)
9715 = build_aligned_type (TREE_TYPE (data_ref),
9716 align * BITS_PER_UNIT);
9717 else
9718 TREE_TYPE (data_ref)
9719 = build_aligned_type (TREE_TYPE (data_ref),
9720 TYPE_ALIGN (elem_type));
9721 if (ltype != vectype)
9722 {
9723 vect_copy_ref_info (data_ref,
9724 DR_REF (first_dr_info->dr));
9725 tree tem = make_ssa_name (ltype);
9726 new_stmt = gimple_build_assign (tem, data_ref);
9727 vect_finish_stmt_generation (vinfo, stmt_info,
9728 new_stmt, gsi);
9729 data_ref = NULL;
9730 vec<constructor_elt, va_gc> *v;
9731 vec_alloc (v, 2);
9732 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9733 {
9734 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9735 build_zero_cst (ltype));
9736 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9737 }
9738 else
9739 {
9740 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9741 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9742 build_zero_cst (ltype));
9743 }
9744 gcc_assert (new_vtype != NULL_TREE);
9745 if (new_vtype == vectype)
9746 new_stmt = gimple_build_assign (
9747 vec_dest, build_constructor (vectype, v));
9748 else
9749 {
9750 tree new_vname = make_ssa_name (new_vtype);
9751 new_stmt = gimple_build_assign (
9752 new_vname, build_constructor (new_vtype, v));
9753 vect_finish_stmt_generation (vinfo, stmt_info,
9754 new_stmt, gsi);
9755 new_stmt = gimple_build_assign (
9756 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9757 new_vname));
9758 }
9759 }
9760 }
9761 break;
9762 }
9763 case dr_explicit_realign:
9764 {
9765 tree ptr, bump;
9766
9767 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9768
9769 if (compute_in_loop)
9770 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9771 &realignment_token,
9772 dr_explicit_realign,
9773 dataref_ptr, NULL);
9774
9775 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9776 ptr = copy_ssa_name (dataref_ptr);
9777 else
9778 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9779 // For explicit realign the target alignment should be
9780 // known at compile time.
9781 unsigned HOST_WIDE_INT align =
9782 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9783 new_stmt = gimple_build_assign
9784 (ptr, BIT_AND_EXPR, dataref_ptr,
9785 build_int_cst
9786 (TREE_TYPE (dataref_ptr),
9787 -(HOST_WIDE_INT) align));
9788 vect_finish_stmt_generation (vinfo, stmt_info,
9789 new_stmt, gsi);
9790 data_ref
9791 = build2 (MEM_REF, vectype, ptr,
9792 build_int_cst (ref_type, 0));
9793 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9794 vec_dest = vect_create_destination_var (scalar_dest,
9795 vectype);
9796 new_stmt = gimple_build_assign (vec_dest, data_ref);
9797 new_temp = make_ssa_name (vec_dest, new_stmt);
9798 gimple_assign_set_lhs (new_stmt, new_temp);
9799 gimple_move_vops (new_stmt, stmt_info->stmt);
9800 vect_finish_stmt_generation (vinfo, stmt_info,
9801 new_stmt, gsi);
9802 msq = new_temp;
9803
9804 bump = size_binop (MULT_EXPR, vs,
9805 TYPE_SIZE_UNIT (elem_type));
9806 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9807 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9808 stmt_info, bump);
9809 new_stmt = gimple_build_assign
9810 (NULL_TREE, BIT_AND_EXPR, ptr,
9811 build_int_cst
9812 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9813 ptr = copy_ssa_name (ptr, new_stmt);
9814 gimple_assign_set_lhs (new_stmt, ptr);
9815 vect_finish_stmt_generation (vinfo, stmt_info,
9816 new_stmt, gsi);
9817 data_ref
9818 = build2 (MEM_REF, vectype, ptr,
9819 build_int_cst (ref_type, 0));
9820 break;
9821 }
9822 case dr_explicit_realign_optimized:
9823 {
9824 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9825 new_temp = copy_ssa_name (dataref_ptr);
9826 else
9827 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9828 // We should only be doing this if we know the target
9829 // alignment at compile time.
9830 unsigned HOST_WIDE_INT align =
9831 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9832 new_stmt = gimple_build_assign
9833 (new_temp, BIT_AND_EXPR, dataref_ptr,
9834 build_int_cst (TREE_TYPE (dataref_ptr),
9835 -(HOST_WIDE_INT) align));
9836 vect_finish_stmt_generation (vinfo, stmt_info,
9837 new_stmt, gsi);
9838 data_ref
9839 = build2 (MEM_REF, vectype, new_temp,
9840 build_int_cst (ref_type, 0));
9841 break;
9842 }
9843 default:
9844 gcc_unreachable ();
9845 }
9846 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9847 /* DATA_REF is null if we've already built the statement. */
9848 if (data_ref)
9849 {
9850 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9851 new_stmt = gimple_build_assign (vec_dest, data_ref);
9852 }
9853 new_temp = make_ssa_name (vec_dest, new_stmt);
9854 gimple_set_lhs (new_stmt, new_temp);
9855 new_stmt_info
9856 = vect_finish_stmt_generation (vinfo, stmt_info,
9857 new_stmt, gsi);
9858
9859 /* 3. Handle explicit realignment if necessary/supported.
9860 Create in loop:
9861 vec_dest = realign_load (msq, lsq, realignment_token) */
9862 if (alignment_support_scheme == dr_explicit_realign_optimized
9863 || alignment_support_scheme == dr_explicit_realign)
9864 {
9865 lsq = gimple_assign_lhs (new_stmt);
9866 if (!realignment_token)
9867 realignment_token = dataref_ptr;
9868 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9869 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9870 msq, lsq, realignment_token);
9871 new_temp = make_ssa_name (vec_dest, new_stmt);
9872 gimple_assign_set_lhs (new_stmt, new_temp);
9873 new_stmt_info
9874 = vect_finish_stmt_generation (vinfo, stmt_info,
9875 new_stmt, gsi);
9876
9877 if (alignment_support_scheme == dr_explicit_realign_optimized)
9878 {
9879 gcc_assert (phi);
9880 if (i == vec_num - 1 && j == ncopies - 1)
9881 add_phi_arg (phi, lsq,
9882 loop_latch_edge (containing_loop),
9883 UNKNOWN_LOCATION);
9884 msq = lsq;
9885 }
9886 }
9887
9888 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9889 {
9890 tree perm_mask = perm_mask_for_reverse (vectype);
9891 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9892 perm_mask, stmt_info, gsi);
9893 new_stmt_info = vinfo->lookup_def (new_temp);
9894 }
9895
9896 /* Collect vector loads and later create their permutation in
9897 vect_transform_grouped_load (). */
9898 if (grouped_load || slp_perm)
9899 dr_chain.quick_push (new_temp);
9900
9901 /* Store vector loads in the corresponding SLP_NODE. */
9902 if (slp && !slp_perm)
9903 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9904
9905 /* With SLP permutation we load the gaps as well, without
9906 we need to skip the gaps after we manage to fully load
9907 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9908 group_elt += nunits;
9909 if (maybe_ne (group_gap_adj, 0U)
9910 && !slp_perm
9911 && known_eq (group_elt, group_size - group_gap_adj))
9912 {
9913 poly_wide_int bump_val
9914 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9915 * group_gap_adj);
9916 tree bump = wide_int_to_tree (sizetype, bump_val);
9917 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9918 gsi, stmt_info, bump);
9919 group_elt = 0;
9920 }
9921 }
9922 /* Bump the vector pointer to account for a gap or for excess
9923 elements loaded for a permuted SLP load. */
9924 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9925 {
9926 poly_wide_int bump_val
9927 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9928 * group_gap_adj);
9929 tree bump = wide_int_to_tree (sizetype, bump_val);
9930 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9931 stmt_info, bump);
9932 }
9933 }
9934
9935 if (slp && !slp_perm)
9936 continue;
9937
9938 if (slp_perm)
9939 {
9940 unsigned n_perms;
9941 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9942 gsi, vf, false, &n_perms);
9943 gcc_assert (ok);
9944 }
9945 else
9946 {
9947 if (grouped_load)
9948 {
9949 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9950 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9951 group_size, gsi);
9952 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9953 }
9954 else
9955 {
9956 if (j == 0)
9957 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9958 else
9959 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9960 prev_stmt_info = new_stmt_info;
9961 }
9962 }
9963 dr_chain.release ();
9964 }
9965
9966 return true;
9967 }
9968
9969 /* Function vect_is_simple_cond.
9970
9971 Input:
9972 LOOP - the loop that is being vectorized.
9973 COND - Condition that is checked for simple use.
9974
9975 Output:
9976 *COMP_VECTYPE - the vector type for the comparison.
9977 *DTS - The def types for the arguments of the comparison
9978
9979 Returns whether a COND can be vectorized. Checks whether
9980 condition operands are supportable using vec_is_simple_use. */
9981
9982 static bool
9983 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9984 slp_tree slp_node, tree *comp_vectype,
9985 enum vect_def_type *dts, tree vectype)
9986 {
9987 tree lhs, rhs;
9988 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9989 slp_tree slp_op;
9990
9991 /* Mask case. */
9992 if (TREE_CODE (cond) == SSA_NAME
9993 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9994 {
9995 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9996 &slp_op, &dts[0], comp_vectype)
9997 || !*comp_vectype
9998 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9999 return false;
10000 return true;
10001 }
10002
10003 if (!COMPARISON_CLASS_P (cond))
10004 return false;
10005
10006 lhs = TREE_OPERAND (cond, 0);
10007 rhs = TREE_OPERAND (cond, 1);
10008
10009 if (TREE_CODE (lhs) == SSA_NAME)
10010 {
10011 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10012 &lhs, &slp_op, &dts[0], &vectype1))
10013 return false;
10014 }
10015 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10016 || TREE_CODE (lhs) == FIXED_CST)
10017 dts[0] = vect_constant_def;
10018 else
10019 return false;
10020
10021 if (TREE_CODE (rhs) == SSA_NAME)
10022 {
10023 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10024 &rhs, &slp_op, &dts[1], &vectype2))
10025 return false;
10026 }
10027 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10028 || TREE_CODE (rhs) == FIXED_CST)
10029 dts[1] = vect_constant_def;
10030 else
10031 return false;
10032
10033 if (vectype1 && vectype2
10034 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10035 TYPE_VECTOR_SUBPARTS (vectype2)))
10036 return false;
10037
10038 *comp_vectype = vectype1 ? vectype1 : vectype2;
10039 /* Invariant comparison. */
10040 if (! *comp_vectype)
10041 {
10042 tree scalar_type = TREE_TYPE (lhs);
10043 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10044 *comp_vectype = truth_type_for (vectype);
10045 else
10046 {
10047 /* If we can widen the comparison to match vectype do so. */
10048 if (INTEGRAL_TYPE_P (scalar_type)
10049 && !slp_node
10050 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10051 TYPE_SIZE (TREE_TYPE (vectype))))
10052 scalar_type = build_nonstandard_integer_type
10053 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10054 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10055 slp_node);
10056 }
10057 }
10058
10059 return true;
10060 }
10061
10062 /* vectorizable_condition.
10063
10064 Check if STMT_INFO is conditional modify expression that can be vectorized.
10065 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10066 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10067 at GSI.
10068
10069 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10070
10071 Return true if STMT_INFO is vectorizable in this way. */
10072
10073 static bool
10074 vectorizable_condition (vec_info *vinfo,
10075 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10076 stmt_vec_info *vec_stmt,
10077 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10078 {
10079 tree scalar_dest = NULL_TREE;
10080 tree vec_dest = NULL_TREE;
10081 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10082 tree then_clause, else_clause;
10083 tree comp_vectype = NULL_TREE;
10084 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10085 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10086 tree vec_compare;
10087 tree new_temp;
10088 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10089 enum vect_def_type dts[4]
10090 = {vect_unknown_def_type, vect_unknown_def_type,
10091 vect_unknown_def_type, vect_unknown_def_type};
10092 int ndts = 4;
10093 int ncopies;
10094 int vec_num;
10095 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10096 stmt_vec_info prev_stmt_info = NULL;
10097 int i, j;
10098 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10099 vec<tree> vec_oprnds0 = vNULL;
10100 vec<tree> vec_oprnds1 = vNULL;
10101 vec<tree> vec_oprnds2 = vNULL;
10102 vec<tree> vec_oprnds3 = vNULL;
10103 tree vec_cmp_type;
10104 bool masked = false;
10105
10106 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10107 return false;
10108
10109 /* Is vectorizable conditional operation? */
10110 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10111 if (!stmt)
10112 return false;
10113
10114 code = gimple_assign_rhs_code (stmt);
10115 if (code != COND_EXPR)
10116 return false;
10117
10118 stmt_vec_info reduc_info = NULL;
10119 int reduc_index = -1;
10120 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10121 bool for_reduction
10122 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10123 if (for_reduction)
10124 {
10125 if (STMT_SLP_TYPE (stmt_info))
10126 return false;
10127 reduc_info = info_for_reduction (vinfo, stmt_info);
10128 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10129 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10130 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10131 || reduc_index != -1);
10132 }
10133 else
10134 {
10135 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10136 return false;
10137
10138 /* FORNOW: only supported as part of a reduction. */
10139 if (STMT_VINFO_LIVE_P (stmt_info))
10140 {
10141 if (dump_enabled_p ())
10142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10143 "value used after loop.\n");
10144 return false;
10145 }
10146 }
10147
10148 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10149 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10150
10151 if (slp_node)
10152 {
10153 ncopies = 1;
10154 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10155 }
10156 else
10157 {
10158 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10159 vec_num = 1;
10160 }
10161
10162 gcc_assert (ncopies >= 1);
10163 if (for_reduction && ncopies > 1)
10164 return false; /* FORNOW */
10165
10166 cond_expr = gimple_assign_rhs1 (stmt);
10167
10168 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10169 &comp_vectype, &dts[0], vectype)
10170 || !comp_vectype)
10171 return false;
10172
10173 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10174 slp_tree then_slp_node, else_slp_node;
10175 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10176 &then_clause, &then_slp_node, &dts[2], &vectype1))
10177 return false;
10178 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10179 &else_clause, &else_slp_node, &dts[3], &vectype2))
10180 return false;
10181
10182 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10183 return false;
10184
10185 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10186 return false;
10187
10188 masked = !COMPARISON_CLASS_P (cond_expr);
10189 vec_cmp_type = truth_type_for (comp_vectype);
10190
10191 if (vec_cmp_type == NULL_TREE)
10192 return false;
10193
10194 cond_code = TREE_CODE (cond_expr);
10195 if (!masked)
10196 {
10197 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10198 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10199 }
10200
10201 /* For conditional reductions, the "then" value needs to be the candidate
10202 value calculated by this iteration while the "else" value needs to be
10203 the result carried over from previous iterations. If the COND_EXPR
10204 is the other way around, we need to swap it. */
10205 bool must_invert_cmp_result = false;
10206 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10207 {
10208 if (masked)
10209 must_invert_cmp_result = true;
10210 else
10211 {
10212 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10213 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10214 if (new_code == ERROR_MARK)
10215 must_invert_cmp_result = true;
10216 else
10217 {
10218 cond_code = new_code;
10219 /* Make sure we don't accidentally use the old condition. */
10220 cond_expr = NULL_TREE;
10221 }
10222 }
10223 std::swap (then_clause, else_clause);
10224 }
10225
10226 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10227 {
10228 /* Boolean values may have another representation in vectors
10229 and therefore we prefer bit operations over comparison for
10230 them (which also works for scalar masks). We store opcodes
10231 to use in bitop1 and bitop2. Statement is vectorized as
10232 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10233 depending on bitop1 and bitop2 arity. */
10234 switch (cond_code)
10235 {
10236 case GT_EXPR:
10237 bitop1 = BIT_NOT_EXPR;
10238 bitop2 = BIT_AND_EXPR;
10239 break;
10240 case GE_EXPR:
10241 bitop1 = BIT_NOT_EXPR;
10242 bitop2 = BIT_IOR_EXPR;
10243 break;
10244 case LT_EXPR:
10245 bitop1 = BIT_NOT_EXPR;
10246 bitop2 = BIT_AND_EXPR;
10247 std::swap (cond_expr0, cond_expr1);
10248 break;
10249 case LE_EXPR:
10250 bitop1 = BIT_NOT_EXPR;
10251 bitop2 = BIT_IOR_EXPR;
10252 std::swap (cond_expr0, cond_expr1);
10253 break;
10254 case NE_EXPR:
10255 bitop1 = BIT_XOR_EXPR;
10256 break;
10257 case EQ_EXPR:
10258 bitop1 = BIT_XOR_EXPR;
10259 bitop2 = BIT_NOT_EXPR;
10260 break;
10261 default:
10262 return false;
10263 }
10264 cond_code = SSA_NAME;
10265 }
10266
10267 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10268 && reduction_type == EXTRACT_LAST_REDUCTION
10269 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10270 {
10271 if (dump_enabled_p ())
10272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10273 "reduction comparison operation not supported.\n");
10274 return false;
10275 }
10276
10277 if (!vec_stmt)
10278 {
10279 if (bitop1 != NOP_EXPR)
10280 {
10281 machine_mode mode = TYPE_MODE (comp_vectype);
10282 optab optab;
10283
10284 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10285 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10286 return false;
10287
10288 if (bitop2 != NOP_EXPR)
10289 {
10290 optab = optab_for_tree_code (bitop2, comp_vectype,
10291 optab_default);
10292 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10293 return false;
10294 }
10295 }
10296
10297 vect_cost_for_stmt kind = vector_stmt;
10298 if (reduction_type == EXTRACT_LAST_REDUCTION)
10299 /* Count one reduction-like operation per vector. */
10300 kind = vec_to_scalar;
10301 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10302 return false;
10303
10304 if (slp_node
10305 && (!vect_maybe_update_slp_op_vectype
10306 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10307 || (op_adjust == 1
10308 && !vect_maybe_update_slp_op_vectype
10309 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10310 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10311 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10312 {
10313 if (dump_enabled_p ())
10314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10315 "incompatible vector types for invariants\n");
10316 return false;
10317 }
10318
10319 if (loop_vinfo
10320 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10321 && reduction_type == EXTRACT_LAST_REDUCTION)
10322 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10323 ncopies * vec_num, vectype, NULL);
10324
10325 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10326 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10327 cost_vec, kind);
10328 return true;
10329 }
10330
10331 /* Transform. */
10332
10333 if (!slp_node)
10334 {
10335 vec_oprnds0.create (1);
10336 vec_oprnds1.create (1);
10337 vec_oprnds2.create (1);
10338 vec_oprnds3.create (1);
10339 }
10340
10341 /* Handle def. */
10342 scalar_dest = gimple_assign_lhs (stmt);
10343 if (reduction_type != EXTRACT_LAST_REDUCTION)
10344 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10345
10346 /* Handle cond expr. */
10347 for (j = 0; j < ncopies; j++)
10348 {
10349 bool swap_cond_operands = false;
10350
10351 /* See whether another part of the vectorized code applies a loop
10352 mask to the condition, or to its inverse. */
10353
10354 vec_loop_masks *masks = NULL;
10355 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10356 {
10357 if (reduction_type == EXTRACT_LAST_REDUCTION)
10358 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10359 else
10360 {
10361 scalar_cond_masked_key cond (cond_expr, ncopies);
10362 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10363 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10364 else
10365 {
10366 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10367 cond.code = invert_tree_comparison (cond.code, honor_nans);
10368 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10369 {
10370 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10371 cond_code = cond.code;
10372 swap_cond_operands = true;
10373 }
10374 }
10375 }
10376 }
10377
10378 stmt_vec_info new_stmt_info = NULL;
10379 if (j == 0)
10380 {
10381 if (slp_node)
10382 {
10383 auto_vec<vec<tree>, 4> vec_defs;
10384 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10385 vec_oprnds3 = vec_defs.pop ();
10386 vec_oprnds2 = vec_defs.pop ();
10387 if (!masked)
10388 vec_oprnds1 = vec_defs.pop ();
10389 vec_oprnds0 = vec_defs.pop ();
10390 }
10391 else
10392 {
10393 if (masked)
10394 {
10395 vec_cond_lhs
10396 = vect_get_vec_def_for_operand (vinfo, cond_expr, stmt_info,
10397 comp_vectype);
10398 }
10399 else
10400 {
10401 vec_cond_lhs
10402 = vect_get_vec_def_for_operand (vinfo, cond_expr0,
10403 stmt_info, comp_vectype);
10404 vec_cond_rhs
10405 = vect_get_vec_def_for_operand (vinfo, cond_expr1,
10406 stmt_info, comp_vectype);
10407 }
10408 vec_then_clause = vect_get_vec_def_for_operand (vinfo,
10409 then_clause,
10410 stmt_info);
10411 if (reduction_type != EXTRACT_LAST_REDUCTION)
10412 vec_else_clause = vect_get_vec_def_for_operand (vinfo,
10413 else_clause,
10414 stmt_info);
10415 }
10416 }
10417 else
10418 {
10419 vec_cond_lhs
10420 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10421 if (!masked)
10422 vec_cond_rhs
10423 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10424
10425 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10426 vec_oprnds2.pop ());
10427 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10428 vec_oprnds3.pop ());
10429 }
10430
10431 if (!slp_node)
10432 {
10433 vec_oprnds0.quick_push (vec_cond_lhs);
10434 if (!masked)
10435 vec_oprnds1.quick_push (vec_cond_rhs);
10436 vec_oprnds2.quick_push (vec_then_clause);
10437 vec_oprnds3.quick_push (vec_else_clause);
10438 }
10439
10440 /* Arguments are ready. Create the new vector stmt. */
10441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10442 {
10443 vec_then_clause = vec_oprnds2[i];
10444 vec_else_clause = vec_oprnds3[i];
10445
10446 if (swap_cond_operands)
10447 std::swap (vec_then_clause, vec_else_clause);
10448
10449 if (masked)
10450 vec_compare = vec_cond_lhs;
10451 else
10452 {
10453 vec_cond_rhs = vec_oprnds1[i];
10454 if (bitop1 == NOP_EXPR)
10455 vec_compare = build2 (cond_code, vec_cmp_type,
10456 vec_cond_lhs, vec_cond_rhs);
10457 else
10458 {
10459 new_temp = make_ssa_name (vec_cmp_type);
10460 gassign *new_stmt;
10461 if (bitop1 == BIT_NOT_EXPR)
10462 new_stmt = gimple_build_assign (new_temp, bitop1,
10463 vec_cond_rhs);
10464 else
10465 new_stmt
10466 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10467 vec_cond_rhs);
10468 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10469 if (bitop2 == NOP_EXPR)
10470 vec_compare = new_temp;
10471 else if (bitop2 == BIT_NOT_EXPR)
10472 {
10473 /* Instead of doing ~x ? y : z do x ? z : y. */
10474 vec_compare = new_temp;
10475 std::swap (vec_then_clause, vec_else_clause);
10476 }
10477 else
10478 {
10479 vec_compare = make_ssa_name (vec_cmp_type);
10480 new_stmt
10481 = gimple_build_assign (vec_compare, bitop2,
10482 vec_cond_lhs, new_temp);
10483 vect_finish_stmt_generation (vinfo, stmt_info,
10484 new_stmt, gsi);
10485 }
10486 }
10487 }
10488
10489 /* If we decided to apply a loop mask to the result of the vector
10490 comparison, AND the comparison with the mask now. Later passes
10491 should then be able to reuse the AND results between mulitple
10492 vector statements.
10493
10494 For example:
10495 for (int i = 0; i < 100; ++i)
10496 x[i] = y[i] ? z[i] : 10;
10497
10498 results in following optimized GIMPLE:
10499
10500 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10501 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10502 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10503 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10504 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10505 vect_iftmp.11_47, { 10, ... }>;
10506
10507 instead of using a masked and unmasked forms of
10508 vec != { 0, ... } (masked in the MASK_LOAD,
10509 unmasked in the VEC_COND_EXPR). */
10510
10511 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10512 in cases where that's necessary. */
10513
10514 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10515 {
10516 if (!is_gimple_val (vec_compare))
10517 {
10518 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10519 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10520 vec_compare);
10521 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10522 vec_compare = vec_compare_name;
10523 }
10524
10525 if (must_invert_cmp_result)
10526 {
10527 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10528 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10529 BIT_NOT_EXPR,
10530 vec_compare);
10531 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10532 vec_compare = vec_compare_name;
10533 }
10534
10535 if (masks)
10536 {
10537 unsigned vec_num = vec_oprnds0.length ();
10538 tree loop_mask
10539 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10540 vectype, vec_num * j + i);
10541 tree tmp2 = make_ssa_name (vec_cmp_type);
10542 gassign *g
10543 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10544 loop_mask);
10545 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10546 vec_compare = tmp2;
10547 }
10548 }
10549
10550 if (reduction_type == EXTRACT_LAST_REDUCTION)
10551 {
10552 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10553 tree lhs = gimple_get_lhs (old_stmt);
10554 gcall *new_stmt = gimple_build_call_internal
10555 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10556 vec_then_clause);
10557 gimple_call_set_lhs (new_stmt, lhs);
10558 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10559 if (old_stmt == gsi_stmt (*gsi))
10560 new_stmt_info = vect_finish_replace_stmt (vinfo,
10561 stmt_info, new_stmt);
10562 else
10563 {
10564 /* In this case we're moving the definition to later in the
10565 block. That doesn't matter because the only uses of the
10566 lhs are in phi statements. */
10567 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10568 gsi_remove (&old_gsi, true);
10569 new_stmt_info
10570 = vect_finish_stmt_generation (vinfo, stmt_info,
10571 new_stmt, gsi);
10572 }
10573 }
10574 else
10575 {
10576 new_temp = make_ssa_name (vec_dest);
10577 gassign *new_stmt
10578 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10579 vec_then_clause, vec_else_clause);
10580 new_stmt_info
10581 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10582 }
10583 if (slp_node)
10584 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10585 }
10586
10587 if (slp_node)
10588 continue;
10589
10590 if (j == 0)
10591 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10592 else
10593 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10594
10595 prev_stmt_info = new_stmt_info;
10596 }
10597
10598 vec_oprnds0.release ();
10599 vec_oprnds1.release ();
10600 vec_oprnds2.release ();
10601 vec_oprnds3.release ();
10602
10603 return true;
10604 }
10605
10606 /* vectorizable_comparison.
10607
10608 Check if STMT_INFO is comparison expression that can be vectorized.
10609 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10610 comparison, put it in VEC_STMT, and insert it at GSI.
10611
10612 Return true if STMT_INFO is vectorizable in this way. */
10613
10614 static bool
10615 vectorizable_comparison (vec_info *vinfo,
10616 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10617 stmt_vec_info *vec_stmt,
10618 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10619 {
10620 tree lhs, rhs1, rhs2;
10621 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10622 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10623 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10624 tree new_temp;
10625 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10626 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10627 int ndts = 2;
10628 poly_uint64 nunits;
10629 int ncopies;
10630 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10631 stmt_vec_info prev_stmt_info = NULL;
10632 int i, j;
10633 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10634 vec<tree> vec_oprnds0 = vNULL;
10635 vec<tree> vec_oprnds1 = vNULL;
10636 tree mask_type;
10637 tree mask;
10638
10639 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10640 return false;
10641
10642 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10643 return false;
10644
10645 mask_type = vectype;
10646 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10647
10648 if (slp_node)
10649 ncopies = 1;
10650 else
10651 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10652
10653 gcc_assert (ncopies >= 1);
10654 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10655 return false;
10656
10657 if (STMT_VINFO_LIVE_P (stmt_info))
10658 {
10659 if (dump_enabled_p ())
10660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10661 "value used after loop.\n");
10662 return false;
10663 }
10664
10665 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10666 if (!stmt)
10667 return false;
10668
10669 code = gimple_assign_rhs_code (stmt);
10670
10671 if (TREE_CODE_CLASS (code) != tcc_comparison)
10672 return false;
10673
10674 slp_tree slp_rhs1, slp_rhs2;
10675 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10676 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10677 return false;
10678
10679 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10680 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10681 return false;
10682
10683 if (vectype1 && vectype2
10684 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10685 TYPE_VECTOR_SUBPARTS (vectype2)))
10686 return false;
10687
10688 vectype = vectype1 ? vectype1 : vectype2;
10689
10690 /* Invariant comparison. */
10691 if (!vectype)
10692 {
10693 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10694 vectype = mask_type;
10695 else
10696 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10697 slp_node);
10698 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10699 return false;
10700 }
10701 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10702 return false;
10703
10704 /* Can't compare mask and non-mask types. */
10705 if (vectype1 && vectype2
10706 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10707 return false;
10708
10709 /* Boolean values may have another representation in vectors
10710 and therefore we prefer bit operations over comparison for
10711 them (which also works for scalar masks). We store opcodes
10712 to use in bitop1 and bitop2. Statement is vectorized as
10713 BITOP2 (rhs1 BITOP1 rhs2) or
10714 rhs1 BITOP2 (BITOP1 rhs2)
10715 depending on bitop1 and bitop2 arity. */
10716 bool swap_p = false;
10717 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10718 {
10719 if (code == GT_EXPR)
10720 {
10721 bitop1 = BIT_NOT_EXPR;
10722 bitop2 = BIT_AND_EXPR;
10723 }
10724 else if (code == GE_EXPR)
10725 {
10726 bitop1 = BIT_NOT_EXPR;
10727 bitop2 = BIT_IOR_EXPR;
10728 }
10729 else if (code == LT_EXPR)
10730 {
10731 bitop1 = BIT_NOT_EXPR;
10732 bitop2 = BIT_AND_EXPR;
10733 swap_p = true;
10734 }
10735 else if (code == LE_EXPR)
10736 {
10737 bitop1 = BIT_NOT_EXPR;
10738 bitop2 = BIT_IOR_EXPR;
10739 swap_p = true;
10740 }
10741 else
10742 {
10743 bitop1 = BIT_XOR_EXPR;
10744 if (code == EQ_EXPR)
10745 bitop2 = BIT_NOT_EXPR;
10746 }
10747 }
10748
10749 if (!vec_stmt)
10750 {
10751 if (bitop1 == NOP_EXPR)
10752 {
10753 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10754 return false;
10755 }
10756 else
10757 {
10758 machine_mode mode = TYPE_MODE (vectype);
10759 optab optab;
10760
10761 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10762 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10763 return false;
10764
10765 if (bitop2 != NOP_EXPR)
10766 {
10767 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10768 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10769 return false;
10770 }
10771 }
10772
10773 /* Put types on constant and invariant SLP children. */
10774 if (slp_node
10775 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10776 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10777 {
10778 if (dump_enabled_p ())
10779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10780 "incompatible vector types for invariants\n");
10781 return false;
10782 }
10783
10784 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10785 vect_model_simple_cost (vinfo, stmt_info,
10786 ncopies * (1 + (bitop2 != NOP_EXPR)),
10787 dts, ndts, slp_node, cost_vec);
10788 return true;
10789 }
10790
10791 /* Transform. */
10792 if (!slp_node)
10793 {
10794 vec_oprnds0.create (1);
10795 vec_oprnds1.create (1);
10796 }
10797
10798 /* Handle def. */
10799 lhs = gimple_assign_lhs (stmt);
10800 mask = vect_create_destination_var (lhs, mask_type);
10801
10802 /* Handle cmp expr. */
10803 for (j = 0; j < ncopies; j++)
10804 {
10805 stmt_vec_info new_stmt_info = NULL;
10806 if (j == 0)
10807 {
10808 if (slp_node)
10809 {
10810 auto_vec<vec<tree>, 2> vec_defs;
10811 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10812 vec_oprnds1 = vec_defs.pop ();
10813 vec_oprnds0 = vec_defs.pop ();
10814 if (swap_p)
10815 std::swap (vec_oprnds0, vec_oprnds1);
10816 }
10817 else
10818 {
10819 vec_rhs1 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info,
10820 vectype);
10821 vec_rhs2 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info,
10822 vectype);
10823 }
10824 }
10825 else
10826 {
10827 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10828 vec_oprnds0.pop ());
10829 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10830 vec_oprnds1.pop ());
10831 }
10832
10833 if (!slp_node)
10834 {
10835 if (swap_p && j == 0)
10836 std::swap (vec_rhs1, vec_rhs2);
10837 vec_oprnds0.quick_push (vec_rhs1);
10838 vec_oprnds1.quick_push (vec_rhs2);
10839 }
10840
10841 /* Arguments are ready. Create the new vector stmt. */
10842 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10843 {
10844 vec_rhs2 = vec_oprnds1[i];
10845
10846 new_temp = make_ssa_name (mask);
10847 if (bitop1 == NOP_EXPR)
10848 {
10849 gassign *new_stmt = gimple_build_assign (new_temp, code,
10850 vec_rhs1, vec_rhs2);
10851 new_stmt_info
10852 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10853 }
10854 else
10855 {
10856 gassign *new_stmt;
10857 if (bitop1 == BIT_NOT_EXPR)
10858 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10859 else
10860 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10861 vec_rhs2);
10862 new_stmt_info
10863 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10864 if (bitop2 != NOP_EXPR)
10865 {
10866 tree res = make_ssa_name (mask);
10867 if (bitop2 == BIT_NOT_EXPR)
10868 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10869 else
10870 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10871 new_temp);
10872 new_stmt_info
10873 = vect_finish_stmt_generation (vinfo, stmt_info,
10874 new_stmt, gsi);
10875 }
10876 }
10877 if (slp_node)
10878 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10879 }
10880
10881 if (slp_node)
10882 continue;
10883
10884 if (j == 0)
10885 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10886 else
10887 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10888
10889 prev_stmt_info = new_stmt_info;
10890 }
10891
10892 vec_oprnds0.release ();
10893 vec_oprnds1.release ();
10894
10895 return true;
10896 }
10897
10898 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10899 can handle all live statements in the node. Otherwise return true
10900 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10901 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10902
10903 static bool
10904 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10905 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10906 slp_tree slp_node, slp_instance slp_node_instance,
10907 bool vec_stmt_p,
10908 stmt_vector_for_cost *cost_vec)
10909 {
10910 if (slp_node)
10911 {
10912 stmt_vec_info slp_stmt_info;
10913 unsigned int i;
10914 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10915 {
10916 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10917 && !vectorizable_live_operation (loop_vinfo,
10918 slp_stmt_info, gsi, slp_node,
10919 slp_node_instance, i,
10920 vec_stmt_p, cost_vec))
10921 return false;
10922 }
10923 }
10924 else if (STMT_VINFO_LIVE_P (stmt_info)
10925 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10926 slp_node, slp_node_instance, -1,
10927 vec_stmt_p, cost_vec))
10928 return false;
10929
10930 return true;
10931 }
10932
10933 /* Make sure the statement is vectorizable. */
10934
10935 opt_result
10936 vect_analyze_stmt (vec_info *vinfo,
10937 stmt_vec_info stmt_info, bool *need_to_vectorize,
10938 slp_tree node, slp_instance node_instance,
10939 stmt_vector_for_cost *cost_vec)
10940 {
10941 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10942 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10943 bool ok;
10944 gimple_seq pattern_def_seq;
10945
10946 if (dump_enabled_p ())
10947 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10948 stmt_info->stmt);
10949
10950 if (gimple_has_volatile_ops (stmt_info->stmt))
10951 return opt_result::failure_at (stmt_info->stmt,
10952 "not vectorized:"
10953 " stmt has volatile operands: %G\n",
10954 stmt_info->stmt);
10955
10956 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10957 && node == NULL
10958 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10959 {
10960 gimple_stmt_iterator si;
10961
10962 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10963 {
10964 stmt_vec_info pattern_def_stmt_info
10965 = vinfo->lookup_stmt (gsi_stmt (si));
10966 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10967 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10968 {
10969 /* Analyze def stmt of STMT if it's a pattern stmt. */
10970 if (dump_enabled_p ())
10971 dump_printf_loc (MSG_NOTE, vect_location,
10972 "==> examining pattern def statement: %G",
10973 pattern_def_stmt_info->stmt);
10974
10975 opt_result res
10976 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10977 need_to_vectorize, node, node_instance,
10978 cost_vec);
10979 if (!res)
10980 return res;
10981 }
10982 }
10983 }
10984
10985 /* Skip stmts that do not need to be vectorized. In loops this is expected
10986 to include:
10987 - the COND_EXPR which is the loop exit condition
10988 - any LABEL_EXPRs in the loop
10989 - computations that are used only for array indexing or loop control.
10990 In basic blocks we only analyze statements that are a part of some SLP
10991 instance, therefore, all the statements are relevant.
10992
10993 Pattern statement needs to be analyzed instead of the original statement
10994 if the original statement is not relevant. Otherwise, we analyze both
10995 statements. In basic blocks we are called from some SLP instance
10996 traversal, don't analyze pattern stmts instead, the pattern stmts
10997 already will be part of SLP instance. */
10998
10999 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11000 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11001 && !STMT_VINFO_LIVE_P (stmt_info))
11002 {
11003 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11004 && pattern_stmt_info
11005 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11006 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11007 {
11008 /* Analyze PATTERN_STMT instead of the original stmt. */
11009 stmt_info = pattern_stmt_info;
11010 if (dump_enabled_p ())
11011 dump_printf_loc (MSG_NOTE, vect_location,
11012 "==> examining pattern statement: %G",
11013 stmt_info->stmt);
11014 }
11015 else
11016 {
11017 if (dump_enabled_p ())
11018 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11019
11020 return opt_result::success ();
11021 }
11022 }
11023 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11024 && node == NULL
11025 && pattern_stmt_info
11026 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11027 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11028 {
11029 /* Analyze PATTERN_STMT too. */
11030 if (dump_enabled_p ())
11031 dump_printf_loc (MSG_NOTE, vect_location,
11032 "==> examining pattern statement: %G",
11033 pattern_stmt_info->stmt);
11034
11035 opt_result res
11036 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11037 node_instance, cost_vec);
11038 if (!res)
11039 return res;
11040 }
11041
11042 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11043 {
11044 case vect_internal_def:
11045 break;
11046
11047 case vect_reduction_def:
11048 case vect_nested_cycle:
11049 gcc_assert (!bb_vinfo
11050 && (relevance == vect_used_in_outer
11051 || relevance == vect_used_in_outer_by_reduction
11052 || relevance == vect_used_by_reduction
11053 || relevance == vect_unused_in_scope
11054 || relevance == vect_used_only_live));
11055 break;
11056
11057 case vect_induction_def:
11058 gcc_assert (!bb_vinfo);
11059 break;
11060
11061 case vect_constant_def:
11062 case vect_external_def:
11063 case vect_unknown_def_type:
11064 default:
11065 gcc_unreachable ();
11066 }
11067
11068 if (STMT_VINFO_RELEVANT_P (stmt_info))
11069 {
11070 tree type = gimple_expr_type (stmt_info->stmt);
11071 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
11072 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11073 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11074 || (call && gimple_call_lhs (call) == NULL_TREE));
11075 *need_to_vectorize = true;
11076 }
11077
11078 if (PURE_SLP_STMT (stmt_info) && !node)
11079 {
11080 if (dump_enabled_p ())
11081 dump_printf_loc (MSG_NOTE, vect_location,
11082 "handled only by SLP analysis\n");
11083 return opt_result::success ();
11084 }
11085
11086 ok = true;
11087 if (!bb_vinfo
11088 && (STMT_VINFO_RELEVANT_P (stmt_info)
11089 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11090 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11091 -mveclibabi= takes preference over library functions with
11092 the simd attribute. */
11093 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11094 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11095 cost_vec)
11096 || vectorizable_conversion (vinfo, stmt_info,
11097 NULL, NULL, node, cost_vec)
11098 || vectorizable_operation (vinfo, stmt_info,
11099 NULL, NULL, node, cost_vec)
11100 || vectorizable_assignment (vinfo, stmt_info,
11101 NULL, NULL, node, cost_vec)
11102 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11103 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11104 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11105 node, node_instance, cost_vec)
11106 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11107 NULL, NULL, node, cost_vec)
11108 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11109 || vectorizable_condition (vinfo, stmt_info,
11110 NULL, NULL, node, cost_vec)
11111 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11112 cost_vec)
11113 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11114 stmt_info, NULL, node));
11115 else
11116 {
11117 if (bb_vinfo)
11118 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11119 || vectorizable_simd_clone_call (vinfo, stmt_info,
11120 NULL, NULL, node, cost_vec)
11121 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11122 cost_vec)
11123 || vectorizable_shift (vinfo, stmt_info,
11124 NULL, NULL, node, cost_vec)
11125 || vectorizable_operation (vinfo, stmt_info,
11126 NULL, NULL, node, cost_vec)
11127 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11128 cost_vec)
11129 || vectorizable_load (vinfo, stmt_info,
11130 NULL, NULL, node, cost_vec)
11131 || vectorizable_store (vinfo, stmt_info,
11132 NULL, NULL, node, cost_vec)
11133 || vectorizable_condition (vinfo, stmt_info,
11134 NULL, NULL, node, cost_vec)
11135 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11136 cost_vec));
11137 }
11138
11139 if (!ok)
11140 return opt_result::failure_at (stmt_info->stmt,
11141 "not vectorized:"
11142 " relevant stmt not supported: %G",
11143 stmt_info->stmt);
11144
11145 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11146 need extra handling, except for vectorizable reductions. */
11147 if (!bb_vinfo
11148 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11149 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11150 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11151 stmt_info, NULL, node, node_instance,
11152 false, cost_vec))
11153 return opt_result::failure_at (stmt_info->stmt,
11154 "not vectorized:"
11155 " live stmt not supported: %G",
11156 stmt_info->stmt);
11157
11158 return opt_result::success ();
11159 }
11160
11161
11162 /* Function vect_transform_stmt.
11163
11164 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11165
11166 bool
11167 vect_transform_stmt (vec_info *vinfo,
11168 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11169 slp_tree slp_node, slp_instance slp_node_instance)
11170 {
11171 bool is_store = false;
11172 stmt_vec_info vec_stmt = NULL;
11173 bool done;
11174
11175 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11176 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11177
11178 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11179 bool nested_p = (loop_vinfo
11180 && nested_in_vect_loop_p
11181 (LOOP_VINFO_LOOP (loop_vinfo), stmt_info));
11182
11183 gimple *stmt = stmt_info->stmt;
11184 switch (STMT_VINFO_TYPE (stmt_info))
11185 {
11186 case type_demotion_vec_info_type:
11187 case type_promotion_vec_info_type:
11188 case type_conversion_vec_info_type:
11189 done = vectorizable_conversion (vinfo, stmt_info,
11190 gsi, &vec_stmt, slp_node, NULL);
11191 gcc_assert (done);
11192 break;
11193
11194 case induc_vec_info_type:
11195 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11196 stmt_info, gsi, &vec_stmt, slp_node,
11197 NULL);
11198 gcc_assert (done);
11199 break;
11200
11201 case shift_vec_info_type:
11202 done = vectorizable_shift (vinfo, stmt_info,
11203 gsi, &vec_stmt, slp_node, NULL);
11204 gcc_assert (done);
11205 break;
11206
11207 case op_vec_info_type:
11208 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11209 NULL);
11210 gcc_assert (done);
11211 break;
11212
11213 case assignment_vec_info_type:
11214 done = vectorizable_assignment (vinfo, stmt_info,
11215 gsi, &vec_stmt, slp_node, NULL);
11216 gcc_assert (done);
11217 break;
11218
11219 case load_vec_info_type:
11220 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11221 NULL);
11222 gcc_assert (done);
11223 break;
11224
11225 case store_vec_info_type:
11226 done = vectorizable_store (vinfo, stmt_info,
11227 gsi, &vec_stmt, slp_node, NULL);
11228 gcc_assert (done);
11229 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11230 {
11231 /* In case of interleaving, the whole chain is vectorized when the
11232 last store in the chain is reached. Store stmts before the last
11233 one are skipped, and there vec_stmt_info shouldn't be freed
11234 meanwhile. */
11235 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11236 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11237 is_store = true;
11238 }
11239 else
11240 is_store = true;
11241 break;
11242
11243 case condition_vec_info_type:
11244 done = vectorizable_condition (vinfo, stmt_info,
11245 gsi, &vec_stmt, slp_node, NULL);
11246 gcc_assert (done);
11247 break;
11248
11249 case comparison_vec_info_type:
11250 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11251 slp_node, NULL);
11252 gcc_assert (done);
11253 break;
11254
11255 case call_vec_info_type:
11256 done = vectorizable_call (vinfo, stmt_info,
11257 gsi, &vec_stmt, slp_node, NULL);
11258 stmt = gsi_stmt (*gsi);
11259 break;
11260
11261 case call_simd_clone_vec_info_type:
11262 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11263 slp_node, NULL);
11264 stmt = gsi_stmt (*gsi);
11265 break;
11266
11267 case reduc_vec_info_type:
11268 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11269 gsi, &vec_stmt, slp_node);
11270 gcc_assert (done);
11271 break;
11272
11273 case cycle_phi_info_type:
11274 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11275 &vec_stmt, slp_node, slp_node_instance);
11276 gcc_assert (done);
11277 break;
11278
11279 case lc_phi_info_type:
11280 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11281 stmt_info, &vec_stmt, slp_node);
11282 gcc_assert (done);
11283 break;
11284
11285 default:
11286 if (!STMT_VINFO_LIVE_P (stmt_info))
11287 {
11288 if (dump_enabled_p ())
11289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11290 "stmt not supported.\n");
11291 gcc_unreachable ();
11292 }
11293 done = true;
11294 }
11295
11296 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11297 This would break hybrid SLP vectorization. */
11298 if (slp_node)
11299 gcc_assert (!vec_stmt
11300 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11301
11302 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11303 is being vectorized, but outside the immediately enclosing loop. */
11304 if (vec_stmt
11305 && nested_p
11306 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11307 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11308 || STMT_VINFO_RELEVANT (stmt_info) ==
11309 vect_used_in_outer_by_reduction))
11310 {
11311 class loop *innerloop = LOOP_VINFO_LOOP (loop_vinfo)->inner;
11312 imm_use_iterator imm_iter;
11313 use_operand_p use_p;
11314 tree scalar_dest;
11315
11316 if (dump_enabled_p ())
11317 dump_printf_loc (MSG_NOTE, vect_location,
11318 "Record the vdef for outer-loop vectorization.\n");
11319
11320 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11321 (to be used when vectorizing outer-loop stmts that use the DEF of
11322 STMT). */
11323 if (gimple_code (stmt) == GIMPLE_PHI)
11324 scalar_dest = PHI_RESULT (stmt);
11325 else
11326 scalar_dest = gimple_get_lhs (stmt);
11327
11328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11329 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11330 {
11331 stmt_vec_info exit_phi_info
11332 = vinfo->lookup_stmt (USE_STMT (use_p));
11333 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11334 }
11335 }
11336
11337 if (vec_stmt)
11338 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11339
11340 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11341 return is_store;
11342
11343 /* If this stmt defines a value used on a backedge, update the
11344 vectorized PHIs. */
11345 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11346 stmt_vec_info reduc_info;
11347 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11348 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11349 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
11350 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11351 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11352 {
11353 gphi *phi;
11354 edge e;
11355 if (!slp_node
11356 && (phi = dyn_cast <gphi *>
11357 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11358 && dominated_by_p (CDI_DOMINATORS,
11359 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11360 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11361 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11362 == gimple_get_lhs (orig_stmt_info->stmt)))
11363 {
11364 stmt_vec_info phi_info
11365 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11366 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11367 do
11368 {
11369 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11370 gimple_get_lhs (vec_stmt->stmt), e,
11371 gimple_phi_arg_location (phi, e->dest_idx));
11372 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11373 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11374 }
11375 while (phi_info);
11376 gcc_assert (!vec_stmt);
11377 }
11378 else if (slp_node
11379 && slp_node != slp_node_instance->reduc_phis)
11380 {
11381 slp_tree phi_node = slp_node_instance->reduc_phis;
11382 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11383 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11384 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11385 == SLP_TREE_VEC_STMTS (slp_node).length ());
11386 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11387 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11388 vect_get_slp_vect_def (slp_node, i),
11389 e, gimple_phi_arg_location (phi, e->dest_idx));
11390 }
11391 }
11392
11393 /* Handle stmts whose DEF is used outside the loop-nest that is
11394 being vectorized. */
11395 if (is_a <loop_vec_info> (vinfo))
11396 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11397 stmt_info, gsi, slp_node,
11398 slp_node_instance, true, NULL);
11399 gcc_assert (done);
11400
11401 return false;
11402 }
11403
11404
11405 /* Remove a group of stores (for SLP or interleaving), free their
11406 stmt_vec_info. */
11407
11408 void
11409 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11410 {
11411 stmt_vec_info next_stmt_info = first_stmt_info;
11412
11413 while (next_stmt_info)
11414 {
11415 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11416 next_stmt_info = vect_orig_stmt (next_stmt_info);
11417 /* Free the attached stmt_vec_info and remove the stmt. */
11418 vinfo->remove_stmt (next_stmt_info);
11419 next_stmt_info = tmp;
11420 }
11421 }
11422
11423 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11424 elements of type SCALAR_TYPE, or null if the target doesn't support
11425 such a type.
11426
11427 If NUNITS is zero, return a vector type that contains elements of
11428 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11429
11430 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11431 for this vectorization region and want to "autodetect" the best choice.
11432 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11433 and we want the new type to be interoperable with it. PREVAILING_MODE
11434 in this case can be a scalar integer mode or a vector mode; when it
11435 is a vector mode, the function acts like a tree-level version of
11436 related_vector_mode. */
11437
11438 tree
11439 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11440 tree scalar_type, poly_uint64 nunits)
11441 {
11442 tree orig_scalar_type = scalar_type;
11443 scalar_mode inner_mode;
11444 machine_mode simd_mode;
11445 tree vectype;
11446
11447 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11448 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11449 return NULL_TREE;
11450
11451 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11452
11453 /* For vector types of elements whose mode precision doesn't
11454 match their types precision we use a element type of mode
11455 precision. The vectorization routines will have to make sure
11456 they support the proper result truncation/extension.
11457 We also make sure to build vector types with INTEGER_TYPE
11458 component type only. */
11459 if (INTEGRAL_TYPE_P (scalar_type)
11460 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11461 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11462 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11463 TYPE_UNSIGNED (scalar_type));
11464
11465 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11466 When the component mode passes the above test simply use a type
11467 corresponding to that mode. The theory is that any use that
11468 would cause problems with this will disable vectorization anyway. */
11469 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11470 && !INTEGRAL_TYPE_P (scalar_type))
11471 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11472
11473 /* We can't build a vector type of elements with alignment bigger than
11474 their size. */
11475 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11476 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11477 TYPE_UNSIGNED (scalar_type));
11478
11479 /* If we felt back to using the mode fail if there was
11480 no scalar type for it. */
11481 if (scalar_type == NULL_TREE)
11482 return NULL_TREE;
11483
11484 /* If no prevailing mode was supplied, use the mode the target prefers.
11485 Otherwise lookup a vector mode based on the prevailing mode. */
11486 if (prevailing_mode == VOIDmode)
11487 {
11488 gcc_assert (known_eq (nunits, 0U));
11489 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11490 if (SCALAR_INT_MODE_P (simd_mode))
11491 {
11492 /* Traditional behavior is not to take the integer mode
11493 literally, but simply to use it as a way of determining
11494 the vector size. It is up to mode_for_vector to decide
11495 what the TYPE_MODE should be.
11496
11497 Note that nunits == 1 is allowed in order to support single
11498 element vector types. */
11499 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11500 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11501 return NULL_TREE;
11502 }
11503 }
11504 else if (SCALAR_INT_MODE_P (prevailing_mode)
11505 || !related_vector_mode (prevailing_mode,
11506 inner_mode, nunits).exists (&simd_mode))
11507 {
11508 /* Fall back to using mode_for_vector, mostly in the hope of being
11509 able to use an integer mode. */
11510 if (known_eq (nunits, 0U)
11511 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11512 return NULL_TREE;
11513
11514 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11515 return NULL_TREE;
11516 }
11517
11518 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11519
11520 /* In cases where the mode was chosen by mode_for_vector, check that
11521 the target actually supports the chosen mode, or that it at least
11522 allows the vector mode to be replaced by a like-sized integer. */
11523 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11524 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11525 return NULL_TREE;
11526
11527 /* Re-attach the address-space qualifier if we canonicalized the scalar
11528 type. */
11529 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11530 return build_qualified_type
11531 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11532
11533 return vectype;
11534 }
11535
11536 /* Function get_vectype_for_scalar_type.
11537
11538 Returns the vector type corresponding to SCALAR_TYPE as supported
11539 by the target. If GROUP_SIZE is nonzero and we're performing BB
11540 vectorization, make sure that the number of elements in the vector
11541 is no bigger than GROUP_SIZE. */
11542
11543 tree
11544 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11545 unsigned int group_size)
11546 {
11547 /* For BB vectorization, we should always have a group size once we've
11548 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11549 are tentative requests during things like early data reference
11550 analysis and pattern recognition. */
11551 if (is_a <bb_vec_info> (vinfo))
11552 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11553 else
11554 group_size = 0;
11555
11556 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11557 scalar_type);
11558 if (vectype && vinfo->vector_mode == VOIDmode)
11559 vinfo->vector_mode = TYPE_MODE (vectype);
11560
11561 /* Register the natural choice of vector type, before the group size
11562 has been applied. */
11563 if (vectype)
11564 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11565
11566 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11567 try again with an explicit number of elements. */
11568 if (vectype
11569 && group_size
11570 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11571 {
11572 /* Start with the biggest number of units that fits within
11573 GROUP_SIZE and halve it until we find a valid vector type.
11574 Usually either the first attempt will succeed or all will
11575 fail (in the latter case because GROUP_SIZE is too small
11576 for the target), but it's possible that a target could have
11577 a hole between supported vector types.
11578
11579 If GROUP_SIZE is not a power of 2, this has the effect of
11580 trying the largest power of 2 that fits within the group,
11581 even though the group is not a multiple of that vector size.
11582 The BB vectorizer will then try to carve up the group into
11583 smaller pieces. */
11584 unsigned int nunits = 1 << floor_log2 (group_size);
11585 do
11586 {
11587 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11588 scalar_type, nunits);
11589 nunits /= 2;
11590 }
11591 while (nunits > 1 && !vectype);
11592 }
11593
11594 return vectype;
11595 }
11596
11597 /* Return the vector type corresponding to SCALAR_TYPE as supported
11598 by the target. NODE, if nonnull, is the SLP tree node that will
11599 use the returned vector type. */
11600
11601 tree
11602 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11603 {
11604 unsigned int group_size = 0;
11605 if (node)
11606 group_size = SLP_TREE_LANES (node);
11607 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11608 }
11609
11610 /* Function get_mask_type_for_scalar_type.
11611
11612 Returns the mask type corresponding to a result of comparison
11613 of vectors of specified SCALAR_TYPE as supported by target.
11614 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11615 make sure that the number of elements in the vector is no bigger
11616 than GROUP_SIZE. */
11617
11618 tree
11619 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11620 unsigned int group_size)
11621 {
11622 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11623
11624 if (!vectype)
11625 return NULL;
11626
11627 return truth_type_for (vectype);
11628 }
11629
11630 /* Function get_same_sized_vectype
11631
11632 Returns a vector type corresponding to SCALAR_TYPE of size
11633 VECTOR_TYPE if supported by the target. */
11634
11635 tree
11636 get_same_sized_vectype (tree scalar_type, tree vector_type)
11637 {
11638 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11639 return truth_type_for (vector_type);
11640
11641 poly_uint64 nunits;
11642 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11643 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11644 return NULL_TREE;
11645
11646 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11647 scalar_type, nunits);
11648 }
11649
11650 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11651 would not change the chosen vector modes. */
11652
11653 bool
11654 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11655 {
11656 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11657 i != vinfo->used_vector_modes.end (); ++i)
11658 if (!VECTOR_MODE_P (*i)
11659 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11660 return false;
11661 return true;
11662 }
11663
11664 /* Function vect_is_simple_use.
11665
11666 Input:
11667 VINFO - the vect info of the loop or basic block that is being vectorized.
11668 OPERAND - operand in the loop or bb.
11669 Output:
11670 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11671 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11672 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11673 the definition could be anywhere in the function
11674 DT - the type of definition
11675
11676 Returns whether a stmt with OPERAND can be vectorized.
11677 For loops, supportable operands are constants, loop invariants, and operands
11678 that are defined by the current iteration of the loop. Unsupportable
11679 operands are those that are defined by a previous iteration of the loop (as
11680 is the case in reduction/induction computations).
11681 For basic blocks, supportable operands are constants and bb invariants.
11682 For now, operands defined outside the basic block are not supported. */
11683
11684 bool
11685 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11686 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11687 {
11688 if (def_stmt_info_out)
11689 *def_stmt_info_out = NULL;
11690 if (def_stmt_out)
11691 *def_stmt_out = NULL;
11692 *dt = vect_unknown_def_type;
11693
11694 if (dump_enabled_p ())
11695 {
11696 dump_printf_loc (MSG_NOTE, vect_location,
11697 "vect_is_simple_use: operand ");
11698 if (TREE_CODE (operand) == SSA_NAME
11699 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11700 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11701 else
11702 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11703 }
11704
11705 if (CONSTANT_CLASS_P (operand))
11706 *dt = vect_constant_def;
11707 else if (is_gimple_min_invariant (operand))
11708 *dt = vect_external_def;
11709 else if (TREE_CODE (operand) != SSA_NAME)
11710 *dt = vect_unknown_def_type;
11711 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11712 *dt = vect_external_def;
11713 else
11714 {
11715 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11716 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11717 if (!stmt_vinfo)
11718 *dt = vect_external_def;
11719 else
11720 {
11721 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11722 def_stmt = stmt_vinfo->stmt;
11723 switch (gimple_code (def_stmt))
11724 {
11725 case GIMPLE_PHI:
11726 case GIMPLE_ASSIGN:
11727 case GIMPLE_CALL:
11728 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11729 break;
11730 default:
11731 *dt = vect_unknown_def_type;
11732 break;
11733 }
11734 if (def_stmt_info_out)
11735 *def_stmt_info_out = stmt_vinfo;
11736 }
11737 if (def_stmt_out)
11738 *def_stmt_out = def_stmt;
11739 }
11740
11741 if (dump_enabled_p ())
11742 {
11743 dump_printf (MSG_NOTE, ", type of def: ");
11744 switch (*dt)
11745 {
11746 case vect_uninitialized_def:
11747 dump_printf (MSG_NOTE, "uninitialized\n");
11748 break;
11749 case vect_constant_def:
11750 dump_printf (MSG_NOTE, "constant\n");
11751 break;
11752 case vect_external_def:
11753 dump_printf (MSG_NOTE, "external\n");
11754 break;
11755 case vect_internal_def:
11756 dump_printf (MSG_NOTE, "internal\n");
11757 break;
11758 case vect_induction_def:
11759 dump_printf (MSG_NOTE, "induction\n");
11760 break;
11761 case vect_reduction_def:
11762 dump_printf (MSG_NOTE, "reduction\n");
11763 break;
11764 case vect_double_reduction_def:
11765 dump_printf (MSG_NOTE, "double reduction\n");
11766 break;
11767 case vect_nested_cycle:
11768 dump_printf (MSG_NOTE, "nested cycle\n");
11769 break;
11770 case vect_unknown_def_type:
11771 dump_printf (MSG_NOTE, "unknown\n");
11772 break;
11773 }
11774 }
11775
11776 if (*dt == vect_unknown_def_type)
11777 {
11778 if (dump_enabled_p ())
11779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11780 "Unsupported pattern.\n");
11781 return false;
11782 }
11783
11784 return true;
11785 }
11786
11787 /* Function vect_is_simple_use.
11788
11789 Same as vect_is_simple_use but also determines the vector operand
11790 type of OPERAND and stores it to *VECTYPE. If the definition of
11791 OPERAND is vect_uninitialized_def, vect_constant_def or
11792 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11793 is responsible to compute the best suited vector type for the
11794 scalar operand. */
11795
11796 bool
11797 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11798 tree *vectype, stmt_vec_info *def_stmt_info_out,
11799 gimple **def_stmt_out)
11800 {
11801 stmt_vec_info def_stmt_info;
11802 gimple *def_stmt;
11803 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11804 return false;
11805
11806 if (def_stmt_out)
11807 *def_stmt_out = def_stmt;
11808 if (def_stmt_info_out)
11809 *def_stmt_info_out = def_stmt_info;
11810
11811 /* Now get a vector type if the def is internal, otherwise supply
11812 NULL_TREE and leave it up to the caller to figure out a proper
11813 type for the use stmt. */
11814 if (*dt == vect_internal_def
11815 || *dt == vect_induction_def
11816 || *dt == vect_reduction_def
11817 || *dt == vect_double_reduction_def
11818 || *dt == vect_nested_cycle)
11819 {
11820 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11821 gcc_assert (*vectype != NULL_TREE);
11822 if (dump_enabled_p ())
11823 dump_printf_loc (MSG_NOTE, vect_location,
11824 "vect_is_simple_use: vectype %T\n", *vectype);
11825 }
11826 else if (*dt == vect_uninitialized_def
11827 || *dt == vect_constant_def
11828 || *dt == vect_external_def)
11829 *vectype = NULL_TREE;
11830 else
11831 gcc_unreachable ();
11832
11833 return true;
11834 }
11835
11836 /* Function vect_is_simple_use.
11837
11838 Same as vect_is_simple_use but determines the operand by operand
11839 position OPERAND from either STMT or SLP_NODE, filling in *OP
11840 and *SLP_DEF (when SLP_NODE is not NULL). */
11841
11842 bool
11843 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11844 unsigned operand, tree *op, slp_tree *slp_def,
11845 enum vect_def_type *dt,
11846 tree *vectype, stmt_vec_info *def_stmt_info_out)
11847 {
11848 if (slp_node)
11849 {
11850 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11851 *slp_def = child;
11852 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11853 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11854 else
11855 {
11856 if (def_stmt_info_out)
11857 *def_stmt_info_out = NULL;
11858 *op = SLP_TREE_SCALAR_OPS (child)[0];
11859 *dt = SLP_TREE_DEF_TYPE (child);
11860 *vectype = SLP_TREE_VECTYPE (child);
11861 return true;
11862 }
11863 }
11864 else
11865 {
11866 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11867 {
11868 if (gimple_assign_rhs_code (ass) == COND_EXPR
11869 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11870 {
11871 if (operand < 2)
11872 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11873 else
11874 *op = gimple_op (ass, operand);
11875 }
11876 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11877 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11878 else
11879 *op = gimple_op (ass, operand + 1);
11880 }
11881 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11882 {
11883 if (gimple_call_internal_p (call)
11884 && internal_store_fn_p (gimple_call_internal_fn (call)))
11885 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11886 (call));
11887 *op = gimple_call_arg (call, operand);
11888 }
11889 else
11890 gcc_unreachable ();
11891 }
11892
11893 /* ??? We might want to update *vectype from *slp_def here though
11894 when sharing nodes this would prevent unsharing in the caller. */
11895 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11896 }
11897
11898 /* If OP is not NULL and is external or constant update its vector
11899 type with VECTYPE. Returns true if successful or false if not,
11900 for example when conflicting vector types are present. */
11901
11902 bool
11903 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11904 {
11905 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11906 return true;
11907 if (SLP_TREE_VECTYPE (op))
11908 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11909 SLP_TREE_VECTYPE (op) = vectype;
11910 return true;
11911 }
11912
11913 /* Function supportable_widening_operation
11914
11915 Check whether an operation represented by the code CODE is a
11916 widening operation that is supported by the target platform in
11917 vector form (i.e., when operating on arguments of type VECTYPE_IN
11918 producing a result of type VECTYPE_OUT).
11919
11920 Widening operations we currently support are NOP (CONVERT), FLOAT,
11921 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11922 are supported by the target platform either directly (via vector
11923 tree-codes), or via target builtins.
11924
11925 Output:
11926 - CODE1 and CODE2 are codes of vector operations to be used when
11927 vectorizing the operation, if available.
11928 - MULTI_STEP_CVT determines the number of required intermediate steps in
11929 case of multi-step conversion (like char->short->int - in that case
11930 MULTI_STEP_CVT will be 1).
11931 - INTERM_TYPES contains the intermediate type required to perform the
11932 widening operation (short in the above example). */
11933
11934 bool
11935 supportable_widening_operation (vec_info *vinfo,
11936 enum tree_code code, stmt_vec_info stmt_info,
11937 tree vectype_out, tree vectype_in,
11938 enum tree_code *code1, enum tree_code *code2,
11939 int *multi_step_cvt,
11940 vec<tree> *interm_types)
11941 {
11942 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11943 class loop *vect_loop = NULL;
11944 machine_mode vec_mode;
11945 enum insn_code icode1, icode2;
11946 optab optab1, optab2;
11947 tree vectype = vectype_in;
11948 tree wide_vectype = vectype_out;
11949 enum tree_code c1, c2;
11950 int i;
11951 tree prev_type, intermediate_type;
11952 machine_mode intermediate_mode, prev_mode;
11953 optab optab3, optab4;
11954
11955 *multi_step_cvt = 0;
11956 if (loop_info)
11957 vect_loop = LOOP_VINFO_LOOP (loop_info);
11958
11959 switch (code)
11960 {
11961 case WIDEN_MULT_EXPR:
11962 /* The result of a vectorized widening operation usually requires
11963 two vectors (because the widened results do not fit into one vector).
11964 The generated vector results would normally be expected to be
11965 generated in the same order as in the original scalar computation,
11966 i.e. if 8 results are generated in each vector iteration, they are
11967 to be organized as follows:
11968 vect1: [res1,res2,res3,res4],
11969 vect2: [res5,res6,res7,res8].
11970
11971 However, in the special case that the result of the widening
11972 operation is used in a reduction computation only, the order doesn't
11973 matter (because when vectorizing a reduction we change the order of
11974 the computation). Some targets can take advantage of this and
11975 generate more efficient code. For example, targets like Altivec,
11976 that support widen_mult using a sequence of {mult_even,mult_odd}
11977 generate the following vectors:
11978 vect1: [res1,res3,res5,res7],
11979 vect2: [res2,res4,res6,res8].
11980
11981 When vectorizing outer-loops, we execute the inner-loop sequentially
11982 (each vectorized inner-loop iteration contributes to VF outer-loop
11983 iterations in parallel). We therefore don't allow to change the
11984 order of the computation in the inner-loop during outer-loop
11985 vectorization. */
11986 /* TODO: Another case in which order doesn't *really* matter is when we
11987 widen and then contract again, e.g. (short)((int)x * y >> 8).
11988 Normally, pack_trunc performs an even/odd permute, whereas the
11989 repack from an even/odd expansion would be an interleave, which
11990 would be significantly simpler for e.g. AVX2. */
11991 /* In any case, in order to avoid duplicating the code below, recurse
11992 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11993 are properly set up for the caller. If we fail, we'll continue with
11994 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11995 if (vect_loop
11996 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11997 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11998 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11999 stmt_info, vectype_out,
12000 vectype_in, code1, code2,
12001 multi_step_cvt, interm_types))
12002 {
12003 /* Elements in a vector with vect_used_by_reduction property cannot
12004 be reordered if the use chain with this property does not have the
12005 same operation. One such an example is s += a * b, where elements
12006 in a and b cannot be reordered. Here we check if the vector defined
12007 by STMT is only directly used in the reduction statement. */
12008 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12009 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12010 if (use_stmt_info
12011 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12012 return true;
12013 }
12014 c1 = VEC_WIDEN_MULT_LO_EXPR;
12015 c2 = VEC_WIDEN_MULT_HI_EXPR;
12016 break;
12017
12018 case DOT_PROD_EXPR:
12019 c1 = DOT_PROD_EXPR;
12020 c2 = DOT_PROD_EXPR;
12021 break;
12022
12023 case SAD_EXPR:
12024 c1 = SAD_EXPR;
12025 c2 = SAD_EXPR;
12026 break;
12027
12028 case VEC_WIDEN_MULT_EVEN_EXPR:
12029 /* Support the recursion induced just above. */
12030 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12031 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12032 break;
12033
12034 case WIDEN_LSHIFT_EXPR:
12035 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12036 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12037 break;
12038
12039 CASE_CONVERT:
12040 c1 = VEC_UNPACK_LO_EXPR;
12041 c2 = VEC_UNPACK_HI_EXPR;
12042 break;
12043
12044 case FLOAT_EXPR:
12045 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12046 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12047 break;
12048
12049 case FIX_TRUNC_EXPR:
12050 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12051 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12052 break;
12053
12054 default:
12055 gcc_unreachable ();
12056 }
12057
12058 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12059 std::swap (c1, c2);
12060
12061 if (code == FIX_TRUNC_EXPR)
12062 {
12063 /* The signedness is determined from output operand. */
12064 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12065 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12066 }
12067 else if (CONVERT_EXPR_CODE_P (code)
12068 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12069 && VECTOR_BOOLEAN_TYPE_P (vectype)
12070 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12071 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12072 {
12073 /* If the input and result modes are the same, a different optab
12074 is needed where we pass in the number of units in vectype. */
12075 optab1 = vec_unpacks_sbool_lo_optab;
12076 optab2 = vec_unpacks_sbool_hi_optab;
12077 }
12078 else
12079 {
12080 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12081 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12082 }
12083
12084 if (!optab1 || !optab2)
12085 return false;
12086
12087 vec_mode = TYPE_MODE (vectype);
12088 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12089 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12090 return false;
12091
12092 *code1 = c1;
12093 *code2 = c2;
12094
12095 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12096 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12097 {
12098 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12099 return true;
12100 /* For scalar masks we may have different boolean
12101 vector types having the same QImode. Thus we
12102 add additional check for elements number. */
12103 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12104 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12105 return true;
12106 }
12107
12108 /* Check if it's a multi-step conversion that can be done using intermediate
12109 types. */
12110
12111 prev_type = vectype;
12112 prev_mode = vec_mode;
12113
12114 if (!CONVERT_EXPR_CODE_P (code))
12115 return false;
12116
12117 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12118 intermediate steps in promotion sequence. We try
12119 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12120 not. */
12121 interm_types->create (MAX_INTERM_CVT_STEPS);
12122 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12123 {
12124 intermediate_mode = insn_data[icode1].operand[0].mode;
12125 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12126 intermediate_type
12127 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12128 else
12129 intermediate_type
12130 = lang_hooks.types.type_for_mode (intermediate_mode,
12131 TYPE_UNSIGNED (prev_type));
12132
12133 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12134 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12135 && intermediate_mode == prev_mode
12136 && SCALAR_INT_MODE_P (prev_mode))
12137 {
12138 /* If the input and result modes are the same, a different optab
12139 is needed where we pass in the number of units in vectype. */
12140 optab3 = vec_unpacks_sbool_lo_optab;
12141 optab4 = vec_unpacks_sbool_hi_optab;
12142 }
12143 else
12144 {
12145 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12146 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12147 }
12148
12149 if (!optab3 || !optab4
12150 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12151 || insn_data[icode1].operand[0].mode != intermediate_mode
12152 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12153 || insn_data[icode2].operand[0].mode != intermediate_mode
12154 || ((icode1 = optab_handler (optab3, intermediate_mode))
12155 == CODE_FOR_nothing)
12156 || ((icode2 = optab_handler (optab4, intermediate_mode))
12157 == CODE_FOR_nothing))
12158 break;
12159
12160 interm_types->quick_push (intermediate_type);
12161 (*multi_step_cvt)++;
12162
12163 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12164 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12165 {
12166 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12167 return true;
12168 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12169 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12170 return true;
12171 }
12172
12173 prev_type = intermediate_type;
12174 prev_mode = intermediate_mode;
12175 }
12176
12177 interm_types->release ();
12178 return false;
12179 }
12180
12181
12182 /* Function supportable_narrowing_operation
12183
12184 Check whether an operation represented by the code CODE is a
12185 narrowing operation that is supported by the target platform in
12186 vector form (i.e., when operating on arguments of type VECTYPE_IN
12187 and producing a result of type VECTYPE_OUT).
12188
12189 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12190 and FLOAT. This function checks if these operations are supported by
12191 the target platform directly via vector tree-codes.
12192
12193 Output:
12194 - CODE1 is the code of a vector operation to be used when
12195 vectorizing the operation, if available.
12196 - MULTI_STEP_CVT determines the number of required intermediate steps in
12197 case of multi-step conversion (like int->short->char - in that case
12198 MULTI_STEP_CVT will be 1).
12199 - INTERM_TYPES contains the intermediate type required to perform the
12200 narrowing operation (short in the above example). */
12201
12202 bool
12203 supportable_narrowing_operation (enum tree_code code,
12204 tree vectype_out, tree vectype_in,
12205 enum tree_code *code1, int *multi_step_cvt,
12206 vec<tree> *interm_types)
12207 {
12208 machine_mode vec_mode;
12209 enum insn_code icode1;
12210 optab optab1, interm_optab;
12211 tree vectype = vectype_in;
12212 tree narrow_vectype = vectype_out;
12213 enum tree_code c1;
12214 tree intermediate_type, prev_type;
12215 machine_mode intermediate_mode, prev_mode;
12216 int i;
12217 bool uns;
12218
12219 *multi_step_cvt = 0;
12220 switch (code)
12221 {
12222 CASE_CONVERT:
12223 c1 = VEC_PACK_TRUNC_EXPR;
12224 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12225 && VECTOR_BOOLEAN_TYPE_P (vectype)
12226 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12227 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12228 optab1 = vec_pack_sbool_trunc_optab;
12229 else
12230 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12231 break;
12232
12233 case FIX_TRUNC_EXPR:
12234 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12235 /* The signedness is determined from output operand. */
12236 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12237 break;
12238
12239 case FLOAT_EXPR:
12240 c1 = VEC_PACK_FLOAT_EXPR;
12241 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12242 break;
12243
12244 default:
12245 gcc_unreachable ();
12246 }
12247
12248 if (!optab1)
12249 return false;
12250
12251 vec_mode = TYPE_MODE (vectype);
12252 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12253 return false;
12254
12255 *code1 = c1;
12256
12257 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12258 {
12259 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12260 return true;
12261 /* For scalar masks we may have different boolean
12262 vector types having the same QImode. Thus we
12263 add additional check for elements number. */
12264 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12265 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12266 return true;
12267 }
12268
12269 if (code == FLOAT_EXPR)
12270 return false;
12271
12272 /* Check if it's a multi-step conversion that can be done using intermediate
12273 types. */
12274 prev_mode = vec_mode;
12275 prev_type = vectype;
12276 if (code == FIX_TRUNC_EXPR)
12277 uns = TYPE_UNSIGNED (vectype_out);
12278 else
12279 uns = TYPE_UNSIGNED (vectype);
12280
12281 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12282 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12283 costly than signed. */
12284 if (code == FIX_TRUNC_EXPR && uns)
12285 {
12286 enum insn_code icode2;
12287
12288 intermediate_type
12289 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12290 interm_optab
12291 = optab_for_tree_code (c1, intermediate_type, optab_default);
12292 if (interm_optab != unknown_optab
12293 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12294 && insn_data[icode1].operand[0].mode
12295 == insn_data[icode2].operand[0].mode)
12296 {
12297 uns = false;
12298 optab1 = interm_optab;
12299 icode1 = icode2;
12300 }
12301 }
12302
12303 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12304 intermediate steps in promotion sequence. We try
12305 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12306 interm_types->create (MAX_INTERM_CVT_STEPS);
12307 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12308 {
12309 intermediate_mode = insn_data[icode1].operand[0].mode;
12310 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12311 intermediate_type
12312 = vect_double_mask_nunits (prev_type, intermediate_mode);
12313 else
12314 intermediate_type
12315 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12316 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12317 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12318 && intermediate_mode == prev_mode
12319 && SCALAR_INT_MODE_P (prev_mode))
12320 interm_optab = vec_pack_sbool_trunc_optab;
12321 else
12322 interm_optab
12323 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12324 optab_default);
12325 if (!interm_optab
12326 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12327 || insn_data[icode1].operand[0].mode != intermediate_mode
12328 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12329 == CODE_FOR_nothing))
12330 break;
12331
12332 interm_types->quick_push (intermediate_type);
12333 (*multi_step_cvt)++;
12334
12335 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12336 {
12337 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12338 return true;
12339 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12340 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12341 return true;
12342 }
12343
12344 prev_mode = intermediate_mode;
12345 prev_type = intermediate_type;
12346 optab1 = interm_optab;
12347 }
12348
12349 interm_types->release ();
12350 return false;
12351 }
12352
12353 /* Generate and return a statement that sets vector mask MASK such that
12354 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12355
12356 gcall *
12357 vect_gen_while (tree mask, tree start_index, tree end_index)
12358 {
12359 tree cmp_type = TREE_TYPE (start_index);
12360 tree mask_type = TREE_TYPE (mask);
12361 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12362 cmp_type, mask_type,
12363 OPTIMIZE_FOR_SPEED));
12364 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12365 start_index, end_index,
12366 build_zero_cst (mask_type));
12367 gimple_call_set_lhs (call, mask);
12368 return call;
12369 }
12370
12371 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12372 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12373
12374 tree
12375 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12376 tree end_index)
12377 {
12378 tree tmp = make_ssa_name (mask_type);
12379 gcall *call = vect_gen_while (tmp, start_index, end_index);
12380 gimple_seq_add_stmt (seq, call);
12381 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12382 }
12383
12384 /* Try to compute the vector types required to vectorize STMT_INFO,
12385 returning true on success and false if vectorization isn't possible.
12386 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12387 take sure that the number of elements in the vectors is no bigger
12388 than GROUP_SIZE.
12389
12390 On success:
12391
12392 - Set *STMT_VECTYPE_OUT to:
12393 - NULL_TREE if the statement doesn't need to be vectorized;
12394 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12395
12396 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12397 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12398 statement does not help to determine the overall number of units. */
12399
12400 opt_result
12401 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12402 tree *stmt_vectype_out,
12403 tree *nunits_vectype_out,
12404 unsigned int group_size)
12405 {
12406 gimple *stmt = stmt_info->stmt;
12407
12408 /* For BB vectorization, we should always have a group size once we've
12409 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12410 are tentative requests during things like early data reference
12411 analysis and pattern recognition. */
12412 if (is_a <bb_vec_info> (vinfo))
12413 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12414 else
12415 group_size = 0;
12416
12417 *stmt_vectype_out = NULL_TREE;
12418 *nunits_vectype_out = NULL_TREE;
12419
12420 if (gimple_get_lhs (stmt) == NULL_TREE
12421 /* MASK_STORE has no lhs, but is ok. */
12422 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12423 {
12424 if (is_a <gcall *> (stmt))
12425 {
12426 /* Ignore calls with no lhs. These must be calls to
12427 #pragma omp simd functions, and what vectorization factor
12428 it really needs can't be determined until
12429 vectorizable_simd_clone_call. */
12430 if (dump_enabled_p ())
12431 dump_printf_loc (MSG_NOTE, vect_location,
12432 "defer to SIMD clone analysis.\n");
12433 return opt_result::success ();
12434 }
12435
12436 return opt_result::failure_at (stmt,
12437 "not vectorized: irregular stmt.%G", stmt);
12438 }
12439
12440 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12441 return opt_result::failure_at (stmt,
12442 "not vectorized: vector stmt in loop:%G",
12443 stmt);
12444
12445 tree vectype;
12446 tree scalar_type = NULL_TREE;
12447 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12448 {
12449 vectype = STMT_VINFO_VECTYPE (stmt_info);
12450 if (dump_enabled_p ())
12451 dump_printf_loc (MSG_NOTE, vect_location,
12452 "precomputed vectype: %T\n", vectype);
12453 }
12454 else if (vect_use_mask_type_p (stmt_info))
12455 {
12456 unsigned int precision = stmt_info->mask_precision;
12457 scalar_type = build_nonstandard_integer_type (precision, 1);
12458 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12459 if (!vectype)
12460 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12461 " data-type %T\n", scalar_type);
12462 if (dump_enabled_p ())
12463 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12464 }
12465 else
12466 {
12467 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12468 scalar_type = TREE_TYPE (DR_REF (dr));
12469 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12470 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12471 else
12472 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12473
12474 if (dump_enabled_p ())
12475 {
12476 if (group_size)
12477 dump_printf_loc (MSG_NOTE, vect_location,
12478 "get vectype for scalar type (group size %d):"
12479 " %T\n", group_size, scalar_type);
12480 else
12481 dump_printf_loc (MSG_NOTE, vect_location,
12482 "get vectype for scalar type: %T\n", scalar_type);
12483 }
12484 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12485 if (!vectype)
12486 return opt_result::failure_at (stmt,
12487 "not vectorized:"
12488 " unsupported data-type %T\n",
12489 scalar_type);
12490
12491 if (dump_enabled_p ())
12492 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12493 }
12494 *stmt_vectype_out = vectype;
12495
12496 /* Don't try to compute scalar types if the stmt produces a boolean
12497 vector; use the existing vector type instead. */
12498 tree nunits_vectype = vectype;
12499 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12500 {
12501 /* The number of units is set according to the smallest scalar
12502 type (or the largest vector size, but we only support one
12503 vector size per vectorization). */
12504 HOST_WIDE_INT dummy;
12505 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12506 if (scalar_type != TREE_TYPE (vectype))
12507 {
12508 if (dump_enabled_p ())
12509 dump_printf_loc (MSG_NOTE, vect_location,
12510 "get vectype for smallest scalar type: %T\n",
12511 scalar_type);
12512 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12513 group_size);
12514 if (!nunits_vectype)
12515 return opt_result::failure_at
12516 (stmt, "not vectorized: unsupported data-type %T\n",
12517 scalar_type);
12518 if (dump_enabled_p ())
12519 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12520 nunits_vectype);
12521 }
12522 }
12523
12524 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12525 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12526
12527 if (dump_enabled_p ())
12528 {
12529 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12530 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12531 dump_printf (MSG_NOTE, "\n");
12532 }
12533
12534 *nunits_vectype_out = nunits_vectype;
12535 return opt_result::success ();
12536 }