tree-optimization/98308 - set vector type for mask of masked load
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
840
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
846 {
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
849
850 for (i = 0; i < pwr + 1; i++)
851 {
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
855 }
856
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
862
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 }
868
869 /* Returns true if the current function returns DECL. */
870
871 static bool
872 cfun_returns (tree decl)
873 {
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
877 {
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
887 do
888 {
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
890 }
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
896 }
897 return false;
898 }
899
900 /* Function vect_model_store_cost
901
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
904
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
910 {
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
918
919 if (vls_type == VLS_STORE_INVARIANT)
920 {
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
924 }
925
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
930
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
935
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 {
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
954 }
955
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 {
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
966 }
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
969
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
972 {
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
978 }
979
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
989 {
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
993 {
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
998 {
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1007 }
1008 }
1009 }
1010
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1015 }
1016
1017
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1023 {
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1027
1028 switch (alignment_support_scheme)
1029 {
1030 case dr_aligned:
1031 {
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1035
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1040 }
1041
1042 case dr_unaligned_supported:
1043 {
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1054 }
1055
1056 case dr_unaligned_unsupported:
1057 {
1058 *inside_cost = VECT_MAX_COST;
1059
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1064 }
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071
1072 /* Function vect_model_load_cost
1073
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1078
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1085 {
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1088
1089 gcc_assert (cost_vec);
1090
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1094
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1096 {
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms, n_loads;
1102 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1103 vf, true, &n_perms, &n_loads);
1104 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1105 first_stmt_info, 0, vect_body);
1106
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1109 ncopies = n_loads;
1110 }
1111
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info = stmt_info;
1115 if (!slp_node && grouped_access_p)
1116 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1117
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p = (first_stmt_info == stmt_info);
1122
1123 /* We assume that the cost of a single load-lanes instruction is
1124 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1125 access is instead being provided by a load-and-permute operation,
1126 include the cost of the permutes. */
1127 if (first_stmt_p
1128 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1129 {
1130 /* Uses an even and odd extract operations or shuffle operations
1131 for each needed permute. */
1132 int group_size = DR_GROUP_SIZE (first_stmt_info);
1133 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1134 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1135 stmt_info, 0, vect_body);
1136
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1140 group_size);
1141 }
1142
1143 /* The loads themselves. */
1144 if (memory_access_type == VMAT_ELEMENTWISE
1145 || memory_access_type == VMAT_GATHER_SCATTER)
1146 {
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1149 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1150 inside_cost += record_stmt_cost (cost_vec,
1151 ncopies * assumed_nunits,
1152 scalar_load, stmt_info, 0, vect_body);
1153 }
1154 else
1155 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1156 &inside_cost, &prologue_cost,
1157 cost_vec, cost_vec, true);
1158 if (memory_access_type == VMAT_ELEMENTWISE
1159 || memory_access_type == VMAT_STRIDED_SLP)
1160 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1161 stmt_info, 0, vect_body);
1162
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1167 }
1168
1169
1170 /* Calculate cost of DR's memory access. */
1171 void
1172 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1173 bool add_realign_cost, unsigned int *inside_cost,
1174 unsigned int *prologue_cost,
1175 stmt_vector_for_cost *prologue_cost_vec,
1176 stmt_vector_for_cost *body_cost_vec,
1177 bool record_prologue_costs)
1178 {
1179 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1180 int alignment_support_scheme
1181 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1182
1183 switch (alignment_support_scheme)
1184 {
1185 case dr_aligned:
1186 {
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1189
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE, vect_location,
1192 "vect_model_load_cost: aligned.\n");
1193
1194 break;
1195 }
1196 case dr_unaligned_supported:
1197 {
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1200 unaligned_load, stmt_info,
1201 DR_MISALIGNMENT (dr_info),
1202 vect_body);
1203
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE, vect_location,
1206 "vect_model_load_cost: unaligned supported by "
1207 "hardware.\n");
1208
1209 break;
1210 }
1211 case dr_explicit_realign:
1212 {
1213 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1214 vector_load, stmt_info, 0, vect_body);
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1216 vec_perm, stmt_info, 0, vect_body);
1217
1218 /* FIXME: If the misalignment remains fixed across the iterations of
1219 the containing loop, the following cost should be added to the
1220 prologue costs. */
1221 if (targetm.vectorize.builtin_mask_for_load)
1222 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1223 stmt_info, 0, vect_body);
1224
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_NOTE, vect_location,
1227 "vect_model_load_cost: explicit realign\n");
1228
1229 break;
1230 }
1231 case dr_explicit_realign_optimized:
1232 {
1233 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE, vect_location,
1235 "vect_model_load_cost: unaligned software "
1236 "pipelined.\n");
1237
1238 /* Unaligned software pipeline has a load of an address, an initial
1239 load, and possibly a mask operation to "prime" the loop. However,
1240 if this is an access in a group of loads, which provide grouped
1241 access, then the above cost should only be considered for one
1242 access in the group. Inside the loop, there is a load op
1243 and a realignment op. */
1244
1245 if (add_realign_cost && record_prologue_costs)
1246 {
1247 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1248 vector_stmt, stmt_info,
1249 0, vect_prologue);
1250 if (targetm.vectorize.builtin_mask_for_load)
1251 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1252 vector_stmt, stmt_info,
1253 0, vect_prologue);
1254 }
1255
1256 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1257 stmt_info, 0, vect_body);
1258 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1259 stmt_info, 0, vect_body);
1260
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "vect_model_load_cost: explicit realign optimized"
1264 "\n");
1265
1266 break;
1267 }
1268
1269 case dr_unaligned_unsupported:
1270 {
1271 *inside_cost = VECT_MAX_COST;
1272
1273 if (dump_enabled_p ())
1274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1275 "vect_model_load_cost: unsupported access.\n");
1276 break;
1277 }
1278
1279 default:
1280 gcc_unreachable ();
1281 }
1282 }
1283
1284 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1285 the loop preheader for the vectorized stmt STMT_VINFO. */
1286
1287 static void
1288 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1289 gimple_stmt_iterator *gsi)
1290 {
1291 if (gsi)
1292 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1293 else
1294 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1295
1296 if (dump_enabled_p ())
1297 dump_printf_loc (MSG_NOTE, vect_location,
1298 "created new init_stmt: %G", new_stmt);
1299 }
1300
1301 /* Function vect_init_vector.
1302
1303 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1304 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1305 vector type a vector with all elements equal to VAL is created first.
1306 Place the initialization at GSI if it is not NULL. Otherwise, place the
1307 initialization at the loop preheader.
1308 Return the DEF of INIT_STMT.
1309 It will be used in the vectorization of STMT_INFO. */
1310
1311 tree
1312 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1313 gimple_stmt_iterator *gsi)
1314 {
1315 gimple *init_stmt;
1316 tree new_temp;
1317
1318 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1319 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1320 {
1321 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1322 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1323 {
1324 /* Scalar boolean value should be transformed into
1325 all zeros or all ones value before building a vector. */
1326 if (VECTOR_BOOLEAN_TYPE_P (type))
1327 {
1328 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1329 tree false_val = build_zero_cst (TREE_TYPE (type));
1330
1331 if (CONSTANT_CLASS_P (val))
1332 val = integer_zerop (val) ? false_val : true_val;
1333 else
1334 {
1335 new_temp = make_ssa_name (TREE_TYPE (type));
1336 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1337 val, true_val, false_val);
1338 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1339 val = new_temp;
1340 }
1341 }
1342 else
1343 {
1344 gimple_seq stmts = NULL;
1345 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1346 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1347 TREE_TYPE (type), val);
1348 else
1349 /* ??? Condition vectorization expects us to do
1350 promotion of invariant/external defs. */
1351 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1352 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1353 !gsi_end_p (gsi2); )
1354 {
1355 init_stmt = gsi_stmt (gsi2);
1356 gsi_remove (&gsi2, false);
1357 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1358 }
1359 }
1360 }
1361 val = build_vector_from_val (type, val);
1362 }
1363
1364 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1365 init_stmt = gimple_build_assign (new_temp, val);
1366 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1367 return new_temp;
1368 }
1369
1370
1371 /* Function vect_get_vec_defs_for_operand.
1372
1373 OP is an operand in STMT_VINFO. This function returns a vector of
1374 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1375
1376 In the case that OP is an SSA_NAME which is defined in the loop, then
1377 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1378
1379 In case OP is an invariant or constant, a new stmt that creates a vector def
1380 needs to be introduced. VECTYPE may be used to specify a required type for
1381 vector invariant. */
1382
1383 void
1384 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1385 unsigned ncopies,
1386 tree op, vec<tree> *vec_oprnds, tree vectype)
1387 {
1388 gimple *def_stmt;
1389 enum vect_def_type dt;
1390 bool is_simple_use;
1391 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1392
1393 if (dump_enabled_p ())
1394 dump_printf_loc (MSG_NOTE, vect_location,
1395 "vect_get_vec_defs_for_operand: %T\n", op);
1396
1397 stmt_vec_info def_stmt_info;
1398 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1399 &def_stmt_info, &def_stmt);
1400 gcc_assert (is_simple_use);
1401 if (def_stmt && dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1403
1404 vec_oprnds->create (ncopies);
1405 if (dt == vect_constant_def || dt == vect_external_def)
1406 {
1407 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1408 tree vector_type;
1409
1410 if (vectype)
1411 vector_type = vectype;
1412 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1413 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1414 vector_type = truth_type_for (stmt_vectype);
1415 else
1416 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1417
1418 gcc_assert (vector_type);
1419 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1420 while (ncopies--)
1421 vec_oprnds->quick_push (vop);
1422 }
1423 else
1424 {
1425 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1426 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1427 for (unsigned i = 0; i < ncopies; ++i)
1428 vec_oprnds->quick_push (gimple_get_lhs
1429 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1430 }
1431 }
1432
1433
1434 /* Get vectorized definitions for OP0 and OP1. */
1435
1436 void
1437 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1438 unsigned ncopies,
1439 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1440 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1441 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1442 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1443 {
1444 if (slp_node)
1445 {
1446 if (op0)
1447 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1448 if (op1)
1449 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1450 if (op2)
1451 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1452 if (op3)
1453 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1454 }
1455 else
1456 {
1457 if (op0)
1458 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1459 op0, vec_oprnds0, vectype0);
1460 if (op1)
1461 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1462 op1, vec_oprnds1, vectype1);
1463 if (op2)
1464 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1465 op2, vec_oprnds2, vectype2);
1466 if (op3)
1467 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1468 op3, vec_oprnds3, vectype3);
1469 }
1470 }
1471
1472 void
1473 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1474 unsigned ncopies,
1475 tree op0, vec<tree> *vec_oprnds0,
1476 tree op1, vec<tree> *vec_oprnds1,
1477 tree op2, vec<tree> *vec_oprnds2,
1478 tree op3, vec<tree> *vec_oprnds3)
1479 {
1480 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1481 op0, vec_oprnds0, NULL_TREE,
1482 op1, vec_oprnds1, NULL_TREE,
1483 op2, vec_oprnds2, NULL_TREE,
1484 op3, vec_oprnds3, NULL_TREE);
1485 }
1486
1487 /* Helper function called by vect_finish_replace_stmt and
1488 vect_finish_stmt_generation. Set the location of the new
1489 statement and create and return a stmt_vec_info for it. */
1490
1491 static void
1492 vect_finish_stmt_generation_1 (vec_info *,
1493 stmt_vec_info stmt_info, gimple *vec_stmt)
1494 {
1495 if (dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1497
1498 if (stmt_info)
1499 {
1500 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1501
1502 /* While EH edges will generally prevent vectorization, stmt might
1503 e.g. be in a must-not-throw region. Ensure newly created stmts
1504 that could throw are part of the same region. */
1505 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1506 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1507 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1508 }
1509 else
1510 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1511 }
1512
1513 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1514 which sets the same scalar result as STMT_INFO did. Create and return a
1515 stmt_vec_info for VEC_STMT. */
1516
1517 void
1518 vect_finish_replace_stmt (vec_info *vinfo,
1519 stmt_vec_info stmt_info, gimple *vec_stmt)
1520 {
1521 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1522 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1523
1524 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1525 gsi_replace (&gsi, vec_stmt, true);
1526
1527 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1528 }
1529
1530 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1531 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1532
1533 void
1534 vect_finish_stmt_generation (vec_info *vinfo,
1535 stmt_vec_info stmt_info, gimple *vec_stmt,
1536 gimple_stmt_iterator *gsi)
1537 {
1538 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1539
1540 if (!gsi_end_p (*gsi)
1541 && gimple_has_mem_ops (vec_stmt))
1542 {
1543 gimple *at_stmt = gsi_stmt (*gsi);
1544 tree vuse = gimple_vuse (at_stmt);
1545 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1546 {
1547 tree vdef = gimple_vdef (at_stmt);
1548 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1549 gimple_set_modified (vec_stmt, true);
1550 /* If we have an SSA vuse and insert a store, update virtual
1551 SSA form to avoid triggering the renamer. Do so only
1552 if we can easily see all uses - which is what almost always
1553 happens with the way vectorized stmts are inserted. */
1554 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1555 && ((is_gimple_assign (vec_stmt)
1556 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1557 || (is_gimple_call (vec_stmt)
1558 && !(gimple_call_flags (vec_stmt)
1559 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1560 {
1561 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1562 gimple_set_vdef (vec_stmt, new_vdef);
1563 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1564 }
1565 }
1566 }
1567 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1568 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1569 }
1570
1571 /* We want to vectorize a call to combined function CFN with function
1572 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1573 as the types of all inputs. Check whether this is possible using
1574 an internal function, returning its code if so or IFN_LAST if not. */
1575
1576 static internal_fn
1577 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1578 tree vectype_out, tree vectype_in)
1579 {
1580 internal_fn ifn;
1581 if (internal_fn_p (cfn))
1582 ifn = as_internal_fn (cfn);
1583 else
1584 ifn = associated_internal_fn (fndecl);
1585 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1586 {
1587 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1588 if (info.vectorizable)
1589 {
1590 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1591 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1592 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1593 OPTIMIZE_FOR_SPEED))
1594 return ifn;
1595 }
1596 }
1597 return IFN_LAST;
1598 }
1599
1600
1601 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1602 gimple_stmt_iterator *);
1603
1604 /* Check whether a load or store statement in the loop described by
1605 LOOP_VINFO is possible in a loop using partial vectors. This is
1606 testing whether the vectorizer pass has the appropriate support,
1607 as well as whether the target does.
1608
1609 VLS_TYPE says whether the statement is a load or store and VECTYPE
1610 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1611 says how the load or store is going to be implemented and GROUP_SIZE
1612 is the number of load or store statements in the containing group.
1613 If the access is a gather load or scatter store, GS_INFO describes
1614 its arguments. If the load or store is conditional, SCALAR_MASK is the
1615 condition under which it occurs.
1616
1617 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1618 vectors is not supported, otherwise record the required rgroup control
1619 types. */
1620
1621 static void
1622 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1623 vec_load_store_type vls_type,
1624 int group_size,
1625 vect_memory_access_type
1626 memory_access_type,
1627 gather_scatter_info *gs_info,
1628 tree scalar_mask)
1629 {
1630 /* Invariant loads need no special support. */
1631 if (memory_access_type == VMAT_INVARIANT)
1632 return;
1633
1634 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1635 machine_mode vecmode = TYPE_MODE (vectype);
1636 bool is_load = (vls_type == VLS_LOAD);
1637 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1638 {
1639 if (is_load
1640 ? !vect_load_lanes_supported (vectype, group_size, true)
1641 : !vect_store_lanes_supported (vectype, group_size, true))
1642 {
1643 if (dump_enabled_p ())
1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1645 "can't operate on partial vectors because"
1646 " the target doesn't have an appropriate"
1647 " load/store-lanes instruction.\n");
1648 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1649 return;
1650 }
1651 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1652 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1653 return;
1654 }
1655
1656 if (memory_access_type == VMAT_GATHER_SCATTER)
1657 {
1658 internal_fn ifn = (is_load
1659 ? IFN_MASK_GATHER_LOAD
1660 : IFN_MASK_SCATTER_STORE);
1661 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1662 gs_info->memory_type,
1663 gs_info->offset_vectype,
1664 gs_info->scale))
1665 {
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1668 "can't operate on partial vectors because"
1669 " the target doesn't have an appropriate"
1670 " gather load or scatter store instruction.\n");
1671 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1672 return;
1673 }
1674 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1675 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1676 return;
1677 }
1678
1679 if (memory_access_type != VMAT_CONTIGUOUS
1680 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1681 {
1682 /* Element X of the data must come from iteration i * VF + X of the
1683 scalar loop. We need more work to support other mappings. */
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686 "can't operate on partial vectors because an"
1687 " access isn't contiguous.\n");
1688 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1689 return;
1690 }
1691
1692 if (!VECTOR_MODE_P (vecmode))
1693 {
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "can't operate on partial vectors when emulating"
1697 " vector operations.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1699 return;
1700 }
1701
1702 /* We might load more scalars than we need for permuting SLP loads.
1703 We checked in get_group_load_store_type that the extra elements
1704 don't leak into a new vector. */
1705 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1706 {
1707 unsigned int nvectors;
1708 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1709 return nvectors;
1710 gcc_unreachable ();
1711 };
1712
1713 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1714 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1715 machine_mode mask_mode;
1716 bool using_partial_vectors_p = false;
1717 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1718 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1719 {
1720 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1721 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1722 using_partial_vectors_p = true;
1723 }
1724
1725 machine_mode vmode;
1726 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1727 {
1728 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1729 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1730 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1731 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1732 using_partial_vectors_p = true;
1733 }
1734
1735 if (!using_partial_vectors_p)
1736 {
1737 if (dump_enabled_p ())
1738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1739 "can't operate on partial vectors because the"
1740 " target doesn't have the appropriate partial"
1741 " vectorization load or store.\n");
1742 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1743 }
1744 }
1745
1746 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1747 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1748 that needs to be applied to all loads and stores in a vectorized loop.
1749 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1750
1751 MASK_TYPE is the type of both masks. If new statements are needed,
1752 insert them before GSI. */
1753
1754 static tree
1755 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1756 gimple_stmt_iterator *gsi)
1757 {
1758 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1759 if (!loop_mask)
1760 return vec_mask;
1761
1762 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1763 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1764 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1765 vec_mask, loop_mask);
1766 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1767 return and_res;
1768 }
1769
1770 /* Determine whether we can use a gather load or scatter store to vectorize
1771 strided load or store STMT_INFO by truncating the current offset to a
1772 smaller width. We need to be able to construct an offset vector:
1773
1774 { 0, X, X*2, X*3, ... }
1775
1776 without loss of precision, where X is STMT_INFO's DR_STEP.
1777
1778 Return true if this is possible, describing the gather load or scatter
1779 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1780
1781 static bool
1782 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1783 loop_vec_info loop_vinfo, bool masked_p,
1784 gather_scatter_info *gs_info)
1785 {
1786 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1787 data_reference *dr = dr_info->dr;
1788 tree step = DR_STEP (dr);
1789 if (TREE_CODE (step) != INTEGER_CST)
1790 {
1791 /* ??? Perhaps we could use range information here? */
1792 if (dump_enabled_p ())
1793 dump_printf_loc (MSG_NOTE, vect_location,
1794 "cannot truncate variable step.\n");
1795 return false;
1796 }
1797
1798 /* Get the number of bits in an element. */
1799 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1800 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1801 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1802
1803 /* Set COUNT to the upper limit on the number of elements - 1.
1804 Start with the maximum vectorization factor. */
1805 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1806
1807 /* Try lowering COUNT to the number of scalar latch iterations. */
1808 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1809 widest_int max_iters;
1810 if (max_loop_iterations (loop, &max_iters)
1811 && max_iters < count)
1812 count = max_iters.to_shwi ();
1813
1814 /* Try scales of 1 and the element size. */
1815 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1816 wi::overflow_type overflow = wi::OVF_NONE;
1817 for (int i = 0; i < 2; ++i)
1818 {
1819 int scale = scales[i];
1820 widest_int factor;
1821 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1822 continue;
1823
1824 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1825 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1826 if (overflow)
1827 continue;
1828 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1829 unsigned int min_offset_bits = wi::min_precision (range, sign);
1830
1831 /* Find the narrowest viable offset type. */
1832 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1833 tree offset_type = build_nonstandard_integer_type (offset_bits,
1834 sign == UNSIGNED);
1835
1836 /* See whether the target supports the operation with an offset
1837 no narrower than OFFSET_TYPE. */
1838 tree memory_type = TREE_TYPE (DR_REF (dr));
1839 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1840 vectype, memory_type, offset_type, scale,
1841 &gs_info->ifn, &gs_info->offset_vectype))
1842 continue;
1843
1844 gs_info->decl = NULL_TREE;
1845 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1846 but we don't need to store that here. */
1847 gs_info->base = NULL_TREE;
1848 gs_info->element_type = TREE_TYPE (vectype);
1849 gs_info->offset = fold_convert (offset_type, step);
1850 gs_info->offset_dt = vect_constant_def;
1851 gs_info->scale = scale;
1852 gs_info->memory_type = memory_type;
1853 return true;
1854 }
1855
1856 if (overflow && dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE, vect_location,
1858 "truncating gather/scatter offset to %d bits"
1859 " might change its value.\n", element_bits);
1860
1861 return false;
1862 }
1863
1864 /* Return true if we can use gather/scatter internal functions to
1865 vectorize STMT_INFO, which is a grouped or strided load or store.
1866 MASKED_P is true if load or store is conditional. When returning
1867 true, fill in GS_INFO with the information required to perform the
1868 operation. */
1869
1870 static bool
1871 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1872 loop_vec_info loop_vinfo, bool masked_p,
1873 gather_scatter_info *gs_info)
1874 {
1875 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1876 || gs_info->decl)
1877 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1878 masked_p, gs_info);
1879
1880 tree old_offset_type = TREE_TYPE (gs_info->offset);
1881 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1882
1883 gcc_assert (TYPE_PRECISION (new_offset_type)
1884 >= TYPE_PRECISION (old_offset_type));
1885 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1886
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE, vect_location,
1889 "using gather/scatter for strided/grouped access,"
1890 " scale = %d\n", gs_info->scale);
1891
1892 return true;
1893 }
1894
1895 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1896 elements with a known constant step. Return -1 if that step
1897 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1898
1899 static int
1900 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1901 {
1902 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1903 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1904 size_zero_node);
1905 }
1906
1907 /* If the target supports a permute mask that reverses the elements in
1908 a vector of type VECTYPE, return that mask, otherwise return null. */
1909
1910 static tree
1911 perm_mask_for_reverse (tree vectype)
1912 {
1913 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1914
1915 /* The encoding has a single stepped pattern. */
1916 vec_perm_builder sel (nunits, 1, 3);
1917 for (int i = 0; i < 3; ++i)
1918 sel.quick_push (nunits - 1 - i);
1919
1920 vec_perm_indices indices (sel, 1, nunits);
1921 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1922 return NULL_TREE;
1923 return vect_gen_perm_mask_checked (vectype, indices);
1924 }
1925
1926 /* A subroutine of get_load_store_type, with a subset of the same
1927 arguments. Handle the case where STMT_INFO is a load or store that
1928 accesses consecutive elements with a negative step. */
1929
1930 static vect_memory_access_type
1931 get_negative_load_store_type (vec_info *vinfo,
1932 stmt_vec_info stmt_info, tree vectype,
1933 vec_load_store_type vls_type,
1934 unsigned int ncopies)
1935 {
1936 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1937 dr_alignment_support alignment_support_scheme;
1938
1939 if (ncopies > 1)
1940 {
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1943 "multiple types with negative step.\n");
1944 return VMAT_ELEMENTWISE;
1945 }
1946
1947 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1948 dr_info, false);
1949 if (alignment_support_scheme != dr_aligned
1950 && alignment_support_scheme != dr_unaligned_supported)
1951 {
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 "negative step but alignment required.\n");
1955 return VMAT_ELEMENTWISE;
1956 }
1957
1958 if (vls_type == VLS_STORE_INVARIANT)
1959 {
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_NOTE, vect_location,
1962 "negative step with invariant source;"
1963 " no permute needed.\n");
1964 return VMAT_CONTIGUOUS_DOWN;
1965 }
1966
1967 if (!perm_mask_for_reverse (vectype))
1968 {
1969 if (dump_enabled_p ())
1970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1971 "negative step and reversing not supported.\n");
1972 return VMAT_ELEMENTWISE;
1973 }
1974
1975 return VMAT_CONTIGUOUS_REVERSE;
1976 }
1977
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1979 being stored. */
1980
1981 tree
1982 vect_get_store_rhs (stmt_vec_info stmt_info)
1983 {
1984 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1985 {
1986 gcc_assert (gimple_assign_single_p (assign));
1987 return gimple_assign_rhs1 (assign);
1988 }
1989 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1990 {
1991 internal_fn ifn = gimple_call_internal_fn (call);
1992 int index = internal_fn_stored_value_index (ifn);
1993 gcc_assert (index >= 0);
1994 return gimple_call_arg (call, index);
1995 }
1996 gcc_unreachable ();
1997 }
1998
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2000
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2007
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2011 - NULL_TREE. */
2012
2013 static tree
2014 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2015 {
2016 gcc_assert (VECTOR_TYPE_P (vtype));
2017 gcc_assert (known_gt (nelts, 0U));
2018
2019 machine_mode vmode = TYPE_MODE (vtype);
2020 if (!VECTOR_MODE_P (vmode))
2021 return NULL_TREE;
2022
2023 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2024 unsigned int pbsize;
2025 if (constant_multiple_p (vbsize, nelts, &pbsize))
2026 {
2027 /* First check if vec_init optab supports construction from
2028 vector pieces directly. */
2029 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2030 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2031 machine_mode rmode;
2032 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2033 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2034 != CODE_FOR_nothing))
2035 {
2036 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2037 return vtype;
2038 }
2039
2040 /* Otherwise check if exists an integer type of the same piece size and
2041 if vec_init optab supports construction from it directly. */
2042 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2043 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2044 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2045 != CODE_FOR_nothing))
2046 {
2047 *ptype = build_nonstandard_integer_type (pbsize, 1);
2048 return build_vector_type (*ptype, nelts);
2049 }
2050 }
2051
2052 return NULL_TREE;
2053 }
2054
2055 /* A subroutine of get_load_store_type, with a subset of the same
2056 arguments. Handle the case where STMT_INFO is part of a grouped load
2057 or store.
2058
2059 For stores, the statements in the group are all consecutive
2060 and there is no gap at the end. For loads, the statements in the
2061 group might not be consecutive; there can be gaps between statements
2062 as well as at the end. */
2063
2064 static bool
2065 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2066 tree vectype, slp_tree slp_node,
2067 bool masked_p, vec_load_store_type vls_type,
2068 vect_memory_access_type *memory_access_type,
2069 dr_alignment_support *alignment_support_scheme,
2070 gather_scatter_info *gs_info)
2071 {
2072 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2073 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2074 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2075 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2076 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2077 bool single_element_p = (stmt_info == first_stmt_info
2078 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2079 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2080 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2081
2082 /* True if the vectorized statements would access beyond the last
2083 statement in the group. */
2084 bool overrun_p = false;
2085
2086 /* True if we can cope with such overrun by peeling for gaps, so that
2087 there is at least one final scalar iteration after the vector loop. */
2088 bool can_overrun_p = (!masked_p
2089 && vls_type == VLS_LOAD
2090 && loop_vinfo
2091 && !loop->inner);
2092
2093 /* There can only be a gap at the end of the group if the stride is
2094 known at compile time. */
2095 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2096
2097 /* Stores can't yet have gaps. */
2098 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2099
2100 if (slp_node)
2101 {
2102 /* For SLP vectorization we directly vectorize a subchain
2103 without permutation. */
2104 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2105 first_dr_info
2106 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2107 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2108 {
2109 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2110 separated by the stride, until we have a complete vector.
2111 Fall back to scalar accesses if that isn't possible. */
2112 if (multiple_p (nunits, group_size))
2113 *memory_access_type = VMAT_STRIDED_SLP;
2114 else
2115 *memory_access_type = VMAT_ELEMENTWISE;
2116 }
2117 else
2118 {
2119 overrun_p = loop_vinfo && gap != 0;
2120 if (overrun_p && vls_type != VLS_LOAD)
2121 {
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123 "Grouped store with gaps requires"
2124 " non-consecutive accesses\n");
2125 return false;
2126 }
2127 /* An overrun is fine if the trailing elements are smaller
2128 than the alignment boundary B. Every vector access will
2129 be a multiple of B and so we are guaranteed to access a
2130 non-gap element in the same B-sized block. */
2131 if (overrun_p
2132 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2133 / vect_get_scalar_dr_size (first_dr_info)))
2134 overrun_p = false;
2135
2136 /* If the gap splits the vector in half and the target
2137 can do half-vector operations avoid the epilogue peeling
2138 by simply loading half of the vector only. Usually
2139 the construction with an upper zero half will be elided. */
2140 dr_alignment_support alignment_support_scheme;
2141 tree half_vtype;
2142 if (overrun_p
2143 && !masked_p
2144 && (((alignment_support_scheme
2145 = vect_supportable_dr_alignment (vinfo,
2146 first_dr_info, false)))
2147 == dr_aligned
2148 || alignment_support_scheme == dr_unaligned_supported)
2149 && known_eq (nunits, (group_size - gap) * 2)
2150 && known_eq (nunits, group_size)
2151 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2152 != NULL_TREE))
2153 overrun_p = false;
2154
2155 if (overrun_p && !can_overrun_p)
2156 {
2157 if (dump_enabled_p ())
2158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2159 "Peeling for outer loop is not supported\n");
2160 return false;
2161 }
2162 int cmp = compare_step_with_zero (vinfo, stmt_info);
2163 if (cmp < 0)
2164 {
2165 if (single_element_p)
2166 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2167 only correct for single element "interleaving" SLP. */
2168 *memory_access_type = get_negative_load_store_type
2169 (vinfo, stmt_info, vectype, vls_type, 1);
2170 else
2171 {
2172 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2173 separated by the stride, until we have a complete vector.
2174 Fall back to scalar accesses if that isn't possible. */
2175 if (multiple_p (nunits, group_size))
2176 *memory_access_type = VMAT_STRIDED_SLP;
2177 else
2178 *memory_access_type = VMAT_ELEMENTWISE;
2179 }
2180 }
2181 else
2182 {
2183 gcc_assert (!loop_vinfo || cmp > 0);
2184 *memory_access_type = VMAT_CONTIGUOUS;
2185 }
2186 }
2187 }
2188 else
2189 {
2190 /* We can always handle this case using elementwise accesses,
2191 but see if something more efficient is available. */
2192 *memory_access_type = VMAT_ELEMENTWISE;
2193
2194 /* If there is a gap at the end of the group then these optimizations
2195 would access excess elements in the last iteration. */
2196 bool would_overrun_p = (gap != 0);
2197 /* An overrun is fine if the trailing elements are smaller than the
2198 alignment boundary B. Every vector access will be a multiple of B
2199 and so we are guaranteed to access a non-gap element in the
2200 same B-sized block. */
2201 if (would_overrun_p
2202 && !masked_p
2203 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2204 / vect_get_scalar_dr_size (first_dr_info)))
2205 would_overrun_p = false;
2206
2207 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2208 && (can_overrun_p || !would_overrun_p)
2209 && compare_step_with_zero (vinfo, stmt_info) > 0)
2210 {
2211 /* First cope with the degenerate case of a single-element
2212 vector. */
2213 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2214 ;
2215
2216 /* Otherwise try using LOAD/STORE_LANES. */
2217 else if (vls_type == VLS_LOAD
2218 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2219 : vect_store_lanes_supported (vectype, group_size,
2220 masked_p))
2221 {
2222 *memory_access_type = VMAT_LOAD_STORE_LANES;
2223 overrun_p = would_overrun_p;
2224 }
2225
2226 /* If that fails, try using permuting loads. */
2227 else if (vls_type == VLS_LOAD
2228 ? vect_grouped_load_supported (vectype, single_element_p,
2229 group_size)
2230 : vect_grouped_store_supported (vectype, group_size))
2231 {
2232 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2233 overrun_p = would_overrun_p;
2234 }
2235 }
2236
2237 /* As a last resort, trying using a gather load or scatter store.
2238
2239 ??? Although the code can handle all group sizes correctly,
2240 it probably isn't a win to use separate strided accesses based
2241 on nearby locations. Or, even if it's a win over scalar code,
2242 it might not be a win over vectorizing at a lower VF, if that
2243 allows us to use contiguous accesses. */
2244 if (*memory_access_type == VMAT_ELEMENTWISE
2245 && single_element_p
2246 && loop_vinfo
2247 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2248 masked_p, gs_info))
2249 *memory_access_type = VMAT_GATHER_SCATTER;
2250 }
2251
2252 if (*memory_access_type == VMAT_GATHER_SCATTER
2253 || *memory_access_type == VMAT_ELEMENTWISE)
2254 *alignment_support_scheme = dr_unaligned_supported;
2255 else
2256 *alignment_support_scheme
2257 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2258
2259 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2260 {
2261 /* STMT is the leader of the group. Check the operands of all the
2262 stmts of the group. */
2263 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2264 while (next_stmt_info)
2265 {
2266 tree op = vect_get_store_rhs (next_stmt_info);
2267 enum vect_def_type dt;
2268 if (!vect_is_simple_use (op, vinfo, &dt))
2269 {
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "use not simple.\n");
2273 return false;
2274 }
2275 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2276 }
2277 }
2278
2279 if (overrun_p)
2280 {
2281 gcc_assert (can_overrun_p);
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "Data access with gaps requires scalar "
2285 "epilogue loop\n");
2286 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2287 }
2288
2289 return true;
2290 }
2291
2292 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2293 if there is a memory access type that the vectorized form can use,
2294 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2295 or scatters, fill in GS_INFO accordingly. In addition
2296 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2297 the target does not support the alignment scheme.
2298
2299 SLP says whether we're performing SLP rather than loop vectorization.
2300 MASKED_P is true if the statement is conditional on a vectorized mask.
2301 VECTYPE is the vector type that the vectorized statements will use.
2302 NCOPIES is the number of vector statements that will be needed. */
2303
2304 static bool
2305 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2306 tree vectype, slp_tree slp_node,
2307 bool masked_p, vec_load_store_type vls_type,
2308 unsigned int ncopies,
2309 vect_memory_access_type *memory_access_type,
2310 dr_alignment_support *alignment_support_scheme,
2311 gather_scatter_info *gs_info)
2312 {
2313 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2314 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2315 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2316 {
2317 *memory_access_type = VMAT_GATHER_SCATTER;
2318 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2319 gcc_unreachable ();
2320 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2321 &gs_info->offset_dt,
2322 &gs_info->offset_vectype))
2323 {
2324 if (dump_enabled_p ())
2325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2326 "%s index use not simple.\n",
2327 vls_type == VLS_LOAD ? "gather" : "scatter");
2328 return false;
2329 }
2330 /* Gather-scatter accesses perform only component accesses, alignment
2331 is irrelevant for them. */
2332 *alignment_support_scheme = dr_unaligned_supported;
2333 }
2334 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2335 {
2336 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2337 masked_p,
2338 vls_type, memory_access_type,
2339 alignment_support_scheme, gs_info))
2340 return false;
2341 }
2342 else if (STMT_VINFO_STRIDED_P (stmt_info))
2343 {
2344 gcc_assert (!slp_node);
2345 if (loop_vinfo
2346 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2347 masked_p, gs_info))
2348 *memory_access_type = VMAT_GATHER_SCATTER;
2349 else
2350 *memory_access_type = VMAT_ELEMENTWISE;
2351 /* Alignment is irrelevant here. */
2352 *alignment_support_scheme = dr_unaligned_supported;
2353 }
2354 else
2355 {
2356 int cmp = compare_step_with_zero (vinfo, stmt_info);
2357 if (cmp < 0)
2358 *memory_access_type = get_negative_load_store_type
2359 (vinfo, stmt_info, vectype, vls_type, ncopies);
2360 else if (cmp == 0)
2361 {
2362 gcc_assert (vls_type == VLS_LOAD);
2363 *memory_access_type = VMAT_INVARIANT;
2364 }
2365 else
2366 *memory_access_type = VMAT_CONTIGUOUS;
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info), false);
2370 }
2371
2372 if ((*memory_access_type == VMAT_ELEMENTWISE
2373 || *memory_access_type == VMAT_STRIDED_SLP)
2374 && !nunits.is_constant ())
2375 {
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2378 "Not using elementwise accesses due to variable "
2379 "vectorization factor.\n");
2380 return false;
2381 }
2382
2383 if (*alignment_support_scheme == dr_unaligned_unsupported)
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "unsupported unaligned access\n");
2388 return false;
2389 }
2390
2391 /* FIXME: At the moment the cost model seems to underestimate the
2392 cost of using elementwise accesses. This check preserves the
2393 traditional behavior until that can be fixed. */
2394 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2395 if (!first_stmt_info)
2396 first_stmt_info = stmt_info;
2397 if (*memory_access_type == VMAT_ELEMENTWISE
2398 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2399 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2400 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2401 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2402 {
2403 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2405 "not falling back to elementwise accesses\n");
2406 return false;
2407 }
2408 return true;
2409 }
2410
2411 /* Return true if boolean argument MASK is suitable for vectorizing
2412 conditional operation STMT_INFO. When returning true, store the type
2413 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2414 in *MASK_VECTYPE_OUT. */
2415
2416 static bool
2417 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2418 vect_def_type *mask_dt_out,
2419 tree *mask_vectype_out)
2420 {
2421 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2422 {
2423 if (dump_enabled_p ())
2424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2425 "mask argument is not a boolean.\n");
2426 return false;
2427 }
2428
2429 if (TREE_CODE (mask) != SSA_NAME)
2430 {
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask argument is not an SSA name.\n");
2434 return false;
2435 }
2436
2437 enum vect_def_type mask_dt;
2438 tree mask_vectype;
2439 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2440 {
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "mask use not simple.\n");
2444 return false;
2445 }
2446
2447 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2448 if (!mask_vectype)
2449 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2450
2451 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2452 {
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2455 "could not find an appropriate vector mask type.\n");
2456 return false;
2457 }
2458
2459 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2460 TYPE_VECTOR_SUBPARTS (vectype)))
2461 {
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "vector mask type %T"
2465 " does not match vector data type %T.\n",
2466 mask_vectype, vectype);
2467
2468 return false;
2469 }
2470
2471 *mask_dt_out = mask_dt;
2472 *mask_vectype_out = mask_vectype;
2473 return true;
2474 }
2475
2476 /* Return true if stored value RHS is suitable for vectorizing store
2477 statement STMT_INFO. When returning true, store the type of the
2478 definition in *RHS_DT_OUT, the type of the vectorized store value in
2479 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2480
2481 static bool
2482 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2483 slp_tree slp_node, tree rhs,
2484 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2485 vec_load_store_type *vls_type_out)
2486 {
2487 /* In the case this is a store from a constant make sure
2488 native_encode_expr can handle it. */
2489 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2490 {
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "cannot encode constant as a byte sequence.\n");
2494 return false;
2495 }
2496
2497 enum vect_def_type rhs_dt;
2498 tree rhs_vectype;
2499 slp_tree slp_op;
2500 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2501 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2502 {
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "use not simple.\n");
2506 return false;
2507 }
2508
2509 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2510 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2511 {
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2514 "incompatible vector types.\n");
2515 return false;
2516 }
2517
2518 *rhs_dt_out = rhs_dt;
2519 *rhs_vectype_out = rhs_vectype;
2520 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2521 *vls_type_out = VLS_STORE_INVARIANT;
2522 else
2523 *vls_type_out = VLS_STORE;
2524 return true;
2525 }
2526
2527 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2528 Note that we support masks with floating-point type, in which case the
2529 floats are interpreted as a bitmask. */
2530
2531 static tree
2532 vect_build_all_ones_mask (vec_info *vinfo,
2533 stmt_vec_info stmt_info, tree masktype)
2534 {
2535 if (TREE_CODE (masktype) == INTEGER_TYPE)
2536 return build_int_cst (masktype, -1);
2537 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2538 {
2539 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2540 mask = build_vector_from_val (masktype, mask);
2541 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2542 }
2543 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2544 {
2545 REAL_VALUE_TYPE r;
2546 long tmp[6];
2547 for (int j = 0; j < 6; ++j)
2548 tmp[j] = -1;
2549 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2550 tree mask = build_real (TREE_TYPE (masktype), r);
2551 mask = build_vector_from_val (masktype, mask);
2552 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2553 }
2554 gcc_unreachable ();
2555 }
2556
2557 /* Build an all-zero merge value of type VECTYPE while vectorizing
2558 STMT_INFO as a gather load. */
2559
2560 static tree
2561 vect_build_zero_merge_argument (vec_info *vinfo,
2562 stmt_vec_info stmt_info, tree vectype)
2563 {
2564 tree merge;
2565 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2566 merge = build_int_cst (TREE_TYPE (vectype), 0);
2567 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2568 {
2569 REAL_VALUE_TYPE r;
2570 long tmp[6];
2571 for (int j = 0; j < 6; ++j)
2572 tmp[j] = 0;
2573 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2574 merge = build_real (TREE_TYPE (vectype), r);
2575 }
2576 else
2577 gcc_unreachable ();
2578 merge = build_vector_from_val (vectype, merge);
2579 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2580 }
2581
2582 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2583 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2584 the gather load operation. If the load is conditional, MASK is the
2585 unvectorized condition and MASK_DT is its definition type, otherwise
2586 MASK is null. */
2587
2588 static void
2589 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2590 gimple_stmt_iterator *gsi,
2591 gimple **vec_stmt,
2592 gather_scatter_info *gs_info,
2593 tree mask)
2594 {
2595 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2596 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2597 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2598 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2599 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2600 edge pe = loop_preheader_edge (loop);
2601 enum { NARROW, NONE, WIDEN } modifier;
2602 poly_uint64 gather_off_nunits
2603 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2604
2605 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2606 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2607 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2608 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2609 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2610 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2611 tree scaletype = TREE_VALUE (arglist);
2612 tree real_masktype = masktype;
2613 gcc_checking_assert (types_compatible_p (srctype, rettype)
2614 && (!mask
2615 || TREE_CODE (masktype) == INTEGER_TYPE
2616 || types_compatible_p (srctype, masktype)));
2617 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2618 masktype = truth_type_for (srctype);
2619
2620 tree mask_halftype = masktype;
2621 tree perm_mask = NULL_TREE;
2622 tree mask_perm_mask = NULL_TREE;
2623 if (known_eq (nunits, gather_off_nunits))
2624 modifier = NONE;
2625 else if (known_eq (nunits * 2, gather_off_nunits))
2626 {
2627 modifier = WIDEN;
2628
2629 /* Currently widening gathers and scatters are only supported for
2630 fixed-length vectors. */
2631 int count = gather_off_nunits.to_constant ();
2632 vec_perm_builder sel (count, count, 1);
2633 for (int i = 0; i < count; ++i)
2634 sel.quick_push (i | (count / 2));
2635
2636 vec_perm_indices indices (sel, 1, count);
2637 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2638 indices);
2639 }
2640 else if (known_eq (nunits, gather_off_nunits * 2))
2641 {
2642 modifier = NARROW;
2643
2644 /* Currently narrowing gathers and scatters are only supported for
2645 fixed-length vectors. */
2646 int count = nunits.to_constant ();
2647 vec_perm_builder sel (count, count, 1);
2648 sel.quick_grow (count);
2649 for (int i = 0; i < count; ++i)
2650 sel[i] = i < count / 2 ? i : i + count / 2;
2651 vec_perm_indices indices (sel, 2, count);
2652 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2653
2654 ncopies *= 2;
2655
2656 if (mask && masktype == real_masktype)
2657 {
2658 for (int i = 0; i < count; ++i)
2659 sel[i] = i | (count / 2);
2660 indices.new_vector (sel, 2, count);
2661 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2662 }
2663 else if (mask)
2664 mask_halftype = truth_type_for (gs_info->offset_vectype);
2665 }
2666 else
2667 gcc_unreachable ();
2668
2669 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2670 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2671
2672 tree ptr = fold_convert (ptrtype, gs_info->base);
2673 if (!is_gimple_min_invariant (ptr))
2674 {
2675 gimple_seq seq;
2676 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2677 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2678 gcc_assert (!new_bb);
2679 }
2680
2681 tree scale = build_int_cst (scaletype, gs_info->scale);
2682
2683 tree vec_oprnd0 = NULL_TREE;
2684 tree vec_mask = NULL_TREE;
2685 tree src_op = NULL_TREE;
2686 tree mask_op = NULL_TREE;
2687 tree prev_res = NULL_TREE;
2688
2689 if (!mask)
2690 {
2691 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2692 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2693 }
2694
2695 auto_vec<tree> vec_oprnds0;
2696 auto_vec<tree> vec_masks;
2697 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2698 modifier == WIDEN ? ncopies / 2 : ncopies,
2699 gs_info->offset, &vec_oprnds0);
2700 if (mask)
2701 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2702 modifier == NARROW ? ncopies / 2 : ncopies,
2703 mask, &vec_masks);
2704 for (int j = 0; j < ncopies; ++j)
2705 {
2706 tree op, var;
2707 if (modifier == WIDEN && (j & 1))
2708 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2709 perm_mask, stmt_info, gsi);
2710 else
2711 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2712
2713 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2714 {
2715 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2716 TYPE_VECTOR_SUBPARTS (idxtype)));
2717 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2718 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2719 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2721 op = var;
2722 }
2723
2724 if (mask)
2725 {
2726 if (mask_perm_mask && (j & 1))
2727 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2728 mask_perm_mask, stmt_info, gsi);
2729 else
2730 {
2731 if (modifier == NARROW)
2732 {
2733 if ((j & 1) == 0)
2734 vec_mask = vec_masks[j / 2];
2735 }
2736 else
2737 vec_mask = vec_masks[j];
2738
2739 mask_op = vec_mask;
2740 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2741 {
2742 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2743 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2744 gcc_assert (known_eq (sub1, sub2));
2745 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2746 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2747 gassign *new_stmt
2748 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_op = var;
2751 }
2752 }
2753 if (modifier == NARROW && masktype != real_masktype)
2754 {
2755 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2756 gassign *new_stmt
2757 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2758 : VEC_UNPACK_LO_EXPR,
2759 mask_op);
2760 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2761 mask_op = var;
2762 }
2763 src_op = mask_op;
2764 }
2765
2766 tree mask_arg = mask_op;
2767 if (masktype != real_masktype)
2768 {
2769 tree utype, optype = TREE_TYPE (mask_op);
2770 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2771 utype = real_masktype;
2772 else
2773 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2774 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2775 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2776 gassign *new_stmt
2777 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2778 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2779 mask_arg = var;
2780 if (!useless_type_conversion_p (real_masktype, utype))
2781 {
2782 gcc_assert (TYPE_PRECISION (utype)
2783 <= TYPE_PRECISION (real_masktype));
2784 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2785 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2786 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2787 mask_arg = var;
2788 }
2789 src_op = build_zero_cst (srctype);
2790 }
2791 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2792 mask_arg, scale);
2793
2794 if (!useless_type_conversion_p (vectype, rettype))
2795 {
2796 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2797 TYPE_VECTOR_SUBPARTS (rettype)));
2798 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2799 gimple_call_set_lhs (new_stmt, op);
2800 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2801 var = make_ssa_name (vec_dest);
2802 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2803 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2804 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2805 }
2806 else
2807 {
2808 var = make_ssa_name (vec_dest, new_stmt);
2809 gimple_call_set_lhs (new_stmt, var);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 }
2812
2813 if (modifier == NARROW)
2814 {
2815 if ((j & 1) == 0)
2816 {
2817 prev_res = var;
2818 continue;
2819 }
2820 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2821 stmt_info, gsi);
2822 new_stmt = SSA_NAME_DEF_STMT (var);
2823 }
2824
2825 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2826 }
2827 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2828 }
2829
2830 /* Prepare the base and offset in GS_INFO for vectorization.
2831 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2832 to the vectorized offset argument for the first copy of STMT_INFO.
2833 STMT_INFO is the statement described by GS_INFO and LOOP is the
2834 containing loop. */
2835
2836 static void
2837 vect_get_gather_scatter_ops (vec_info *vinfo,
2838 class loop *loop, stmt_vec_info stmt_info,
2839 gather_scatter_info *gs_info,
2840 tree *dataref_ptr, vec<tree> *vec_offset,
2841 unsigned ncopies)
2842 {
2843 gimple_seq stmts = NULL;
2844 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2845 if (stmts != NULL)
2846 {
2847 basic_block new_bb;
2848 edge pe = loop_preheader_edge (loop);
2849 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2850 gcc_assert (!new_bb);
2851 }
2852 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2853 vec_offset, gs_info->offset_vectype);
2854 }
2855
2856 /* Prepare to implement a grouped or strided load or store using
2857 the gather load or scatter store operation described by GS_INFO.
2858 STMT_INFO is the load or store statement.
2859
2860 Set *DATAREF_BUMP to the amount that should be added to the base
2861 address after each copy of the vectorized statement. Set *VEC_OFFSET
2862 to an invariant offset vector in which element I has the value
2863 I * DR_STEP / SCALE. */
2864
2865 static void
2866 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2867 loop_vec_info loop_vinfo,
2868 gather_scatter_info *gs_info,
2869 tree *dataref_bump, tree *vec_offset)
2870 {
2871 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873
2874 tree bump = size_binop (MULT_EXPR,
2875 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2876 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2877 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2878
2879 /* The offset given in GS_INFO can have pointer type, so use the element
2880 type of the vector instead. */
2881 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2882
2883 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2884 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2885 ssize_int (gs_info->scale));
2886 step = fold_convert (offset_type, step);
2887
2888 /* Create {0, X, X*2, X*3, ...}. */
2889 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2890 build_zero_cst (offset_type), step);
2891 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2892 }
2893
2894 /* Return the amount that should be added to a vector pointer to move
2895 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2896 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2897 vectorization. */
2898
2899 static tree
2900 vect_get_data_ptr_increment (vec_info *vinfo,
2901 dr_vec_info *dr_info, tree aggr_type,
2902 vect_memory_access_type memory_access_type)
2903 {
2904 if (memory_access_type == VMAT_INVARIANT)
2905 return size_zero_node;
2906
2907 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2908 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2909 if (tree_int_cst_sgn (step) == -1)
2910 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2911 return iv_step;
2912 }
2913
2914 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2915
2916 static bool
2917 vectorizable_bswap (vec_info *vinfo,
2918 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2919 gimple **vec_stmt, slp_tree slp_node,
2920 slp_tree *slp_op,
2921 tree vectype_in, stmt_vector_for_cost *cost_vec)
2922 {
2923 tree op, vectype;
2924 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2925 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2926 unsigned ncopies;
2927
2928 op = gimple_call_arg (stmt, 0);
2929 vectype = STMT_VINFO_VECTYPE (stmt_info);
2930 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2931
2932 /* Multiple types in SLP are handled by creating the appropriate number of
2933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2934 case of SLP. */
2935 if (slp_node)
2936 ncopies = 1;
2937 else
2938 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2939
2940 gcc_assert (ncopies >= 1);
2941
2942 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2943 if (! char_vectype)
2944 return false;
2945
2946 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2947 unsigned word_bytes;
2948 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2949 return false;
2950
2951 /* The encoding uses one stepped pattern for each byte in the word. */
2952 vec_perm_builder elts (num_bytes, word_bytes, 3);
2953 for (unsigned i = 0; i < 3; ++i)
2954 for (unsigned j = 0; j < word_bytes; ++j)
2955 elts.quick_push ((i + 1) * word_bytes - j - 1);
2956
2957 vec_perm_indices indices (elts, 1, num_bytes);
2958 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2959 return false;
2960
2961 if (! vec_stmt)
2962 {
2963 if (slp_node
2964 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2965 {
2966 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2968 "incompatible vector types for invariants\n");
2969 return false;
2970 }
2971
2972 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2973 DUMP_VECT_SCOPE ("vectorizable_bswap");
2974 if (! slp_node)
2975 {
2976 record_stmt_cost (cost_vec,
2977 1, vector_stmt, stmt_info, 0, vect_prologue);
2978 record_stmt_cost (cost_vec,
2979 ncopies, vec_perm, stmt_info, 0, vect_body);
2980 }
2981 return true;
2982 }
2983
2984 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2985
2986 /* Transform. */
2987 vec<tree> vec_oprnds = vNULL;
2988 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
2989 op, &vec_oprnds);
2990 /* Arguments are ready. create the new vector stmt. */
2991 unsigned i;
2992 tree vop;
2993 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2994 {
2995 gimple *new_stmt;
2996 tree tem = make_ssa_name (char_vectype);
2997 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2998 char_vectype, vop));
2999 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3000 tree tem2 = make_ssa_name (char_vectype);
3001 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3002 tem, tem, bswap_vconst);
3003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3004 tem = make_ssa_name (vectype);
3005 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3006 vectype, tem2));
3007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3008 if (slp_node)
3009 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3010 else
3011 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3012 }
3013
3014 if (!slp_node)
3015 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3016
3017 vec_oprnds.release ();
3018 return true;
3019 }
3020
3021 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3022 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3023 in a single step. On success, store the binary pack code in
3024 *CONVERT_CODE. */
3025
3026 static bool
3027 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3028 tree_code *convert_code)
3029 {
3030 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3031 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3032 return false;
3033
3034 tree_code code;
3035 int multi_step_cvt = 0;
3036 auto_vec <tree, 8> interm_types;
3037 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3038 &code, &multi_step_cvt, &interm_types)
3039 || multi_step_cvt)
3040 return false;
3041
3042 *convert_code = code;
3043 return true;
3044 }
3045
3046 /* Function vectorizable_call.
3047
3048 Check if STMT_INFO performs a function call that can be vectorized.
3049 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3050 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3051 Return true if STMT_INFO is vectorizable in this way. */
3052
3053 static bool
3054 vectorizable_call (vec_info *vinfo,
3055 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3056 gimple **vec_stmt, slp_tree slp_node,
3057 stmt_vector_for_cost *cost_vec)
3058 {
3059 gcall *stmt;
3060 tree vec_dest;
3061 tree scalar_dest;
3062 tree op;
3063 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3064 tree vectype_out, vectype_in;
3065 poly_uint64 nunits_in;
3066 poly_uint64 nunits_out;
3067 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3068 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3069 tree fndecl, new_temp, rhs_type;
3070 enum vect_def_type dt[4]
3071 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3072 vect_unknown_def_type };
3073 tree vectypes[ARRAY_SIZE (dt)] = {};
3074 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3075 int ndts = ARRAY_SIZE (dt);
3076 int ncopies, j;
3077 auto_vec<tree, 8> vargs;
3078 auto_vec<tree, 8> orig_vargs;
3079 enum { NARROW, NONE, WIDEN } modifier;
3080 size_t i, nargs;
3081 tree lhs;
3082
3083 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3084 return false;
3085
3086 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3087 && ! vec_stmt)
3088 return false;
3089
3090 /* Is STMT_INFO a vectorizable call? */
3091 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3092 if (!stmt)
3093 return false;
3094
3095 if (gimple_call_internal_p (stmt)
3096 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3097 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3098 /* Handled by vectorizable_load and vectorizable_store. */
3099 return false;
3100
3101 if (gimple_call_lhs (stmt) == NULL_TREE
3102 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3103 return false;
3104
3105 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3106
3107 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3108
3109 /* Process function arguments. */
3110 rhs_type = NULL_TREE;
3111 vectype_in = NULL_TREE;
3112 nargs = gimple_call_num_args (stmt);
3113
3114 /* Bail out if the function has more than four arguments, we do not have
3115 interesting builtin functions to vectorize with more than two arguments
3116 except for fma. No arguments is also not good. */
3117 if (nargs == 0 || nargs > 4)
3118 return false;
3119
3120 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3121 combined_fn cfn = gimple_call_combined_fn (stmt);
3122 if (cfn == CFN_GOMP_SIMD_LANE)
3123 {
3124 nargs = 0;
3125 rhs_type = unsigned_type_node;
3126 }
3127
3128 int mask_opno = -1;
3129 if (internal_fn_p (cfn))
3130 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3131
3132 for (i = 0; i < nargs; i++)
3133 {
3134 if ((int) i == mask_opno)
3135 {
3136 op = gimple_call_arg (stmt, i);
3137 if (!vect_check_scalar_mask (vinfo,
3138 stmt_info, op, &dt[i], &vectypes[i]))
3139 return false;
3140 continue;
3141 }
3142
3143 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3144 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3145 {
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148 "use not simple.\n");
3149 return false;
3150 }
3151
3152 /* We can only handle calls with arguments of the same type. */
3153 if (rhs_type
3154 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3155 {
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3158 "argument types differ.\n");
3159 return false;
3160 }
3161 if (!rhs_type)
3162 rhs_type = TREE_TYPE (op);
3163
3164 if (!vectype_in)
3165 vectype_in = vectypes[i];
3166 else if (vectypes[i]
3167 && !types_compatible_p (vectypes[i], vectype_in))
3168 {
3169 if (dump_enabled_p ())
3170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3171 "argument vector types differ.\n");
3172 return false;
3173 }
3174 }
3175 /* If all arguments are external or constant defs, infer the vector type
3176 from the scalar type. */
3177 if (!vectype_in)
3178 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3179 if (vec_stmt)
3180 gcc_assert (vectype_in);
3181 if (!vectype_in)
3182 {
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3185 "no vectype for scalar type %T\n", rhs_type);
3186
3187 return false;
3188 }
3189 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3190 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3191 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3192 by a pack of the two vectors into an SI vector. We would need
3193 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3194 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3195 {
3196 if (dump_enabled_p ())
3197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3198 "mismatched vector sizes %T and %T\n",
3199 vectype_in, vectype_out);
3200 return false;
3201 }
3202
3203 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3204 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3205 {
3206 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3208 "mixed mask and nonmask vector types\n");
3209 return false;
3210 }
3211
3212 /* FORNOW */
3213 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3214 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3215 if (known_eq (nunits_in * 2, nunits_out))
3216 modifier = NARROW;
3217 else if (known_eq (nunits_out, nunits_in))
3218 modifier = NONE;
3219 else if (known_eq (nunits_out * 2, nunits_in))
3220 modifier = WIDEN;
3221 else
3222 return false;
3223
3224 /* We only handle functions that do not read or clobber memory. */
3225 if (gimple_vuse (stmt))
3226 {
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3229 "function reads from or writes to memory.\n");
3230 return false;
3231 }
3232
3233 /* For now, we only vectorize functions if a target specific builtin
3234 is available. TODO -- in some cases, it might be profitable to
3235 insert the calls for pieces of the vector, in order to be able
3236 to vectorize other operations in the loop. */
3237 fndecl = NULL_TREE;
3238 internal_fn ifn = IFN_LAST;
3239 tree callee = gimple_call_fndecl (stmt);
3240
3241 /* First try using an internal function. */
3242 tree_code convert_code = ERROR_MARK;
3243 if (cfn != CFN_LAST
3244 && (modifier == NONE
3245 || (modifier == NARROW
3246 && simple_integer_narrowing (vectype_out, vectype_in,
3247 &convert_code))))
3248 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3249 vectype_in);
3250
3251 /* If that fails, try asking for a target-specific built-in function. */
3252 if (ifn == IFN_LAST)
3253 {
3254 if (cfn != CFN_LAST)
3255 fndecl = targetm.vectorize.builtin_vectorized_function
3256 (cfn, vectype_out, vectype_in);
3257 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3258 fndecl = targetm.vectorize.builtin_md_vectorized_function
3259 (callee, vectype_out, vectype_in);
3260 }
3261
3262 if (ifn == IFN_LAST && !fndecl)
3263 {
3264 if (cfn == CFN_GOMP_SIMD_LANE
3265 && !slp_node
3266 && loop_vinfo
3267 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3268 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3269 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3270 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3271 {
3272 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3273 { 0, 1, 2, ... vf - 1 } vector. */
3274 gcc_assert (nargs == 0);
3275 }
3276 else if (modifier == NONE
3277 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3278 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3279 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3280 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3281 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3282 slp_op, vectype_in, cost_vec);
3283 else
3284 {
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "function is not vectorizable.\n");
3288 return false;
3289 }
3290 }
3291
3292 if (slp_node)
3293 ncopies = 1;
3294 else if (modifier == NARROW && ifn == IFN_LAST)
3295 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3296 else
3297 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3298
3299 /* Sanity check: make sure that at least one copy of the vectorized stmt
3300 needs to be generated. */
3301 gcc_assert (ncopies >= 1);
3302
3303 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3304 if (!vec_stmt) /* transformation not required. */
3305 {
3306 if (slp_node)
3307 for (i = 0; i < nargs; ++i)
3308 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3309 {
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "incompatible vector types for invariants\n");
3313 return false;
3314 }
3315 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3316 DUMP_VECT_SCOPE ("vectorizable_call");
3317 vect_model_simple_cost (vinfo, stmt_info,
3318 ncopies, dt, ndts, slp_node, cost_vec);
3319 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3320 record_stmt_cost (cost_vec, ncopies / 2,
3321 vec_promote_demote, stmt_info, 0, vect_body);
3322
3323 if (loop_vinfo && mask_opno >= 0)
3324 {
3325 unsigned int nvectors = (slp_node
3326 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3327 : ncopies);
3328 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3329 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3330 vectype_out, scalar_mask);
3331 }
3332 return true;
3333 }
3334
3335 /* Transform. */
3336
3337 if (dump_enabled_p ())
3338 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3339
3340 /* Handle def. */
3341 scalar_dest = gimple_call_lhs (stmt);
3342 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3343
3344 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3345
3346 if (modifier == NONE || ifn != IFN_LAST)
3347 {
3348 tree prev_res = NULL_TREE;
3349 vargs.safe_grow (nargs, true);
3350 orig_vargs.safe_grow (nargs, true);
3351 auto_vec<vec<tree> > vec_defs (nargs);
3352 for (j = 0; j < ncopies; ++j)
3353 {
3354 /* Build argument list for the vectorized call. */
3355 if (slp_node)
3356 {
3357 vec<tree> vec_oprnds0;
3358
3359 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3360 vec_oprnds0 = vec_defs[0];
3361
3362 /* Arguments are ready. Create the new vector stmt. */
3363 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3364 {
3365 size_t k;
3366 for (k = 0; k < nargs; k++)
3367 {
3368 vec<tree> vec_oprndsk = vec_defs[k];
3369 vargs[k] = vec_oprndsk[i];
3370 }
3371 gimple *new_stmt;
3372 if (modifier == NARROW)
3373 {
3374 /* We don't define any narrowing conditional functions
3375 at present. */
3376 gcc_assert (mask_opno < 0);
3377 tree half_res = make_ssa_name (vectype_in);
3378 gcall *call
3379 = gimple_build_call_internal_vec (ifn, vargs);
3380 gimple_call_set_lhs (call, half_res);
3381 gimple_call_set_nothrow (call, true);
3382 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3383 if ((i & 1) == 0)
3384 {
3385 prev_res = half_res;
3386 continue;
3387 }
3388 new_temp = make_ssa_name (vec_dest);
3389 new_stmt = gimple_build_assign (new_temp, convert_code,
3390 prev_res, half_res);
3391 vect_finish_stmt_generation (vinfo, stmt_info,
3392 new_stmt, gsi);
3393 }
3394 else
3395 {
3396 if (mask_opno >= 0 && masked_loop_p)
3397 {
3398 unsigned int vec_num = vec_oprnds0.length ();
3399 /* Always true for SLP. */
3400 gcc_assert (ncopies == 1);
3401 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3402 vectype_out, i);
3403 vargs[mask_opno] = prepare_load_store_mask
3404 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3405 }
3406
3407 gcall *call;
3408 if (ifn != IFN_LAST)
3409 call = gimple_build_call_internal_vec (ifn, vargs);
3410 else
3411 call = gimple_build_call_vec (fndecl, vargs);
3412 new_temp = make_ssa_name (vec_dest, call);
3413 gimple_call_set_lhs (call, new_temp);
3414 gimple_call_set_nothrow (call, true);
3415 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3416 new_stmt = call;
3417 }
3418 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3419 }
3420 continue;
3421 }
3422
3423 for (i = 0; i < nargs; i++)
3424 {
3425 op = gimple_call_arg (stmt, i);
3426 if (j == 0)
3427 {
3428 vec_defs.quick_push (vNULL);
3429 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3430 op, &vec_defs[i],
3431 vectypes[i]);
3432 }
3433 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3434 }
3435
3436 if (mask_opno >= 0 && masked_loop_p)
3437 {
3438 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3439 vectype_out, j);
3440 vargs[mask_opno]
3441 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3442 vargs[mask_opno], gsi);
3443 }
3444
3445 gimple *new_stmt;
3446 if (cfn == CFN_GOMP_SIMD_LANE)
3447 {
3448 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3449 tree new_var
3450 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3451 gimple *init_stmt = gimple_build_assign (new_var, cst);
3452 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3453 new_temp = make_ssa_name (vec_dest);
3454 new_stmt = gimple_build_assign (new_temp, new_var);
3455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3456 }
3457 else if (modifier == NARROW)
3458 {
3459 /* We don't define any narrowing conditional functions at
3460 present. */
3461 gcc_assert (mask_opno < 0);
3462 tree half_res = make_ssa_name (vectype_in);
3463 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3464 gimple_call_set_lhs (call, half_res);
3465 gimple_call_set_nothrow (call, true);
3466 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3467 if ((j & 1) == 0)
3468 {
3469 prev_res = half_res;
3470 continue;
3471 }
3472 new_temp = make_ssa_name (vec_dest);
3473 new_stmt = gimple_build_assign (new_temp, convert_code,
3474 prev_res, half_res);
3475 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3476 }
3477 else
3478 {
3479 gcall *call;
3480 if (ifn != IFN_LAST)
3481 call = gimple_build_call_internal_vec (ifn, vargs);
3482 else
3483 call = gimple_build_call_vec (fndecl, vargs);
3484 new_temp = make_ssa_name (vec_dest, call);
3485 gimple_call_set_lhs (call, new_temp);
3486 gimple_call_set_nothrow (call, true);
3487 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3488 new_stmt = call;
3489 }
3490
3491 if (j == (modifier == NARROW ? 1 : 0))
3492 *vec_stmt = new_stmt;
3493 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3494 }
3495 for (i = 0; i < nargs; i++)
3496 {
3497 vec<tree> vec_oprndsi = vec_defs[i];
3498 vec_oprndsi.release ();
3499 }
3500 }
3501 else if (modifier == NARROW)
3502 {
3503 auto_vec<vec<tree> > vec_defs (nargs);
3504 /* We don't define any narrowing conditional functions at present. */
3505 gcc_assert (mask_opno < 0);
3506 for (j = 0; j < ncopies; ++j)
3507 {
3508 /* Build argument list for the vectorized call. */
3509 if (j == 0)
3510 vargs.create (nargs * 2);
3511 else
3512 vargs.truncate (0);
3513
3514 if (slp_node)
3515 {
3516 vec<tree> vec_oprnds0;
3517
3518 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3519 vec_oprnds0 = vec_defs[0];
3520
3521 /* Arguments are ready. Create the new vector stmt. */
3522 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3523 {
3524 size_t k;
3525 vargs.truncate (0);
3526 for (k = 0; k < nargs; k++)
3527 {
3528 vec<tree> vec_oprndsk = vec_defs[k];
3529 vargs.quick_push (vec_oprndsk[i]);
3530 vargs.quick_push (vec_oprndsk[i + 1]);
3531 }
3532 gcall *call;
3533 if (ifn != IFN_LAST)
3534 call = gimple_build_call_internal_vec (ifn, vargs);
3535 else
3536 call = gimple_build_call_vec (fndecl, vargs);
3537 new_temp = make_ssa_name (vec_dest, call);
3538 gimple_call_set_lhs (call, new_temp);
3539 gimple_call_set_nothrow (call, true);
3540 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3541 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3542 }
3543 continue;
3544 }
3545
3546 for (i = 0; i < nargs; i++)
3547 {
3548 op = gimple_call_arg (stmt, i);
3549 if (j == 0)
3550 {
3551 vec_defs.quick_push (vNULL);
3552 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3553 op, &vec_defs[i], vectypes[i]);
3554 }
3555 vec_oprnd0 = vec_defs[i][2*j];
3556 vec_oprnd1 = vec_defs[i][2*j+1];
3557
3558 vargs.quick_push (vec_oprnd0);
3559 vargs.quick_push (vec_oprnd1);
3560 }
3561
3562 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3563 new_temp = make_ssa_name (vec_dest, new_stmt);
3564 gimple_call_set_lhs (new_stmt, new_temp);
3565 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3566
3567 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3568 }
3569
3570 if (!slp_node)
3571 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3572
3573 for (i = 0; i < nargs; i++)
3574 {
3575 vec<tree> vec_oprndsi = vec_defs[i];
3576 vec_oprndsi.release ();
3577 }
3578 }
3579 else
3580 /* No current target implements this case. */
3581 return false;
3582
3583 vargs.release ();
3584
3585 /* The call in STMT might prevent it from being removed in dce.
3586 We however cannot remove it here, due to the way the ssa name
3587 it defines is mapped to the new definition. So just replace
3588 rhs of the statement with something harmless. */
3589
3590 if (slp_node)
3591 return true;
3592
3593 stmt_info = vect_orig_stmt (stmt_info);
3594 lhs = gimple_get_lhs (stmt_info->stmt);
3595
3596 gassign *new_stmt
3597 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3598 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3599
3600 return true;
3601 }
3602
3603
3604 struct simd_call_arg_info
3605 {
3606 tree vectype;
3607 tree op;
3608 HOST_WIDE_INT linear_step;
3609 enum vect_def_type dt;
3610 unsigned int align;
3611 bool simd_lane_linear;
3612 };
3613
3614 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3615 is linear within simd lane (but not within whole loop), note it in
3616 *ARGINFO. */
3617
3618 static void
3619 vect_simd_lane_linear (tree op, class loop *loop,
3620 struct simd_call_arg_info *arginfo)
3621 {
3622 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3623
3624 if (!is_gimple_assign (def_stmt)
3625 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3626 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3627 return;
3628
3629 tree base = gimple_assign_rhs1 (def_stmt);
3630 HOST_WIDE_INT linear_step = 0;
3631 tree v = gimple_assign_rhs2 (def_stmt);
3632 while (TREE_CODE (v) == SSA_NAME)
3633 {
3634 tree t;
3635 def_stmt = SSA_NAME_DEF_STMT (v);
3636 if (is_gimple_assign (def_stmt))
3637 switch (gimple_assign_rhs_code (def_stmt))
3638 {
3639 case PLUS_EXPR:
3640 t = gimple_assign_rhs2 (def_stmt);
3641 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3642 return;
3643 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3644 v = gimple_assign_rhs1 (def_stmt);
3645 continue;
3646 case MULT_EXPR:
3647 t = gimple_assign_rhs2 (def_stmt);
3648 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3649 return;
3650 linear_step = tree_to_shwi (t);
3651 v = gimple_assign_rhs1 (def_stmt);
3652 continue;
3653 CASE_CONVERT:
3654 t = gimple_assign_rhs1 (def_stmt);
3655 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3656 || (TYPE_PRECISION (TREE_TYPE (v))
3657 < TYPE_PRECISION (TREE_TYPE (t))))
3658 return;
3659 if (!linear_step)
3660 linear_step = 1;
3661 v = t;
3662 continue;
3663 default:
3664 return;
3665 }
3666 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3667 && loop->simduid
3668 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3669 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3670 == loop->simduid))
3671 {
3672 if (!linear_step)
3673 linear_step = 1;
3674 arginfo->linear_step = linear_step;
3675 arginfo->op = base;
3676 arginfo->simd_lane_linear = true;
3677 return;
3678 }
3679 }
3680 }
3681
3682 /* Return the number of elements in vector type VECTYPE, which is associated
3683 with a SIMD clone. At present these vectors always have a constant
3684 length. */
3685
3686 static unsigned HOST_WIDE_INT
3687 simd_clone_subparts (tree vectype)
3688 {
3689 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3690 }
3691
3692 /* Function vectorizable_simd_clone_call.
3693
3694 Check if STMT_INFO performs a function call that can be vectorized
3695 by calling a simd clone of the function.
3696 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3697 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3698 Return true if STMT_INFO is vectorizable in this way. */
3699
3700 static bool
3701 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3702 gimple_stmt_iterator *gsi,
3703 gimple **vec_stmt, slp_tree slp_node,
3704 stmt_vector_for_cost *)
3705 {
3706 tree vec_dest;
3707 tree scalar_dest;
3708 tree op, type;
3709 tree vec_oprnd0 = NULL_TREE;
3710 tree vectype;
3711 poly_uint64 nunits;
3712 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3713 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3714 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3715 tree fndecl, new_temp;
3716 int ncopies, j;
3717 auto_vec<simd_call_arg_info> arginfo;
3718 vec<tree> vargs = vNULL;
3719 size_t i, nargs;
3720 tree lhs, rtype, ratype;
3721 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3722
3723 /* Is STMT a vectorizable call? */
3724 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3725 if (!stmt)
3726 return false;
3727
3728 fndecl = gimple_call_fndecl (stmt);
3729 if (fndecl == NULL_TREE)
3730 return false;
3731
3732 struct cgraph_node *node = cgraph_node::get (fndecl);
3733 if (node == NULL || node->simd_clones == NULL)
3734 return false;
3735
3736 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3737 return false;
3738
3739 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3740 && ! vec_stmt)
3741 return false;
3742
3743 if (gimple_call_lhs (stmt)
3744 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3745 return false;
3746
3747 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3748
3749 vectype = STMT_VINFO_VECTYPE (stmt_info);
3750
3751 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3752 return false;
3753
3754 /* FORNOW */
3755 if (slp_node)
3756 return false;
3757
3758 /* Process function arguments. */
3759 nargs = gimple_call_num_args (stmt);
3760
3761 /* Bail out if the function has zero arguments. */
3762 if (nargs == 0)
3763 return false;
3764
3765 arginfo.reserve (nargs, true);
3766
3767 for (i = 0; i < nargs; i++)
3768 {
3769 simd_call_arg_info thisarginfo;
3770 affine_iv iv;
3771
3772 thisarginfo.linear_step = 0;
3773 thisarginfo.align = 0;
3774 thisarginfo.op = NULL_TREE;
3775 thisarginfo.simd_lane_linear = false;
3776
3777 op = gimple_call_arg (stmt, i);
3778 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3779 &thisarginfo.vectype)
3780 || thisarginfo.dt == vect_uninitialized_def)
3781 {
3782 if (dump_enabled_p ())
3783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3784 "use not simple.\n");
3785 return false;
3786 }
3787
3788 if (thisarginfo.dt == vect_constant_def
3789 || thisarginfo.dt == vect_external_def)
3790 gcc_assert (thisarginfo.vectype == NULL_TREE);
3791 else
3792 {
3793 gcc_assert (thisarginfo.vectype != NULL_TREE);
3794 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3795 {
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3798 "vector mask arguments are not supported\n");
3799 return false;
3800 }
3801 }
3802
3803 /* For linear arguments, the analyze phase should have saved
3804 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3805 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3806 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3807 {
3808 gcc_assert (vec_stmt);
3809 thisarginfo.linear_step
3810 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3811 thisarginfo.op
3812 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3813 thisarginfo.simd_lane_linear
3814 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3815 == boolean_true_node);
3816 /* If loop has been peeled for alignment, we need to adjust it. */
3817 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3818 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3819 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3820 {
3821 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3822 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3823 tree opt = TREE_TYPE (thisarginfo.op);
3824 bias = fold_convert (TREE_TYPE (step), bias);
3825 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3826 thisarginfo.op
3827 = fold_build2 (POINTER_TYPE_P (opt)
3828 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3829 thisarginfo.op, bias);
3830 }
3831 }
3832 else if (!vec_stmt
3833 && thisarginfo.dt != vect_constant_def
3834 && thisarginfo.dt != vect_external_def
3835 && loop_vinfo
3836 && TREE_CODE (op) == SSA_NAME
3837 && simple_iv (loop, loop_containing_stmt (stmt), op,
3838 &iv, false)
3839 && tree_fits_shwi_p (iv.step))
3840 {
3841 thisarginfo.linear_step = tree_to_shwi (iv.step);
3842 thisarginfo.op = iv.base;
3843 }
3844 else if ((thisarginfo.dt == vect_constant_def
3845 || thisarginfo.dt == vect_external_def)
3846 && POINTER_TYPE_P (TREE_TYPE (op)))
3847 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3848 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3849 linear too. */
3850 if (POINTER_TYPE_P (TREE_TYPE (op))
3851 && !thisarginfo.linear_step
3852 && !vec_stmt
3853 && thisarginfo.dt != vect_constant_def
3854 && thisarginfo.dt != vect_external_def
3855 && loop_vinfo
3856 && !slp_node
3857 && TREE_CODE (op) == SSA_NAME)
3858 vect_simd_lane_linear (op, loop, &thisarginfo);
3859
3860 arginfo.quick_push (thisarginfo);
3861 }
3862
3863 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3864 if (!vf.is_constant ())
3865 {
3866 if (dump_enabled_p ())
3867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3868 "not considering SIMD clones; not yet supported"
3869 " for variable-width vectors.\n");
3870 return false;
3871 }
3872
3873 unsigned int badness = 0;
3874 struct cgraph_node *bestn = NULL;
3875 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3876 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3877 else
3878 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3879 n = n->simdclone->next_clone)
3880 {
3881 unsigned int this_badness = 0;
3882 unsigned int num_calls;
3883 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3884 || n->simdclone->nargs != nargs)
3885 continue;
3886 if (num_calls != 1)
3887 this_badness += exact_log2 (num_calls) * 1024;
3888 if (n->simdclone->inbranch)
3889 this_badness += 2048;
3890 int target_badness = targetm.simd_clone.usable (n);
3891 if (target_badness < 0)
3892 continue;
3893 this_badness += target_badness * 512;
3894 /* FORNOW: Have to add code to add the mask argument. */
3895 if (n->simdclone->inbranch)
3896 continue;
3897 for (i = 0; i < nargs; i++)
3898 {
3899 switch (n->simdclone->args[i].arg_type)
3900 {
3901 case SIMD_CLONE_ARG_TYPE_VECTOR:
3902 if (!useless_type_conversion_p
3903 (n->simdclone->args[i].orig_type,
3904 TREE_TYPE (gimple_call_arg (stmt, i))))
3905 i = -1;
3906 else if (arginfo[i].dt == vect_constant_def
3907 || arginfo[i].dt == vect_external_def
3908 || arginfo[i].linear_step)
3909 this_badness += 64;
3910 break;
3911 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3912 if (arginfo[i].dt != vect_constant_def
3913 && arginfo[i].dt != vect_external_def)
3914 i = -1;
3915 break;
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3918 if (arginfo[i].dt == vect_constant_def
3919 || arginfo[i].dt == vect_external_def
3920 || (arginfo[i].linear_step
3921 != n->simdclone->args[i].linear_step))
3922 i = -1;
3923 break;
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3928 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3929 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3930 /* FORNOW */
3931 i = -1;
3932 break;
3933 case SIMD_CLONE_ARG_TYPE_MASK:
3934 gcc_unreachable ();
3935 }
3936 if (i == (size_t) -1)
3937 break;
3938 if (n->simdclone->args[i].alignment > arginfo[i].align)
3939 {
3940 i = -1;
3941 break;
3942 }
3943 if (arginfo[i].align)
3944 this_badness += (exact_log2 (arginfo[i].align)
3945 - exact_log2 (n->simdclone->args[i].alignment));
3946 }
3947 if (i == (size_t) -1)
3948 continue;
3949 if (bestn == NULL || this_badness < badness)
3950 {
3951 bestn = n;
3952 badness = this_badness;
3953 }
3954 }
3955
3956 if (bestn == NULL)
3957 return false;
3958
3959 for (i = 0; i < nargs; i++)
3960 if ((arginfo[i].dt == vect_constant_def
3961 || arginfo[i].dt == vect_external_def)
3962 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3963 {
3964 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3965 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3966 slp_node);
3967 if (arginfo[i].vectype == NULL
3968 || !constant_multiple_p (bestn->simdclone->simdlen,
3969 simd_clone_subparts (arginfo[i].vectype)))
3970 return false;
3971 }
3972
3973 fndecl = bestn->decl;
3974 nunits = bestn->simdclone->simdlen;
3975 ncopies = vector_unroll_factor (vf, nunits);
3976
3977 /* If the function isn't const, only allow it in simd loops where user
3978 has asserted that at least nunits consecutive iterations can be
3979 performed using SIMD instructions. */
3980 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
3981 && gimple_vuse (stmt))
3982 return false;
3983
3984 /* Sanity check: make sure that at least one copy of the vectorized stmt
3985 needs to be generated. */
3986 gcc_assert (ncopies >= 1);
3987
3988 if (!vec_stmt) /* transformation not required. */
3989 {
3990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3991 for (i = 0; i < nargs; i++)
3992 if ((bestn->simdclone->args[i].arg_type
3993 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3994 || (bestn->simdclone->args[i].arg_type
3995 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3996 {
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3998 + 1,
3999 true);
4000 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4001 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4002 ? size_type_node : TREE_TYPE (arginfo[i].op);
4003 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4005 tree sll = arginfo[i].simd_lane_linear
4006 ? boolean_true_node : boolean_false_node;
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4008 }
4009 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4010 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4011 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4012 dt, slp_node, cost_vec); */
4013 return true;
4014 }
4015
4016 /* Transform. */
4017
4018 if (dump_enabled_p ())
4019 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4020
4021 /* Handle def. */
4022 scalar_dest = gimple_call_lhs (stmt);
4023 vec_dest = NULL_TREE;
4024 rtype = NULL_TREE;
4025 ratype = NULL_TREE;
4026 if (scalar_dest)
4027 {
4028 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4029 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4030 if (TREE_CODE (rtype) == ARRAY_TYPE)
4031 {
4032 ratype = rtype;
4033 rtype = TREE_TYPE (ratype);
4034 }
4035 }
4036
4037 auto_vec<vec<tree> > vec_oprnds;
4038 auto_vec<unsigned> vec_oprnds_i;
4039 vec_oprnds.safe_grow_cleared (nargs, true);
4040 vec_oprnds_i.safe_grow_cleared (nargs, true);
4041 for (j = 0; j < ncopies; ++j)
4042 {
4043 /* Build argument list for the vectorized call. */
4044 if (j == 0)
4045 vargs.create (nargs);
4046 else
4047 vargs.truncate (0);
4048
4049 for (i = 0; i < nargs; i++)
4050 {
4051 unsigned int k, l, m, o;
4052 tree atype;
4053 op = gimple_call_arg (stmt, i);
4054 switch (bestn->simdclone->args[i].arg_type)
4055 {
4056 case SIMD_CLONE_ARG_TYPE_VECTOR:
4057 atype = bestn->simdclone->args[i].vector_type;
4058 o = vector_unroll_factor (nunits,
4059 simd_clone_subparts (atype));
4060 for (m = j * o; m < (j + 1) * o; m++)
4061 {
4062 if (simd_clone_subparts (atype)
4063 < simd_clone_subparts (arginfo[i].vectype))
4064 {
4065 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4066 k = (simd_clone_subparts (arginfo[i].vectype)
4067 / simd_clone_subparts (atype));
4068 gcc_assert ((k & (k - 1)) == 0);
4069 if (m == 0)
4070 {
4071 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4072 ncopies * o / k, op,
4073 &vec_oprnds[i]);
4074 vec_oprnds_i[i] = 0;
4075 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4076 }
4077 else
4078 {
4079 vec_oprnd0 = arginfo[i].op;
4080 if ((m & (k - 1)) == 0)
4081 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4082 }
4083 arginfo[i].op = vec_oprnd0;
4084 vec_oprnd0
4085 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4086 bitsize_int (prec),
4087 bitsize_int ((m & (k - 1)) * prec));
4088 gassign *new_stmt
4089 = gimple_build_assign (make_ssa_name (atype),
4090 vec_oprnd0);
4091 vect_finish_stmt_generation (vinfo, stmt_info,
4092 new_stmt, gsi);
4093 vargs.safe_push (gimple_assign_lhs (new_stmt));
4094 }
4095 else
4096 {
4097 k = (simd_clone_subparts (atype)
4098 / simd_clone_subparts (arginfo[i].vectype));
4099 gcc_assert ((k & (k - 1)) == 0);
4100 vec<constructor_elt, va_gc> *ctor_elts;
4101 if (k != 1)
4102 vec_alloc (ctor_elts, k);
4103 else
4104 ctor_elts = NULL;
4105 for (l = 0; l < k; l++)
4106 {
4107 if (m == 0 && l == 0)
4108 {
4109 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4110 k * o * ncopies,
4111 op,
4112 &vec_oprnds[i]);
4113 vec_oprnds_i[i] = 0;
4114 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4115 }
4116 else
4117 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4118 arginfo[i].op = vec_oprnd0;
4119 if (k == 1)
4120 break;
4121 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4122 vec_oprnd0);
4123 }
4124 if (k == 1)
4125 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4126 atype))
4127 {
4128 vec_oprnd0
4129 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4130 gassign *new_stmt
4131 = gimple_build_assign (make_ssa_name (atype),
4132 vec_oprnd0);
4133 vect_finish_stmt_generation (vinfo, stmt_info,
4134 new_stmt, gsi);
4135 vargs.safe_push (gimple_assign_lhs (new_stmt));
4136 }
4137 else
4138 vargs.safe_push (vec_oprnd0);
4139 else
4140 {
4141 vec_oprnd0 = build_constructor (atype, ctor_elts);
4142 gassign *new_stmt
4143 = gimple_build_assign (make_ssa_name (atype),
4144 vec_oprnd0);
4145 vect_finish_stmt_generation (vinfo, stmt_info,
4146 new_stmt, gsi);
4147 vargs.safe_push (gimple_assign_lhs (new_stmt));
4148 }
4149 }
4150 }
4151 break;
4152 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4153 vargs.safe_push (op);
4154 break;
4155 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4156 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4157 if (j == 0)
4158 {
4159 gimple_seq stmts;
4160 arginfo[i].op
4161 = force_gimple_operand (unshare_expr (arginfo[i].op),
4162 &stmts, true, NULL_TREE);
4163 if (stmts != NULL)
4164 {
4165 basic_block new_bb;
4166 edge pe = loop_preheader_edge (loop);
4167 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4168 gcc_assert (!new_bb);
4169 }
4170 if (arginfo[i].simd_lane_linear)
4171 {
4172 vargs.safe_push (arginfo[i].op);
4173 break;
4174 }
4175 tree phi_res = copy_ssa_name (op);
4176 gphi *new_phi = create_phi_node (phi_res, loop->header);
4177 add_phi_arg (new_phi, arginfo[i].op,
4178 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4179 enum tree_code code
4180 = POINTER_TYPE_P (TREE_TYPE (op))
4181 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4182 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4183 ? sizetype : TREE_TYPE (op);
4184 poly_widest_int cst
4185 = wi::mul (bestn->simdclone->args[i].linear_step,
4186 ncopies * nunits);
4187 tree tcst = wide_int_to_tree (type, cst);
4188 tree phi_arg = copy_ssa_name (op);
4189 gassign *new_stmt
4190 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4191 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4192 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4193 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4194 UNKNOWN_LOCATION);
4195 arginfo[i].op = phi_res;
4196 vargs.safe_push (phi_res);
4197 }
4198 else
4199 {
4200 enum tree_code code
4201 = POINTER_TYPE_P (TREE_TYPE (op))
4202 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4203 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4204 ? sizetype : TREE_TYPE (op);
4205 poly_widest_int cst
4206 = wi::mul (bestn->simdclone->args[i].linear_step,
4207 j * nunits);
4208 tree tcst = wide_int_to_tree (type, cst);
4209 new_temp = make_ssa_name (TREE_TYPE (op));
4210 gassign *new_stmt
4211 = gimple_build_assign (new_temp, code,
4212 arginfo[i].op, tcst);
4213 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4214 vargs.safe_push (new_temp);
4215 }
4216 break;
4217 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4218 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4219 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4220 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4221 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4222 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4223 default:
4224 gcc_unreachable ();
4225 }
4226 }
4227
4228 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4229 if (vec_dest)
4230 {
4231 gcc_assert (ratype
4232 || known_eq (simd_clone_subparts (rtype), nunits));
4233 if (ratype)
4234 new_temp = create_tmp_var (ratype);
4235 else if (useless_type_conversion_p (vectype, rtype))
4236 new_temp = make_ssa_name (vec_dest, new_call);
4237 else
4238 new_temp = make_ssa_name (rtype, new_call);
4239 gimple_call_set_lhs (new_call, new_temp);
4240 }
4241 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4242 gimple *new_stmt = new_call;
4243
4244 if (vec_dest)
4245 {
4246 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4247 {
4248 unsigned int k, l;
4249 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4250 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4251 k = vector_unroll_factor (nunits,
4252 simd_clone_subparts (vectype));
4253 gcc_assert ((k & (k - 1)) == 0);
4254 for (l = 0; l < k; l++)
4255 {
4256 tree t;
4257 if (ratype)
4258 {
4259 t = build_fold_addr_expr (new_temp);
4260 t = build2 (MEM_REF, vectype, t,
4261 build_int_cst (TREE_TYPE (t), l * bytes));
4262 }
4263 else
4264 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4265 bitsize_int (prec), bitsize_int (l * prec));
4266 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4267 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4268
4269 if (j == 0 && l == 0)
4270 *vec_stmt = new_stmt;
4271 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4272 }
4273
4274 if (ratype)
4275 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4276 continue;
4277 }
4278 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4279 {
4280 unsigned int k = (simd_clone_subparts (vectype)
4281 / simd_clone_subparts (rtype));
4282 gcc_assert ((k & (k - 1)) == 0);
4283 if ((j & (k - 1)) == 0)
4284 vec_alloc (ret_ctor_elts, k);
4285 if (ratype)
4286 {
4287 unsigned int m, o;
4288 o = vector_unroll_factor (nunits,
4289 simd_clone_subparts (rtype));
4290 for (m = 0; m < o; m++)
4291 {
4292 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4293 size_int (m), NULL_TREE, NULL_TREE);
4294 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4295 tem);
4296 vect_finish_stmt_generation (vinfo, stmt_info,
4297 new_stmt, gsi);
4298 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4299 gimple_assign_lhs (new_stmt));
4300 }
4301 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4302 }
4303 else
4304 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4305 if ((j & (k - 1)) != k - 1)
4306 continue;
4307 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4308 new_stmt
4309 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4310 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4311
4312 if ((unsigned) j == k - 1)
4313 *vec_stmt = new_stmt;
4314 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4315 continue;
4316 }
4317 else if (ratype)
4318 {
4319 tree t = build_fold_addr_expr (new_temp);
4320 t = build2 (MEM_REF, vectype, t,
4321 build_int_cst (TREE_TYPE (t), 0));
4322 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4323 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4324 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4325 }
4326 else if (!useless_type_conversion_p (vectype, rtype))
4327 {
4328 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4329 new_stmt
4330 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4331 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4332 }
4333 }
4334
4335 if (j == 0)
4336 *vec_stmt = new_stmt;
4337 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4338 }
4339
4340 for (i = 0; i < nargs; ++i)
4341 {
4342 vec<tree> oprndsi = vec_oprnds[i];
4343 oprndsi.release ();
4344 }
4345 vargs.release ();
4346
4347 /* The call in STMT might prevent it from being removed in dce.
4348 We however cannot remove it here, due to the way the ssa name
4349 it defines is mapped to the new definition. So just replace
4350 rhs of the statement with something harmless. */
4351
4352 if (slp_node)
4353 return true;
4354
4355 gimple *new_stmt;
4356 if (scalar_dest)
4357 {
4358 type = TREE_TYPE (scalar_dest);
4359 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4360 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4361 }
4362 else
4363 new_stmt = gimple_build_nop ();
4364 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4365 unlink_stmt_vdef (stmt);
4366
4367 return true;
4368 }
4369
4370
4371 /* Function vect_gen_widened_results_half
4372
4373 Create a vector stmt whose code, type, number of arguments, and result
4374 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4375 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4376 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4377 needs to be created (DECL is a function-decl of a target-builtin).
4378 STMT_INFO is the original scalar stmt that we are vectorizing. */
4379
4380 static gimple *
4381 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4382 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4383 tree vec_dest, gimple_stmt_iterator *gsi,
4384 stmt_vec_info stmt_info)
4385 {
4386 gimple *new_stmt;
4387 tree new_temp;
4388
4389 /* Generate half of the widened result: */
4390 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4391 if (op_type != binary_op)
4392 vec_oprnd1 = NULL;
4393 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4394 new_temp = make_ssa_name (vec_dest, new_stmt);
4395 gimple_assign_set_lhs (new_stmt, new_temp);
4396 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4397
4398 return new_stmt;
4399 }
4400
4401
4402 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4403 For multi-step conversions store the resulting vectors and call the function
4404 recursively. */
4405
4406 static void
4407 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4408 int multi_step_cvt,
4409 stmt_vec_info stmt_info,
4410 vec<tree> vec_dsts,
4411 gimple_stmt_iterator *gsi,
4412 slp_tree slp_node, enum tree_code code)
4413 {
4414 unsigned int i;
4415 tree vop0, vop1, new_tmp, vec_dest;
4416
4417 vec_dest = vec_dsts.pop ();
4418
4419 for (i = 0; i < vec_oprnds->length (); i += 2)
4420 {
4421 /* Create demotion operation. */
4422 vop0 = (*vec_oprnds)[i];
4423 vop1 = (*vec_oprnds)[i + 1];
4424 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4425 new_tmp = make_ssa_name (vec_dest, new_stmt);
4426 gimple_assign_set_lhs (new_stmt, new_tmp);
4427 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4428
4429 if (multi_step_cvt)
4430 /* Store the resulting vector for next recursive call. */
4431 (*vec_oprnds)[i/2] = new_tmp;
4432 else
4433 {
4434 /* This is the last step of the conversion sequence. Store the
4435 vectors in SLP_NODE or in vector info of the scalar statement
4436 (or in STMT_VINFO_RELATED_STMT chain). */
4437 if (slp_node)
4438 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4439 else
4440 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4441 }
4442 }
4443
4444 /* For multi-step demotion operations we first generate demotion operations
4445 from the source type to the intermediate types, and then combine the
4446 results (stored in VEC_OPRNDS) in demotion operation to the destination
4447 type. */
4448 if (multi_step_cvt)
4449 {
4450 /* At each level of recursion we have half of the operands we had at the
4451 previous level. */
4452 vec_oprnds->truncate ((i+1)/2);
4453 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4454 multi_step_cvt - 1,
4455 stmt_info, vec_dsts, gsi,
4456 slp_node, VEC_PACK_TRUNC_EXPR);
4457 }
4458
4459 vec_dsts.quick_push (vec_dest);
4460 }
4461
4462
4463 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4464 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4465 STMT_INFO. For multi-step conversions store the resulting vectors and
4466 call the function recursively. */
4467
4468 static void
4469 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4470 vec<tree> *vec_oprnds0,
4471 vec<tree> *vec_oprnds1,
4472 stmt_vec_info stmt_info, tree vec_dest,
4473 gimple_stmt_iterator *gsi,
4474 enum tree_code code1,
4475 enum tree_code code2, int op_type)
4476 {
4477 int i;
4478 tree vop0, vop1, new_tmp1, new_tmp2;
4479 gimple *new_stmt1, *new_stmt2;
4480 vec<tree> vec_tmp = vNULL;
4481
4482 vec_tmp.create (vec_oprnds0->length () * 2);
4483 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4484 {
4485 if (op_type == binary_op)
4486 vop1 = (*vec_oprnds1)[i];
4487 else
4488 vop1 = NULL_TREE;
4489
4490 /* Generate the two halves of promotion operation. */
4491 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4492 op_type, vec_dest, gsi,
4493 stmt_info);
4494 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4495 op_type, vec_dest, gsi,
4496 stmt_info);
4497 if (is_gimple_call (new_stmt1))
4498 {
4499 new_tmp1 = gimple_call_lhs (new_stmt1);
4500 new_tmp2 = gimple_call_lhs (new_stmt2);
4501 }
4502 else
4503 {
4504 new_tmp1 = gimple_assign_lhs (new_stmt1);
4505 new_tmp2 = gimple_assign_lhs (new_stmt2);
4506 }
4507
4508 /* Store the results for the next step. */
4509 vec_tmp.quick_push (new_tmp1);
4510 vec_tmp.quick_push (new_tmp2);
4511 }
4512
4513 vec_oprnds0->release ();
4514 *vec_oprnds0 = vec_tmp;
4515 }
4516
4517
4518 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4519 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4520 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4521 Return true if STMT_INFO is vectorizable in this way. */
4522
4523 static bool
4524 vectorizable_conversion (vec_info *vinfo,
4525 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4526 gimple **vec_stmt, slp_tree slp_node,
4527 stmt_vector_for_cost *cost_vec)
4528 {
4529 tree vec_dest;
4530 tree scalar_dest;
4531 tree op0, op1 = NULL_TREE;
4532 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4533 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4534 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4535 tree new_temp;
4536 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4537 int ndts = 2;
4538 poly_uint64 nunits_in;
4539 poly_uint64 nunits_out;
4540 tree vectype_out, vectype_in;
4541 int ncopies, i;
4542 tree lhs_type, rhs_type;
4543 enum { NARROW, NONE, WIDEN } modifier;
4544 vec<tree> vec_oprnds0 = vNULL;
4545 vec<tree> vec_oprnds1 = vNULL;
4546 tree vop0;
4547 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4548 int multi_step_cvt = 0;
4549 vec<tree> interm_types = vNULL;
4550 tree intermediate_type, cvt_type = NULL_TREE;
4551 int op_type;
4552 unsigned short fltsz;
4553
4554 /* Is STMT a vectorizable conversion? */
4555
4556 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4557 return false;
4558
4559 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4560 && ! vec_stmt)
4561 return false;
4562
4563 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4564 if (!stmt)
4565 return false;
4566
4567 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4568 return false;
4569
4570 code = gimple_assign_rhs_code (stmt);
4571 if (!CONVERT_EXPR_CODE_P (code)
4572 && code != FIX_TRUNC_EXPR
4573 && code != FLOAT_EXPR
4574 && code != WIDEN_PLUS_EXPR
4575 && code != WIDEN_MINUS_EXPR
4576 && code != WIDEN_MULT_EXPR
4577 && code != WIDEN_LSHIFT_EXPR)
4578 return false;
4579
4580 op_type = TREE_CODE_LENGTH (code);
4581
4582 /* Check types of lhs and rhs. */
4583 scalar_dest = gimple_assign_lhs (stmt);
4584 lhs_type = TREE_TYPE (scalar_dest);
4585 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4586
4587 /* Check the operands of the operation. */
4588 slp_tree slp_op0, slp_op1 = NULL;
4589 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4590 0, &op0, &slp_op0, &dt[0], &vectype_in))
4591 {
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4594 "use not simple.\n");
4595 return false;
4596 }
4597
4598 rhs_type = TREE_TYPE (op0);
4599 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4600 && !((INTEGRAL_TYPE_P (lhs_type)
4601 && INTEGRAL_TYPE_P (rhs_type))
4602 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4603 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4604 return false;
4605
4606 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4607 && ((INTEGRAL_TYPE_P (lhs_type)
4608 && !type_has_mode_precision_p (lhs_type))
4609 || (INTEGRAL_TYPE_P (rhs_type)
4610 && !type_has_mode_precision_p (rhs_type))))
4611 {
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4614 "type conversion to/from bit-precision unsupported."
4615 "\n");
4616 return false;
4617 }
4618
4619 if (op_type == binary_op)
4620 {
4621 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4622 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4623
4624 op1 = gimple_assign_rhs2 (stmt);
4625 tree vectype1_in;
4626 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4627 &op1, &slp_op1, &dt[1], &vectype1_in))
4628 {
4629 if (dump_enabled_p ())
4630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4631 "use not simple.\n");
4632 return false;
4633 }
4634 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4635 OP1. */
4636 if (!vectype_in)
4637 vectype_in = vectype1_in;
4638 }
4639
4640 /* If op0 is an external or constant def, infer the vector type
4641 from the scalar type. */
4642 if (!vectype_in)
4643 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4644 if (vec_stmt)
4645 gcc_assert (vectype_in);
4646 if (!vectype_in)
4647 {
4648 if (dump_enabled_p ())
4649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4650 "no vectype for scalar type %T\n", rhs_type);
4651
4652 return false;
4653 }
4654
4655 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4656 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4657 {
4658 if (dump_enabled_p ())
4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4660 "can't convert between boolean and non "
4661 "boolean vectors %T\n", rhs_type);
4662
4663 return false;
4664 }
4665
4666 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4667 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4668 if (known_eq (nunits_out, nunits_in))
4669 modifier = NONE;
4670 else if (multiple_p (nunits_out, nunits_in))
4671 modifier = NARROW;
4672 else
4673 {
4674 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4675 modifier = WIDEN;
4676 }
4677
4678 /* Multiple types in SLP are handled by creating the appropriate number of
4679 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4680 case of SLP. */
4681 if (slp_node)
4682 ncopies = 1;
4683 else if (modifier == NARROW)
4684 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4685 else
4686 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4687
4688 /* Sanity check: make sure that at least one copy of the vectorized stmt
4689 needs to be generated. */
4690 gcc_assert (ncopies >= 1);
4691
4692 bool found_mode = false;
4693 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4694 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4695 opt_scalar_mode rhs_mode_iter;
4696
4697 /* Supportable by target? */
4698 switch (modifier)
4699 {
4700 case NONE:
4701 if (code != FIX_TRUNC_EXPR
4702 && code != FLOAT_EXPR
4703 && !CONVERT_EXPR_CODE_P (code))
4704 return false;
4705 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4706 break;
4707 /* FALLTHRU */
4708 unsupported:
4709 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4711 "conversion not supported by target.\n");
4712 return false;
4713
4714 case WIDEN:
4715 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4716 vectype_in, &code1, &code2,
4717 &multi_step_cvt, &interm_types))
4718 {
4719 /* Binary widening operation can only be supported directly by the
4720 architecture. */
4721 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4722 break;
4723 }
4724
4725 if (code != FLOAT_EXPR
4726 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4727 goto unsupported;
4728
4729 fltsz = GET_MODE_SIZE (lhs_mode);
4730 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4731 {
4732 rhs_mode = rhs_mode_iter.require ();
4733 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4734 break;
4735
4736 cvt_type
4737 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4738 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4739 if (cvt_type == NULL_TREE)
4740 goto unsupported;
4741
4742 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4743 {
4744 if (!supportable_convert_operation (code, vectype_out,
4745 cvt_type, &codecvt1))
4746 goto unsupported;
4747 }
4748 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4749 vectype_out, cvt_type,
4750 &codecvt1, &codecvt2,
4751 &multi_step_cvt,
4752 &interm_types))
4753 continue;
4754 else
4755 gcc_assert (multi_step_cvt == 0);
4756
4757 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4758 cvt_type,
4759 vectype_in, &code1, &code2,
4760 &multi_step_cvt, &interm_types))
4761 {
4762 found_mode = true;
4763 break;
4764 }
4765 }
4766
4767 if (!found_mode)
4768 goto unsupported;
4769
4770 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4771 codecvt2 = ERROR_MARK;
4772 else
4773 {
4774 multi_step_cvt++;
4775 interm_types.safe_push (cvt_type);
4776 cvt_type = NULL_TREE;
4777 }
4778 break;
4779
4780 case NARROW:
4781 gcc_assert (op_type == unary_op);
4782 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4783 &code1, &multi_step_cvt,
4784 &interm_types))
4785 break;
4786
4787 if (code != FIX_TRUNC_EXPR
4788 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4789 goto unsupported;
4790
4791 cvt_type
4792 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4793 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4794 if (cvt_type == NULL_TREE)
4795 goto unsupported;
4796 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4797 &codecvt1))
4798 goto unsupported;
4799 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4800 &code1, &multi_step_cvt,
4801 &interm_types))
4802 break;
4803 goto unsupported;
4804
4805 default:
4806 gcc_unreachable ();
4807 }
4808
4809 if (!vec_stmt) /* transformation not required. */
4810 {
4811 if (slp_node
4812 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4813 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4814 {
4815 if (dump_enabled_p ())
4816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4817 "incompatible vector types for invariants\n");
4818 return false;
4819 }
4820 DUMP_VECT_SCOPE ("vectorizable_conversion");
4821 if (modifier == NONE)
4822 {
4823 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4824 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4825 cost_vec);
4826 }
4827 else if (modifier == NARROW)
4828 {
4829 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4830 /* The final packing step produces one vector result per copy. */
4831 unsigned int nvectors
4832 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4833 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4834 multi_step_cvt, cost_vec);
4835 }
4836 else
4837 {
4838 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4839 /* The initial unpacking step produces two vector results
4840 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4841 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4842 unsigned int nvectors
4843 = (slp_node
4844 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4845 : ncopies * 2);
4846 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4847 multi_step_cvt, cost_vec);
4848 }
4849 interm_types.release ();
4850 return true;
4851 }
4852
4853 /* Transform. */
4854 if (dump_enabled_p ())
4855 dump_printf_loc (MSG_NOTE, vect_location,
4856 "transform conversion. ncopies = %d.\n", ncopies);
4857
4858 if (op_type == binary_op)
4859 {
4860 if (CONSTANT_CLASS_P (op0))
4861 op0 = fold_convert (TREE_TYPE (op1), op0);
4862 else if (CONSTANT_CLASS_P (op1))
4863 op1 = fold_convert (TREE_TYPE (op0), op1);
4864 }
4865
4866 /* In case of multi-step conversion, we first generate conversion operations
4867 to the intermediate types, and then from that types to the final one.
4868 We create vector destinations for the intermediate type (TYPES) received
4869 from supportable_*_operation, and store them in the correct order
4870 for future use in vect_create_vectorized_*_stmts (). */
4871 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4872 vec_dest = vect_create_destination_var (scalar_dest,
4873 (cvt_type && modifier == WIDEN)
4874 ? cvt_type : vectype_out);
4875 vec_dsts.quick_push (vec_dest);
4876
4877 if (multi_step_cvt)
4878 {
4879 for (i = interm_types.length () - 1;
4880 interm_types.iterate (i, &intermediate_type); i--)
4881 {
4882 vec_dest = vect_create_destination_var (scalar_dest,
4883 intermediate_type);
4884 vec_dsts.quick_push (vec_dest);
4885 }
4886 }
4887
4888 if (cvt_type)
4889 vec_dest = vect_create_destination_var (scalar_dest,
4890 modifier == WIDEN
4891 ? vectype_out : cvt_type);
4892
4893 int ninputs = 1;
4894 if (!slp_node)
4895 {
4896 if (modifier == WIDEN)
4897 ;
4898 else if (modifier == NARROW)
4899 {
4900 if (multi_step_cvt)
4901 ninputs = vect_pow2 (multi_step_cvt);
4902 ninputs *= 2;
4903 }
4904 }
4905
4906 switch (modifier)
4907 {
4908 case NONE:
4909 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4910 op0, &vec_oprnds0);
4911 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4912 {
4913 /* Arguments are ready, create the new vector stmt. */
4914 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4915 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4916 new_temp = make_ssa_name (vec_dest, new_stmt);
4917 gimple_assign_set_lhs (new_stmt, new_temp);
4918 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4919
4920 if (slp_node)
4921 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4922 else
4923 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4924 }
4925 break;
4926
4927 case WIDEN:
4928 /* In case the vectorization factor (VF) is bigger than the number
4929 of elements that we can fit in a vectype (nunits), we have to
4930 generate more than one vector stmt - i.e - we need to "unroll"
4931 the vector stmt by a factor VF/nunits. */
4932 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4933 op0, &vec_oprnds0,
4934 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4935 &vec_oprnds1);
4936 if (code == WIDEN_LSHIFT_EXPR)
4937 {
4938 int oprnds_size = vec_oprnds0.length ();
4939 vec_oprnds1.create (oprnds_size);
4940 for (i = 0; i < oprnds_size; ++i)
4941 vec_oprnds1.quick_push (op1);
4942 }
4943 /* Arguments are ready. Create the new vector stmts. */
4944 for (i = multi_step_cvt; i >= 0; i--)
4945 {
4946 tree this_dest = vec_dsts[i];
4947 enum tree_code c1 = code1, c2 = code2;
4948 if (i == 0 && codecvt2 != ERROR_MARK)
4949 {
4950 c1 = codecvt1;
4951 c2 = codecvt2;
4952 }
4953 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4954 &vec_oprnds1, stmt_info,
4955 this_dest, gsi,
4956 c1, c2, op_type);
4957 }
4958
4959 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4960 {
4961 gimple *new_stmt;
4962 if (cvt_type)
4963 {
4964 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4965 new_temp = make_ssa_name (vec_dest);
4966 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4967 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4968 }
4969 else
4970 new_stmt = SSA_NAME_DEF_STMT (vop0);
4971
4972 if (slp_node)
4973 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4974 else
4975 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4976 }
4977 break;
4978
4979 case NARROW:
4980 /* In case the vectorization factor (VF) is bigger than the number
4981 of elements that we can fit in a vectype (nunits), we have to
4982 generate more than one vector stmt - i.e - we need to "unroll"
4983 the vector stmt by a factor VF/nunits. */
4984 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4985 op0, &vec_oprnds0);
4986 /* Arguments are ready. Create the new vector stmts. */
4987 if (cvt_type)
4988 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4989 {
4990 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4991 new_temp = make_ssa_name (vec_dest);
4992 gassign *new_stmt
4993 = gimple_build_assign (new_temp, codecvt1, vop0);
4994 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4995 vec_oprnds0[i] = new_temp;
4996 }
4997
4998 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
4999 multi_step_cvt,
5000 stmt_info, vec_dsts, gsi,
5001 slp_node, code1);
5002 break;
5003 }
5004 if (!slp_node)
5005 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5006
5007 vec_oprnds0.release ();
5008 vec_oprnds1.release ();
5009 interm_types.release ();
5010
5011 return true;
5012 }
5013
5014 /* Return true if we can assume from the scalar form of STMT_INFO that
5015 neither the scalar nor the vector forms will generate code. STMT_INFO
5016 is known not to involve a data reference. */
5017
5018 bool
5019 vect_nop_conversion_p (stmt_vec_info stmt_info)
5020 {
5021 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5022 if (!stmt)
5023 return false;
5024
5025 tree lhs = gimple_assign_lhs (stmt);
5026 tree_code code = gimple_assign_rhs_code (stmt);
5027 tree rhs = gimple_assign_rhs1 (stmt);
5028
5029 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5030 return true;
5031
5032 if (CONVERT_EXPR_CODE_P (code))
5033 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5034
5035 return false;
5036 }
5037
5038 /* Function vectorizable_assignment.
5039
5040 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5041 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5042 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5043 Return true if STMT_INFO is vectorizable in this way. */
5044
5045 static bool
5046 vectorizable_assignment (vec_info *vinfo,
5047 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5048 gimple **vec_stmt, slp_tree slp_node,
5049 stmt_vector_for_cost *cost_vec)
5050 {
5051 tree vec_dest;
5052 tree scalar_dest;
5053 tree op;
5054 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5055 tree new_temp;
5056 enum vect_def_type dt[1] = {vect_unknown_def_type};
5057 int ndts = 1;
5058 int ncopies;
5059 int i;
5060 vec<tree> vec_oprnds = vNULL;
5061 tree vop;
5062 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5063 enum tree_code code;
5064 tree vectype_in;
5065
5066 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5067 return false;
5068
5069 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5070 && ! vec_stmt)
5071 return false;
5072
5073 /* Is vectorizable assignment? */
5074 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5075 if (!stmt)
5076 return false;
5077
5078 scalar_dest = gimple_assign_lhs (stmt);
5079 if (TREE_CODE (scalar_dest) != SSA_NAME)
5080 return false;
5081
5082 if (STMT_VINFO_DATA_REF (stmt_info))
5083 return false;
5084
5085 code = gimple_assign_rhs_code (stmt);
5086 if (!(gimple_assign_single_p (stmt)
5087 || code == PAREN_EXPR
5088 || CONVERT_EXPR_CODE_P (code)))
5089 return false;
5090
5091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5092 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5093
5094 /* Multiple types in SLP are handled by creating the appropriate number of
5095 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5096 case of SLP. */
5097 if (slp_node)
5098 ncopies = 1;
5099 else
5100 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5101
5102 gcc_assert (ncopies >= 1);
5103
5104 slp_tree slp_op;
5105 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5106 &dt[0], &vectype_in))
5107 {
5108 if (dump_enabled_p ())
5109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5110 "use not simple.\n");
5111 return false;
5112 }
5113 if (!vectype_in)
5114 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5115
5116 /* We can handle NOP_EXPR conversions that do not change the number
5117 of elements or the vector size. */
5118 if ((CONVERT_EXPR_CODE_P (code)
5119 || code == VIEW_CONVERT_EXPR)
5120 && (!vectype_in
5121 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5122 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5123 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5124 return false;
5125
5126 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5127 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5128 {
5129 if (dump_enabled_p ())
5130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5131 "can't convert between boolean and non "
5132 "boolean vectors %T\n", TREE_TYPE (op));
5133
5134 return false;
5135 }
5136
5137 /* We do not handle bit-precision changes. */
5138 if ((CONVERT_EXPR_CODE_P (code)
5139 || code == VIEW_CONVERT_EXPR)
5140 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5141 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5142 || !type_has_mode_precision_p (TREE_TYPE (op)))
5143 /* But a conversion that does not change the bit-pattern is ok. */
5144 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5145 > TYPE_PRECISION (TREE_TYPE (op)))
5146 && TYPE_UNSIGNED (TREE_TYPE (op))))
5147 {
5148 if (dump_enabled_p ())
5149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5150 "type conversion to/from bit-precision "
5151 "unsupported.\n");
5152 return false;
5153 }
5154
5155 if (!vec_stmt) /* transformation not required. */
5156 {
5157 if (slp_node
5158 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5159 {
5160 if (dump_enabled_p ())
5161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5162 "incompatible vector types for invariants\n");
5163 return false;
5164 }
5165 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5166 DUMP_VECT_SCOPE ("vectorizable_assignment");
5167 if (!vect_nop_conversion_p (stmt_info))
5168 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5169 cost_vec);
5170 return true;
5171 }
5172
5173 /* Transform. */
5174 if (dump_enabled_p ())
5175 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5176
5177 /* Handle def. */
5178 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5179
5180 /* Handle use. */
5181 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5182
5183 /* Arguments are ready. create the new vector stmt. */
5184 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5185 {
5186 if (CONVERT_EXPR_CODE_P (code)
5187 || code == VIEW_CONVERT_EXPR)
5188 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5189 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5190 new_temp = make_ssa_name (vec_dest, new_stmt);
5191 gimple_assign_set_lhs (new_stmt, new_temp);
5192 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5193 if (slp_node)
5194 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5195 else
5196 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5197 }
5198 if (!slp_node)
5199 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5200
5201 vec_oprnds.release ();
5202 return true;
5203 }
5204
5205
5206 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5207 either as shift by a scalar or by a vector. */
5208
5209 bool
5210 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5211 {
5212
5213 machine_mode vec_mode;
5214 optab optab;
5215 int icode;
5216 tree vectype;
5217
5218 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5219 if (!vectype)
5220 return false;
5221
5222 optab = optab_for_tree_code (code, vectype, optab_scalar);
5223 if (!optab
5224 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5225 {
5226 optab = optab_for_tree_code (code, vectype, optab_vector);
5227 if (!optab
5228 || (optab_handler (optab, TYPE_MODE (vectype))
5229 == CODE_FOR_nothing))
5230 return false;
5231 }
5232
5233 vec_mode = TYPE_MODE (vectype);
5234 icode = (int) optab_handler (optab, vec_mode);
5235 if (icode == CODE_FOR_nothing)
5236 return false;
5237
5238 return true;
5239 }
5240
5241
5242 /* Function vectorizable_shift.
5243
5244 Check if STMT_INFO performs a shift operation that can be vectorized.
5245 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5246 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5247 Return true if STMT_INFO is vectorizable in this way. */
5248
5249 static bool
5250 vectorizable_shift (vec_info *vinfo,
5251 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5252 gimple **vec_stmt, slp_tree slp_node,
5253 stmt_vector_for_cost *cost_vec)
5254 {
5255 tree vec_dest;
5256 tree scalar_dest;
5257 tree op0, op1 = NULL;
5258 tree vec_oprnd1 = NULL_TREE;
5259 tree vectype;
5260 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5261 enum tree_code code;
5262 machine_mode vec_mode;
5263 tree new_temp;
5264 optab optab;
5265 int icode;
5266 machine_mode optab_op2_mode;
5267 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5268 int ndts = 2;
5269 poly_uint64 nunits_in;
5270 poly_uint64 nunits_out;
5271 tree vectype_out;
5272 tree op1_vectype;
5273 int ncopies;
5274 int i;
5275 vec<tree> vec_oprnds0 = vNULL;
5276 vec<tree> vec_oprnds1 = vNULL;
5277 tree vop0, vop1;
5278 unsigned int k;
5279 bool scalar_shift_arg = true;
5280 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5281 bool incompatible_op1_vectype_p = false;
5282
5283 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5284 return false;
5285
5286 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5287 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5288 && ! vec_stmt)
5289 return false;
5290
5291 /* Is STMT a vectorizable binary/unary operation? */
5292 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5293 if (!stmt)
5294 return false;
5295
5296 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5297 return false;
5298
5299 code = gimple_assign_rhs_code (stmt);
5300
5301 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5302 || code == RROTATE_EXPR))
5303 return false;
5304
5305 scalar_dest = gimple_assign_lhs (stmt);
5306 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5307 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5308 {
5309 if (dump_enabled_p ())
5310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5311 "bit-precision shifts not supported.\n");
5312 return false;
5313 }
5314
5315 slp_tree slp_op0;
5316 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5317 0, &op0, &slp_op0, &dt[0], &vectype))
5318 {
5319 if (dump_enabled_p ())
5320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5321 "use not simple.\n");
5322 return false;
5323 }
5324 /* If op0 is an external or constant def, infer the vector type
5325 from the scalar type. */
5326 if (!vectype)
5327 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5328 if (vec_stmt)
5329 gcc_assert (vectype);
5330 if (!vectype)
5331 {
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334 "no vectype for scalar type\n");
5335 return false;
5336 }
5337
5338 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5339 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5340 if (maybe_ne (nunits_out, nunits_in))
5341 return false;
5342
5343 stmt_vec_info op1_def_stmt_info;
5344 slp_tree slp_op1;
5345 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5346 &dt[1], &op1_vectype, &op1_def_stmt_info))
5347 {
5348 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5350 "use not simple.\n");
5351 return false;
5352 }
5353
5354 /* Multiple types in SLP are handled by creating the appropriate number of
5355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5356 case of SLP. */
5357 if (slp_node)
5358 ncopies = 1;
5359 else
5360 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5361
5362 gcc_assert (ncopies >= 1);
5363
5364 /* Determine whether the shift amount is a vector, or scalar. If the
5365 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5366
5367 if ((dt[1] == vect_internal_def
5368 || dt[1] == vect_induction_def
5369 || dt[1] == vect_nested_cycle)
5370 && !slp_node)
5371 scalar_shift_arg = false;
5372 else if (dt[1] == vect_constant_def
5373 || dt[1] == vect_external_def
5374 || dt[1] == vect_internal_def)
5375 {
5376 /* In SLP, need to check whether the shift count is the same,
5377 in loops if it is a constant or invariant, it is always
5378 a scalar shift. */
5379 if (slp_node)
5380 {
5381 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5382 stmt_vec_info slpstmt_info;
5383
5384 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5385 {
5386 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5387 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5388 scalar_shift_arg = false;
5389 }
5390
5391 /* For internal SLP defs we have to make sure we see scalar stmts
5392 for all vector elements.
5393 ??? For different vectors we could resort to a different
5394 scalar shift operand but code-generation below simply always
5395 takes the first. */
5396 if (dt[1] == vect_internal_def
5397 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5398 stmts.length ()))
5399 scalar_shift_arg = false;
5400 }
5401
5402 /* If the shift amount is computed by a pattern stmt we cannot
5403 use the scalar amount directly thus give up and use a vector
5404 shift. */
5405 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5406 scalar_shift_arg = false;
5407 }
5408 else
5409 {
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5412 "operand mode requires invariant argument.\n");
5413 return false;
5414 }
5415
5416 /* Vector shifted by vector. */
5417 bool was_scalar_shift_arg = scalar_shift_arg;
5418 if (!scalar_shift_arg)
5419 {
5420 optab = optab_for_tree_code (code, vectype, optab_vector);
5421 if (dump_enabled_p ())
5422 dump_printf_loc (MSG_NOTE, vect_location,
5423 "vector/vector shift/rotate found.\n");
5424
5425 if (!op1_vectype)
5426 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5427 slp_op1);
5428 incompatible_op1_vectype_p
5429 = (op1_vectype == NULL_TREE
5430 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5431 TYPE_VECTOR_SUBPARTS (vectype))
5432 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5433 if (incompatible_op1_vectype_p
5434 && (!slp_node
5435 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5436 || slp_op1->refcnt != 1))
5437 {
5438 if (dump_enabled_p ())
5439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5440 "unusable type for last operand in"
5441 " vector/vector shift/rotate.\n");
5442 return false;
5443 }
5444 }
5445 /* See if the machine has a vector shifted by scalar insn and if not
5446 then see if it has a vector shifted by vector insn. */
5447 else
5448 {
5449 optab = optab_for_tree_code (code, vectype, optab_scalar);
5450 if (optab
5451 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5452 {
5453 if (dump_enabled_p ())
5454 dump_printf_loc (MSG_NOTE, vect_location,
5455 "vector/scalar shift/rotate found.\n");
5456 }
5457 else
5458 {
5459 optab = optab_for_tree_code (code, vectype, optab_vector);
5460 if (optab
5461 && (optab_handler (optab, TYPE_MODE (vectype))
5462 != CODE_FOR_nothing))
5463 {
5464 scalar_shift_arg = false;
5465
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_NOTE, vect_location,
5468 "vector/vector shift/rotate found.\n");
5469
5470 if (!op1_vectype)
5471 op1_vectype = get_vectype_for_scalar_type (vinfo,
5472 TREE_TYPE (op1),
5473 slp_op1);
5474
5475 /* Unlike the other binary operators, shifts/rotates have
5476 the rhs being int, instead of the same type as the lhs,
5477 so make sure the scalar is the right type if we are
5478 dealing with vectors of long long/long/short/char. */
5479 incompatible_op1_vectype_p
5480 = (!op1_vectype
5481 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5482 TREE_TYPE (op1)));
5483 if (incompatible_op1_vectype_p
5484 && dt[1] == vect_internal_def)
5485 {
5486 if (dump_enabled_p ())
5487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5488 "unusable type for last operand in"
5489 " vector/vector shift/rotate.\n");
5490 return false;
5491 }
5492 }
5493 }
5494 }
5495
5496 /* Supportable by target? */
5497 if (!optab)
5498 {
5499 if (dump_enabled_p ())
5500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5501 "no optab.\n");
5502 return false;
5503 }
5504 vec_mode = TYPE_MODE (vectype);
5505 icode = (int) optab_handler (optab, vec_mode);
5506 if (icode == CODE_FOR_nothing)
5507 {
5508 if (dump_enabled_p ())
5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5510 "op not supported by target.\n");
5511 /* Check only during analysis. */
5512 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5513 || (!vec_stmt
5514 && !vect_worthwhile_without_simd_p (vinfo, code)))
5515 return false;
5516 if (dump_enabled_p ())
5517 dump_printf_loc (MSG_NOTE, vect_location,
5518 "proceeding using word mode.\n");
5519 }
5520
5521 /* Worthwhile without SIMD support? Check only during analysis. */
5522 if (!vec_stmt
5523 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5524 && !vect_worthwhile_without_simd_p (vinfo, code))
5525 {
5526 if (dump_enabled_p ())
5527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5528 "not worthwhile without SIMD support.\n");
5529 return false;
5530 }
5531
5532 if (!vec_stmt) /* transformation not required. */
5533 {
5534 if (slp_node
5535 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5536 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5537 && (!incompatible_op1_vectype_p
5538 || dt[1] == vect_constant_def)
5539 && !vect_maybe_update_slp_op_vectype
5540 (slp_op1,
5541 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5542 {
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5545 "incompatible vector types for invariants\n");
5546 return false;
5547 }
5548 /* Now adjust the constant shift amount in place. */
5549 if (slp_node
5550 && incompatible_op1_vectype_p
5551 && dt[1] == vect_constant_def)
5552 {
5553 for (unsigned i = 0;
5554 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5555 {
5556 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5557 = fold_convert (TREE_TYPE (vectype),
5558 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5559 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5560 == INTEGER_CST));
5561 }
5562 }
5563 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5564 DUMP_VECT_SCOPE ("vectorizable_shift");
5565 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5566 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5567 return true;
5568 }
5569
5570 /* Transform. */
5571
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_NOTE, vect_location,
5574 "transform binary/unary operation.\n");
5575
5576 if (incompatible_op1_vectype_p && !slp_node)
5577 {
5578 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5579 op1 = fold_convert (TREE_TYPE (vectype), op1);
5580 if (dt[1] != vect_constant_def)
5581 op1 = vect_init_vector (vinfo, stmt_info, op1,
5582 TREE_TYPE (vectype), NULL);
5583 }
5584
5585 /* Handle def. */
5586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5587
5588 if (scalar_shift_arg && dt[1] != vect_internal_def)
5589 {
5590 /* Vector shl and shr insn patterns can be defined with scalar
5591 operand 2 (shift operand). In this case, use constant or loop
5592 invariant op1 directly, without extending it to vector mode
5593 first. */
5594 optab_op2_mode = insn_data[icode].operand[2].mode;
5595 if (!VECTOR_MODE_P (optab_op2_mode))
5596 {
5597 if (dump_enabled_p ())
5598 dump_printf_loc (MSG_NOTE, vect_location,
5599 "operand 1 using scalar mode.\n");
5600 vec_oprnd1 = op1;
5601 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5602 vec_oprnds1.quick_push (vec_oprnd1);
5603 /* Store vec_oprnd1 for every vector stmt to be created.
5604 We check during the analysis that all the shift arguments
5605 are the same.
5606 TODO: Allow different constants for different vector
5607 stmts generated for an SLP instance. */
5608 for (k = 0;
5609 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5610 vec_oprnds1.quick_push (vec_oprnd1);
5611 }
5612 }
5613 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5614 {
5615 if (was_scalar_shift_arg)
5616 {
5617 /* If the argument was the same in all lanes create
5618 the correctly typed vector shift amount directly. */
5619 op1 = fold_convert (TREE_TYPE (vectype), op1);
5620 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5621 !loop_vinfo ? gsi : NULL);
5622 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5623 !loop_vinfo ? gsi : NULL);
5624 vec_oprnds1.create (slp_node->vec_stmts_size);
5625 for (k = 0; k < slp_node->vec_stmts_size; k++)
5626 vec_oprnds1.quick_push (vec_oprnd1);
5627 }
5628 else if (dt[1] == vect_constant_def)
5629 /* The constant shift amount has been adjusted in place. */
5630 ;
5631 else
5632 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5633 }
5634
5635 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5636 (a special case for certain kind of vector shifts); otherwise,
5637 operand 1 should be of a vector type (the usual case). */
5638 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5639 op0, &vec_oprnds0,
5640 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5641
5642 /* Arguments are ready. Create the new vector stmt. */
5643 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5644 {
5645 /* For internal defs where we need to use a scalar shift arg
5646 extract the first lane. */
5647 if (scalar_shift_arg && dt[1] == vect_internal_def)
5648 {
5649 vop1 = vec_oprnds1[0];
5650 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5651 gassign *new_stmt
5652 = gimple_build_assign (new_temp,
5653 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5654 vop1,
5655 TYPE_SIZE (TREE_TYPE (new_temp)),
5656 bitsize_zero_node));
5657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5658 vop1 = new_temp;
5659 }
5660 else
5661 vop1 = vec_oprnds1[i];
5662 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5663 new_temp = make_ssa_name (vec_dest, new_stmt);
5664 gimple_assign_set_lhs (new_stmt, new_temp);
5665 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5666 if (slp_node)
5667 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5668 else
5669 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5670 }
5671
5672 if (!slp_node)
5673 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5674
5675 vec_oprnds0.release ();
5676 vec_oprnds1.release ();
5677
5678 return true;
5679 }
5680
5681
5682 /* Function vectorizable_operation.
5683
5684 Check if STMT_INFO performs a binary, unary or ternary operation that can
5685 be vectorized.
5686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5687 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5688 Return true if STMT_INFO is vectorizable in this way. */
5689
5690 static bool
5691 vectorizable_operation (vec_info *vinfo,
5692 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5693 gimple **vec_stmt, slp_tree slp_node,
5694 stmt_vector_for_cost *cost_vec)
5695 {
5696 tree vec_dest;
5697 tree scalar_dest;
5698 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5699 tree vectype;
5700 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5701 enum tree_code code, orig_code;
5702 machine_mode vec_mode;
5703 tree new_temp;
5704 int op_type;
5705 optab optab;
5706 bool target_support_p;
5707 enum vect_def_type dt[3]
5708 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5709 int ndts = 3;
5710 poly_uint64 nunits_in;
5711 poly_uint64 nunits_out;
5712 tree vectype_out;
5713 int ncopies, vec_num;
5714 int i;
5715 vec<tree> vec_oprnds0 = vNULL;
5716 vec<tree> vec_oprnds1 = vNULL;
5717 vec<tree> vec_oprnds2 = vNULL;
5718 tree vop0, vop1, vop2;
5719 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5720
5721 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5722 return false;
5723
5724 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5725 && ! vec_stmt)
5726 return false;
5727
5728 /* Is STMT a vectorizable binary/unary operation? */
5729 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5730 if (!stmt)
5731 return false;
5732
5733 /* Loads and stores are handled in vectorizable_{load,store}. */
5734 if (STMT_VINFO_DATA_REF (stmt_info))
5735 return false;
5736
5737 orig_code = code = gimple_assign_rhs_code (stmt);
5738
5739 /* Shifts are handled in vectorizable_shift. */
5740 if (code == LSHIFT_EXPR
5741 || code == RSHIFT_EXPR
5742 || code == LROTATE_EXPR
5743 || code == RROTATE_EXPR)
5744 return false;
5745
5746 /* Comparisons are handled in vectorizable_comparison. */
5747 if (TREE_CODE_CLASS (code) == tcc_comparison)
5748 return false;
5749
5750 /* Conditions are handled in vectorizable_condition. */
5751 if (code == COND_EXPR)
5752 return false;
5753
5754 /* For pointer addition and subtraction, we should use the normal
5755 plus and minus for the vector operation. */
5756 if (code == POINTER_PLUS_EXPR)
5757 code = PLUS_EXPR;
5758 if (code == POINTER_DIFF_EXPR)
5759 code = MINUS_EXPR;
5760
5761 /* Support only unary or binary operations. */
5762 op_type = TREE_CODE_LENGTH (code);
5763 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5764 {
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5767 "num. args = %d (not unary/binary/ternary op).\n",
5768 op_type);
5769 return false;
5770 }
5771
5772 scalar_dest = gimple_assign_lhs (stmt);
5773 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5774
5775 /* Most operations cannot handle bit-precision types without extra
5776 truncations. */
5777 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5778 if (!mask_op_p
5779 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5780 /* Exception are bitwise binary operations. */
5781 && code != BIT_IOR_EXPR
5782 && code != BIT_XOR_EXPR
5783 && code != BIT_AND_EXPR)
5784 {
5785 if (dump_enabled_p ())
5786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5787 "bit-precision arithmetic not supported.\n");
5788 return false;
5789 }
5790
5791 slp_tree slp_op0;
5792 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5793 0, &op0, &slp_op0, &dt[0], &vectype))
5794 {
5795 if (dump_enabled_p ())
5796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5797 "use not simple.\n");
5798 return false;
5799 }
5800 /* If op0 is an external or constant def, infer the vector type
5801 from the scalar type. */
5802 if (!vectype)
5803 {
5804 /* For boolean type we cannot determine vectype by
5805 invariant value (don't know whether it is a vector
5806 of booleans or vector of integers). We use output
5807 vectype because operations on boolean don't change
5808 type. */
5809 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5810 {
5811 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5812 {
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5815 "not supported operation on bool value.\n");
5816 return false;
5817 }
5818 vectype = vectype_out;
5819 }
5820 else
5821 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5822 slp_node);
5823 }
5824 if (vec_stmt)
5825 gcc_assert (vectype);
5826 if (!vectype)
5827 {
5828 if (dump_enabled_p ())
5829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5830 "no vectype for scalar type %T\n",
5831 TREE_TYPE (op0));
5832
5833 return false;
5834 }
5835
5836 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5837 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5838 if (maybe_ne (nunits_out, nunits_in))
5839 return false;
5840
5841 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5842 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5843 if (op_type == binary_op || op_type == ternary_op)
5844 {
5845 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5846 1, &op1, &slp_op1, &dt[1], &vectype2))
5847 {
5848 if (dump_enabled_p ())
5849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5850 "use not simple.\n");
5851 return false;
5852 }
5853 }
5854 if (op_type == ternary_op)
5855 {
5856 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5857 2, &op2, &slp_op2, &dt[2], &vectype3))
5858 {
5859 if (dump_enabled_p ())
5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5861 "use not simple.\n");
5862 return false;
5863 }
5864 }
5865
5866 /* Multiple types in SLP are handled by creating the appropriate number of
5867 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5868 case of SLP. */
5869 if (slp_node)
5870 {
5871 ncopies = 1;
5872 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5873 }
5874 else
5875 {
5876 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5877 vec_num = 1;
5878 }
5879
5880 gcc_assert (ncopies >= 1);
5881
5882 /* Reject attempts to combine mask types with nonmask types, e.g. if
5883 we have an AND between a (nonmask) boolean loaded from memory and
5884 a (mask) boolean result of a comparison.
5885
5886 TODO: We could easily fix these cases up using pattern statements. */
5887 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5888 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5889 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5890 {
5891 if (dump_enabled_p ())
5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5893 "mixed mask and nonmask vector types\n");
5894 return false;
5895 }
5896
5897 /* Supportable by target? */
5898
5899 vec_mode = TYPE_MODE (vectype);
5900 if (code == MULT_HIGHPART_EXPR)
5901 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5902 else
5903 {
5904 optab = optab_for_tree_code (code, vectype, optab_default);
5905 if (!optab)
5906 {
5907 if (dump_enabled_p ())
5908 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5909 "no optab.\n");
5910 return false;
5911 }
5912 target_support_p = (optab_handler (optab, vec_mode)
5913 != CODE_FOR_nothing);
5914 }
5915
5916 if (!target_support_p)
5917 {
5918 if (dump_enabled_p ())
5919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5920 "op not supported by target.\n");
5921 /* Check only during analysis. */
5922 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5923 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5924 return false;
5925 if (dump_enabled_p ())
5926 dump_printf_loc (MSG_NOTE, vect_location,
5927 "proceeding using word mode.\n");
5928 }
5929
5930 /* Worthwhile without SIMD support? Check only during analysis. */
5931 if (!VECTOR_MODE_P (vec_mode)
5932 && !vec_stmt
5933 && !vect_worthwhile_without_simd_p (vinfo, code))
5934 {
5935 if (dump_enabled_p ())
5936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5937 "not worthwhile without SIMD support.\n");
5938 return false;
5939 }
5940
5941 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5942 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5943 internal_fn cond_fn = get_conditional_internal_fn (code);
5944
5945 if (!vec_stmt) /* transformation not required. */
5946 {
5947 /* If this operation is part of a reduction, a fully-masked loop
5948 should only change the active lanes of the reduction chain,
5949 keeping the inactive lanes as-is. */
5950 if (loop_vinfo
5951 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5952 && reduc_idx >= 0)
5953 {
5954 if (cond_fn == IFN_LAST
5955 || !direct_internal_fn_supported_p (cond_fn, vectype,
5956 OPTIMIZE_FOR_SPEED))
5957 {
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5960 "can't use a fully-masked loop because no"
5961 " conditional operation is available.\n");
5962 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5963 }
5964 else
5965 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5966 vectype, NULL);
5967 }
5968
5969 /* Put types on constant and invariant SLP children. */
5970 if (slp_node
5971 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5972 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5973 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5974 {
5975 if (dump_enabled_p ())
5976 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5977 "incompatible vector types for invariants\n");
5978 return false;
5979 }
5980
5981 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5982 DUMP_VECT_SCOPE ("vectorizable_operation");
5983 vect_model_simple_cost (vinfo, stmt_info,
5984 ncopies, dt, ndts, slp_node, cost_vec);
5985 return true;
5986 }
5987
5988 /* Transform. */
5989
5990 if (dump_enabled_p ())
5991 dump_printf_loc (MSG_NOTE, vect_location,
5992 "transform binary/unary operation.\n");
5993
5994 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
5995
5996 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5997 vectors with unsigned elements, but the result is signed. So, we
5998 need to compute the MINUS_EXPR into vectype temporary and
5999 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6000 tree vec_cvt_dest = NULL_TREE;
6001 if (orig_code == POINTER_DIFF_EXPR)
6002 {
6003 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6004 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6005 }
6006 /* Handle def. */
6007 else
6008 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6009
6010 /* In case the vectorization factor (VF) is bigger than the number
6011 of elements that we can fit in a vectype (nunits), we have to generate
6012 more than one vector stmt - i.e - we need to "unroll" the
6013 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6014 from one copy of the vector stmt to the next, in the field
6015 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6016 stages to find the correct vector defs to be used when vectorizing
6017 stmts that use the defs of the current stmt. The example below
6018 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6019 we need to create 4 vectorized stmts):
6020
6021 before vectorization:
6022 RELATED_STMT VEC_STMT
6023 S1: x = memref - -
6024 S2: z = x + 1 - -
6025
6026 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6027 there):
6028 RELATED_STMT VEC_STMT
6029 VS1_0: vx0 = memref0 VS1_1 -
6030 VS1_1: vx1 = memref1 VS1_2 -
6031 VS1_2: vx2 = memref2 VS1_3 -
6032 VS1_3: vx3 = memref3 - -
6033 S1: x = load - VS1_0
6034 S2: z = x + 1 - -
6035
6036 step2: vectorize stmt S2 (done here):
6037 To vectorize stmt S2 we first need to find the relevant vector
6038 def for the first operand 'x'. This is, as usual, obtained from
6039 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6040 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6041 relevant vector def 'vx0'. Having found 'vx0' we can generate
6042 the vector stmt VS2_0, and as usual, record it in the
6043 STMT_VINFO_VEC_STMT of stmt S2.
6044 When creating the second copy (VS2_1), we obtain the relevant vector
6045 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6046 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6047 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6048 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6049 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6050 chain of stmts and pointers:
6051 RELATED_STMT VEC_STMT
6052 VS1_0: vx0 = memref0 VS1_1 -
6053 VS1_1: vx1 = memref1 VS1_2 -
6054 VS1_2: vx2 = memref2 VS1_3 -
6055 VS1_3: vx3 = memref3 - -
6056 S1: x = load - VS1_0
6057 VS2_0: vz0 = vx0 + v1 VS2_1 -
6058 VS2_1: vz1 = vx1 + v1 VS2_2 -
6059 VS2_2: vz2 = vx2 + v1 VS2_3 -
6060 VS2_3: vz3 = vx3 + v1 - -
6061 S2: z = x + 1 - VS2_0 */
6062
6063 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6064 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6065 /* Arguments are ready. Create the new vector stmt. */
6066 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6067 {
6068 gimple *new_stmt = NULL;
6069 vop1 = ((op_type == binary_op || op_type == ternary_op)
6070 ? vec_oprnds1[i] : NULL_TREE);
6071 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6072 if (masked_loop_p && reduc_idx >= 0)
6073 {
6074 /* Perform the operation on active elements only and take
6075 inactive elements from the reduction chain input. */
6076 gcc_assert (!vop2);
6077 vop2 = reduc_idx == 1 ? vop1 : vop0;
6078 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6079 vectype, i);
6080 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6081 vop0, vop1, vop2);
6082 new_temp = make_ssa_name (vec_dest, call);
6083 gimple_call_set_lhs (call, new_temp);
6084 gimple_call_set_nothrow (call, true);
6085 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6086 new_stmt = call;
6087 }
6088 else
6089 {
6090 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6091 new_temp = make_ssa_name (vec_dest, new_stmt);
6092 gimple_assign_set_lhs (new_stmt, new_temp);
6093 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6094 if (vec_cvt_dest)
6095 {
6096 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6097 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6098 new_temp);
6099 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6100 gimple_assign_set_lhs (new_stmt, new_temp);
6101 vect_finish_stmt_generation (vinfo, stmt_info,
6102 new_stmt, gsi);
6103 }
6104 }
6105 if (slp_node)
6106 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6107 else
6108 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6109 }
6110
6111 if (!slp_node)
6112 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6113
6114 vec_oprnds0.release ();
6115 vec_oprnds1.release ();
6116 vec_oprnds2.release ();
6117
6118 return true;
6119 }
6120
6121 /* A helper function to ensure data reference DR_INFO's base alignment. */
6122
6123 static void
6124 ensure_base_align (dr_vec_info *dr_info)
6125 {
6126 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6127 return;
6128
6129 if (dr_info->base_misaligned)
6130 {
6131 tree base_decl = dr_info->base_decl;
6132
6133 // We should only be able to increase the alignment of a base object if
6134 // we know what its new alignment should be at compile time.
6135 unsigned HOST_WIDE_INT align_base_to =
6136 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6137
6138 if (decl_in_symtab_p (base_decl))
6139 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6140 else if (DECL_ALIGN (base_decl) < align_base_to)
6141 {
6142 SET_DECL_ALIGN (base_decl, align_base_to);
6143 DECL_USER_ALIGN (base_decl) = 1;
6144 }
6145 dr_info->base_misaligned = false;
6146 }
6147 }
6148
6149
6150 /* Function get_group_alias_ptr_type.
6151
6152 Return the alias type for the group starting at FIRST_STMT_INFO. */
6153
6154 static tree
6155 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6156 {
6157 struct data_reference *first_dr, *next_dr;
6158
6159 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6160 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6161 while (next_stmt_info)
6162 {
6163 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6164 if (get_alias_set (DR_REF (first_dr))
6165 != get_alias_set (DR_REF (next_dr)))
6166 {
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_NOTE, vect_location,
6169 "conflicting alias set types.\n");
6170 return ptr_type_node;
6171 }
6172 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6173 }
6174 return reference_alias_ptr_type (DR_REF (first_dr));
6175 }
6176
6177
6178 /* Function scan_operand_equal_p.
6179
6180 Helper function for check_scan_store. Compare two references
6181 with .GOMP_SIMD_LANE bases. */
6182
6183 static bool
6184 scan_operand_equal_p (tree ref1, tree ref2)
6185 {
6186 tree ref[2] = { ref1, ref2 };
6187 poly_int64 bitsize[2], bitpos[2];
6188 tree offset[2], base[2];
6189 for (int i = 0; i < 2; ++i)
6190 {
6191 machine_mode mode;
6192 int unsignedp, reversep, volatilep = 0;
6193 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6194 &offset[i], &mode, &unsignedp,
6195 &reversep, &volatilep);
6196 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6197 return false;
6198 if (TREE_CODE (base[i]) == MEM_REF
6199 && offset[i] == NULL_TREE
6200 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6201 {
6202 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6203 if (is_gimple_assign (def_stmt)
6204 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6205 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6206 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6207 {
6208 if (maybe_ne (mem_ref_offset (base[i]), 0))
6209 return false;
6210 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6211 offset[i] = gimple_assign_rhs2 (def_stmt);
6212 }
6213 }
6214 }
6215
6216 if (!operand_equal_p (base[0], base[1], 0))
6217 return false;
6218 if (maybe_ne (bitsize[0], bitsize[1]))
6219 return false;
6220 if (offset[0] != offset[1])
6221 {
6222 if (!offset[0] || !offset[1])
6223 return false;
6224 if (!operand_equal_p (offset[0], offset[1], 0))
6225 {
6226 tree step[2];
6227 for (int i = 0; i < 2; ++i)
6228 {
6229 step[i] = integer_one_node;
6230 if (TREE_CODE (offset[i]) == SSA_NAME)
6231 {
6232 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6233 if (is_gimple_assign (def_stmt)
6234 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6235 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6236 == INTEGER_CST))
6237 {
6238 step[i] = gimple_assign_rhs2 (def_stmt);
6239 offset[i] = gimple_assign_rhs1 (def_stmt);
6240 }
6241 }
6242 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6243 {
6244 step[i] = TREE_OPERAND (offset[i], 1);
6245 offset[i] = TREE_OPERAND (offset[i], 0);
6246 }
6247 tree rhs1 = NULL_TREE;
6248 if (TREE_CODE (offset[i]) == SSA_NAME)
6249 {
6250 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6251 if (gimple_assign_cast_p (def_stmt))
6252 rhs1 = gimple_assign_rhs1 (def_stmt);
6253 }
6254 else if (CONVERT_EXPR_P (offset[i]))
6255 rhs1 = TREE_OPERAND (offset[i], 0);
6256 if (rhs1
6257 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6258 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6259 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6260 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6261 offset[i] = rhs1;
6262 }
6263 if (!operand_equal_p (offset[0], offset[1], 0)
6264 || !operand_equal_p (step[0], step[1], 0))
6265 return false;
6266 }
6267 }
6268 return true;
6269 }
6270
6271
6272 enum scan_store_kind {
6273 /* Normal permutation. */
6274 scan_store_kind_perm,
6275
6276 /* Whole vector left shift permutation with zero init. */
6277 scan_store_kind_lshift_zero,
6278
6279 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6280 scan_store_kind_lshift_cond
6281 };
6282
6283 /* Function check_scan_store.
6284
6285 Verify if we can perform the needed permutations or whole vector shifts.
6286 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6287 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6288 to do at each step. */
6289
6290 static int
6291 scan_store_can_perm_p (tree vectype, tree init,
6292 vec<enum scan_store_kind> *use_whole_vector = NULL)
6293 {
6294 enum machine_mode vec_mode = TYPE_MODE (vectype);
6295 unsigned HOST_WIDE_INT nunits;
6296 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6297 return -1;
6298 int units_log2 = exact_log2 (nunits);
6299 if (units_log2 <= 0)
6300 return -1;
6301
6302 int i;
6303 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6304 for (i = 0; i <= units_log2; ++i)
6305 {
6306 unsigned HOST_WIDE_INT j, k;
6307 enum scan_store_kind kind = scan_store_kind_perm;
6308 vec_perm_builder sel (nunits, nunits, 1);
6309 sel.quick_grow (nunits);
6310 if (i == units_log2)
6311 {
6312 for (j = 0; j < nunits; ++j)
6313 sel[j] = nunits - 1;
6314 }
6315 else
6316 {
6317 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6318 sel[j] = j;
6319 for (k = 0; j < nunits; ++j, ++k)
6320 sel[j] = nunits + k;
6321 }
6322 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6323 if (!can_vec_perm_const_p (vec_mode, indices))
6324 {
6325 if (i == units_log2)
6326 return -1;
6327
6328 if (whole_vector_shift_kind == scan_store_kind_perm)
6329 {
6330 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6331 return -1;
6332 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6333 /* Whole vector shifts shift in zeros, so if init is all zero
6334 constant, there is no need to do anything further. */
6335 if ((TREE_CODE (init) != INTEGER_CST
6336 && TREE_CODE (init) != REAL_CST)
6337 || !initializer_zerop (init))
6338 {
6339 tree masktype = truth_type_for (vectype);
6340 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6341 return -1;
6342 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6343 }
6344 }
6345 kind = whole_vector_shift_kind;
6346 }
6347 if (use_whole_vector)
6348 {
6349 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6350 use_whole_vector->safe_grow_cleared (i, true);
6351 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6352 use_whole_vector->safe_push (kind);
6353 }
6354 }
6355
6356 return units_log2;
6357 }
6358
6359
6360 /* Function check_scan_store.
6361
6362 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6363
6364 static bool
6365 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6366 enum vect_def_type rhs_dt, bool slp, tree mask,
6367 vect_memory_access_type memory_access_type)
6368 {
6369 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6370 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6371 tree ref_type;
6372
6373 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6374 if (slp
6375 || mask
6376 || memory_access_type != VMAT_CONTIGUOUS
6377 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6378 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6379 || loop_vinfo == NULL
6380 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6381 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6382 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6383 || !integer_zerop (DR_INIT (dr_info->dr))
6384 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6385 || !alias_sets_conflict_p (get_alias_set (vectype),
6386 get_alias_set (TREE_TYPE (ref_type))))
6387 {
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6390 "unsupported OpenMP scan store.\n");
6391 return false;
6392 }
6393
6394 /* We need to pattern match code built by OpenMP lowering and simplified
6395 by following optimizations into something we can handle.
6396 #pragma omp simd reduction(inscan,+:r)
6397 for (...)
6398 {
6399 r += something ();
6400 #pragma omp scan inclusive (r)
6401 use (r);
6402 }
6403 shall have body with:
6404 // Initialization for input phase, store the reduction initializer:
6405 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6406 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6407 D.2042[_21] = 0;
6408 // Actual input phase:
6409 ...
6410 r.0_5 = D.2042[_20];
6411 _6 = _4 + r.0_5;
6412 D.2042[_20] = _6;
6413 // Initialization for scan phase:
6414 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6415 _26 = D.2043[_25];
6416 _27 = D.2042[_25];
6417 _28 = _26 + _27;
6418 D.2043[_25] = _28;
6419 D.2042[_25] = _28;
6420 // Actual scan phase:
6421 ...
6422 r.1_8 = D.2042[_20];
6423 ...
6424 The "omp simd array" variable D.2042 holds the privatized copy used
6425 inside of the loop and D.2043 is another one that holds copies of
6426 the current original list item. The separate GOMP_SIMD_LANE ifn
6427 kinds are there in order to allow optimizing the initializer store
6428 and combiner sequence, e.g. if it is originally some C++ish user
6429 defined reduction, but allow the vectorizer to pattern recognize it
6430 and turn into the appropriate vectorized scan.
6431
6432 For exclusive scan, this is slightly different:
6433 #pragma omp simd reduction(inscan,+:r)
6434 for (...)
6435 {
6436 use (r);
6437 #pragma omp scan exclusive (r)
6438 r += something ();
6439 }
6440 shall have body with:
6441 // Initialization for input phase, store the reduction initializer:
6442 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6443 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6444 D.2042[_21] = 0;
6445 // Actual input phase:
6446 ...
6447 r.0_5 = D.2042[_20];
6448 _6 = _4 + r.0_5;
6449 D.2042[_20] = _6;
6450 // Initialization for scan phase:
6451 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6452 _26 = D.2043[_25];
6453 D.2044[_25] = _26;
6454 _27 = D.2042[_25];
6455 _28 = _26 + _27;
6456 D.2043[_25] = _28;
6457 // Actual scan phase:
6458 ...
6459 r.1_8 = D.2044[_20];
6460 ... */
6461
6462 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6463 {
6464 /* Match the D.2042[_21] = 0; store above. Just require that
6465 it is a constant or external definition store. */
6466 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6467 {
6468 fail_init:
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6471 "unsupported OpenMP scan initializer store.\n");
6472 return false;
6473 }
6474
6475 if (! loop_vinfo->scan_map)
6476 loop_vinfo->scan_map = new hash_map<tree, tree>;
6477 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6478 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6479 if (cached)
6480 goto fail_init;
6481 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6482
6483 /* These stores can be vectorized normally. */
6484 return true;
6485 }
6486
6487 if (rhs_dt != vect_internal_def)
6488 {
6489 fail:
6490 if (dump_enabled_p ())
6491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6492 "unsupported OpenMP scan combiner pattern.\n");
6493 return false;
6494 }
6495
6496 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6497 tree rhs = gimple_assign_rhs1 (stmt);
6498 if (TREE_CODE (rhs) != SSA_NAME)
6499 goto fail;
6500
6501 gimple *other_store_stmt = NULL;
6502 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6503 bool inscan_var_store
6504 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6505
6506 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6507 {
6508 if (!inscan_var_store)
6509 {
6510 use_operand_p use_p;
6511 imm_use_iterator iter;
6512 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6513 {
6514 gimple *use_stmt = USE_STMT (use_p);
6515 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6516 continue;
6517 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6518 || !is_gimple_assign (use_stmt)
6519 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6520 || other_store_stmt
6521 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6522 goto fail;
6523 other_store_stmt = use_stmt;
6524 }
6525 if (other_store_stmt == NULL)
6526 goto fail;
6527 rhs = gimple_assign_lhs (other_store_stmt);
6528 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6529 goto fail;
6530 }
6531 }
6532 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6533 {
6534 use_operand_p use_p;
6535 imm_use_iterator iter;
6536 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6537 {
6538 gimple *use_stmt = USE_STMT (use_p);
6539 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6540 continue;
6541 if (other_store_stmt)
6542 goto fail;
6543 other_store_stmt = use_stmt;
6544 }
6545 }
6546 else
6547 goto fail;
6548
6549 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6550 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6551 || !is_gimple_assign (def_stmt)
6552 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6553 goto fail;
6554
6555 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6556 /* For pointer addition, we should use the normal plus for the vector
6557 operation. */
6558 switch (code)
6559 {
6560 case POINTER_PLUS_EXPR:
6561 code = PLUS_EXPR;
6562 break;
6563 case MULT_HIGHPART_EXPR:
6564 goto fail;
6565 default:
6566 break;
6567 }
6568 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6569 goto fail;
6570
6571 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6572 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6573 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6574 goto fail;
6575
6576 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6577 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6578 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6579 || !gimple_assign_load_p (load1_stmt)
6580 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6581 || !gimple_assign_load_p (load2_stmt))
6582 goto fail;
6583
6584 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6585 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6586 if (load1_stmt_info == NULL
6587 || load2_stmt_info == NULL
6588 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6589 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6590 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6591 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6592 goto fail;
6593
6594 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6595 {
6596 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6597 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6598 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6599 goto fail;
6600 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6601 tree lrhs;
6602 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6603 lrhs = rhs1;
6604 else
6605 lrhs = rhs2;
6606 use_operand_p use_p;
6607 imm_use_iterator iter;
6608 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6609 {
6610 gimple *use_stmt = USE_STMT (use_p);
6611 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6612 continue;
6613 if (other_store_stmt)
6614 goto fail;
6615 other_store_stmt = use_stmt;
6616 }
6617 }
6618
6619 if (other_store_stmt == NULL)
6620 goto fail;
6621 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6622 || !gimple_store_p (other_store_stmt))
6623 goto fail;
6624
6625 stmt_vec_info other_store_stmt_info
6626 = loop_vinfo->lookup_stmt (other_store_stmt);
6627 if (other_store_stmt_info == NULL
6628 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6629 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6630 goto fail;
6631
6632 gimple *stmt1 = stmt;
6633 gimple *stmt2 = other_store_stmt;
6634 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6635 std::swap (stmt1, stmt2);
6636 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6637 gimple_assign_rhs1 (load2_stmt)))
6638 {
6639 std::swap (rhs1, rhs2);
6640 std::swap (load1_stmt, load2_stmt);
6641 std::swap (load1_stmt_info, load2_stmt_info);
6642 }
6643 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6644 gimple_assign_rhs1 (load1_stmt)))
6645 goto fail;
6646
6647 tree var3 = NULL_TREE;
6648 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6649 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6650 gimple_assign_rhs1 (load2_stmt)))
6651 goto fail;
6652 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6653 {
6654 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6655 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6656 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6657 goto fail;
6658 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6659 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6660 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6661 || lookup_attribute ("omp simd inscan exclusive",
6662 DECL_ATTRIBUTES (var3)))
6663 goto fail;
6664 }
6665
6666 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6667 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6668 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6669 goto fail;
6670
6671 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6672 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6673 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6674 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6675 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6676 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6677 goto fail;
6678
6679 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6680 std::swap (var1, var2);
6681
6682 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6683 {
6684 if (!lookup_attribute ("omp simd inscan exclusive",
6685 DECL_ATTRIBUTES (var1)))
6686 goto fail;
6687 var1 = var3;
6688 }
6689
6690 if (loop_vinfo->scan_map == NULL)
6691 goto fail;
6692 tree *init = loop_vinfo->scan_map->get (var1);
6693 if (init == NULL)
6694 goto fail;
6695
6696 /* The IL is as expected, now check if we can actually vectorize it.
6697 Inclusive scan:
6698 _26 = D.2043[_25];
6699 _27 = D.2042[_25];
6700 _28 = _26 + _27;
6701 D.2043[_25] = _28;
6702 D.2042[_25] = _28;
6703 should be vectorized as (where _40 is the vectorized rhs
6704 from the D.2042[_21] = 0; store):
6705 _30 = MEM <vector(8) int> [(int *)&D.2043];
6706 _31 = MEM <vector(8) int> [(int *)&D.2042];
6707 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6708 _33 = _31 + _32;
6709 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6710 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6711 _35 = _33 + _34;
6712 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6713 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6714 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6715 _37 = _35 + _36;
6716 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6717 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6718 _38 = _30 + _37;
6719 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6720 MEM <vector(8) int> [(int *)&D.2043] = _39;
6721 MEM <vector(8) int> [(int *)&D.2042] = _38;
6722 Exclusive scan:
6723 _26 = D.2043[_25];
6724 D.2044[_25] = _26;
6725 _27 = D.2042[_25];
6726 _28 = _26 + _27;
6727 D.2043[_25] = _28;
6728 should be vectorized as (where _40 is the vectorized rhs
6729 from the D.2042[_21] = 0; store):
6730 _30 = MEM <vector(8) int> [(int *)&D.2043];
6731 _31 = MEM <vector(8) int> [(int *)&D.2042];
6732 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6733 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6734 _34 = _32 + _33;
6735 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6736 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6737 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6738 _36 = _34 + _35;
6739 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6740 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6741 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6742 _38 = _36 + _37;
6743 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6744 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6745 _39 = _30 + _38;
6746 _50 = _31 + _39;
6747 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6748 MEM <vector(8) int> [(int *)&D.2044] = _39;
6749 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6750 enum machine_mode vec_mode = TYPE_MODE (vectype);
6751 optab optab = optab_for_tree_code (code, vectype, optab_default);
6752 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6753 goto fail;
6754
6755 int units_log2 = scan_store_can_perm_p (vectype, *init);
6756 if (units_log2 == -1)
6757 goto fail;
6758
6759 return true;
6760 }
6761
6762
6763 /* Function vectorizable_scan_store.
6764
6765 Helper of vectorizable_score, arguments like on vectorizable_store.
6766 Handle only the transformation, checking is done in check_scan_store. */
6767
6768 static bool
6769 vectorizable_scan_store (vec_info *vinfo,
6770 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6771 gimple **vec_stmt, int ncopies)
6772 {
6773 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6774 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6775 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6776 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6777
6778 if (dump_enabled_p ())
6779 dump_printf_loc (MSG_NOTE, vect_location,
6780 "transform scan store. ncopies = %d\n", ncopies);
6781
6782 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6783 tree rhs = gimple_assign_rhs1 (stmt);
6784 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6785
6786 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6787 bool inscan_var_store
6788 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6789
6790 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6791 {
6792 use_operand_p use_p;
6793 imm_use_iterator iter;
6794 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6795 {
6796 gimple *use_stmt = USE_STMT (use_p);
6797 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6798 continue;
6799 rhs = gimple_assign_lhs (use_stmt);
6800 break;
6801 }
6802 }
6803
6804 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6805 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6806 if (code == POINTER_PLUS_EXPR)
6807 code = PLUS_EXPR;
6808 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6809 && commutative_tree_code (code));
6810 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6811 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6812 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6813 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6814 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6815 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6816 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6817 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6818 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6819 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6820 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6821
6822 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6823 {
6824 std::swap (rhs1, rhs2);
6825 std::swap (var1, var2);
6826 std::swap (load1_dr_info, load2_dr_info);
6827 }
6828
6829 tree *init = loop_vinfo->scan_map->get (var1);
6830 gcc_assert (init);
6831
6832 unsigned HOST_WIDE_INT nunits;
6833 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6834 gcc_unreachable ();
6835 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6836 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6837 gcc_assert (units_log2 > 0);
6838 auto_vec<tree, 16> perms;
6839 perms.quick_grow (units_log2 + 1);
6840 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6841 for (int i = 0; i <= units_log2; ++i)
6842 {
6843 unsigned HOST_WIDE_INT j, k;
6844 vec_perm_builder sel (nunits, nunits, 1);
6845 sel.quick_grow (nunits);
6846 if (i == units_log2)
6847 for (j = 0; j < nunits; ++j)
6848 sel[j] = nunits - 1;
6849 else
6850 {
6851 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6852 sel[j] = j;
6853 for (k = 0; j < nunits; ++j, ++k)
6854 sel[j] = nunits + k;
6855 }
6856 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6857 if (!use_whole_vector.is_empty ()
6858 && use_whole_vector[i] != scan_store_kind_perm)
6859 {
6860 if (zero_vec == NULL_TREE)
6861 zero_vec = build_zero_cst (vectype);
6862 if (masktype == NULL_TREE
6863 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6864 masktype = truth_type_for (vectype);
6865 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6866 }
6867 else
6868 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6869 }
6870
6871 tree vec_oprnd1 = NULL_TREE;
6872 tree vec_oprnd2 = NULL_TREE;
6873 tree vec_oprnd3 = NULL_TREE;
6874 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6875 tree dataref_offset = build_int_cst (ref_type, 0);
6876 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6877 vectype, VMAT_CONTIGUOUS);
6878 tree ldataref_ptr = NULL_TREE;
6879 tree orig = NULL_TREE;
6880 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6881 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6882 auto_vec<tree> vec_oprnds1;
6883 auto_vec<tree> vec_oprnds2;
6884 auto_vec<tree> vec_oprnds3;
6885 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6886 *init, &vec_oprnds1,
6887 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6888 rhs2, &vec_oprnds3);
6889 for (int j = 0; j < ncopies; j++)
6890 {
6891 vec_oprnd1 = vec_oprnds1[j];
6892 if (ldataref_ptr == NULL)
6893 vec_oprnd2 = vec_oprnds2[j];
6894 vec_oprnd3 = vec_oprnds3[j];
6895 if (j == 0)
6896 orig = vec_oprnd3;
6897 else if (!inscan_var_store)
6898 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6899
6900 if (ldataref_ptr)
6901 {
6902 vec_oprnd2 = make_ssa_name (vectype);
6903 tree data_ref = fold_build2 (MEM_REF, vectype,
6904 unshare_expr (ldataref_ptr),
6905 dataref_offset);
6906 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6907 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6908 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6909 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6910 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6911 }
6912
6913 tree v = vec_oprnd2;
6914 for (int i = 0; i < units_log2; ++i)
6915 {
6916 tree new_temp = make_ssa_name (vectype);
6917 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6918 (zero_vec
6919 && (use_whole_vector[i]
6920 != scan_store_kind_perm))
6921 ? zero_vec : vec_oprnd1, v,
6922 perms[i]);
6923 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6924 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6925 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6926
6927 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6928 {
6929 /* Whole vector shift shifted in zero bits, but if *init
6930 is not initializer_zerop, we need to replace those elements
6931 with elements from vec_oprnd1. */
6932 tree_vector_builder vb (masktype, nunits, 1);
6933 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6934 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6935 ? boolean_false_node : boolean_true_node);
6936
6937 tree new_temp2 = make_ssa_name (vectype);
6938 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6939 new_temp, vec_oprnd1);
6940 vect_finish_stmt_generation (vinfo, stmt_info,
6941 g, gsi);
6942 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6943 new_temp = new_temp2;
6944 }
6945
6946 /* For exclusive scan, perform the perms[i] permutation once
6947 more. */
6948 if (i == 0
6949 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6950 && v == vec_oprnd2)
6951 {
6952 v = new_temp;
6953 --i;
6954 continue;
6955 }
6956
6957 tree new_temp2 = make_ssa_name (vectype);
6958 g = gimple_build_assign (new_temp2, code, v, new_temp);
6959 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6960 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6961
6962 v = new_temp2;
6963 }
6964
6965 tree new_temp = make_ssa_name (vectype);
6966 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6967 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6968 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6969
6970 tree last_perm_arg = new_temp;
6971 /* For exclusive scan, new_temp computed above is the exclusive scan
6972 prefix sum. Turn it into inclusive prefix sum for the broadcast
6973 of the last element into orig. */
6974 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6975 {
6976 last_perm_arg = make_ssa_name (vectype);
6977 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6978 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6979 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6980 }
6981
6982 orig = make_ssa_name (vectype);
6983 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6984 last_perm_arg, perms[units_log2]);
6985 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6986 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6987
6988 if (!inscan_var_store)
6989 {
6990 tree data_ref = fold_build2 (MEM_REF, vectype,
6991 unshare_expr (dataref_ptr),
6992 dataref_offset);
6993 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6994 g = gimple_build_assign (data_ref, new_temp);
6995 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6996 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6997 }
6998 }
6999
7000 if (inscan_var_store)
7001 for (int j = 0; j < ncopies; j++)
7002 {
7003 if (j != 0)
7004 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7005
7006 tree data_ref = fold_build2 (MEM_REF, vectype,
7007 unshare_expr (dataref_ptr),
7008 dataref_offset);
7009 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7010 gimple *g = gimple_build_assign (data_ref, orig);
7011 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7012 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7013 }
7014 return true;
7015 }
7016
7017
7018 /* Function vectorizable_store.
7019
7020 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7021 that can be vectorized.
7022 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7023 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7024 Return true if STMT_INFO is vectorizable in this way. */
7025
7026 static bool
7027 vectorizable_store (vec_info *vinfo,
7028 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7029 gimple **vec_stmt, slp_tree slp_node,
7030 stmt_vector_for_cost *cost_vec)
7031 {
7032 tree data_ref;
7033 tree op;
7034 tree vec_oprnd = NULL_TREE;
7035 tree elem_type;
7036 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7037 class loop *loop = NULL;
7038 machine_mode vec_mode;
7039 tree dummy;
7040 enum vect_def_type rhs_dt = vect_unknown_def_type;
7041 enum vect_def_type mask_dt = vect_unknown_def_type;
7042 tree dataref_ptr = NULL_TREE;
7043 tree dataref_offset = NULL_TREE;
7044 gimple *ptr_incr = NULL;
7045 int ncopies;
7046 int j;
7047 stmt_vec_info first_stmt_info;
7048 bool grouped_store;
7049 unsigned int group_size, i;
7050 vec<tree> oprnds = vNULL;
7051 vec<tree> result_chain = vNULL;
7052 tree offset = NULL_TREE;
7053 vec<tree> vec_oprnds = vNULL;
7054 bool slp = (slp_node != NULL);
7055 unsigned int vec_num;
7056 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7057 tree aggr_type;
7058 gather_scatter_info gs_info;
7059 poly_uint64 vf;
7060 vec_load_store_type vls_type;
7061 tree ref_type;
7062
7063 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7064 return false;
7065
7066 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7067 && ! vec_stmt)
7068 return false;
7069
7070 /* Is vectorizable store? */
7071
7072 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7073 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7074 {
7075 tree scalar_dest = gimple_assign_lhs (assign);
7076 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7077 && is_pattern_stmt_p (stmt_info))
7078 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7079 if (TREE_CODE (scalar_dest) != ARRAY_REF
7080 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7081 && TREE_CODE (scalar_dest) != INDIRECT_REF
7082 && TREE_CODE (scalar_dest) != COMPONENT_REF
7083 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7084 && TREE_CODE (scalar_dest) != REALPART_EXPR
7085 && TREE_CODE (scalar_dest) != MEM_REF)
7086 return false;
7087 }
7088 else
7089 {
7090 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7091 if (!call || !gimple_call_internal_p (call))
7092 return false;
7093
7094 internal_fn ifn = gimple_call_internal_fn (call);
7095 if (!internal_store_fn_p (ifn))
7096 return false;
7097
7098 if (slp_node != NULL)
7099 {
7100 if (dump_enabled_p ())
7101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7102 "SLP of masked stores not supported.\n");
7103 return false;
7104 }
7105
7106 int mask_index = internal_fn_mask_index (ifn);
7107 if (mask_index >= 0)
7108 {
7109 mask = gimple_call_arg (call, mask_index);
7110 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7111 &mask_vectype))
7112 return false;
7113 }
7114 }
7115
7116 op = vect_get_store_rhs (stmt_info);
7117
7118 /* Cannot have hybrid store SLP -- that would mean storing to the
7119 same location twice. */
7120 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7121
7122 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7123 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7124
7125 if (loop_vinfo)
7126 {
7127 loop = LOOP_VINFO_LOOP (loop_vinfo);
7128 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7129 }
7130 else
7131 vf = 1;
7132
7133 /* Multiple types in SLP are handled by creating the appropriate number of
7134 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7135 case of SLP. */
7136 if (slp)
7137 ncopies = 1;
7138 else
7139 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7140
7141 gcc_assert (ncopies >= 1);
7142
7143 /* FORNOW. This restriction should be relaxed. */
7144 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7145 {
7146 if (dump_enabled_p ())
7147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7148 "multiple types in nested loop.\n");
7149 return false;
7150 }
7151
7152 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7153 op, &rhs_dt, &rhs_vectype, &vls_type))
7154 return false;
7155
7156 elem_type = TREE_TYPE (vectype);
7157 vec_mode = TYPE_MODE (vectype);
7158
7159 if (!STMT_VINFO_DATA_REF (stmt_info))
7160 return false;
7161
7162 vect_memory_access_type memory_access_type;
7163 enum dr_alignment_support alignment_support_scheme;
7164 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7165 ncopies, &memory_access_type,
7166 &alignment_support_scheme, &gs_info))
7167 return false;
7168
7169 if (mask)
7170 {
7171 if (memory_access_type == VMAT_CONTIGUOUS)
7172 {
7173 if (!VECTOR_MODE_P (vec_mode)
7174 || !can_vec_mask_load_store_p (vec_mode,
7175 TYPE_MODE (mask_vectype), false))
7176 return false;
7177 }
7178 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7179 && (memory_access_type != VMAT_GATHER_SCATTER
7180 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7181 {
7182 if (dump_enabled_p ())
7183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7184 "unsupported access type for masked store.\n");
7185 return false;
7186 }
7187 }
7188 else
7189 {
7190 /* FORNOW. In some cases can vectorize even if data-type not supported
7191 (e.g. - array initialization with 0). */
7192 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7193 return false;
7194 }
7195
7196 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7197 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7198 && memory_access_type != VMAT_GATHER_SCATTER
7199 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7200 if (grouped_store)
7201 {
7202 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7203 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7204 group_size = DR_GROUP_SIZE (first_stmt_info);
7205 }
7206 else
7207 {
7208 first_stmt_info = stmt_info;
7209 first_dr_info = dr_info;
7210 group_size = vec_num = 1;
7211 }
7212
7213 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7214 {
7215 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7216 memory_access_type))
7217 return false;
7218 }
7219
7220 if (!vec_stmt) /* transformation not required. */
7221 {
7222 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7223
7224 if (loop_vinfo
7225 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7226 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7227 group_size, memory_access_type,
7228 &gs_info, mask);
7229
7230 if (slp_node
7231 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7232 vectype))
7233 {
7234 if (dump_enabled_p ())
7235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7236 "incompatible vector types for invariants\n");
7237 return false;
7238 }
7239
7240 if (dump_enabled_p ()
7241 && memory_access_type != VMAT_ELEMENTWISE
7242 && memory_access_type != VMAT_GATHER_SCATTER
7243 && alignment_support_scheme != dr_aligned)
7244 dump_printf_loc (MSG_NOTE, vect_location,
7245 "Vectorizing an unaligned access.\n");
7246
7247 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7248 vect_model_store_cost (vinfo, stmt_info, ncopies,
7249 memory_access_type, vls_type, slp_node, cost_vec);
7250 return true;
7251 }
7252 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7253
7254 /* Transform. */
7255
7256 ensure_base_align (dr_info);
7257
7258 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7259 {
7260 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7261 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7262 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7263 tree ptr, var, scale, vec_mask;
7264 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7265 tree mask_halfvectype = mask_vectype;
7266 edge pe = loop_preheader_edge (loop);
7267 gimple_seq seq;
7268 basic_block new_bb;
7269 enum { NARROW, NONE, WIDEN } modifier;
7270 poly_uint64 scatter_off_nunits
7271 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7272
7273 if (known_eq (nunits, scatter_off_nunits))
7274 modifier = NONE;
7275 else if (known_eq (nunits * 2, scatter_off_nunits))
7276 {
7277 modifier = WIDEN;
7278
7279 /* Currently gathers and scatters are only supported for
7280 fixed-length vectors. */
7281 unsigned int count = scatter_off_nunits.to_constant ();
7282 vec_perm_builder sel (count, count, 1);
7283 for (i = 0; i < (unsigned int) count; ++i)
7284 sel.quick_push (i | (count / 2));
7285
7286 vec_perm_indices indices (sel, 1, count);
7287 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7288 indices);
7289 gcc_assert (perm_mask != NULL_TREE);
7290 }
7291 else if (known_eq (nunits, scatter_off_nunits * 2))
7292 {
7293 modifier = NARROW;
7294
7295 /* Currently gathers and scatters are only supported for
7296 fixed-length vectors. */
7297 unsigned int count = nunits.to_constant ();
7298 vec_perm_builder sel (count, count, 1);
7299 for (i = 0; i < (unsigned int) count; ++i)
7300 sel.quick_push (i | (count / 2));
7301
7302 vec_perm_indices indices (sel, 2, count);
7303 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7304 gcc_assert (perm_mask != NULL_TREE);
7305 ncopies *= 2;
7306
7307 if (mask)
7308 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7309 }
7310 else
7311 gcc_unreachable ();
7312
7313 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7314 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7315 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7316 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7317 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7318 scaletype = TREE_VALUE (arglist);
7319
7320 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7321 && TREE_CODE (rettype) == VOID_TYPE);
7322
7323 ptr = fold_convert (ptrtype, gs_info.base);
7324 if (!is_gimple_min_invariant (ptr))
7325 {
7326 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7327 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7328 gcc_assert (!new_bb);
7329 }
7330
7331 if (mask == NULL_TREE)
7332 {
7333 mask_arg = build_int_cst (masktype, -1);
7334 mask_arg = vect_init_vector (vinfo, stmt_info,
7335 mask_arg, masktype, NULL);
7336 }
7337
7338 scale = build_int_cst (scaletype, gs_info.scale);
7339
7340 auto_vec<tree> vec_oprnds0;
7341 auto_vec<tree> vec_oprnds1;
7342 auto_vec<tree> vec_masks;
7343 if (mask)
7344 {
7345 tree mask_vectype = truth_type_for (vectype);
7346 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7347 modifier == NARROW
7348 ? ncopies / 2 : ncopies,
7349 mask, &vec_masks, mask_vectype);
7350 }
7351 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7352 modifier == WIDEN
7353 ? ncopies / 2 : ncopies,
7354 gs_info.offset, &vec_oprnds0);
7355 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7356 modifier == NARROW
7357 ? ncopies / 2 : ncopies,
7358 op, &vec_oprnds1);
7359 for (j = 0; j < ncopies; ++j)
7360 {
7361 if (modifier == WIDEN)
7362 {
7363 if (j & 1)
7364 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7365 perm_mask, stmt_info, gsi);
7366 else
7367 op = vec_oprnd0 = vec_oprnds0[j / 2];
7368 src = vec_oprnd1 = vec_oprnds1[j];
7369 if (mask)
7370 mask_op = vec_mask = vec_masks[j];
7371 }
7372 else if (modifier == NARROW)
7373 {
7374 if (j & 1)
7375 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7376 perm_mask, stmt_info, gsi);
7377 else
7378 src = vec_oprnd1 = vec_oprnds1[j / 2];
7379 op = vec_oprnd0 = vec_oprnds0[j];
7380 if (mask)
7381 mask_op = vec_mask = vec_masks[j / 2];
7382 }
7383 else
7384 {
7385 op = vec_oprnd0 = vec_oprnds0[j];
7386 src = vec_oprnd1 = vec_oprnds1[j];
7387 if (mask)
7388 mask_op = vec_mask = vec_masks[j];
7389 }
7390
7391 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7392 {
7393 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7394 TYPE_VECTOR_SUBPARTS (srctype)));
7395 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7396 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7397 gassign *new_stmt
7398 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7399 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7400 src = var;
7401 }
7402
7403 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7404 {
7405 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7406 TYPE_VECTOR_SUBPARTS (idxtype)));
7407 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7408 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7409 gassign *new_stmt
7410 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7411 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7412 op = var;
7413 }
7414
7415 if (mask)
7416 {
7417 tree utype;
7418 mask_arg = mask_op;
7419 if (modifier == NARROW)
7420 {
7421 var = vect_get_new_ssa_name (mask_halfvectype,
7422 vect_simple_var);
7423 gassign *new_stmt
7424 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7425 : VEC_UNPACK_LO_EXPR,
7426 mask_op);
7427 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7428 mask_arg = var;
7429 }
7430 tree optype = TREE_TYPE (mask_arg);
7431 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7432 utype = masktype;
7433 else
7434 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7435 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7436 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7437 gassign *new_stmt
7438 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7439 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7440 mask_arg = var;
7441 if (!useless_type_conversion_p (masktype, utype))
7442 {
7443 gcc_assert (TYPE_PRECISION (utype)
7444 <= TYPE_PRECISION (masktype));
7445 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7446 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7447 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7448 mask_arg = var;
7449 }
7450 }
7451
7452 gcall *new_stmt
7453 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7454 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7455
7456 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7457 }
7458 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7459 return true;
7460 }
7461 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7462 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7463
7464 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7465 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7466
7467 if (grouped_store)
7468 {
7469 /* FORNOW */
7470 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7471
7472 /* We vectorize all the stmts of the interleaving group when we
7473 reach the last stmt in the group. */
7474 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7475 < DR_GROUP_SIZE (first_stmt_info)
7476 && !slp)
7477 {
7478 *vec_stmt = NULL;
7479 return true;
7480 }
7481
7482 if (slp)
7483 {
7484 grouped_store = false;
7485 /* VEC_NUM is the number of vect stmts to be created for this
7486 group. */
7487 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7488 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7489 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7490 == first_stmt_info);
7491 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7492 op = vect_get_store_rhs (first_stmt_info);
7493 }
7494 else
7495 /* VEC_NUM is the number of vect stmts to be created for this
7496 group. */
7497 vec_num = group_size;
7498
7499 ref_type = get_group_alias_ptr_type (first_stmt_info);
7500 }
7501 else
7502 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7503
7504 if (dump_enabled_p ())
7505 dump_printf_loc (MSG_NOTE, vect_location,
7506 "transform store. ncopies = %d\n", ncopies);
7507
7508 if (memory_access_type == VMAT_ELEMENTWISE
7509 || memory_access_type == VMAT_STRIDED_SLP)
7510 {
7511 gimple_stmt_iterator incr_gsi;
7512 bool insert_after;
7513 gimple *incr;
7514 tree offvar;
7515 tree ivstep;
7516 tree running_off;
7517 tree stride_base, stride_step, alias_off;
7518 tree vec_oprnd;
7519 tree dr_offset;
7520 unsigned int g;
7521 /* Checked by get_load_store_type. */
7522 unsigned int const_nunits = nunits.to_constant ();
7523
7524 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7525 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7526
7527 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7528 stride_base
7529 = fold_build_pointer_plus
7530 (DR_BASE_ADDRESS (first_dr_info->dr),
7531 size_binop (PLUS_EXPR,
7532 convert_to_ptrofftype (dr_offset),
7533 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7534 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7535
7536 /* For a store with loop-invariant (but other than power-of-2)
7537 stride (i.e. not a grouped access) like so:
7538
7539 for (i = 0; i < n; i += stride)
7540 array[i] = ...;
7541
7542 we generate a new induction variable and new stores from
7543 the components of the (vectorized) rhs:
7544
7545 for (j = 0; ; j += VF*stride)
7546 vectemp = ...;
7547 tmp1 = vectemp[0];
7548 array[j] = tmp1;
7549 tmp2 = vectemp[1];
7550 array[j + stride] = tmp2;
7551 ...
7552 */
7553
7554 unsigned nstores = const_nunits;
7555 unsigned lnel = 1;
7556 tree ltype = elem_type;
7557 tree lvectype = vectype;
7558 if (slp)
7559 {
7560 if (group_size < const_nunits
7561 && const_nunits % group_size == 0)
7562 {
7563 nstores = const_nunits / group_size;
7564 lnel = group_size;
7565 ltype = build_vector_type (elem_type, group_size);
7566 lvectype = vectype;
7567
7568 /* First check if vec_extract optab doesn't support extraction
7569 of vector elts directly. */
7570 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7571 machine_mode vmode;
7572 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7573 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7574 group_size).exists (&vmode)
7575 || (convert_optab_handler (vec_extract_optab,
7576 TYPE_MODE (vectype), vmode)
7577 == CODE_FOR_nothing))
7578 {
7579 /* Try to avoid emitting an extract of vector elements
7580 by performing the extracts using an integer type of the
7581 same size, extracting from a vector of those and then
7582 re-interpreting it as the original vector type if
7583 supported. */
7584 unsigned lsize
7585 = group_size * GET_MODE_BITSIZE (elmode);
7586 unsigned int lnunits = const_nunits / group_size;
7587 /* If we can't construct such a vector fall back to
7588 element extracts from the original vector type and
7589 element size stores. */
7590 if (int_mode_for_size (lsize, 0).exists (&elmode)
7591 && VECTOR_MODE_P (TYPE_MODE (vectype))
7592 && related_vector_mode (TYPE_MODE (vectype), elmode,
7593 lnunits).exists (&vmode)
7594 && (convert_optab_handler (vec_extract_optab,
7595 vmode, elmode)
7596 != CODE_FOR_nothing))
7597 {
7598 nstores = lnunits;
7599 lnel = group_size;
7600 ltype = build_nonstandard_integer_type (lsize, 1);
7601 lvectype = build_vector_type (ltype, nstores);
7602 }
7603 /* Else fall back to vector extraction anyway.
7604 Fewer stores are more important than avoiding spilling
7605 of the vector we extract from. Compared to the
7606 construction case in vectorizable_load no store-forwarding
7607 issue exists here for reasonable archs. */
7608 }
7609 }
7610 else if (group_size >= const_nunits
7611 && group_size % const_nunits == 0)
7612 {
7613 nstores = 1;
7614 lnel = const_nunits;
7615 ltype = vectype;
7616 lvectype = vectype;
7617 }
7618 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7619 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7620 }
7621
7622 ivstep = stride_step;
7623 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7624 build_int_cst (TREE_TYPE (ivstep), vf));
7625
7626 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7627
7628 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7629 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7630 create_iv (stride_base, ivstep, NULL,
7631 loop, &incr_gsi, insert_after,
7632 &offvar, NULL);
7633 incr = gsi_stmt (incr_gsi);
7634
7635 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7636
7637 alias_off = build_int_cst (ref_type, 0);
7638 stmt_vec_info next_stmt_info = first_stmt_info;
7639 for (g = 0; g < group_size; g++)
7640 {
7641 running_off = offvar;
7642 if (g)
7643 {
7644 tree size = TYPE_SIZE_UNIT (ltype);
7645 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7646 size);
7647 tree newoff = copy_ssa_name (running_off, NULL);
7648 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7649 running_off, pos);
7650 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7651 running_off = newoff;
7652 }
7653 if (!slp)
7654 op = vect_get_store_rhs (next_stmt_info);
7655 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7656 op, &vec_oprnds);
7657 unsigned int group_el = 0;
7658 unsigned HOST_WIDE_INT
7659 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7660 for (j = 0; j < ncopies; j++)
7661 {
7662 vec_oprnd = vec_oprnds[j];
7663 /* Pun the vector to extract from if necessary. */
7664 if (lvectype != vectype)
7665 {
7666 tree tem = make_ssa_name (lvectype);
7667 gimple *pun
7668 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7669 lvectype, vec_oprnd));
7670 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7671 vec_oprnd = tem;
7672 }
7673 for (i = 0; i < nstores; i++)
7674 {
7675 tree newref, newoff;
7676 gimple *incr, *assign;
7677 tree size = TYPE_SIZE (ltype);
7678 /* Extract the i'th component. */
7679 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7680 bitsize_int (i), size);
7681 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7682 size, pos);
7683
7684 elem = force_gimple_operand_gsi (gsi, elem, true,
7685 NULL_TREE, true,
7686 GSI_SAME_STMT);
7687
7688 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7689 group_el * elsz);
7690 newref = build2 (MEM_REF, ltype,
7691 running_off, this_off);
7692 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7693
7694 /* And store it to *running_off. */
7695 assign = gimple_build_assign (newref, elem);
7696 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7697
7698 group_el += lnel;
7699 if (! slp
7700 || group_el == group_size)
7701 {
7702 newoff = copy_ssa_name (running_off, NULL);
7703 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7704 running_off, stride_step);
7705 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7706
7707 running_off = newoff;
7708 group_el = 0;
7709 }
7710 if (g == group_size - 1
7711 && !slp)
7712 {
7713 if (j == 0 && i == 0)
7714 *vec_stmt = assign;
7715 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7716 }
7717 }
7718 }
7719 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7720 if (slp)
7721 break;
7722 }
7723
7724 vec_oprnds.release ();
7725 return true;
7726 }
7727
7728 auto_vec<tree> dr_chain (group_size);
7729 oprnds.create (group_size);
7730
7731 /* Gather-scatter accesses perform only component accesses, alignment
7732 is irrelevant for them. */
7733 if (memory_access_type == VMAT_GATHER_SCATTER)
7734 alignment_support_scheme = dr_unaligned_supported;
7735 else
7736 alignment_support_scheme
7737 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7738
7739 gcc_assert (alignment_support_scheme);
7740 vec_loop_masks *loop_masks
7741 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7742 ? &LOOP_VINFO_MASKS (loop_vinfo)
7743 : NULL);
7744 vec_loop_lens *loop_lens
7745 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7746 ? &LOOP_VINFO_LENS (loop_vinfo)
7747 : NULL);
7748
7749 /* Shouldn't go with length-based approach if fully masked. */
7750 gcc_assert (!loop_lens || !loop_masks);
7751
7752 /* Targets with store-lane instructions must not require explicit
7753 realignment. vect_supportable_dr_alignment always returns either
7754 dr_aligned or dr_unaligned_supported for masked operations. */
7755 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7756 && !mask
7757 && !loop_masks)
7758 || alignment_support_scheme == dr_aligned
7759 || alignment_support_scheme == dr_unaligned_supported);
7760
7761 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7762 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7763 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7764
7765 tree bump;
7766 tree vec_offset = NULL_TREE;
7767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7768 {
7769 aggr_type = NULL_TREE;
7770 bump = NULL_TREE;
7771 }
7772 else if (memory_access_type == VMAT_GATHER_SCATTER)
7773 {
7774 aggr_type = elem_type;
7775 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7776 &bump, &vec_offset);
7777 }
7778 else
7779 {
7780 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7781 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7782 else
7783 aggr_type = vectype;
7784 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7785 memory_access_type);
7786 }
7787
7788 if (mask)
7789 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7790
7791 /* In case the vectorization factor (VF) is bigger than the number
7792 of elements that we can fit in a vectype (nunits), we have to generate
7793 more than one vector stmt - i.e - we need to "unroll" the
7794 vector stmt by a factor VF/nunits. */
7795
7796 /* In case of interleaving (non-unit grouped access):
7797
7798 S1: &base + 2 = x2
7799 S2: &base = x0
7800 S3: &base + 1 = x1
7801 S4: &base + 3 = x3
7802
7803 We create vectorized stores starting from base address (the access of the
7804 first stmt in the chain (S2 in the above example), when the last store stmt
7805 of the chain (S4) is reached:
7806
7807 VS1: &base = vx2
7808 VS2: &base + vec_size*1 = vx0
7809 VS3: &base + vec_size*2 = vx1
7810 VS4: &base + vec_size*3 = vx3
7811
7812 Then permutation statements are generated:
7813
7814 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7815 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7816 ...
7817
7818 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7819 (the order of the data-refs in the output of vect_permute_store_chain
7820 corresponds to the order of scalar stmts in the interleaving chain - see
7821 the documentation of vect_permute_store_chain()).
7822
7823 In case of both multiple types and interleaving, above vector stores and
7824 permutation stmts are created for every copy. The result vector stmts are
7825 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7826 STMT_VINFO_RELATED_STMT for the next copies.
7827 */
7828
7829 auto_vec<tree> vec_masks;
7830 tree vec_mask = NULL;
7831 auto_vec<tree> vec_offsets;
7832 auto_vec<vec<tree> > gvec_oprnds;
7833 gvec_oprnds.safe_grow_cleared (group_size, true);
7834 for (j = 0; j < ncopies; j++)
7835 {
7836 gimple *new_stmt;
7837 if (j == 0)
7838 {
7839 if (slp)
7840 {
7841 /* Get vectorized arguments for SLP_NODE. */
7842 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7843 op, &vec_oprnds);
7844 vec_oprnd = vec_oprnds[0];
7845 }
7846 else
7847 {
7848 /* For interleaved stores we collect vectorized defs for all the
7849 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7850 used as an input to vect_permute_store_chain().
7851
7852 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7853 and OPRNDS are of size 1. */
7854 stmt_vec_info next_stmt_info = first_stmt_info;
7855 for (i = 0; i < group_size; i++)
7856 {
7857 /* Since gaps are not supported for interleaved stores,
7858 DR_GROUP_SIZE is the exact number of stmts in the chain.
7859 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7860 that there is no interleaving, DR_GROUP_SIZE is 1,
7861 and only one iteration of the loop will be executed. */
7862 op = vect_get_store_rhs (next_stmt_info);
7863 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7864 ncopies, op, &gvec_oprnds[i]);
7865 vec_oprnd = gvec_oprnds[i][0];
7866 dr_chain.quick_push (gvec_oprnds[i][0]);
7867 oprnds.quick_push (gvec_oprnds[i][0]);
7868 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7869 }
7870 if (mask)
7871 {
7872 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7873 mask, &vec_masks, mask_vectype);
7874 vec_mask = vec_masks[0];
7875 }
7876 }
7877
7878 /* We should have catched mismatched types earlier. */
7879 gcc_assert (useless_type_conversion_p (vectype,
7880 TREE_TYPE (vec_oprnd)));
7881 bool simd_lane_access_p
7882 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7883 if (simd_lane_access_p
7884 && !loop_masks
7885 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7886 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7887 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7888 && integer_zerop (DR_INIT (first_dr_info->dr))
7889 && alias_sets_conflict_p (get_alias_set (aggr_type),
7890 get_alias_set (TREE_TYPE (ref_type))))
7891 {
7892 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7893 dataref_offset = build_int_cst (ref_type, 0);
7894 }
7895 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7896 {
7897 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7898 &dataref_ptr, &vec_offsets, ncopies);
7899 vec_offset = vec_offsets[0];
7900 }
7901 else
7902 dataref_ptr
7903 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7904 simd_lane_access_p ? loop : NULL,
7905 offset, &dummy, gsi, &ptr_incr,
7906 simd_lane_access_p, NULL_TREE, bump);
7907 }
7908 else
7909 {
7910 /* For interleaved stores we created vectorized defs for all the
7911 defs stored in OPRNDS in the previous iteration (previous copy).
7912 DR_CHAIN is then used as an input to vect_permute_store_chain().
7913 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7914 OPRNDS are of size 1. */
7915 for (i = 0; i < group_size; i++)
7916 {
7917 vec_oprnd = gvec_oprnds[i][j];
7918 dr_chain[i] = gvec_oprnds[i][j];
7919 oprnds[i] = gvec_oprnds[i][j];
7920 }
7921 if (mask)
7922 vec_mask = vec_masks[j];
7923 if (dataref_offset)
7924 dataref_offset
7925 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7926 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7927 vec_offset = vec_offsets[j];
7928 else
7929 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7930 stmt_info, bump);
7931 }
7932
7933 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7934 {
7935 tree vec_array;
7936
7937 /* Get an array into which we can store the individual vectors. */
7938 vec_array = create_vector_array (vectype, vec_num);
7939
7940 /* Invalidate the current contents of VEC_ARRAY. This should
7941 become an RTL clobber too, which prevents the vector registers
7942 from being upward-exposed. */
7943 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7944
7945 /* Store the individual vectors into the array. */
7946 for (i = 0; i < vec_num; i++)
7947 {
7948 vec_oprnd = dr_chain[i];
7949 write_vector_array (vinfo, stmt_info,
7950 gsi, vec_oprnd, vec_array, i);
7951 }
7952
7953 tree final_mask = NULL;
7954 if (loop_masks)
7955 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7956 vectype, j);
7957 if (vec_mask)
7958 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7959 vec_mask, gsi);
7960
7961 gcall *call;
7962 if (final_mask)
7963 {
7964 /* Emit:
7965 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7966 VEC_ARRAY). */
7967 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7968 tree alias_ptr = build_int_cst (ref_type, align);
7969 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7970 dataref_ptr, alias_ptr,
7971 final_mask, vec_array);
7972 }
7973 else
7974 {
7975 /* Emit:
7976 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7977 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7978 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7979 vec_array);
7980 gimple_call_set_lhs (call, data_ref);
7981 }
7982 gimple_call_set_nothrow (call, true);
7983 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7984 new_stmt = call;
7985
7986 /* Record that VEC_ARRAY is now dead. */
7987 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7988 }
7989 else
7990 {
7991 new_stmt = NULL;
7992 if (grouped_store)
7993 {
7994 if (j == 0)
7995 result_chain.create (group_size);
7996 /* Permute. */
7997 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
7998 gsi, &result_chain);
7999 }
8000
8001 stmt_vec_info next_stmt_info = first_stmt_info;
8002 for (i = 0; i < vec_num; i++)
8003 {
8004 unsigned misalign;
8005 unsigned HOST_WIDE_INT align;
8006
8007 tree final_mask = NULL_TREE;
8008 if (loop_masks)
8009 final_mask = vect_get_loop_mask (gsi, loop_masks,
8010 vec_num * ncopies,
8011 vectype, vec_num * j + i);
8012 if (vec_mask)
8013 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8014 vec_mask, gsi);
8015
8016 if (memory_access_type == VMAT_GATHER_SCATTER)
8017 {
8018 tree scale = size_int (gs_info.scale);
8019 gcall *call;
8020 if (loop_masks)
8021 call = gimple_build_call_internal
8022 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8023 scale, vec_oprnd, final_mask);
8024 else
8025 call = gimple_build_call_internal
8026 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8027 scale, vec_oprnd);
8028 gimple_call_set_nothrow (call, true);
8029 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8030 new_stmt = call;
8031 break;
8032 }
8033
8034 if (i > 0)
8035 /* Bump the vector pointer. */
8036 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8037 gsi, stmt_info, bump);
8038
8039 if (slp)
8040 vec_oprnd = vec_oprnds[i];
8041 else if (grouped_store)
8042 /* For grouped stores vectorized defs are interleaved in
8043 vect_permute_store_chain(). */
8044 vec_oprnd = result_chain[i];
8045
8046 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8047 if (aligned_access_p (first_dr_info))
8048 misalign = 0;
8049 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8050 {
8051 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8052 misalign = 0;
8053 }
8054 else
8055 misalign = DR_MISALIGNMENT (first_dr_info);
8056 if (dataref_offset == NULL_TREE
8057 && TREE_CODE (dataref_ptr) == SSA_NAME)
8058 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8059 misalign);
8060
8061 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8062 {
8063 tree perm_mask = perm_mask_for_reverse (vectype);
8064 tree perm_dest = vect_create_destination_var
8065 (vect_get_store_rhs (stmt_info), vectype);
8066 tree new_temp = make_ssa_name (perm_dest);
8067
8068 /* Generate the permute statement. */
8069 gimple *perm_stmt
8070 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8071 vec_oprnd, perm_mask);
8072 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8073
8074 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8075 vec_oprnd = new_temp;
8076 }
8077
8078 /* Arguments are ready. Create the new vector stmt. */
8079 if (final_mask)
8080 {
8081 align = least_bit_hwi (misalign | align);
8082 tree ptr = build_int_cst (ref_type, align);
8083 gcall *call
8084 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8085 dataref_ptr, ptr,
8086 final_mask, vec_oprnd);
8087 gimple_call_set_nothrow (call, true);
8088 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8089 new_stmt = call;
8090 }
8091 else if (loop_lens)
8092 {
8093 tree final_len
8094 = vect_get_loop_len (loop_vinfo, loop_lens,
8095 vec_num * ncopies, vec_num * j + i);
8096 align = least_bit_hwi (misalign | align);
8097 tree ptr = build_int_cst (ref_type, align);
8098 machine_mode vmode = TYPE_MODE (vectype);
8099 opt_machine_mode new_ovmode
8100 = get_len_load_store_mode (vmode, false);
8101 machine_mode new_vmode = new_ovmode.require ();
8102 /* Need conversion if it's wrapped with VnQI. */
8103 if (vmode != new_vmode)
8104 {
8105 tree new_vtype
8106 = build_vector_type_for_mode (unsigned_intQI_type_node,
8107 new_vmode);
8108 tree var
8109 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8110 vec_oprnd
8111 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8112 gassign *new_stmt
8113 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8114 vec_oprnd);
8115 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8116 gsi);
8117 vec_oprnd = var;
8118 }
8119 gcall *call
8120 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8121 ptr, final_len, vec_oprnd);
8122 gimple_call_set_nothrow (call, true);
8123 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8124 new_stmt = call;
8125 }
8126 else
8127 {
8128 data_ref = fold_build2 (MEM_REF, vectype,
8129 dataref_ptr,
8130 dataref_offset
8131 ? dataref_offset
8132 : build_int_cst (ref_type, 0));
8133 if (aligned_access_p (first_dr_info))
8134 ;
8135 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8136 TREE_TYPE (data_ref)
8137 = build_aligned_type (TREE_TYPE (data_ref),
8138 align * BITS_PER_UNIT);
8139 else
8140 TREE_TYPE (data_ref)
8141 = build_aligned_type (TREE_TYPE (data_ref),
8142 TYPE_ALIGN (elem_type));
8143 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8144 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8145 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8146 }
8147
8148 if (slp)
8149 continue;
8150
8151 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8152 if (!next_stmt_info)
8153 break;
8154 }
8155 }
8156 if (!slp)
8157 {
8158 if (j == 0)
8159 *vec_stmt = new_stmt;
8160 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8161 }
8162 }
8163
8164 for (i = 0; i < group_size; ++i)
8165 {
8166 vec<tree> oprndsi = gvec_oprnds[i];
8167 oprndsi.release ();
8168 }
8169 oprnds.release ();
8170 result_chain.release ();
8171 vec_oprnds.release ();
8172
8173 return true;
8174 }
8175
8176 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8177 VECTOR_CST mask. No checks are made that the target platform supports the
8178 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8179 vect_gen_perm_mask_checked. */
8180
8181 tree
8182 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8183 {
8184 tree mask_type;
8185
8186 poly_uint64 nunits = sel.length ();
8187 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8188
8189 mask_type = build_vector_type (ssizetype, nunits);
8190 return vec_perm_indices_to_tree (mask_type, sel);
8191 }
8192
8193 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8194 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8195
8196 tree
8197 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8198 {
8199 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8200 return vect_gen_perm_mask_any (vectype, sel);
8201 }
8202
8203 /* Given a vector variable X and Y, that was generated for the scalar
8204 STMT_INFO, generate instructions to permute the vector elements of X and Y
8205 using permutation mask MASK_VEC, insert them at *GSI and return the
8206 permuted vector variable. */
8207
8208 static tree
8209 permute_vec_elements (vec_info *vinfo,
8210 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8211 gimple_stmt_iterator *gsi)
8212 {
8213 tree vectype = TREE_TYPE (x);
8214 tree perm_dest, data_ref;
8215 gimple *perm_stmt;
8216
8217 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8218 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8219 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8220 else
8221 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8222 data_ref = make_ssa_name (perm_dest);
8223
8224 /* Generate the permute statement. */
8225 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8226 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8227
8228 return data_ref;
8229 }
8230
8231 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8232 inserting them on the loops preheader edge. Returns true if we
8233 were successful in doing so (and thus STMT_INFO can be moved then),
8234 otherwise returns false. */
8235
8236 static bool
8237 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8238 {
8239 ssa_op_iter i;
8240 tree op;
8241 bool any = false;
8242
8243 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8244 {
8245 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8246 if (!gimple_nop_p (def_stmt)
8247 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8248 {
8249 /* Make sure we don't need to recurse. While we could do
8250 so in simple cases when there are more complex use webs
8251 we don't have an easy way to preserve stmt order to fulfil
8252 dependencies within them. */
8253 tree op2;
8254 ssa_op_iter i2;
8255 if (gimple_code (def_stmt) == GIMPLE_PHI)
8256 return false;
8257 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8258 {
8259 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8260 if (!gimple_nop_p (def_stmt2)
8261 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8262 return false;
8263 }
8264 any = true;
8265 }
8266 }
8267
8268 if (!any)
8269 return true;
8270
8271 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8272 {
8273 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8274 if (!gimple_nop_p (def_stmt)
8275 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8276 {
8277 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8278 gsi_remove (&gsi, false);
8279 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8280 }
8281 }
8282
8283 return true;
8284 }
8285
8286 /* vectorizable_load.
8287
8288 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8289 that can be vectorized.
8290 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8291 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8292 Return true if STMT_INFO is vectorizable in this way. */
8293
8294 static bool
8295 vectorizable_load (vec_info *vinfo,
8296 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8297 gimple **vec_stmt, slp_tree slp_node,
8298 stmt_vector_for_cost *cost_vec)
8299 {
8300 tree scalar_dest;
8301 tree vec_dest = NULL;
8302 tree data_ref = NULL;
8303 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8304 class loop *loop = NULL;
8305 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8306 bool nested_in_vect_loop = false;
8307 tree elem_type;
8308 tree new_temp;
8309 machine_mode mode;
8310 tree dummy;
8311 tree dataref_ptr = NULL_TREE;
8312 tree dataref_offset = NULL_TREE;
8313 gimple *ptr_incr = NULL;
8314 int ncopies;
8315 int i, j;
8316 unsigned int group_size;
8317 poly_uint64 group_gap_adj;
8318 tree msq = NULL_TREE, lsq;
8319 tree offset = NULL_TREE;
8320 tree byte_offset = NULL_TREE;
8321 tree realignment_token = NULL_TREE;
8322 gphi *phi = NULL;
8323 vec<tree> dr_chain = vNULL;
8324 bool grouped_load = false;
8325 stmt_vec_info first_stmt_info;
8326 stmt_vec_info first_stmt_info_for_drptr = NULL;
8327 bool compute_in_loop = false;
8328 class loop *at_loop;
8329 int vec_num;
8330 bool slp = (slp_node != NULL);
8331 bool slp_perm = false;
8332 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8333 poly_uint64 vf;
8334 tree aggr_type;
8335 gather_scatter_info gs_info;
8336 tree ref_type;
8337 enum vect_def_type mask_dt = vect_unknown_def_type;
8338
8339 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8340 return false;
8341
8342 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8343 && ! vec_stmt)
8344 return false;
8345
8346 if (!STMT_VINFO_DATA_REF (stmt_info))
8347 return false;
8348
8349 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8350 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8351 which can be different when reduction chains were re-ordered.
8352 Now that we figured we're a dataref reset stmt_info back to
8353 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8354 refactored in a way to maintain the dr_vec_info pointer for the
8355 relevant access explicitely. */
8356 stmt_vec_info orig_stmt_info = stmt_info;
8357 if (slp_node)
8358 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8359
8360 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8361 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8362 {
8363 scalar_dest = gimple_assign_lhs (assign);
8364 if (TREE_CODE (scalar_dest) != SSA_NAME)
8365 return false;
8366
8367 tree_code code = gimple_assign_rhs_code (assign);
8368 if (code != ARRAY_REF
8369 && code != BIT_FIELD_REF
8370 && code != INDIRECT_REF
8371 && code != COMPONENT_REF
8372 && code != IMAGPART_EXPR
8373 && code != REALPART_EXPR
8374 && code != MEM_REF
8375 && TREE_CODE_CLASS (code) != tcc_declaration)
8376 return false;
8377 }
8378 else
8379 {
8380 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8381 if (!call || !gimple_call_internal_p (call))
8382 return false;
8383
8384 internal_fn ifn = gimple_call_internal_fn (call);
8385 if (!internal_load_fn_p (ifn))
8386 return false;
8387
8388 scalar_dest = gimple_call_lhs (call);
8389 if (!scalar_dest)
8390 return false;
8391
8392 int mask_index = internal_fn_mask_index (ifn);
8393 if (mask_index >= 0)
8394 {
8395 mask = gimple_call_arg (call, mask_index);
8396 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8397 &mask_vectype))
8398 return false;
8399 }
8400 }
8401
8402 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8403 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8404
8405 if (loop_vinfo)
8406 {
8407 loop = LOOP_VINFO_LOOP (loop_vinfo);
8408 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8409 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8410 }
8411 else
8412 vf = 1;
8413
8414 /* Multiple types in SLP are handled by creating the appropriate number of
8415 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8416 case of SLP. */
8417 if (slp)
8418 ncopies = 1;
8419 else
8420 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8421
8422 gcc_assert (ncopies >= 1);
8423
8424 /* FORNOW. This restriction should be relaxed. */
8425 if (nested_in_vect_loop && ncopies > 1)
8426 {
8427 if (dump_enabled_p ())
8428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8429 "multiple types in nested loop.\n");
8430 return false;
8431 }
8432
8433 /* Invalidate assumptions made by dependence analysis when vectorization
8434 on the unrolled body effectively re-orders stmts. */
8435 if (ncopies > 1
8436 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8437 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8438 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8439 {
8440 if (dump_enabled_p ())
8441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8442 "cannot perform implicit CSE when unrolling "
8443 "with negative dependence distance\n");
8444 return false;
8445 }
8446
8447 elem_type = TREE_TYPE (vectype);
8448 mode = TYPE_MODE (vectype);
8449
8450 /* FORNOW. In some cases can vectorize even if data-type not supported
8451 (e.g. - data copies). */
8452 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8453 {
8454 if (dump_enabled_p ())
8455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8456 "Aligned load, but unsupported type.\n");
8457 return false;
8458 }
8459
8460 /* Check if the load is a part of an interleaving chain. */
8461 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8462 {
8463 grouped_load = true;
8464 /* FORNOW */
8465 gcc_assert (!nested_in_vect_loop);
8466 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8467
8468 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8469 group_size = DR_GROUP_SIZE (first_stmt_info);
8470
8471 /* Refuse non-SLP vectorization of SLP-only groups. */
8472 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8473 {
8474 if (dump_enabled_p ())
8475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8476 "cannot vectorize load in non-SLP mode.\n");
8477 return false;
8478 }
8479
8480 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8481 {
8482 slp_perm = true;
8483
8484 if (!loop_vinfo)
8485 {
8486 /* In BB vectorization we may not actually use a loaded vector
8487 accessing elements in excess of DR_GROUP_SIZE. */
8488 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8489 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8490 unsigned HOST_WIDE_INT nunits;
8491 unsigned j, k, maxk = 0;
8492 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8493 if (k > maxk)
8494 maxk = k;
8495 tree vectype = STMT_VINFO_VECTYPE (group_info);
8496 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8497 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8498 {
8499 if (dump_enabled_p ())
8500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8501 "BB vectorization with gaps at the end of "
8502 "a load is not supported\n");
8503 return false;
8504 }
8505 }
8506
8507 auto_vec<tree> tem;
8508 unsigned n_perms;
8509 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8510 true, &n_perms))
8511 {
8512 if (dump_enabled_p ())
8513 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8514 vect_location,
8515 "unsupported load permutation\n");
8516 return false;
8517 }
8518 }
8519
8520 /* Invalidate assumptions made by dependence analysis when vectorization
8521 on the unrolled body effectively re-orders stmts. */
8522 if (!PURE_SLP_STMT (stmt_info)
8523 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8524 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8525 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8526 {
8527 if (dump_enabled_p ())
8528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8529 "cannot perform implicit CSE when performing "
8530 "group loads with negative dependence distance\n");
8531 return false;
8532 }
8533 }
8534 else
8535 group_size = 1;
8536
8537 vect_memory_access_type memory_access_type;
8538 enum dr_alignment_support alignment_support_scheme;
8539 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8540 ncopies, &memory_access_type,
8541 &alignment_support_scheme, &gs_info))
8542 return false;
8543
8544 if (mask)
8545 {
8546 if (memory_access_type == VMAT_CONTIGUOUS)
8547 {
8548 machine_mode vec_mode = TYPE_MODE (vectype);
8549 if (!VECTOR_MODE_P (vec_mode)
8550 || !can_vec_mask_load_store_p (vec_mode,
8551 TYPE_MODE (mask_vectype), true))
8552 return false;
8553 }
8554 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8555 && memory_access_type != VMAT_GATHER_SCATTER)
8556 {
8557 if (dump_enabled_p ())
8558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8559 "unsupported access type for masked load.\n");
8560 return false;
8561 }
8562 }
8563
8564 if (!vec_stmt) /* transformation not required. */
8565 {
8566 if (slp_node
8567 && mask
8568 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8569 mask_vectype))
8570 {
8571 if (dump_enabled_p ())
8572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8573 "incompatible vector types for invariants\n");
8574 return false;
8575 }
8576
8577 if (!slp)
8578 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8579
8580 if (loop_vinfo
8581 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8582 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8583 group_size, memory_access_type,
8584 &gs_info, mask);
8585
8586 if (dump_enabled_p ()
8587 && memory_access_type != VMAT_ELEMENTWISE
8588 && memory_access_type != VMAT_GATHER_SCATTER
8589 && alignment_support_scheme != dr_aligned)
8590 dump_printf_loc (MSG_NOTE, vect_location,
8591 "Vectorizing an unaligned access.\n");
8592
8593 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8594 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8595 slp_node, cost_vec);
8596 return true;
8597 }
8598
8599 if (!slp)
8600 gcc_assert (memory_access_type
8601 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8602
8603 if (dump_enabled_p ())
8604 dump_printf_loc (MSG_NOTE, vect_location,
8605 "transform load. ncopies = %d\n", ncopies);
8606
8607 /* Transform. */
8608
8609 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8610 ensure_base_align (dr_info);
8611
8612 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8613 {
8614 vect_build_gather_load_calls (vinfo,
8615 stmt_info, gsi, vec_stmt, &gs_info, mask);
8616 return true;
8617 }
8618
8619 if (memory_access_type == VMAT_INVARIANT)
8620 {
8621 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8622 /* If we have versioned for aliasing or the loop doesn't
8623 have any data dependencies that would preclude this,
8624 then we are sure this is a loop invariant load and
8625 thus we can insert it on the preheader edge. */
8626 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8627 && !nested_in_vect_loop
8628 && hoist_defs_of_uses (stmt_info, loop));
8629 if (hoist_p)
8630 {
8631 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8632 if (dump_enabled_p ())
8633 dump_printf_loc (MSG_NOTE, vect_location,
8634 "hoisting out of the vectorized loop: %G", stmt);
8635 scalar_dest = copy_ssa_name (scalar_dest);
8636 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8637 gsi_insert_on_edge_immediate
8638 (loop_preheader_edge (loop),
8639 gimple_build_assign (scalar_dest, rhs));
8640 }
8641 /* These copies are all equivalent, but currently the representation
8642 requires a separate STMT_VINFO_VEC_STMT for each one. */
8643 gimple_stmt_iterator gsi2 = *gsi;
8644 gsi_next (&gsi2);
8645 for (j = 0; j < ncopies; j++)
8646 {
8647 if (hoist_p)
8648 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8649 vectype, NULL);
8650 else
8651 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8652 vectype, &gsi2);
8653 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8654 if (slp)
8655 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8656 else
8657 {
8658 if (j == 0)
8659 *vec_stmt = new_stmt;
8660 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8661 }
8662 }
8663 return true;
8664 }
8665
8666 if (memory_access_type == VMAT_ELEMENTWISE
8667 || memory_access_type == VMAT_STRIDED_SLP)
8668 {
8669 gimple_stmt_iterator incr_gsi;
8670 bool insert_after;
8671 tree offvar;
8672 tree ivstep;
8673 tree running_off;
8674 vec<constructor_elt, va_gc> *v = NULL;
8675 tree stride_base, stride_step, alias_off;
8676 /* Checked by get_load_store_type. */
8677 unsigned int const_nunits = nunits.to_constant ();
8678 unsigned HOST_WIDE_INT cst_offset = 0;
8679 tree dr_offset;
8680
8681 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8682 gcc_assert (!nested_in_vect_loop);
8683
8684 if (grouped_load)
8685 {
8686 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8687 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8688 }
8689 else
8690 {
8691 first_stmt_info = stmt_info;
8692 first_dr_info = dr_info;
8693 }
8694 if (slp && grouped_load)
8695 {
8696 group_size = DR_GROUP_SIZE (first_stmt_info);
8697 ref_type = get_group_alias_ptr_type (first_stmt_info);
8698 }
8699 else
8700 {
8701 if (grouped_load)
8702 cst_offset
8703 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8704 * vect_get_place_in_interleaving_chain (stmt_info,
8705 first_stmt_info));
8706 group_size = 1;
8707 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8708 }
8709
8710 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8711 stride_base
8712 = fold_build_pointer_plus
8713 (DR_BASE_ADDRESS (first_dr_info->dr),
8714 size_binop (PLUS_EXPR,
8715 convert_to_ptrofftype (dr_offset),
8716 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8717 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8718
8719 /* For a load with loop-invariant (but other than power-of-2)
8720 stride (i.e. not a grouped access) like so:
8721
8722 for (i = 0; i < n; i += stride)
8723 ... = array[i];
8724
8725 we generate a new induction variable and new accesses to
8726 form a new vector (or vectors, depending on ncopies):
8727
8728 for (j = 0; ; j += VF*stride)
8729 tmp1 = array[j];
8730 tmp2 = array[j + stride];
8731 ...
8732 vectemp = {tmp1, tmp2, ...}
8733 */
8734
8735 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8736 build_int_cst (TREE_TYPE (stride_step), vf));
8737
8738 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8739
8740 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8741 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8742 create_iv (stride_base, ivstep, NULL,
8743 loop, &incr_gsi, insert_after,
8744 &offvar, NULL);
8745
8746 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8747
8748 running_off = offvar;
8749 alias_off = build_int_cst (ref_type, 0);
8750 int nloads = const_nunits;
8751 int lnel = 1;
8752 tree ltype = TREE_TYPE (vectype);
8753 tree lvectype = vectype;
8754 auto_vec<tree> dr_chain;
8755 if (memory_access_type == VMAT_STRIDED_SLP)
8756 {
8757 if (group_size < const_nunits)
8758 {
8759 /* First check if vec_init optab supports construction from vector
8760 elts directly. Otherwise avoid emitting a constructor of
8761 vector elements by performing the loads using an integer type
8762 of the same size, constructing a vector of those and then
8763 re-interpreting it as the original vector type. This avoids a
8764 huge runtime penalty due to the general inability to perform
8765 store forwarding from smaller stores to a larger load. */
8766 tree ptype;
8767 tree vtype
8768 = vector_vector_composition_type (vectype,
8769 const_nunits / group_size,
8770 &ptype);
8771 if (vtype != NULL_TREE)
8772 {
8773 nloads = const_nunits / group_size;
8774 lnel = group_size;
8775 lvectype = vtype;
8776 ltype = ptype;
8777 }
8778 }
8779 else
8780 {
8781 nloads = 1;
8782 lnel = const_nunits;
8783 ltype = vectype;
8784 }
8785 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8786 }
8787 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8788 else if (nloads == 1)
8789 ltype = vectype;
8790
8791 if (slp)
8792 {
8793 /* For SLP permutation support we need to load the whole group,
8794 not only the number of vector stmts the permutation result
8795 fits in. */
8796 if (slp_perm)
8797 {
8798 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8799 variable VF. */
8800 unsigned int const_vf = vf.to_constant ();
8801 ncopies = CEIL (group_size * const_vf, const_nunits);
8802 dr_chain.create (ncopies);
8803 }
8804 else
8805 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8806 }
8807 unsigned int group_el = 0;
8808 unsigned HOST_WIDE_INT
8809 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8810 for (j = 0; j < ncopies; j++)
8811 {
8812 if (nloads > 1)
8813 vec_alloc (v, nloads);
8814 gimple *new_stmt = NULL;
8815 for (i = 0; i < nloads; i++)
8816 {
8817 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8818 group_el * elsz + cst_offset);
8819 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8820 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8821 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8822 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8823 if (nloads > 1)
8824 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8825 gimple_assign_lhs (new_stmt));
8826
8827 group_el += lnel;
8828 if (! slp
8829 || group_el == group_size)
8830 {
8831 tree newoff = copy_ssa_name (running_off);
8832 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8833 running_off, stride_step);
8834 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8835
8836 running_off = newoff;
8837 group_el = 0;
8838 }
8839 }
8840 if (nloads > 1)
8841 {
8842 tree vec_inv = build_constructor (lvectype, v);
8843 new_temp = vect_init_vector (vinfo, stmt_info,
8844 vec_inv, lvectype, gsi);
8845 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8846 if (lvectype != vectype)
8847 {
8848 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8849 VIEW_CONVERT_EXPR,
8850 build1 (VIEW_CONVERT_EXPR,
8851 vectype, new_temp));
8852 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8853 }
8854 }
8855
8856 if (slp)
8857 {
8858 if (slp_perm)
8859 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8860 else
8861 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8862 }
8863 else
8864 {
8865 if (j == 0)
8866 *vec_stmt = new_stmt;
8867 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8868 }
8869 }
8870 if (slp_perm)
8871 {
8872 unsigned n_perms;
8873 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8874 false, &n_perms);
8875 }
8876 return true;
8877 }
8878
8879 if (memory_access_type == VMAT_GATHER_SCATTER
8880 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8881 grouped_load = false;
8882
8883 if (grouped_load)
8884 {
8885 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8886 group_size = DR_GROUP_SIZE (first_stmt_info);
8887 /* For SLP vectorization we directly vectorize a subchain
8888 without permutation. */
8889 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8890 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8891 /* For BB vectorization always use the first stmt to base
8892 the data ref pointer on. */
8893 if (bb_vinfo)
8894 first_stmt_info_for_drptr
8895 = vect_find_first_scalar_stmt_in_slp (slp_node);
8896
8897 /* Check if the chain of loads is already vectorized. */
8898 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8899 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8900 ??? But we can only do so if there is exactly one
8901 as we have no way to get at the rest. Leave the CSE
8902 opportunity alone.
8903 ??? With the group load eventually participating
8904 in multiple different permutations (having multiple
8905 slp nodes which refer to the same group) the CSE
8906 is even wrong code. See PR56270. */
8907 && !slp)
8908 {
8909 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8910 return true;
8911 }
8912 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8913 group_gap_adj = 0;
8914
8915 /* VEC_NUM is the number of vect stmts to be created for this group. */
8916 if (slp)
8917 {
8918 grouped_load = false;
8919 /* If an SLP permutation is from N elements to N elements,
8920 and if one vector holds a whole number of N, we can load
8921 the inputs to the permutation in the same way as an
8922 unpermuted sequence. In other cases we need to load the
8923 whole group, not only the number of vector stmts the
8924 permutation result fits in. */
8925 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8926 if (slp_perm
8927 && (group_size != scalar_lanes
8928 || !multiple_p (nunits, group_size)))
8929 {
8930 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8931 variable VF; see vect_transform_slp_perm_load. */
8932 unsigned int const_vf = vf.to_constant ();
8933 unsigned int const_nunits = nunits.to_constant ();
8934 vec_num = CEIL (group_size * const_vf, const_nunits);
8935 group_gap_adj = vf * group_size - nunits * vec_num;
8936 }
8937 else
8938 {
8939 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8940 group_gap_adj
8941 = group_size - scalar_lanes;
8942 }
8943 }
8944 else
8945 vec_num = group_size;
8946
8947 ref_type = get_group_alias_ptr_type (first_stmt_info);
8948 }
8949 else
8950 {
8951 first_stmt_info = stmt_info;
8952 first_dr_info = dr_info;
8953 group_size = vec_num = 1;
8954 group_gap_adj = 0;
8955 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8956 }
8957
8958 gcc_assert (alignment_support_scheme);
8959 vec_loop_masks *loop_masks
8960 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8961 ? &LOOP_VINFO_MASKS (loop_vinfo)
8962 : NULL);
8963 vec_loop_lens *loop_lens
8964 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8965 ? &LOOP_VINFO_LENS (loop_vinfo)
8966 : NULL);
8967
8968 /* Shouldn't go with length-based approach if fully masked. */
8969 gcc_assert (!loop_lens || !loop_masks);
8970
8971 /* Targets with store-lane instructions must not require explicit
8972 realignment. vect_supportable_dr_alignment always returns either
8973 dr_aligned or dr_unaligned_supported for masked operations. */
8974 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8975 && !mask
8976 && !loop_masks)
8977 || alignment_support_scheme == dr_aligned
8978 || alignment_support_scheme == dr_unaligned_supported);
8979
8980 /* In case the vectorization factor (VF) is bigger than the number
8981 of elements that we can fit in a vectype (nunits), we have to generate
8982 more than one vector stmt - i.e - we need to "unroll" the
8983 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8984 from one copy of the vector stmt to the next, in the field
8985 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8986 stages to find the correct vector defs to be used when vectorizing
8987 stmts that use the defs of the current stmt. The example below
8988 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8989 need to create 4 vectorized stmts):
8990
8991 before vectorization:
8992 RELATED_STMT VEC_STMT
8993 S1: x = memref - -
8994 S2: z = x + 1 - -
8995
8996 step 1: vectorize stmt S1:
8997 We first create the vector stmt VS1_0, and, as usual, record a
8998 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8999 Next, we create the vector stmt VS1_1, and record a pointer to
9000 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9001 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9002 stmts and pointers:
9003 RELATED_STMT VEC_STMT
9004 VS1_0: vx0 = memref0 VS1_1 -
9005 VS1_1: vx1 = memref1 VS1_2 -
9006 VS1_2: vx2 = memref2 VS1_3 -
9007 VS1_3: vx3 = memref3 - -
9008 S1: x = load - VS1_0
9009 S2: z = x + 1 - -
9010 */
9011
9012 /* In case of interleaving (non-unit grouped access):
9013
9014 S1: x2 = &base + 2
9015 S2: x0 = &base
9016 S3: x1 = &base + 1
9017 S4: x3 = &base + 3
9018
9019 Vectorized loads are created in the order of memory accesses
9020 starting from the access of the first stmt of the chain:
9021
9022 VS1: vx0 = &base
9023 VS2: vx1 = &base + vec_size*1
9024 VS3: vx3 = &base + vec_size*2
9025 VS4: vx4 = &base + vec_size*3
9026
9027 Then permutation statements are generated:
9028
9029 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9030 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9031 ...
9032
9033 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9034 (the order of the data-refs in the output of vect_permute_load_chain
9035 corresponds to the order of scalar stmts in the interleaving chain - see
9036 the documentation of vect_permute_load_chain()).
9037 The generation of permutation stmts and recording them in
9038 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9039
9040 In case of both multiple types and interleaving, the vector loads and
9041 permutation stmts above are created for every copy. The result vector
9042 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9043 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9044
9045 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9046 on a target that supports unaligned accesses (dr_unaligned_supported)
9047 we generate the following code:
9048 p = initial_addr;
9049 indx = 0;
9050 loop {
9051 p = p + indx * vectype_size;
9052 vec_dest = *(p);
9053 indx = indx + 1;
9054 }
9055
9056 Otherwise, the data reference is potentially unaligned on a target that
9057 does not support unaligned accesses (dr_explicit_realign_optimized) -
9058 then generate the following code, in which the data in each iteration is
9059 obtained by two vector loads, one from the previous iteration, and one
9060 from the current iteration:
9061 p1 = initial_addr;
9062 msq_init = *(floor(p1))
9063 p2 = initial_addr + VS - 1;
9064 realignment_token = call target_builtin;
9065 indx = 0;
9066 loop {
9067 p2 = p2 + indx * vectype_size
9068 lsq = *(floor(p2))
9069 vec_dest = realign_load (msq, lsq, realignment_token)
9070 indx = indx + 1;
9071 msq = lsq;
9072 } */
9073
9074 /* If the misalignment remains the same throughout the execution of the
9075 loop, we can create the init_addr and permutation mask at the loop
9076 preheader. Otherwise, it needs to be created inside the loop.
9077 This can only occur when vectorizing memory accesses in the inner-loop
9078 nested within an outer-loop that is being vectorized. */
9079
9080 if (nested_in_vect_loop
9081 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9082 GET_MODE_SIZE (TYPE_MODE (vectype))))
9083 {
9084 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9085 compute_in_loop = true;
9086 }
9087
9088 bool diff_first_stmt_info
9089 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9090
9091 if ((alignment_support_scheme == dr_explicit_realign_optimized
9092 || alignment_support_scheme == dr_explicit_realign)
9093 && !compute_in_loop)
9094 {
9095 /* If we have different first_stmt_info, we can't set up realignment
9096 here, since we can't guarantee first_stmt_info DR has been
9097 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9098 distance from first_stmt_info DR instead as below. */
9099 if (!diff_first_stmt_info)
9100 msq = vect_setup_realignment (vinfo,
9101 first_stmt_info, gsi, &realignment_token,
9102 alignment_support_scheme, NULL_TREE,
9103 &at_loop);
9104 if (alignment_support_scheme == dr_explicit_realign_optimized)
9105 {
9106 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9107 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9108 size_one_node);
9109 gcc_assert (!first_stmt_info_for_drptr);
9110 }
9111 }
9112 else
9113 at_loop = loop;
9114
9115 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9116 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9117
9118 tree bump;
9119 tree vec_offset = NULL_TREE;
9120 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9121 {
9122 aggr_type = NULL_TREE;
9123 bump = NULL_TREE;
9124 }
9125 else if (memory_access_type == VMAT_GATHER_SCATTER)
9126 {
9127 aggr_type = elem_type;
9128 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9129 &bump, &vec_offset);
9130 }
9131 else
9132 {
9133 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9134 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9135 else
9136 aggr_type = vectype;
9137 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9138 memory_access_type);
9139 }
9140
9141 vec<tree> vec_offsets = vNULL;
9142 auto_vec<tree> vec_masks;
9143 if (mask)
9144 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9145 mask, &vec_masks, mask_vectype, NULL_TREE);
9146 tree vec_mask = NULL_TREE;
9147 poly_uint64 group_elt = 0;
9148 for (j = 0; j < ncopies; j++)
9149 {
9150 /* 1. Create the vector or array pointer update chain. */
9151 if (j == 0)
9152 {
9153 bool simd_lane_access_p
9154 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9155 if (simd_lane_access_p
9156 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9157 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9158 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9159 && integer_zerop (DR_INIT (first_dr_info->dr))
9160 && alias_sets_conflict_p (get_alias_set (aggr_type),
9161 get_alias_set (TREE_TYPE (ref_type)))
9162 && (alignment_support_scheme == dr_aligned
9163 || alignment_support_scheme == dr_unaligned_supported))
9164 {
9165 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9166 dataref_offset = build_int_cst (ref_type, 0);
9167 }
9168 else if (diff_first_stmt_info)
9169 {
9170 dataref_ptr
9171 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9172 aggr_type, at_loop, offset, &dummy,
9173 gsi, &ptr_incr, simd_lane_access_p,
9174 byte_offset, bump);
9175 /* Adjust the pointer by the difference to first_stmt. */
9176 data_reference_p ptrdr
9177 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9178 tree diff
9179 = fold_convert (sizetype,
9180 size_binop (MINUS_EXPR,
9181 DR_INIT (first_dr_info->dr),
9182 DR_INIT (ptrdr)));
9183 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9184 stmt_info, diff);
9185 if (alignment_support_scheme == dr_explicit_realign)
9186 {
9187 msq = vect_setup_realignment (vinfo,
9188 first_stmt_info_for_drptr, gsi,
9189 &realignment_token,
9190 alignment_support_scheme,
9191 dataref_ptr, &at_loop);
9192 gcc_assert (!compute_in_loop);
9193 }
9194 }
9195 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9196 {
9197 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9198 &dataref_ptr, &vec_offsets, ncopies);
9199 vec_offset = vec_offsets[0];
9200 }
9201 else
9202 dataref_ptr
9203 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9204 at_loop,
9205 offset, &dummy, gsi, &ptr_incr,
9206 simd_lane_access_p,
9207 byte_offset, bump);
9208 if (mask)
9209 vec_mask = vec_masks[0];
9210 }
9211 else
9212 {
9213 if (dataref_offset)
9214 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9215 bump);
9216 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9217 vec_offset = vec_offsets[j];
9218 else
9219 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9220 stmt_info, bump);
9221 if (mask)
9222 vec_mask = vec_masks[j];
9223 }
9224
9225 if (grouped_load || slp_perm)
9226 dr_chain.create (vec_num);
9227
9228 gimple *new_stmt = NULL;
9229 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9230 {
9231 tree vec_array;
9232
9233 vec_array = create_vector_array (vectype, vec_num);
9234
9235 tree final_mask = NULL_TREE;
9236 if (loop_masks)
9237 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9238 vectype, j);
9239 if (vec_mask)
9240 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9241 vec_mask, gsi);
9242
9243 gcall *call;
9244 if (final_mask)
9245 {
9246 /* Emit:
9247 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9248 VEC_MASK). */
9249 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9250 tree alias_ptr = build_int_cst (ref_type, align);
9251 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9252 dataref_ptr, alias_ptr,
9253 final_mask);
9254 }
9255 else
9256 {
9257 /* Emit:
9258 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9259 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9260 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9261 }
9262 gimple_call_set_lhs (call, vec_array);
9263 gimple_call_set_nothrow (call, true);
9264 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9265 new_stmt = call;
9266
9267 /* Extract each vector into an SSA_NAME. */
9268 for (i = 0; i < vec_num; i++)
9269 {
9270 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9271 vec_array, i);
9272 dr_chain.quick_push (new_temp);
9273 }
9274
9275 /* Record the mapping between SSA_NAMEs and statements. */
9276 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9277
9278 /* Record that VEC_ARRAY is now dead. */
9279 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9280 }
9281 else
9282 {
9283 for (i = 0; i < vec_num; i++)
9284 {
9285 tree final_mask = NULL_TREE;
9286 if (loop_masks
9287 && memory_access_type != VMAT_INVARIANT)
9288 final_mask = vect_get_loop_mask (gsi, loop_masks,
9289 vec_num * ncopies,
9290 vectype, vec_num * j + i);
9291 if (vec_mask)
9292 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9293 vec_mask, gsi);
9294
9295 if (i > 0)
9296 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9297 gsi, stmt_info, bump);
9298
9299 /* 2. Create the vector-load in the loop. */
9300 switch (alignment_support_scheme)
9301 {
9302 case dr_aligned:
9303 case dr_unaligned_supported:
9304 {
9305 unsigned int misalign;
9306 unsigned HOST_WIDE_INT align;
9307
9308 if (memory_access_type == VMAT_GATHER_SCATTER)
9309 {
9310 tree zero = build_zero_cst (vectype);
9311 tree scale = size_int (gs_info.scale);
9312 gcall *call;
9313 if (loop_masks)
9314 call = gimple_build_call_internal
9315 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9316 vec_offset, scale, zero, final_mask);
9317 else
9318 call = gimple_build_call_internal
9319 (IFN_GATHER_LOAD, 4, dataref_ptr,
9320 vec_offset, scale, zero);
9321 gimple_call_set_nothrow (call, true);
9322 new_stmt = call;
9323 data_ref = NULL_TREE;
9324 break;
9325 }
9326
9327 align =
9328 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9329 if (alignment_support_scheme == dr_aligned)
9330 {
9331 gcc_assert (aligned_access_p (first_dr_info));
9332 misalign = 0;
9333 }
9334 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9335 {
9336 align = dr_alignment
9337 (vect_dr_behavior (vinfo, first_dr_info));
9338 misalign = 0;
9339 }
9340 else
9341 misalign = DR_MISALIGNMENT (first_dr_info);
9342 if (dataref_offset == NULL_TREE
9343 && TREE_CODE (dataref_ptr) == SSA_NAME)
9344 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9345 align, misalign);
9346
9347 if (final_mask)
9348 {
9349 align = least_bit_hwi (misalign | align);
9350 tree ptr = build_int_cst (ref_type, align);
9351 gcall *call
9352 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9353 dataref_ptr, ptr,
9354 final_mask);
9355 gimple_call_set_nothrow (call, true);
9356 new_stmt = call;
9357 data_ref = NULL_TREE;
9358 }
9359 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9360 {
9361 tree final_len
9362 = vect_get_loop_len (loop_vinfo, loop_lens,
9363 vec_num * ncopies,
9364 vec_num * j + i);
9365 align = least_bit_hwi (misalign | align);
9366 tree ptr = build_int_cst (ref_type, align);
9367 gcall *call
9368 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9369 dataref_ptr, ptr,
9370 final_len);
9371 gimple_call_set_nothrow (call, true);
9372 new_stmt = call;
9373 data_ref = NULL_TREE;
9374
9375 /* Need conversion if it's wrapped with VnQI. */
9376 machine_mode vmode = TYPE_MODE (vectype);
9377 opt_machine_mode new_ovmode
9378 = get_len_load_store_mode (vmode, true);
9379 machine_mode new_vmode = new_ovmode.require ();
9380 if (vmode != new_vmode)
9381 {
9382 tree qi_type = unsigned_intQI_type_node;
9383 tree new_vtype
9384 = build_vector_type_for_mode (qi_type, new_vmode);
9385 tree var = vect_get_new_ssa_name (new_vtype,
9386 vect_simple_var);
9387 gimple_set_lhs (call, var);
9388 vect_finish_stmt_generation (vinfo, stmt_info, call,
9389 gsi);
9390 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9391 new_stmt
9392 = gimple_build_assign (vec_dest,
9393 VIEW_CONVERT_EXPR, op);
9394 }
9395 }
9396 else
9397 {
9398 tree ltype = vectype;
9399 tree new_vtype = NULL_TREE;
9400 unsigned HOST_WIDE_INT gap
9401 = DR_GROUP_GAP (first_stmt_info);
9402 unsigned int vect_align
9403 = vect_known_alignment_in_bytes (first_dr_info);
9404 unsigned int scalar_dr_size
9405 = vect_get_scalar_dr_size (first_dr_info);
9406 /* If there's no peeling for gaps but we have a gap
9407 with slp loads then load the lower half of the
9408 vector only. See get_group_load_store_type for
9409 when we apply this optimization. */
9410 if (slp
9411 && loop_vinfo
9412 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9413 && gap != 0
9414 && known_eq (nunits, (group_size - gap) * 2)
9415 && known_eq (nunits, group_size)
9416 && gap >= (vect_align / scalar_dr_size))
9417 {
9418 tree half_vtype;
9419 new_vtype
9420 = vector_vector_composition_type (vectype, 2,
9421 &half_vtype);
9422 if (new_vtype != NULL_TREE)
9423 ltype = half_vtype;
9424 }
9425 tree offset
9426 = (dataref_offset ? dataref_offset
9427 : build_int_cst (ref_type, 0));
9428 if (ltype != vectype
9429 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9430 {
9431 unsigned HOST_WIDE_INT gap_offset
9432 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9433 tree gapcst = build_int_cst (ref_type, gap_offset);
9434 offset = size_binop (PLUS_EXPR, offset, gapcst);
9435 }
9436 data_ref
9437 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9438 if (alignment_support_scheme == dr_aligned)
9439 ;
9440 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9441 TREE_TYPE (data_ref)
9442 = build_aligned_type (TREE_TYPE (data_ref),
9443 align * BITS_PER_UNIT);
9444 else
9445 TREE_TYPE (data_ref)
9446 = build_aligned_type (TREE_TYPE (data_ref),
9447 TYPE_ALIGN (elem_type));
9448 if (ltype != vectype)
9449 {
9450 vect_copy_ref_info (data_ref,
9451 DR_REF (first_dr_info->dr));
9452 tree tem = make_ssa_name (ltype);
9453 new_stmt = gimple_build_assign (tem, data_ref);
9454 vect_finish_stmt_generation (vinfo, stmt_info,
9455 new_stmt, gsi);
9456 data_ref = NULL;
9457 vec<constructor_elt, va_gc> *v;
9458 vec_alloc (v, 2);
9459 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9460 {
9461 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9462 build_zero_cst (ltype));
9463 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9464 }
9465 else
9466 {
9467 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9468 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9469 build_zero_cst (ltype));
9470 }
9471 gcc_assert (new_vtype != NULL_TREE);
9472 if (new_vtype == vectype)
9473 new_stmt = gimple_build_assign (
9474 vec_dest, build_constructor (vectype, v));
9475 else
9476 {
9477 tree new_vname = make_ssa_name (new_vtype);
9478 new_stmt = gimple_build_assign (
9479 new_vname, build_constructor (new_vtype, v));
9480 vect_finish_stmt_generation (vinfo, stmt_info,
9481 new_stmt, gsi);
9482 new_stmt = gimple_build_assign (
9483 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9484 new_vname));
9485 }
9486 }
9487 }
9488 break;
9489 }
9490 case dr_explicit_realign:
9491 {
9492 tree ptr, bump;
9493
9494 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9495
9496 if (compute_in_loop)
9497 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9498 &realignment_token,
9499 dr_explicit_realign,
9500 dataref_ptr, NULL);
9501
9502 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9503 ptr = copy_ssa_name (dataref_ptr);
9504 else
9505 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9506 // For explicit realign the target alignment should be
9507 // known at compile time.
9508 unsigned HOST_WIDE_INT align =
9509 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9510 new_stmt = gimple_build_assign
9511 (ptr, BIT_AND_EXPR, dataref_ptr,
9512 build_int_cst
9513 (TREE_TYPE (dataref_ptr),
9514 -(HOST_WIDE_INT) align));
9515 vect_finish_stmt_generation (vinfo, stmt_info,
9516 new_stmt, gsi);
9517 data_ref
9518 = build2 (MEM_REF, vectype, ptr,
9519 build_int_cst (ref_type, 0));
9520 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9521 vec_dest = vect_create_destination_var (scalar_dest,
9522 vectype);
9523 new_stmt = gimple_build_assign (vec_dest, data_ref);
9524 new_temp = make_ssa_name (vec_dest, new_stmt);
9525 gimple_assign_set_lhs (new_stmt, new_temp);
9526 gimple_move_vops (new_stmt, stmt_info->stmt);
9527 vect_finish_stmt_generation (vinfo, stmt_info,
9528 new_stmt, gsi);
9529 msq = new_temp;
9530
9531 bump = size_binop (MULT_EXPR, vs,
9532 TYPE_SIZE_UNIT (elem_type));
9533 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9534 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9535 stmt_info, bump);
9536 new_stmt = gimple_build_assign
9537 (NULL_TREE, BIT_AND_EXPR, ptr,
9538 build_int_cst
9539 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9540 ptr = copy_ssa_name (ptr, new_stmt);
9541 gimple_assign_set_lhs (new_stmt, ptr);
9542 vect_finish_stmt_generation (vinfo, stmt_info,
9543 new_stmt, gsi);
9544 data_ref
9545 = build2 (MEM_REF, vectype, ptr,
9546 build_int_cst (ref_type, 0));
9547 break;
9548 }
9549 case dr_explicit_realign_optimized:
9550 {
9551 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9552 new_temp = copy_ssa_name (dataref_ptr);
9553 else
9554 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9555 // We should only be doing this if we know the target
9556 // alignment at compile time.
9557 unsigned HOST_WIDE_INT align =
9558 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9559 new_stmt = gimple_build_assign
9560 (new_temp, BIT_AND_EXPR, dataref_ptr,
9561 build_int_cst (TREE_TYPE (dataref_ptr),
9562 -(HOST_WIDE_INT) align));
9563 vect_finish_stmt_generation (vinfo, stmt_info,
9564 new_stmt, gsi);
9565 data_ref
9566 = build2 (MEM_REF, vectype, new_temp,
9567 build_int_cst (ref_type, 0));
9568 break;
9569 }
9570 default:
9571 gcc_unreachable ();
9572 }
9573 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9574 /* DATA_REF is null if we've already built the statement. */
9575 if (data_ref)
9576 {
9577 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9578 new_stmt = gimple_build_assign (vec_dest, data_ref);
9579 }
9580 new_temp = make_ssa_name (vec_dest, new_stmt);
9581 gimple_set_lhs (new_stmt, new_temp);
9582 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9583
9584 /* 3. Handle explicit realignment if necessary/supported.
9585 Create in loop:
9586 vec_dest = realign_load (msq, lsq, realignment_token) */
9587 if (alignment_support_scheme == dr_explicit_realign_optimized
9588 || alignment_support_scheme == dr_explicit_realign)
9589 {
9590 lsq = gimple_assign_lhs (new_stmt);
9591 if (!realignment_token)
9592 realignment_token = dataref_ptr;
9593 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9594 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9595 msq, lsq, realignment_token);
9596 new_temp = make_ssa_name (vec_dest, new_stmt);
9597 gimple_assign_set_lhs (new_stmt, new_temp);
9598 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9599
9600 if (alignment_support_scheme == dr_explicit_realign_optimized)
9601 {
9602 gcc_assert (phi);
9603 if (i == vec_num - 1 && j == ncopies - 1)
9604 add_phi_arg (phi, lsq,
9605 loop_latch_edge (containing_loop),
9606 UNKNOWN_LOCATION);
9607 msq = lsq;
9608 }
9609 }
9610
9611 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9612 {
9613 tree perm_mask = perm_mask_for_reverse (vectype);
9614 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9615 perm_mask, stmt_info, gsi);
9616 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9617 }
9618
9619 /* Collect vector loads and later create their permutation in
9620 vect_transform_grouped_load (). */
9621 if (grouped_load || slp_perm)
9622 dr_chain.quick_push (new_temp);
9623
9624 /* Store vector loads in the corresponding SLP_NODE. */
9625 if (slp && !slp_perm)
9626 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9627
9628 /* With SLP permutation we load the gaps as well, without
9629 we need to skip the gaps after we manage to fully load
9630 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9631 group_elt += nunits;
9632 if (maybe_ne (group_gap_adj, 0U)
9633 && !slp_perm
9634 && known_eq (group_elt, group_size - group_gap_adj))
9635 {
9636 poly_wide_int bump_val
9637 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9638 * group_gap_adj);
9639 tree bump = wide_int_to_tree (sizetype, bump_val);
9640 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9641 gsi, stmt_info, bump);
9642 group_elt = 0;
9643 }
9644 }
9645 /* Bump the vector pointer to account for a gap or for excess
9646 elements loaded for a permuted SLP load. */
9647 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9648 {
9649 poly_wide_int bump_val
9650 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9651 * group_gap_adj);
9652 tree bump = wide_int_to_tree (sizetype, bump_val);
9653 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9654 stmt_info, bump);
9655 }
9656 }
9657
9658 if (slp && !slp_perm)
9659 continue;
9660
9661 if (slp_perm)
9662 {
9663 unsigned n_perms;
9664 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9665 gsi, vf, false, &n_perms);
9666 gcc_assert (ok);
9667 }
9668 else
9669 {
9670 if (grouped_load)
9671 {
9672 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9673 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9674 group_size, gsi);
9675 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9676 }
9677 else
9678 {
9679 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9680 }
9681 }
9682 dr_chain.release ();
9683 }
9684 if (!slp)
9685 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9686
9687 return true;
9688 }
9689
9690 /* Function vect_is_simple_cond.
9691
9692 Input:
9693 LOOP - the loop that is being vectorized.
9694 COND - Condition that is checked for simple use.
9695
9696 Output:
9697 *COMP_VECTYPE - the vector type for the comparison.
9698 *DTS - The def types for the arguments of the comparison
9699
9700 Returns whether a COND can be vectorized. Checks whether
9701 condition operands are supportable using vec_is_simple_use. */
9702
9703 static bool
9704 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9705 slp_tree slp_node, tree *comp_vectype,
9706 enum vect_def_type *dts, tree vectype)
9707 {
9708 tree lhs, rhs;
9709 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9710 slp_tree slp_op;
9711
9712 /* Mask case. */
9713 if (TREE_CODE (cond) == SSA_NAME
9714 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9715 {
9716 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9717 &slp_op, &dts[0], comp_vectype)
9718 || !*comp_vectype
9719 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9720 return false;
9721 return true;
9722 }
9723
9724 if (!COMPARISON_CLASS_P (cond))
9725 return false;
9726
9727 lhs = TREE_OPERAND (cond, 0);
9728 rhs = TREE_OPERAND (cond, 1);
9729
9730 if (TREE_CODE (lhs) == SSA_NAME)
9731 {
9732 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9733 &lhs, &slp_op, &dts[0], &vectype1))
9734 return false;
9735 }
9736 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9737 || TREE_CODE (lhs) == FIXED_CST)
9738 dts[0] = vect_constant_def;
9739 else
9740 return false;
9741
9742 if (TREE_CODE (rhs) == SSA_NAME)
9743 {
9744 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9745 &rhs, &slp_op, &dts[1], &vectype2))
9746 return false;
9747 }
9748 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9749 || TREE_CODE (rhs) == FIXED_CST)
9750 dts[1] = vect_constant_def;
9751 else
9752 return false;
9753
9754 if (vectype1 && vectype2
9755 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9756 TYPE_VECTOR_SUBPARTS (vectype2)))
9757 return false;
9758
9759 *comp_vectype = vectype1 ? vectype1 : vectype2;
9760 /* Invariant comparison. */
9761 if (! *comp_vectype)
9762 {
9763 tree scalar_type = TREE_TYPE (lhs);
9764 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9765 *comp_vectype = truth_type_for (vectype);
9766 else
9767 {
9768 /* If we can widen the comparison to match vectype do so. */
9769 if (INTEGRAL_TYPE_P (scalar_type)
9770 && !slp_node
9771 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9772 TYPE_SIZE (TREE_TYPE (vectype))))
9773 scalar_type = build_nonstandard_integer_type
9774 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9775 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9776 slp_node);
9777 }
9778 }
9779
9780 return true;
9781 }
9782
9783 /* vectorizable_condition.
9784
9785 Check if STMT_INFO is conditional modify expression that can be vectorized.
9786 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9787 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9788 at GSI.
9789
9790 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9791
9792 Return true if STMT_INFO is vectorizable in this way. */
9793
9794 static bool
9795 vectorizable_condition (vec_info *vinfo,
9796 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9797 gimple **vec_stmt,
9798 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9799 {
9800 tree scalar_dest = NULL_TREE;
9801 tree vec_dest = NULL_TREE;
9802 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9803 tree then_clause, else_clause;
9804 tree comp_vectype = NULL_TREE;
9805 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9806 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9807 tree vec_compare;
9808 tree new_temp;
9809 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9810 enum vect_def_type dts[4]
9811 = {vect_unknown_def_type, vect_unknown_def_type,
9812 vect_unknown_def_type, vect_unknown_def_type};
9813 int ndts = 4;
9814 int ncopies;
9815 int vec_num;
9816 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9817 int i;
9818 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9819 vec<tree> vec_oprnds0 = vNULL;
9820 vec<tree> vec_oprnds1 = vNULL;
9821 vec<tree> vec_oprnds2 = vNULL;
9822 vec<tree> vec_oprnds3 = vNULL;
9823 tree vec_cmp_type;
9824 bool masked = false;
9825
9826 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9827 return false;
9828
9829 /* Is vectorizable conditional operation? */
9830 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9831 if (!stmt)
9832 return false;
9833
9834 code = gimple_assign_rhs_code (stmt);
9835 if (code != COND_EXPR)
9836 return false;
9837
9838 stmt_vec_info reduc_info = NULL;
9839 int reduc_index = -1;
9840 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9841 bool for_reduction
9842 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9843 if (for_reduction)
9844 {
9845 if (STMT_SLP_TYPE (stmt_info))
9846 return false;
9847 reduc_info = info_for_reduction (vinfo, stmt_info);
9848 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9849 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9850 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9851 || reduc_index != -1);
9852 }
9853 else
9854 {
9855 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9856 return false;
9857 }
9858
9859 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9860 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9861
9862 if (slp_node)
9863 {
9864 ncopies = 1;
9865 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9866 }
9867 else
9868 {
9869 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9870 vec_num = 1;
9871 }
9872
9873 gcc_assert (ncopies >= 1);
9874 if (for_reduction && ncopies > 1)
9875 return false; /* FORNOW */
9876
9877 cond_expr = gimple_assign_rhs1 (stmt);
9878
9879 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9880 &comp_vectype, &dts[0], vectype)
9881 || !comp_vectype)
9882 return false;
9883
9884 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9885 slp_tree then_slp_node, else_slp_node;
9886 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9887 &then_clause, &then_slp_node, &dts[2], &vectype1))
9888 return false;
9889 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9890 &else_clause, &else_slp_node, &dts[3], &vectype2))
9891 return false;
9892
9893 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9894 return false;
9895
9896 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9897 return false;
9898
9899 masked = !COMPARISON_CLASS_P (cond_expr);
9900 vec_cmp_type = truth_type_for (comp_vectype);
9901
9902 if (vec_cmp_type == NULL_TREE)
9903 return false;
9904
9905 cond_code = TREE_CODE (cond_expr);
9906 if (!masked)
9907 {
9908 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9909 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9910 }
9911
9912 /* For conditional reductions, the "then" value needs to be the candidate
9913 value calculated by this iteration while the "else" value needs to be
9914 the result carried over from previous iterations. If the COND_EXPR
9915 is the other way around, we need to swap it. */
9916 bool must_invert_cmp_result = false;
9917 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9918 {
9919 if (masked)
9920 must_invert_cmp_result = true;
9921 else
9922 {
9923 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9924 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9925 if (new_code == ERROR_MARK)
9926 must_invert_cmp_result = true;
9927 else
9928 {
9929 cond_code = new_code;
9930 /* Make sure we don't accidentally use the old condition. */
9931 cond_expr = NULL_TREE;
9932 }
9933 }
9934 std::swap (then_clause, else_clause);
9935 }
9936
9937 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9938 {
9939 /* Boolean values may have another representation in vectors
9940 and therefore we prefer bit operations over comparison for
9941 them (which also works for scalar masks). We store opcodes
9942 to use in bitop1 and bitop2. Statement is vectorized as
9943 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9944 depending on bitop1 and bitop2 arity. */
9945 switch (cond_code)
9946 {
9947 case GT_EXPR:
9948 bitop1 = BIT_NOT_EXPR;
9949 bitop2 = BIT_AND_EXPR;
9950 break;
9951 case GE_EXPR:
9952 bitop1 = BIT_NOT_EXPR;
9953 bitop2 = BIT_IOR_EXPR;
9954 break;
9955 case LT_EXPR:
9956 bitop1 = BIT_NOT_EXPR;
9957 bitop2 = BIT_AND_EXPR;
9958 std::swap (cond_expr0, cond_expr1);
9959 break;
9960 case LE_EXPR:
9961 bitop1 = BIT_NOT_EXPR;
9962 bitop2 = BIT_IOR_EXPR;
9963 std::swap (cond_expr0, cond_expr1);
9964 break;
9965 case NE_EXPR:
9966 bitop1 = BIT_XOR_EXPR;
9967 break;
9968 case EQ_EXPR:
9969 bitop1 = BIT_XOR_EXPR;
9970 bitop2 = BIT_NOT_EXPR;
9971 break;
9972 default:
9973 return false;
9974 }
9975 cond_code = SSA_NAME;
9976 }
9977
9978 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9979 && reduction_type == EXTRACT_LAST_REDUCTION
9980 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9981 {
9982 if (dump_enabled_p ())
9983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9984 "reduction comparison operation not supported.\n");
9985 return false;
9986 }
9987
9988 if (!vec_stmt)
9989 {
9990 if (bitop1 != NOP_EXPR)
9991 {
9992 machine_mode mode = TYPE_MODE (comp_vectype);
9993 optab optab;
9994
9995 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9996 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9997 return false;
9998
9999 if (bitop2 != NOP_EXPR)
10000 {
10001 optab = optab_for_tree_code (bitop2, comp_vectype,
10002 optab_default);
10003 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10004 return false;
10005 }
10006 }
10007
10008 vect_cost_for_stmt kind = vector_stmt;
10009 if (reduction_type == EXTRACT_LAST_REDUCTION)
10010 /* Count one reduction-like operation per vector. */
10011 kind = vec_to_scalar;
10012 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10013 return false;
10014
10015 if (slp_node
10016 && (!vect_maybe_update_slp_op_vectype
10017 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10018 || (op_adjust == 1
10019 && !vect_maybe_update_slp_op_vectype
10020 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10021 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10022 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10023 {
10024 if (dump_enabled_p ())
10025 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10026 "incompatible vector types for invariants\n");
10027 return false;
10028 }
10029
10030 if (loop_vinfo && for_reduction
10031 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10032 {
10033 if (reduction_type == EXTRACT_LAST_REDUCTION)
10034 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10035 ncopies * vec_num, vectype, NULL);
10036 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10037 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10038 {
10039 if (dump_enabled_p ())
10040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10041 "conditional reduction prevents the use"
10042 " of partial vectors.\n");
10043 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10044 }
10045 }
10046
10047 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10048 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10049 cost_vec, kind);
10050 return true;
10051 }
10052
10053 /* Transform. */
10054
10055 if (!slp_node)
10056 {
10057 vec_oprnds0.create (1);
10058 vec_oprnds1.create (1);
10059 vec_oprnds2.create (1);
10060 vec_oprnds3.create (1);
10061 }
10062
10063 /* Handle def. */
10064 scalar_dest = gimple_assign_lhs (stmt);
10065 if (reduction_type != EXTRACT_LAST_REDUCTION)
10066 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10067
10068 bool swap_cond_operands = false;
10069
10070 /* See whether another part of the vectorized code applies a loop
10071 mask to the condition, or to its inverse. */
10072
10073 vec_loop_masks *masks = NULL;
10074 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10075 {
10076 if (reduction_type == EXTRACT_LAST_REDUCTION)
10077 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10078 else
10079 {
10080 scalar_cond_masked_key cond (cond_expr, ncopies);
10081 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10082 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10083 else
10084 {
10085 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10086 cond.code = invert_tree_comparison (cond.code, honor_nans);
10087 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10088 {
10089 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10090 cond_code = cond.code;
10091 swap_cond_operands = true;
10092 }
10093 }
10094 }
10095 }
10096
10097 /* Handle cond expr. */
10098 if (masked)
10099 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10100 cond_expr, &vec_oprnds0, comp_vectype,
10101 then_clause, &vec_oprnds2, vectype,
10102 reduction_type != EXTRACT_LAST_REDUCTION
10103 ? else_clause : NULL, &vec_oprnds3, vectype);
10104 else
10105 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10106 cond_expr0, &vec_oprnds0, comp_vectype,
10107 cond_expr1, &vec_oprnds1, comp_vectype,
10108 then_clause, &vec_oprnds2, vectype,
10109 reduction_type != EXTRACT_LAST_REDUCTION
10110 ? else_clause : NULL, &vec_oprnds3, vectype);
10111
10112 /* Arguments are ready. Create the new vector stmt. */
10113 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10114 {
10115 vec_then_clause = vec_oprnds2[i];
10116 if (reduction_type != EXTRACT_LAST_REDUCTION)
10117 vec_else_clause = vec_oprnds3[i];
10118
10119 if (swap_cond_operands)
10120 std::swap (vec_then_clause, vec_else_clause);
10121
10122 if (masked)
10123 vec_compare = vec_cond_lhs;
10124 else
10125 {
10126 vec_cond_rhs = vec_oprnds1[i];
10127 if (bitop1 == NOP_EXPR)
10128 {
10129 gimple_seq stmts = NULL;
10130 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10131 vec_cond_lhs, vec_cond_rhs);
10132 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10133 }
10134 else
10135 {
10136 new_temp = make_ssa_name (vec_cmp_type);
10137 gassign *new_stmt;
10138 if (bitop1 == BIT_NOT_EXPR)
10139 new_stmt = gimple_build_assign (new_temp, bitop1,
10140 vec_cond_rhs);
10141 else
10142 new_stmt
10143 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10144 vec_cond_rhs);
10145 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10146 if (bitop2 == NOP_EXPR)
10147 vec_compare = new_temp;
10148 else if (bitop2 == BIT_NOT_EXPR)
10149 {
10150 /* Instead of doing ~x ? y : z do x ? z : y. */
10151 vec_compare = new_temp;
10152 std::swap (vec_then_clause, vec_else_clause);
10153 }
10154 else
10155 {
10156 vec_compare = make_ssa_name (vec_cmp_type);
10157 new_stmt
10158 = gimple_build_assign (vec_compare, bitop2,
10159 vec_cond_lhs, new_temp);
10160 vect_finish_stmt_generation (vinfo, stmt_info,
10161 new_stmt, gsi);
10162 }
10163 }
10164 }
10165
10166 /* If we decided to apply a loop mask to the result of the vector
10167 comparison, AND the comparison with the mask now. Later passes
10168 should then be able to reuse the AND results between mulitple
10169 vector statements.
10170
10171 For example:
10172 for (int i = 0; i < 100; ++i)
10173 x[i] = y[i] ? z[i] : 10;
10174
10175 results in following optimized GIMPLE:
10176
10177 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10178 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10179 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10180 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10181 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10182 vect_iftmp.11_47, { 10, ... }>;
10183
10184 instead of using a masked and unmasked forms of
10185 vec != { 0, ... } (masked in the MASK_LOAD,
10186 unmasked in the VEC_COND_EXPR). */
10187
10188 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10189 in cases where that's necessary. */
10190
10191 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10192 {
10193 if (!is_gimple_val (vec_compare))
10194 {
10195 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10196 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10197 vec_compare);
10198 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10199 vec_compare = vec_compare_name;
10200 }
10201
10202 if (must_invert_cmp_result)
10203 {
10204 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10205 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10206 BIT_NOT_EXPR,
10207 vec_compare);
10208 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10209 vec_compare = vec_compare_name;
10210 }
10211
10212 if (masks)
10213 {
10214 unsigned vec_num = vec_oprnds0.length ();
10215 tree loop_mask
10216 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10217 vectype, i);
10218 tree tmp2 = make_ssa_name (vec_cmp_type);
10219 gassign *g
10220 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10221 loop_mask);
10222 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10223 vec_compare = tmp2;
10224 }
10225 }
10226
10227 gimple *new_stmt;
10228 if (reduction_type == EXTRACT_LAST_REDUCTION)
10229 {
10230 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10231 tree lhs = gimple_get_lhs (old_stmt);
10232 new_stmt = gimple_build_call_internal
10233 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10234 vec_then_clause);
10235 gimple_call_set_lhs (new_stmt, lhs);
10236 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10237 if (old_stmt == gsi_stmt (*gsi))
10238 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10239 else
10240 {
10241 /* In this case we're moving the definition to later in the
10242 block. That doesn't matter because the only uses of the
10243 lhs are in phi statements. */
10244 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10245 gsi_remove (&old_gsi, true);
10246 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10247 }
10248 }
10249 else
10250 {
10251 new_temp = make_ssa_name (vec_dest);
10252 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10253 vec_then_clause, vec_else_clause);
10254 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10255 }
10256 if (slp_node)
10257 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10258 else
10259 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10260 }
10261
10262 if (!slp_node)
10263 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10264
10265 vec_oprnds0.release ();
10266 vec_oprnds1.release ();
10267 vec_oprnds2.release ();
10268 vec_oprnds3.release ();
10269
10270 return true;
10271 }
10272
10273 /* vectorizable_comparison.
10274
10275 Check if STMT_INFO is comparison expression that can be vectorized.
10276 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10277 comparison, put it in VEC_STMT, and insert it at GSI.
10278
10279 Return true if STMT_INFO is vectorizable in this way. */
10280
10281 static bool
10282 vectorizable_comparison (vec_info *vinfo,
10283 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10284 gimple **vec_stmt,
10285 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10286 {
10287 tree lhs, rhs1, rhs2;
10288 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10289 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10290 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10291 tree new_temp;
10292 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10293 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10294 int ndts = 2;
10295 poly_uint64 nunits;
10296 int ncopies;
10297 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10298 int i;
10299 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10300 vec<tree> vec_oprnds0 = vNULL;
10301 vec<tree> vec_oprnds1 = vNULL;
10302 tree mask_type;
10303 tree mask;
10304
10305 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10306 return false;
10307
10308 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10309 return false;
10310
10311 mask_type = vectype;
10312 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10313
10314 if (slp_node)
10315 ncopies = 1;
10316 else
10317 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10318
10319 gcc_assert (ncopies >= 1);
10320 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10321 return false;
10322
10323 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10324 if (!stmt)
10325 return false;
10326
10327 code = gimple_assign_rhs_code (stmt);
10328
10329 if (TREE_CODE_CLASS (code) != tcc_comparison)
10330 return false;
10331
10332 slp_tree slp_rhs1, slp_rhs2;
10333 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10334 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10335 return false;
10336
10337 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10338 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10339 return false;
10340
10341 if (vectype1 && vectype2
10342 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10343 TYPE_VECTOR_SUBPARTS (vectype2)))
10344 return false;
10345
10346 vectype = vectype1 ? vectype1 : vectype2;
10347
10348 /* Invariant comparison. */
10349 if (!vectype)
10350 {
10351 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10352 vectype = mask_type;
10353 else
10354 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10355 slp_node);
10356 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10357 return false;
10358 }
10359 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10360 return false;
10361
10362 /* Can't compare mask and non-mask types. */
10363 if (vectype1 && vectype2
10364 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10365 return false;
10366
10367 /* Boolean values may have another representation in vectors
10368 and therefore we prefer bit operations over comparison for
10369 them (which also works for scalar masks). We store opcodes
10370 to use in bitop1 and bitop2. Statement is vectorized as
10371 BITOP2 (rhs1 BITOP1 rhs2) or
10372 rhs1 BITOP2 (BITOP1 rhs2)
10373 depending on bitop1 and bitop2 arity. */
10374 bool swap_p = false;
10375 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10376 {
10377 if (code == GT_EXPR)
10378 {
10379 bitop1 = BIT_NOT_EXPR;
10380 bitop2 = BIT_AND_EXPR;
10381 }
10382 else if (code == GE_EXPR)
10383 {
10384 bitop1 = BIT_NOT_EXPR;
10385 bitop2 = BIT_IOR_EXPR;
10386 }
10387 else if (code == LT_EXPR)
10388 {
10389 bitop1 = BIT_NOT_EXPR;
10390 bitop2 = BIT_AND_EXPR;
10391 swap_p = true;
10392 }
10393 else if (code == LE_EXPR)
10394 {
10395 bitop1 = BIT_NOT_EXPR;
10396 bitop2 = BIT_IOR_EXPR;
10397 swap_p = true;
10398 }
10399 else
10400 {
10401 bitop1 = BIT_XOR_EXPR;
10402 if (code == EQ_EXPR)
10403 bitop2 = BIT_NOT_EXPR;
10404 }
10405 }
10406
10407 if (!vec_stmt)
10408 {
10409 if (bitop1 == NOP_EXPR)
10410 {
10411 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10412 return false;
10413 }
10414 else
10415 {
10416 machine_mode mode = TYPE_MODE (vectype);
10417 optab optab;
10418
10419 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10420 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10421 return false;
10422
10423 if (bitop2 != NOP_EXPR)
10424 {
10425 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10426 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10427 return false;
10428 }
10429 }
10430
10431 /* Put types on constant and invariant SLP children. */
10432 if (slp_node
10433 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10434 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10435 {
10436 if (dump_enabled_p ())
10437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10438 "incompatible vector types for invariants\n");
10439 return false;
10440 }
10441
10442 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10443 vect_model_simple_cost (vinfo, stmt_info,
10444 ncopies * (1 + (bitop2 != NOP_EXPR)),
10445 dts, ndts, slp_node, cost_vec);
10446 return true;
10447 }
10448
10449 /* Transform. */
10450 if (!slp_node)
10451 {
10452 vec_oprnds0.create (1);
10453 vec_oprnds1.create (1);
10454 }
10455
10456 /* Handle def. */
10457 lhs = gimple_assign_lhs (stmt);
10458 mask = vect_create_destination_var (lhs, mask_type);
10459
10460 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10461 rhs1, &vec_oprnds0, vectype,
10462 rhs2, &vec_oprnds1, vectype);
10463 if (swap_p)
10464 std::swap (vec_oprnds0, vec_oprnds1);
10465
10466 /* Arguments are ready. Create the new vector stmt. */
10467 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10468 {
10469 gimple *new_stmt;
10470 vec_rhs2 = vec_oprnds1[i];
10471
10472 new_temp = make_ssa_name (mask);
10473 if (bitop1 == NOP_EXPR)
10474 {
10475 new_stmt = gimple_build_assign (new_temp, code,
10476 vec_rhs1, vec_rhs2);
10477 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10478 }
10479 else
10480 {
10481 if (bitop1 == BIT_NOT_EXPR)
10482 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10483 else
10484 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10485 vec_rhs2);
10486 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10487 if (bitop2 != NOP_EXPR)
10488 {
10489 tree res = make_ssa_name (mask);
10490 if (bitop2 == BIT_NOT_EXPR)
10491 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10492 else
10493 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10494 new_temp);
10495 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10496 }
10497 }
10498 if (slp_node)
10499 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10500 else
10501 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10502 }
10503
10504 if (!slp_node)
10505 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10506
10507 vec_oprnds0.release ();
10508 vec_oprnds1.release ();
10509
10510 return true;
10511 }
10512
10513 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10514 can handle all live statements in the node. Otherwise return true
10515 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10516 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10517
10518 static bool
10519 can_vectorize_live_stmts (vec_info *vinfo,
10520 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10521 slp_tree slp_node, slp_instance slp_node_instance,
10522 bool vec_stmt_p,
10523 stmt_vector_for_cost *cost_vec)
10524 {
10525 if (slp_node)
10526 {
10527 stmt_vec_info slp_stmt_info;
10528 unsigned int i;
10529 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10530 {
10531 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10532 && !vectorizable_live_operation (vinfo,
10533 slp_stmt_info, gsi, slp_node,
10534 slp_node_instance, i,
10535 vec_stmt_p, cost_vec))
10536 return false;
10537 }
10538 }
10539 else if (STMT_VINFO_LIVE_P (stmt_info)
10540 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10541 slp_node, slp_node_instance, -1,
10542 vec_stmt_p, cost_vec))
10543 return false;
10544
10545 return true;
10546 }
10547
10548 /* Make sure the statement is vectorizable. */
10549
10550 opt_result
10551 vect_analyze_stmt (vec_info *vinfo,
10552 stmt_vec_info stmt_info, bool *need_to_vectorize,
10553 slp_tree node, slp_instance node_instance,
10554 stmt_vector_for_cost *cost_vec)
10555 {
10556 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10557 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10558 bool ok;
10559 gimple_seq pattern_def_seq;
10560
10561 if (dump_enabled_p ())
10562 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10563 stmt_info->stmt);
10564
10565 if (gimple_has_volatile_ops (stmt_info->stmt))
10566 return opt_result::failure_at (stmt_info->stmt,
10567 "not vectorized:"
10568 " stmt has volatile operands: %G\n",
10569 stmt_info->stmt);
10570
10571 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10572 && node == NULL
10573 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10574 {
10575 gimple_stmt_iterator si;
10576
10577 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10578 {
10579 stmt_vec_info pattern_def_stmt_info
10580 = vinfo->lookup_stmt (gsi_stmt (si));
10581 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10582 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10583 {
10584 /* Analyze def stmt of STMT if it's a pattern stmt. */
10585 if (dump_enabled_p ())
10586 dump_printf_loc (MSG_NOTE, vect_location,
10587 "==> examining pattern def statement: %G",
10588 pattern_def_stmt_info->stmt);
10589
10590 opt_result res
10591 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10592 need_to_vectorize, node, node_instance,
10593 cost_vec);
10594 if (!res)
10595 return res;
10596 }
10597 }
10598 }
10599
10600 /* Skip stmts that do not need to be vectorized. In loops this is expected
10601 to include:
10602 - the COND_EXPR which is the loop exit condition
10603 - any LABEL_EXPRs in the loop
10604 - computations that are used only for array indexing or loop control.
10605 In basic blocks we only analyze statements that are a part of some SLP
10606 instance, therefore, all the statements are relevant.
10607
10608 Pattern statement needs to be analyzed instead of the original statement
10609 if the original statement is not relevant. Otherwise, we analyze both
10610 statements. In basic blocks we are called from some SLP instance
10611 traversal, don't analyze pattern stmts instead, the pattern stmts
10612 already will be part of SLP instance. */
10613
10614 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10615 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10616 && !STMT_VINFO_LIVE_P (stmt_info))
10617 {
10618 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10619 && pattern_stmt_info
10620 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10621 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10622 {
10623 /* Analyze PATTERN_STMT instead of the original stmt. */
10624 stmt_info = pattern_stmt_info;
10625 if (dump_enabled_p ())
10626 dump_printf_loc (MSG_NOTE, vect_location,
10627 "==> examining pattern statement: %G",
10628 stmt_info->stmt);
10629 }
10630 else
10631 {
10632 if (dump_enabled_p ())
10633 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10634
10635 return opt_result::success ();
10636 }
10637 }
10638 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10639 && node == NULL
10640 && pattern_stmt_info
10641 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10642 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10643 {
10644 /* Analyze PATTERN_STMT too. */
10645 if (dump_enabled_p ())
10646 dump_printf_loc (MSG_NOTE, vect_location,
10647 "==> examining pattern statement: %G",
10648 pattern_stmt_info->stmt);
10649
10650 opt_result res
10651 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10652 node_instance, cost_vec);
10653 if (!res)
10654 return res;
10655 }
10656
10657 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10658 {
10659 case vect_internal_def:
10660 break;
10661
10662 case vect_reduction_def:
10663 case vect_nested_cycle:
10664 gcc_assert (!bb_vinfo
10665 && (relevance == vect_used_in_outer
10666 || relevance == vect_used_in_outer_by_reduction
10667 || relevance == vect_used_by_reduction
10668 || relevance == vect_unused_in_scope
10669 || relevance == vect_used_only_live));
10670 break;
10671
10672 case vect_induction_def:
10673 gcc_assert (!bb_vinfo);
10674 break;
10675
10676 case vect_constant_def:
10677 case vect_external_def:
10678 case vect_unknown_def_type:
10679 default:
10680 gcc_unreachable ();
10681 }
10682
10683 if (STMT_VINFO_RELEVANT_P (stmt_info))
10684 {
10685 tree type = gimple_expr_type (stmt_info->stmt);
10686 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10687 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10688 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10689 || (call && gimple_call_lhs (call) == NULL_TREE));
10690 *need_to_vectorize = true;
10691 }
10692
10693 if (PURE_SLP_STMT (stmt_info) && !node)
10694 {
10695 if (dump_enabled_p ())
10696 dump_printf_loc (MSG_NOTE, vect_location,
10697 "handled only by SLP analysis\n");
10698 return opt_result::success ();
10699 }
10700
10701 ok = true;
10702 if (!bb_vinfo
10703 && (STMT_VINFO_RELEVANT_P (stmt_info)
10704 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10705 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10706 -mveclibabi= takes preference over library functions with
10707 the simd attribute. */
10708 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10709 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10710 cost_vec)
10711 || vectorizable_conversion (vinfo, stmt_info,
10712 NULL, NULL, node, cost_vec)
10713 || vectorizable_operation (vinfo, stmt_info,
10714 NULL, NULL, node, cost_vec)
10715 || vectorizable_assignment (vinfo, stmt_info,
10716 NULL, NULL, node, cost_vec)
10717 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10718 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10719 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10720 node, node_instance, cost_vec)
10721 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10722 NULL, node, cost_vec)
10723 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10724 || vectorizable_condition (vinfo, stmt_info,
10725 NULL, NULL, node, cost_vec)
10726 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10727 cost_vec)
10728 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10729 stmt_info, NULL, node));
10730 else
10731 {
10732 if (bb_vinfo)
10733 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10734 || vectorizable_simd_clone_call (vinfo, stmt_info,
10735 NULL, NULL, node, cost_vec)
10736 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10737 cost_vec)
10738 || vectorizable_shift (vinfo, stmt_info,
10739 NULL, NULL, node, cost_vec)
10740 || vectorizable_operation (vinfo, stmt_info,
10741 NULL, NULL, node, cost_vec)
10742 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10743 cost_vec)
10744 || vectorizable_load (vinfo, stmt_info,
10745 NULL, NULL, node, cost_vec)
10746 || vectorizable_store (vinfo, stmt_info,
10747 NULL, NULL, node, cost_vec)
10748 || vectorizable_condition (vinfo, stmt_info,
10749 NULL, NULL, node, cost_vec)
10750 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10751 cost_vec)
10752 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10753 }
10754
10755 if (!ok)
10756 return opt_result::failure_at (stmt_info->stmt,
10757 "not vectorized:"
10758 " relevant stmt not supported: %G",
10759 stmt_info->stmt);
10760
10761 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10762 need extra handling, except for vectorizable reductions. */
10763 if (!bb_vinfo
10764 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10765 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10766 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10767 stmt_info, NULL, node, node_instance,
10768 false, cost_vec))
10769 return opt_result::failure_at (stmt_info->stmt,
10770 "not vectorized:"
10771 " live stmt not supported: %G",
10772 stmt_info->stmt);
10773
10774 return opt_result::success ();
10775 }
10776
10777
10778 /* Function vect_transform_stmt.
10779
10780 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10781
10782 bool
10783 vect_transform_stmt (vec_info *vinfo,
10784 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10785 slp_tree slp_node, slp_instance slp_node_instance)
10786 {
10787 bool is_store = false;
10788 gimple *vec_stmt = NULL;
10789 bool done;
10790
10791 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10792
10793 switch (STMT_VINFO_TYPE (stmt_info))
10794 {
10795 case type_demotion_vec_info_type:
10796 case type_promotion_vec_info_type:
10797 case type_conversion_vec_info_type:
10798 done = vectorizable_conversion (vinfo, stmt_info,
10799 gsi, &vec_stmt, slp_node, NULL);
10800 gcc_assert (done);
10801 break;
10802
10803 case induc_vec_info_type:
10804 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10805 stmt_info, &vec_stmt, slp_node,
10806 NULL);
10807 gcc_assert (done);
10808 break;
10809
10810 case shift_vec_info_type:
10811 done = vectorizable_shift (vinfo, stmt_info,
10812 gsi, &vec_stmt, slp_node, NULL);
10813 gcc_assert (done);
10814 break;
10815
10816 case op_vec_info_type:
10817 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10818 NULL);
10819 gcc_assert (done);
10820 break;
10821
10822 case assignment_vec_info_type:
10823 done = vectorizable_assignment (vinfo, stmt_info,
10824 gsi, &vec_stmt, slp_node, NULL);
10825 gcc_assert (done);
10826 break;
10827
10828 case load_vec_info_type:
10829 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10830 NULL);
10831 gcc_assert (done);
10832 break;
10833
10834 case store_vec_info_type:
10835 done = vectorizable_store (vinfo, stmt_info,
10836 gsi, &vec_stmt, slp_node, NULL);
10837 gcc_assert (done);
10838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10839 {
10840 /* In case of interleaving, the whole chain is vectorized when the
10841 last store in the chain is reached. Store stmts before the last
10842 one are skipped, and there vec_stmt_info shouldn't be freed
10843 meanwhile. */
10844 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10845 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10846 is_store = true;
10847 }
10848 else
10849 is_store = true;
10850 break;
10851
10852 case condition_vec_info_type:
10853 done = vectorizable_condition (vinfo, stmt_info,
10854 gsi, &vec_stmt, slp_node, NULL);
10855 gcc_assert (done);
10856 break;
10857
10858 case comparison_vec_info_type:
10859 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10860 slp_node, NULL);
10861 gcc_assert (done);
10862 break;
10863
10864 case call_vec_info_type:
10865 done = vectorizable_call (vinfo, stmt_info,
10866 gsi, &vec_stmt, slp_node, NULL);
10867 break;
10868
10869 case call_simd_clone_vec_info_type:
10870 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10871 slp_node, NULL);
10872 break;
10873
10874 case reduc_vec_info_type:
10875 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10876 gsi, &vec_stmt, slp_node);
10877 gcc_assert (done);
10878 break;
10879
10880 case cycle_phi_info_type:
10881 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10882 &vec_stmt, slp_node, slp_node_instance);
10883 gcc_assert (done);
10884 break;
10885
10886 case lc_phi_info_type:
10887 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10888 stmt_info, &vec_stmt, slp_node);
10889 gcc_assert (done);
10890 break;
10891
10892 case phi_info_type:
10893 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
10894 gcc_assert (done);
10895 break;
10896
10897 default:
10898 if (!STMT_VINFO_LIVE_P (stmt_info))
10899 {
10900 if (dump_enabled_p ())
10901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10902 "stmt not supported.\n");
10903 gcc_unreachable ();
10904 }
10905 done = true;
10906 }
10907
10908 if (!slp_node && vec_stmt)
10909 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10910
10911 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10912 return is_store;
10913
10914 /* Handle stmts whose DEF is used outside the loop-nest that is
10915 being vectorized. */
10916 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
10917 slp_node_instance, true, NULL);
10918 gcc_assert (done);
10919
10920 return false;
10921 }
10922
10923
10924 /* Remove a group of stores (for SLP or interleaving), free their
10925 stmt_vec_info. */
10926
10927 void
10928 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10929 {
10930 stmt_vec_info next_stmt_info = first_stmt_info;
10931
10932 while (next_stmt_info)
10933 {
10934 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10935 next_stmt_info = vect_orig_stmt (next_stmt_info);
10936 /* Free the attached stmt_vec_info and remove the stmt. */
10937 vinfo->remove_stmt (next_stmt_info);
10938 next_stmt_info = tmp;
10939 }
10940 }
10941
10942 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10943 elements of type SCALAR_TYPE, or null if the target doesn't support
10944 such a type.
10945
10946 If NUNITS is zero, return a vector type that contains elements of
10947 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10948
10949 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10950 for this vectorization region and want to "autodetect" the best choice.
10951 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10952 and we want the new type to be interoperable with it. PREVAILING_MODE
10953 in this case can be a scalar integer mode or a vector mode; when it
10954 is a vector mode, the function acts like a tree-level version of
10955 related_vector_mode. */
10956
10957 tree
10958 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10959 tree scalar_type, poly_uint64 nunits)
10960 {
10961 tree orig_scalar_type = scalar_type;
10962 scalar_mode inner_mode;
10963 machine_mode simd_mode;
10964 tree vectype;
10965
10966 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10967 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10968 return NULL_TREE;
10969
10970 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10971
10972 /* For vector types of elements whose mode precision doesn't
10973 match their types precision we use a element type of mode
10974 precision. The vectorization routines will have to make sure
10975 they support the proper result truncation/extension.
10976 We also make sure to build vector types with INTEGER_TYPE
10977 component type only. */
10978 if (INTEGRAL_TYPE_P (scalar_type)
10979 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10980 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10981 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10982 TYPE_UNSIGNED (scalar_type));
10983
10984 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10985 When the component mode passes the above test simply use a type
10986 corresponding to that mode. The theory is that any use that
10987 would cause problems with this will disable vectorization anyway. */
10988 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10989 && !INTEGRAL_TYPE_P (scalar_type))
10990 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10991
10992 /* We can't build a vector type of elements with alignment bigger than
10993 their size. */
10994 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10995 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10996 TYPE_UNSIGNED (scalar_type));
10997
10998 /* If we felt back to using the mode fail if there was
10999 no scalar type for it. */
11000 if (scalar_type == NULL_TREE)
11001 return NULL_TREE;
11002
11003 /* If no prevailing mode was supplied, use the mode the target prefers.
11004 Otherwise lookup a vector mode based on the prevailing mode. */
11005 if (prevailing_mode == VOIDmode)
11006 {
11007 gcc_assert (known_eq (nunits, 0U));
11008 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11009 if (SCALAR_INT_MODE_P (simd_mode))
11010 {
11011 /* Traditional behavior is not to take the integer mode
11012 literally, but simply to use it as a way of determining
11013 the vector size. It is up to mode_for_vector to decide
11014 what the TYPE_MODE should be.
11015
11016 Note that nunits == 1 is allowed in order to support single
11017 element vector types. */
11018 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11019 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11020 return NULL_TREE;
11021 }
11022 }
11023 else if (SCALAR_INT_MODE_P (prevailing_mode)
11024 || !related_vector_mode (prevailing_mode,
11025 inner_mode, nunits).exists (&simd_mode))
11026 {
11027 /* Fall back to using mode_for_vector, mostly in the hope of being
11028 able to use an integer mode. */
11029 if (known_eq (nunits, 0U)
11030 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11031 return NULL_TREE;
11032
11033 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11034 return NULL_TREE;
11035 }
11036
11037 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11038
11039 /* In cases where the mode was chosen by mode_for_vector, check that
11040 the target actually supports the chosen mode, or that it at least
11041 allows the vector mode to be replaced by a like-sized integer. */
11042 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11043 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11044 return NULL_TREE;
11045
11046 /* Re-attach the address-space qualifier if we canonicalized the scalar
11047 type. */
11048 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11049 return build_qualified_type
11050 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11051
11052 return vectype;
11053 }
11054
11055 /* Function get_vectype_for_scalar_type.
11056
11057 Returns the vector type corresponding to SCALAR_TYPE as supported
11058 by the target. If GROUP_SIZE is nonzero and we're performing BB
11059 vectorization, make sure that the number of elements in the vector
11060 is no bigger than GROUP_SIZE. */
11061
11062 tree
11063 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11064 unsigned int group_size)
11065 {
11066 /* For BB vectorization, we should always have a group size once we've
11067 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11068 are tentative requests during things like early data reference
11069 analysis and pattern recognition. */
11070 if (is_a <bb_vec_info> (vinfo))
11071 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11072 else
11073 group_size = 0;
11074
11075 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11076 scalar_type);
11077 if (vectype && vinfo->vector_mode == VOIDmode)
11078 vinfo->vector_mode = TYPE_MODE (vectype);
11079
11080 /* Register the natural choice of vector type, before the group size
11081 has been applied. */
11082 if (vectype)
11083 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11084
11085 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11086 try again with an explicit number of elements. */
11087 if (vectype
11088 && group_size
11089 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11090 {
11091 /* Start with the biggest number of units that fits within
11092 GROUP_SIZE and halve it until we find a valid vector type.
11093 Usually either the first attempt will succeed or all will
11094 fail (in the latter case because GROUP_SIZE is too small
11095 for the target), but it's possible that a target could have
11096 a hole between supported vector types.
11097
11098 If GROUP_SIZE is not a power of 2, this has the effect of
11099 trying the largest power of 2 that fits within the group,
11100 even though the group is not a multiple of that vector size.
11101 The BB vectorizer will then try to carve up the group into
11102 smaller pieces. */
11103 unsigned int nunits = 1 << floor_log2 (group_size);
11104 do
11105 {
11106 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11107 scalar_type, nunits);
11108 nunits /= 2;
11109 }
11110 while (nunits > 1 && !vectype);
11111 }
11112
11113 return vectype;
11114 }
11115
11116 /* Return the vector type corresponding to SCALAR_TYPE as supported
11117 by the target. NODE, if nonnull, is the SLP tree node that will
11118 use the returned vector type. */
11119
11120 tree
11121 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11122 {
11123 unsigned int group_size = 0;
11124 if (node)
11125 group_size = SLP_TREE_LANES (node);
11126 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11127 }
11128
11129 /* Function get_mask_type_for_scalar_type.
11130
11131 Returns the mask type corresponding to a result of comparison
11132 of vectors of specified SCALAR_TYPE as supported by target.
11133 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11134 make sure that the number of elements in the vector is no bigger
11135 than GROUP_SIZE. */
11136
11137 tree
11138 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11139 unsigned int group_size)
11140 {
11141 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11142
11143 if (!vectype)
11144 return NULL;
11145
11146 return truth_type_for (vectype);
11147 }
11148
11149 /* Function get_same_sized_vectype
11150
11151 Returns a vector type corresponding to SCALAR_TYPE of size
11152 VECTOR_TYPE if supported by the target. */
11153
11154 tree
11155 get_same_sized_vectype (tree scalar_type, tree vector_type)
11156 {
11157 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11158 return truth_type_for (vector_type);
11159
11160 poly_uint64 nunits;
11161 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11162 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11163 return NULL_TREE;
11164
11165 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11166 scalar_type, nunits);
11167 }
11168
11169 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11170 would not change the chosen vector modes. */
11171
11172 bool
11173 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11174 {
11175 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11176 i != vinfo->used_vector_modes.end (); ++i)
11177 if (!VECTOR_MODE_P (*i)
11178 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11179 return false;
11180 return true;
11181 }
11182
11183 /* Function vect_is_simple_use.
11184
11185 Input:
11186 VINFO - the vect info of the loop or basic block that is being vectorized.
11187 OPERAND - operand in the loop or bb.
11188 Output:
11189 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11190 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11191 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11192 the definition could be anywhere in the function
11193 DT - the type of definition
11194
11195 Returns whether a stmt with OPERAND can be vectorized.
11196 For loops, supportable operands are constants, loop invariants, and operands
11197 that are defined by the current iteration of the loop. Unsupportable
11198 operands are those that are defined by a previous iteration of the loop (as
11199 is the case in reduction/induction computations).
11200 For basic blocks, supportable operands are constants and bb invariants.
11201 For now, operands defined outside the basic block are not supported. */
11202
11203 bool
11204 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11205 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11206 {
11207 if (def_stmt_info_out)
11208 *def_stmt_info_out = NULL;
11209 if (def_stmt_out)
11210 *def_stmt_out = NULL;
11211 *dt = vect_unknown_def_type;
11212
11213 if (dump_enabled_p ())
11214 {
11215 dump_printf_loc (MSG_NOTE, vect_location,
11216 "vect_is_simple_use: operand ");
11217 if (TREE_CODE (operand) == SSA_NAME
11218 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11219 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11220 else
11221 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11222 }
11223
11224 if (CONSTANT_CLASS_P (operand))
11225 *dt = vect_constant_def;
11226 else if (is_gimple_min_invariant (operand))
11227 *dt = vect_external_def;
11228 else if (TREE_CODE (operand) != SSA_NAME)
11229 *dt = vect_unknown_def_type;
11230 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11231 *dt = vect_external_def;
11232 else
11233 {
11234 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11235 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11236 if (!stmt_vinfo)
11237 *dt = vect_external_def;
11238 else
11239 {
11240 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11241 def_stmt = stmt_vinfo->stmt;
11242 switch (gimple_code (def_stmt))
11243 {
11244 case GIMPLE_PHI:
11245 case GIMPLE_ASSIGN:
11246 case GIMPLE_CALL:
11247 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11248 break;
11249 default:
11250 *dt = vect_unknown_def_type;
11251 break;
11252 }
11253 if (def_stmt_info_out)
11254 *def_stmt_info_out = stmt_vinfo;
11255 }
11256 if (def_stmt_out)
11257 *def_stmt_out = def_stmt;
11258 }
11259
11260 if (dump_enabled_p ())
11261 {
11262 dump_printf (MSG_NOTE, ", type of def: ");
11263 switch (*dt)
11264 {
11265 case vect_uninitialized_def:
11266 dump_printf (MSG_NOTE, "uninitialized\n");
11267 break;
11268 case vect_constant_def:
11269 dump_printf (MSG_NOTE, "constant\n");
11270 break;
11271 case vect_external_def:
11272 dump_printf (MSG_NOTE, "external\n");
11273 break;
11274 case vect_internal_def:
11275 dump_printf (MSG_NOTE, "internal\n");
11276 break;
11277 case vect_induction_def:
11278 dump_printf (MSG_NOTE, "induction\n");
11279 break;
11280 case vect_reduction_def:
11281 dump_printf (MSG_NOTE, "reduction\n");
11282 break;
11283 case vect_double_reduction_def:
11284 dump_printf (MSG_NOTE, "double reduction\n");
11285 break;
11286 case vect_nested_cycle:
11287 dump_printf (MSG_NOTE, "nested cycle\n");
11288 break;
11289 case vect_unknown_def_type:
11290 dump_printf (MSG_NOTE, "unknown\n");
11291 break;
11292 }
11293 }
11294
11295 if (*dt == vect_unknown_def_type)
11296 {
11297 if (dump_enabled_p ())
11298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11299 "Unsupported pattern.\n");
11300 return false;
11301 }
11302
11303 return true;
11304 }
11305
11306 /* Function vect_is_simple_use.
11307
11308 Same as vect_is_simple_use but also determines the vector operand
11309 type of OPERAND and stores it to *VECTYPE. If the definition of
11310 OPERAND is vect_uninitialized_def, vect_constant_def or
11311 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11312 is responsible to compute the best suited vector type for the
11313 scalar operand. */
11314
11315 bool
11316 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11317 tree *vectype, stmt_vec_info *def_stmt_info_out,
11318 gimple **def_stmt_out)
11319 {
11320 stmt_vec_info def_stmt_info;
11321 gimple *def_stmt;
11322 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11323 return false;
11324
11325 if (def_stmt_out)
11326 *def_stmt_out = def_stmt;
11327 if (def_stmt_info_out)
11328 *def_stmt_info_out = def_stmt_info;
11329
11330 /* Now get a vector type if the def is internal, otherwise supply
11331 NULL_TREE and leave it up to the caller to figure out a proper
11332 type for the use stmt. */
11333 if (*dt == vect_internal_def
11334 || *dt == vect_induction_def
11335 || *dt == vect_reduction_def
11336 || *dt == vect_double_reduction_def
11337 || *dt == vect_nested_cycle)
11338 {
11339 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11340 gcc_assert (*vectype != NULL_TREE);
11341 if (dump_enabled_p ())
11342 dump_printf_loc (MSG_NOTE, vect_location,
11343 "vect_is_simple_use: vectype %T\n", *vectype);
11344 }
11345 else if (*dt == vect_uninitialized_def
11346 || *dt == vect_constant_def
11347 || *dt == vect_external_def)
11348 *vectype = NULL_TREE;
11349 else
11350 gcc_unreachable ();
11351
11352 return true;
11353 }
11354
11355 /* Function vect_is_simple_use.
11356
11357 Same as vect_is_simple_use but determines the operand by operand
11358 position OPERAND from either STMT or SLP_NODE, filling in *OP
11359 and *SLP_DEF (when SLP_NODE is not NULL). */
11360
11361 bool
11362 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11363 unsigned operand, tree *op, slp_tree *slp_def,
11364 enum vect_def_type *dt,
11365 tree *vectype, stmt_vec_info *def_stmt_info_out)
11366 {
11367 if (slp_node)
11368 {
11369 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11370 *slp_def = child;
11371 *vectype = SLP_TREE_VECTYPE (child);
11372 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11373 {
11374 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11375 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11376 }
11377 else
11378 {
11379 if (def_stmt_info_out)
11380 *def_stmt_info_out = NULL;
11381 *op = SLP_TREE_SCALAR_OPS (child)[0];
11382 *dt = SLP_TREE_DEF_TYPE (child);
11383 return true;
11384 }
11385 }
11386 else
11387 {
11388 *slp_def = NULL;
11389 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11390 {
11391 if (gimple_assign_rhs_code (ass) == COND_EXPR
11392 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11393 {
11394 if (operand < 2)
11395 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11396 else
11397 *op = gimple_op (ass, operand);
11398 }
11399 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11400 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11401 else
11402 *op = gimple_op (ass, operand + 1);
11403 }
11404 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11405 {
11406 if (gimple_call_internal_p (call)
11407 && internal_store_fn_p (gimple_call_internal_fn (call)))
11408 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11409 (call));
11410 *op = gimple_call_arg (call, operand);
11411 }
11412 else
11413 gcc_unreachable ();
11414 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11415 }
11416 }
11417
11418 /* If OP is not NULL and is external or constant update its vector
11419 type with VECTYPE. Returns true if successful or false if not,
11420 for example when conflicting vector types are present. */
11421
11422 bool
11423 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11424 {
11425 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11426 return true;
11427 if (SLP_TREE_VECTYPE (op))
11428 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11429 SLP_TREE_VECTYPE (op) = vectype;
11430 return true;
11431 }
11432
11433 /* Function supportable_widening_operation
11434
11435 Check whether an operation represented by the code CODE is a
11436 widening operation that is supported by the target platform in
11437 vector form (i.e., when operating on arguments of type VECTYPE_IN
11438 producing a result of type VECTYPE_OUT).
11439
11440 Widening operations we currently support are NOP (CONVERT), FLOAT,
11441 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11442 are supported by the target platform either directly (via vector
11443 tree-codes), or via target builtins.
11444
11445 Output:
11446 - CODE1 and CODE2 are codes of vector operations to be used when
11447 vectorizing the operation, if available.
11448 - MULTI_STEP_CVT determines the number of required intermediate steps in
11449 case of multi-step conversion (like char->short->int - in that case
11450 MULTI_STEP_CVT will be 1).
11451 - INTERM_TYPES contains the intermediate type required to perform the
11452 widening operation (short in the above example). */
11453
11454 bool
11455 supportable_widening_operation (vec_info *vinfo,
11456 enum tree_code code, stmt_vec_info stmt_info,
11457 tree vectype_out, tree vectype_in,
11458 enum tree_code *code1, enum tree_code *code2,
11459 int *multi_step_cvt,
11460 vec<tree> *interm_types)
11461 {
11462 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11463 class loop *vect_loop = NULL;
11464 machine_mode vec_mode;
11465 enum insn_code icode1, icode2;
11466 optab optab1, optab2;
11467 tree vectype = vectype_in;
11468 tree wide_vectype = vectype_out;
11469 enum tree_code c1, c2;
11470 int i;
11471 tree prev_type, intermediate_type;
11472 machine_mode intermediate_mode, prev_mode;
11473 optab optab3, optab4;
11474
11475 *multi_step_cvt = 0;
11476 if (loop_info)
11477 vect_loop = LOOP_VINFO_LOOP (loop_info);
11478
11479 switch (code)
11480 {
11481 case WIDEN_MULT_EXPR:
11482 /* The result of a vectorized widening operation usually requires
11483 two vectors (because the widened results do not fit into one vector).
11484 The generated vector results would normally be expected to be
11485 generated in the same order as in the original scalar computation,
11486 i.e. if 8 results are generated in each vector iteration, they are
11487 to be organized as follows:
11488 vect1: [res1,res2,res3,res4],
11489 vect2: [res5,res6,res7,res8].
11490
11491 However, in the special case that the result of the widening
11492 operation is used in a reduction computation only, the order doesn't
11493 matter (because when vectorizing a reduction we change the order of
11494 the computation). Some targets can take advantage of this and
11495 generate more efficient code. For example, targets like Altivec,
11496 that support widen_mult using a sequence of {mult_even,mult_odd}
11497 generate the following vectors:
11498 vect1: [res1,res3,res5,res7],
11499 vect2: [res2,res4,res6,res8].
11500
11501 When vectorizing outer-loops, we execute the inner-loop sequentially
11502 (each vectorized inner-loop iteration contributes to VF outer-loop
11503 iterations in parallel). We therefore don't allow to change the
11504 order of the computation in the inner-loop during outer-loop
11505 vectorization. */
11506 /* TODO: Another case in which order doesn't *really* matter is when we
11507 widen and then contract again, e.g. (short)((int)x * y >> 8).
11508 Normally, pack_trunc performs an even/odd permute, whereas the
11509 repack from an even/odd expansion would be an interleave, which
11510 would be significantly simpler for e.g. AVX2. */
11511 /* In any case, in order to avoid duplicating the code below, recurse
11512 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11513 are properly set up for the caller. If we fail, we'll continue with
11514 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11515 if (vect_loop
11516 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11517 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11518 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11519 stmt_info, vectype_out,
11520 vectype_in, code1, code2,
11521 multi_step_cvt, interm_types))
11522 {
11523 /* Elements in a vector with vect_used_by_reduction property cannot
11524 be reordered if the use chain with this property does not have the
11525 same operation. One such an example is s += a * b, where elements
11526 in a and b cannot be reordered. Here we check if the vector defined
11527 by STMT is only directly used in the reduction statement. */
11528 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11529 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11530 if (use_stmt_info
11531 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11532 return true;
11533 }
11534 c1 = VEC_WIDEN_MULT_LO_EXPR;
11535 c2 = VEC_WIDEN_MULT_HI_EXPR;
11536 break;
11537
11538 case DOT_PROD_EXPR:
11539 c1 = DOT_PROD_EXPR;
11540 c2 = DOT_PROD_EXPR;
11541 break;
11542
11543 case SAD_EXPR:
11544 c1 = SAD_EXPR;
11545 c2 = SAD_EXPR;
11546 break;
11547
11548 case VEC_WIDEN_MULT_EVEN_EXPR:
11549 /* Support the recursion induced just above. */
11550 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11551 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11552 break;
11553
11554 case WIDEN_LSHIFT_EXPR:
11555 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11556 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11557 break;
11558
11559 case WIDEN_PLUS_EXPR:
11560 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11561 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11562 break;
11563
11564 case WIDEN_MINUS_EXPR:
11565 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11566 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11567 break;
11568
11569 CASE_CONVERT:
11570 c1 = VEC_UNPACK_LO_EXPR;
11571 c2 = VEC_UNPACK_HI_EXPR;
11572 break;
11573
11574 case FLOAT_EXPR:
11575 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11576 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11577 break;
11578
11579 case FIX_TRUNC_EXPR:
11580 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11581 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11582 break;
11583
11584 default:
11585 gcc_unreachable ();
11586 }
11587
11588 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11589 std::swap (c1, c2);
11590
11591 if (code == FIX_TRUNC_EXPR)
11592 {
11593 /* The signedness is determined from output operand. */
11594 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11595 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11596 }
11597 else if (CONVERT_EXPR_CODE_P (code)
11598 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11599 && VECTOR_BOOLEAN_TYPE_P (vectype)
11600 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11601 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11602 {
11603 /* If the input and result modes are the same, a different optab
11604 is needed where we pass in the number of units in vectype. */
11605 optab1 = vec_unpacks_sbool_lo_optab;
11606 optab2 = vec_unpacks_sbool_hi_optab;
11607 }
11608 else
11609 {
11610 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11611 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11612 }
11613
11614 if (!optab1 || !optab2)
11615 return false;
11616
11617 vec_mode = TYPE_MODE (vectype);
11618 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11619 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11620 return false;
11621
11622 *code1 = c1;
11623 *code2 = c2;
11624
11625 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11626 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11627 {
11628 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11629 return true;
11630 /* For scalar masks we may have different boolean
11631 vector types having the same QImode. Thus we
11632 add additional check for elements number. */
11633 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11634 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11635 return true;
11636 }
11637
11638 /* Check if it's a multi-step conversion that can be done using intermediate
11639 types. */
11640
11641 prev_type = vectype;
11642 prev_mode = vec_mode;
11643
11644 if (!CONVERT_EXPR_CODE_P (code))
11645 return false;
11646
11647 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11648 intermediate steps in promotion sequence. We try
11649 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11650 not. */
11651 interm_types->create (MAX_INTERM_CVT_STEPS);
11652 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11653 {
11654 intermediate_mode = insn_data[icode1].operand[0].mode;
11655 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11656 intermediate_type
11657 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11658 else
11659 intermediate_type
11660 = lang_hooks.types.type_for_mode (intermediate_mode,
11661 TYPE_UNSIGNED (prev_type));
11662
11663 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11664 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11665 && intermediate_mode == prev_mode
11666 && SCALAR_INT_MODE_P (prev_mode))
11667 {
11668 /* If the input and result modes are the same, a different optab
11669 is needed where we pass in the number of units in vectype. */
11670 optab3 = vec_unpacks_sbool_lo_optab;
11671 optab4 = vec_unpacks_sbool_hi_optab;
11672 }
11673 else
11674 {
11675 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11676 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11677 }
11678
11679 if (!optab3 || !optab4
11680 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11681 || insn_data[icode1].operand[0].mode != intermediate_mode
11682 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11683 || insn_data[icode2].operand[0].mode != intermediate_mode
11684 || ((icode1 = optab_handler (optab3, intermediate_mode))
11685 == CODE_FOR_nothing)
11686 || ((icode2 = optab_handler (optab4, intermediate_mode))
11687 == CODE_FOR_nothing))
11688 break;
11689
11690 interm_types->quick_push (intermediate_type);
11691 (*multi_step_cvt)++;
11692
11693 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11694 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11695 {
11696 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11697 return true;
11698 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11699 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11700 return true;
11701 }
11702
11703 prev_type = intermediate_type;
11704 prev_mode = intermediate_mode;
11705 }
11706
11707 interm_types->release ();
11708 return false;
11709 }
11710
11711
11712 /* Function supportable_narrowing_operation
11713
11714 Check whether an operation represented by the code CODE is a
11715 narrowing operation that is supported by the target platform in
11716 vector form (i.e., when operating on arguments of type VECTYPE_IN
11717 and producing a result of type VECTYPE_OUT).
11718
11719 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11720 and FLOAT. This function checks if these operations are supported by
11721 the target platform directly via vector tree-codes.
11722
11723 Output:
11724 - CODE1 is the code of a vector operation to be used when
11725 vectorizing the operation, if available.
11726 - MULTI_STEP_CVT determines the number of required intermediate steps in
11727 case of multi-step conversion (like int->short->char - in that case
11728 MULTI_STEP_CVT will be 1).
11729 - INTERM_TYPES contains the intermediate type required to perform the
11730 narrowing operation (short in the above example). */
11731
11732 bool
11733 supportable_narrowing_operation (enum tree_code code,
11734 tree vectype_out, tree vectype_in,
11735 enum tree_code *code1, int *multi_step_cvt,
11736 vec<tree> *interm_types)
11737 {
11738 machine_mode vec_mode;
11739 enum insn_code icode1;
11740 optab optab1, interm_optab;
11741 tree vectype = vectype_in;
11742 tree narrow_vectype = vectype_out;
11743 enum tree_code c1;
11744 tree intermediate_type, prev_type;
11745 machine_mode intermediate_mode, prev_mode;
11746 int i;
11747 bool uns;
11748
11749 *multi_step_cvt = 0;
11750 switch (code)
11751 {
11752 CASE_CONVERT:
11753 c1 = VEC_PACK_TRUNC_EXPR;
11754 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11755 && VECTOR_BOOLEAN_TYPE_P (vectype)
11756 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11757 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11758 optab1 = vec_pack_sbool_trunc_optab;
11759 else
11760 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11761 break;
11762
11763 case FIX_TRUNC_EXPR:
11764 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11765 /* The signedness is determined from output operand. */
11766 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11767 break;
11768
11769 case FLOAT_EXPR:
11770 c1 = VEC_PACK_FLOAT_EXPR;
11771 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11772 break;
11773
11774 default:
11775 gcc_unreachable ();
11776 }
11777
11778 if (!optab1)
11779 return false;
11780
11781 vec_mode = TYPE_MODE (vectype);
11782 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11783 return false;
11784
11785 *code1 = c1;
11786
11787 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11788 {
11789 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11790 return true;
11791 /* For scalar masks we may have different boolean
11792 vector types having the same QImode. Thus we
11793 add additional check for elements number. */
11794 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11795 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11796 return true;
11797 }
11798
11799 if (code == FLOAT_EXPR)
11800 return false;
11801
11802 /* Check if it's a multi-step conversion that can be done using intermediate
11803 types. */
11804 prev_mode = vec_mode;
11805 prev_type = vectype;
11806 if (code == FIX_TRUNC_EXPR)
11807 uns = TYPE_UNSIGNED (vectype_out);
11808 else
11809 uns = TYPE_UNSIGNED (vectype);
11810
11811 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11812 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11813 costly than signed. */
11814 if (code == FIX_TRUNC_EXPR && uns)
11815 {
11816 enum insn_code icode2;
11817
11818 intermediate_type
11819 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11820 interm_optab
11821 = optab_for_tree_code (c1, intermediate_type, optab_default);
11822 if (interm_optab != unknown_optab
11823 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11824 && insn_data[icode1].operand[0].mode
11825 == insn_data[icode2].operand[0].mode)
11826 {
11827 uns = false;
11828 optab1 = interm_optab;
11829 icode1 = icode2;
11830 }
11831 }
11832
11833 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11834 intermediate steps in promotion sequence. We try
11835 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11836 interm_types->create (MAX_INTERM_CVT_STEPS);
11837 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11838 {
11839 intermediate_mode = insn_data[icode1].operand[0].mode;
11840 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11841 intermediate_type
11842 = vect_double_mask_nunits (prev_type, intermediate_mode);
11843 else
11844 intermediate_type
11845 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11846 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11847 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11848 && intermediate_mode == prev_mode
11849 && SCALAR_INT_MODE_P (prev_mode))
11850 interm_optab = vec_pack_sbool_trunc_optab;
11851 else
11852 interm_optab
11853 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11854 optab_default);
11855 if (!interm_optab
11856 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11857 || insn_data[icode1].operand[0].mode != intermediate_mode
11858 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11859 == CODE_FOR_nothing))
11860 break;
11861
11862 interm_types->quick_push (intermediate_type);
11863 (*multi_step_cvt)++;
11864
11865 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11866 {
11867 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11868 return true;
11869 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11870 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11871 return true;
11872 }
11873
11874 prev_mode = intermediate_mode;
11875 prev_type = intermediate_type;
11876 optab1 = interm_optab;
11877 }
11878
11879 interm_types->release ();
11880 return false;
11881 }
11882
11883 /* Generate and return a statement that sets vector mask MASK such that
11884 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11885
11886 gcall *
11887 vect_gen_while (tree mask, tree start_index, tree end_index)
11888 {
11889 tree cmp_type = TREE_TYPE (start_index);
11890 tree mask_type = TREE_TYPE (mask);
11891 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11892 cmp_type, mask_type,
11893 OPTIMIZE_FOR_SPEED));
11894 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11895 start_index, end_index,
11896 build_zero_cst (mask_type));
11897 gimple_call_set_lhs (call, mask);
11898 return call;
11899 }
11900
11901 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11902 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11903
11904 tree
11905 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11906 tree end_index)
11907 {
11908 tree tmp = make_ssa_name (mask_type);
11909 gcall *call = vect_gen_while (tmp, start_index, end_index);
11910 gimple_seq_add_stmt (seq, call);
11911 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11912 }
11913
11914 /* Try to compute the vector types required to vectorize STMT_INFO,
11915 returning true on success and false if vectorization isn't possible.
11916 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11917 take sure that the number of elements in the vectors is no bigger
11918 than GROUP_SIZE.
11919
11920 On success:
11921
11922 - Set *STMT_VECTYPE_OUT to:
11923 - NULL_TREE if the statement doesn't need to be vectorized;
11924 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11925
11926 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11927 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11928 statement does not help to determine the overall number of units. */
11929
11930 opt_result
11931 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11932 tree *stmt_vectype_out,
11933 tree *nunits_vectype_out,
11934 unsigned int group_size)
11935 {
11936 gimple *stmt = stmt_info->stmt;
11937
11938 /* For BB vectorization, we should always have a group size once we've
11939 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11940 are tentative requests during things like early data reference
11941 analysis and pattern recognition. */
11942 if (is_a <bb_vec_info> (vinfo))
11943 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11944 else
11945 group_size = 0;
11946
11947 *stmt_vectype_out = NULL_TREE;
11948 *nunits_vectype_out = NULL_TREE;
11949
11950 if (gimple_get_lhs (stmt) == NULL_TREE
11951 /* MASK_STORE has no lhs, but is ok. */
11952 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11953 {
11954 if (is_a <gcall *> (stmt))
11955 {
11956 /* Ignore calls with no lhs. These must be calls to
11957 #pragma omp simd functions, and what vectorization factor
11958 it really needs can't be determined until
11959 vectorizable_simd_clone_call. */
11960 if (dump_enabled_p ())
11961 dump_printf_loc (MSG_NOTE, vect_location,
11962 "defer to SIMD clone analysis.\n");
11963 return opt_result::success ();
11964 }
11965
11966 return opt_result::failure_at (stmt,
11967 "not vectorized: irregular stmt.%G", stmt);
11968 }
11969
11970 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11971 return opt_result::failure_at (stmt,
11972 "not vectorized: vector stmt in loop:%G",
11973 stmt);
11974
11975 tree vectype;
11976 tree scalar_type = NULL_TREE;
11977 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
11978 {
11979 vectype = STMT_VINFO_VECTYPE (stmt_info);
11980 if (dump_enabled_p ())
11981 dump_printf_loc (MSG_NOTE, vect_location,
11982 "precomputed vectype: %T\n", vectype);
11983 }
11984 else if (vect_use_mask_type_p (stmt_info))
11985 {
11986 unsigned int precision = stmt_info->mask_precision;
11987 scalar_type = build_nonstandard_integer_type (precision, 1);
11988 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
11989 if (!vectype)
11990 return opt_result::failure_at (stmt, "not vectorized: unsupported"
11991 " data-type %T\n", scalar_type);
11992 if (dump_enabled_p ())
11993 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11994 }
11995 else
11996 {
11997 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
11998 scalar_type = TREE_TYPE (DR_REF (dr));
11999 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12000 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12001 else
12002 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12003
12004 if (dump_enabled_p ())
12005 {
12006 if (group_size)
12007 dump_printf_loc (MSG_NOTE, vect_location,
12008 "get vectype for scalar type (group size %d):"
12009 " %T\n", group_size, scalar_type);
12010 else
12011 dump_printf_loc (MSG_NOTE, vect_location,
12012 "get vectype for scalar type: %T\n", scalar_type);
12013 }
12014 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12015 if (!vectype)
12016 return opt_result::failure_at (stmt,
12017 "not vectorized:"
12018 " unsupported data-type %T\n",
12019 scalar_type);
12020
12021 if (dump_enabled_p ())
12022 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12023 }
12024 *stmt_vectype_out = vectype;
12025
12026 /* Don't try to compute scalar types if the stmt produces a boolean
12027 vector; use the existing vector type instead. */
12028 tree nunits_vectype = vectype;
12029 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12030 {
12031 /* The number of units is set according to the smallest scalar
12032 type (or the largest vector size, but we only support one
12033 vector size per vectorization). */
12034 HOST_WIDE_INT dummy;
12035 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12036 if (scalar_type != TREE_TYPE (vectype))
12037 {
12038 if (dump_enabled_p ())
12039 dump_printf_loc (MSG_NOTE, vect_location,
12040 "get vectype for smallest scalar type: %T\n",
12041 scalar_type);
12042 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12043 group_size);
12044 if (!nunits_vectype)
12045 return opt_result::failure_at
12046 (stmt, "not vectorized: unsupported data-type %T\n",
12047 scalar_type);
12048 if (dump_enabled_p ())
12049 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12050 nunits_vectype);
12051 }
12052 }
12053
12054 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12055 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12056
12057 if (dump_enabled_p ())
12058 {
12059 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12060 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12061 dump_printf (MSG_NOTE, "\n");
12062 }
12063
12064 *nunits_vectype_out = nunits_vectype;
12065 return opt_result::success ();
12066 }
12067
12068 /* Generate and return statement sequence that sets vector length LEN that is:
12069
12070 min_of_start_and_end = min (START_INDEX, END_INDEX);
12071 left_len = END_INDEX - min_of_start_and_end;
12072 rhs = min (left_len, LEN_LIMIT);
12073 LEN = rhs;
12074
12075 Note: the cost of the code generated by this function is modeled
12076 by vect_estimate_min_profitable_iters, so changes here may need
12077 corresponding changes there. */
12078
12079 gimple_seq
12080 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12081 {
12082 gimple_seq stmts = NULL;
12083 tree len_type = TREE_TYPE (len);
12084 gcc_assert (TREE_TYPE (start_index) == len_type);
12085
12086 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12087 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12088 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12089 gimple* stmt = gimple_build_assign (len, rhs);
12090 gimple_seq_add_stmt (&stmts, stmt);
12091
12092 return stmts;
12093 }
12094