C-family, Objective-C [1/3] : Implement Wobjc-root-class [PR77404].
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
840
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
846 {
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
849
850 for (i = 0; i < pwr + 1; i++)
851 {
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
855 }
856
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
862
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 }
868
869 /* Returns true if the current function returns DECL. */
870
871 static bool
872 cfun_returns (tree decl)
873 {
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
877 {
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
887 do
888 {
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
890 }
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
896 }
897 return false;
898 }
899
900 /* Function vect_model_store_cost
901
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
904
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
910 {
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
918
919 if (vls_type == VLS_STORE_INVARIANT)
920 {
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
924 }
925
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
930
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
935
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 {
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
954 }
955
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 {
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
966 }
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
969
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
972 {
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
978 }
979
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
989 {
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
993 {
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
998 {
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1007 }
1008 }
1009 }
1010
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1015 }
1016
1017
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1023 {
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1027
1028 switch (alignment_support_scheme)
1029 {
1030 case dr_aligned:
1031 {
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1035
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1040 }
1041
1042 case dr_unaligned_supported:
1043 {
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1054 }
1055
1056 case dr_unaligned_unsupported:
1057 {
1058 *inside_cost = VECT_MAX_COST;
1059
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1064 }
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071
1072 /* Function vect_model_load_cost
1073
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1078
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1085 {
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1088
1089 gcc_assert (cost_vec);
1090
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1094
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1096 {
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms, n_loads;
1102 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1103 vf, true, &n_perms, &n_loads);
1104 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1105 first_stmt_info, 0, vect_body);
1106
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1109 ncopies = n_loads;
1110 }
1111
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info = stmt_info;
1115 if (!slp_node && grouped_access_p)
1116 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1117
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p = (first_stmt_info == stmt_info);
1122
1123 /* We assume that the cost of a single load-lanes instruction is
1124 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1125 access is instead being provided by a load-and-permute operation,
1126 include the cost of the permutes. */
1127 if (first_stmt_p
1128 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1129 {
1130 /* Uses an even and odd extract operations or shuffle operations
1131 for each needed permute. */
1132 int group_size = DR_GROUP_SIZE (first_stmt_info);
1133 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1134 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1135 stmt_info, 0, vect_body);
1136
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1140 group_size);
1141 }
1142
1143 /* The loads themselves. */
1144 if (memory_access_type == VMAT_ELEMENTWISE
1145 || memory_access_type == VMAT_GATHER_SCATTER)
1146 {
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1149 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1150 inside_cost += record_stmt_cost (cost_vec,
1151 ncopies * assumed_nunits,
1152 scalar_load, stmt_info, 0, vect_body);
1153 }
1154 else
1155 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1156 &inside_cost, &prologue_cost,
1157 cost_vec, cost_vec, true);
1158 if (memory_access_type == VMAT_ELEMENTWISE
1159 || memory_access_type == VMAT_STRIDED_SLP)
1160 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1161 stmt_info, 0, vect_body);
1162
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1167 }
1168
1169
1170 /* Calculate cost of DR's memory access. */
1171 void
1172 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1173 bool add_realign_cost, unsigned int *inside_cost,
1174 unsigned int *prologue_cost,
1175 stmt_vector_for_cost *prologue_cost_vec,
1176 stmt_vector_for_cost *body_cost_vec,
1177 bool record_prologue_costs)
1178 {
1179 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1180 int alignment_support_scheme
1181 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1182
1183 switch (alignment_support_scheme)
1184 {
1185 case dr_aligned:
1186 {
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1189
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE, vect_location,
1192 "vect_model_load_cost: aligned.\n");
1193
1194 break;
1195 }
1196 case dr_unaligned_supported:
1197 {
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1200 unaligned_load, stmt_info,
1201 DR_MISALIGNMENT (dr_info),
1202 vect_body);
1203
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE, vect_location,
1206 "vect_model_load_cost: unaligned supported by "
1207 "hardware.\n");
1208
1209 break;
1210 }
1211 case dr_explicit_realign:
1212 {
1213 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1214 vector_load, stmt_info, 0, vect_body);
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1216 vec_perm, stmt_info, 0, vect_body);
1217
1218 /* FIXME: If the misalignment remains fixed across the iterations of
1219 the containing loop, the following cost should be added to the
1220 prologue costs. */
1221 if (targetm.vectorize.builtin_mask_for_load)
1222 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1223 stmt_info, 0, vect_body);
1224
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_NOTE, vect_location,
1227 "vect_model_load_cost: explicit realign\n");
1228
1229 break;
1230 }
1231 case dr_explicit_realign_optimized:
1232 {
1233 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE, vect_location,
1235 "vect_model_load_cost: unaligned software "
1236 "pipelined.\n");
1237
1238 /* Unaligned software pipeline has a load of an address, an initial
1239 load, and possibly a mask operation to "prime" the loop. However,
1240 if this is an access in a group of loads, which provide grouped
1241 access, then the above cost should only be considered for one
1242 access in the group. Inside the loop, there is a load op
1243 and a realignment op. */
1244
1245 if (add_realign_cost && record_prologue_costs)
1246 {
1247 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1248 vector_stmt, stmt_info,
1249 0, vect_prologue);
1250 if (targetm.vectorize.builtin_mask_for_load)
1251 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1252 vector_stmt, stmt_info,
1253 0, vect_prologue);
1254 }
1255
1256 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1257 stmt_info, 0, vect_body);
1258 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1259 stmt_info, 0, vect_body);
1260
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "vect_model_load_cost: explicit realign optimized"
1264 "\n");
1265
1266 break;
1267 }
1268
1269 case dr_unaligned_unsupported:
1270 {
1271 *inside_cost = VECT_MAX_COST;
1272
1273 if (dump_enabled_p ())
1274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1275 "vect_model_load_cost: unsupported access.\n");
1276 break;
1277 }
1278
1279 default:
1280 gcc_unreachable ();
1281 }
1282 }
1283
1284 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1285 the loop preheader for the vectorized stmt STMT_VINFO. */
1286
1287 static void
1288 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1289 gimple_stmt_iterator *gsi)
1290 {
1291 if (gsi)
1292 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1293 else
1294 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1295
1296 if (dump_enabled_p ())
1297 dump_printf_loc (MSG_NOTE, vect_location,
1298 "created new init_stmt: %G", new_stmt);
1299 }
1300
1301 /* Function vect_init_vector.
1302
1303 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1304 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1305 vector type a vector with all elements equal to VAL is created first.
1306 Place the initialization at GSI if it is not NULL. Otherwise, place the
1307 initialization at the loop preheader.
1308 Return the DEF of INIT_STMT.
1309 It will be used in the vectorization of STMT_INFO. */
1310
1311 tree
1312 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1313 gimple_stmt_iterator *gsi)
1314 {
1315 gimple *init_stmt;
1316 tree new_temp;
1317
1318 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1319 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1320 {
1321 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1322 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1323 {
1324 /* Scalar boolean value should be transformed into
1325 all zeros or all ones value before building a vector. */
1326 if (VECTOR_BOOLEAN_TYPE_P (type))
1327 {
1328 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1329 tree false_val = build_zero_cst (TREE_TYPE (type));
1330
1331 if (CONSTANT_CLASS_P (val))
1332 val = integer_zerop (val) ? false_val : true_val;
1333 else
1334 {
1335 new_temp = make_ssa_name (TREE_TYPE (type));
1336 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1337 val, true_val, false_val);
1338 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1339 val = new_temp;
1340 }
1341 }
1342 else
1343 {
1344 gimple_seq stmts = NULL;
1345 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1346 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1347 TREE_TYPE (type), val);
1348 else
1349 /* ??? Condition vectorization expects us to do
1350 promotion of invariant/external defs. */
1351 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1352 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1353 !gsi_end_p (gsi2); )
1354 {
1355 init_stmt = gsi_stmt (gsi2);
1356 gsi_remove (&gsi2, false);
1357 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1358 }
1359 }
1360 }
1361 val = build_vector_from_val (type, val);
1362 }
1363
1364 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1365 init_stmt = gimple_build_assign (new_temp, val);
1366 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1367 return new_temp;
1368 }
1369
1370
1371 /* Function vect_get_vec_defs_for_operand.
1372
1373 OP is an operand in STMT_VINFO. This function returns a vector of
1374 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1375
1376 In the case that OP is an SSA_NAME which is defined in the loop, then
1377 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1378
1379 In case OP is an invariant or constant, a new stmt that creates a vector def
1380 needs to be introduced. VECTYPE may be used to specify a required type for
1381 vector invariant. */
1382
1383 void
1384 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1385 unsigned ncopies,
1386 tree op, vec<tree> *vec_oprnds, tree vectype)
1387 {
1388 gimple *def_stmt;
1389 enum vect_def_type dt;
1390 bool is_simple_use;
1391 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1392
1393 if (dump_enabled_p ())
1394 dump_printf_loc (MSG_NOTE, vect_location,
1395 "vect_get_vec_defs_for_operand: %T\n", op);
1396
1397 stmt_vec_info def_stmt_info;
1398 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1399 &def_stmt_info, &def_stmt);
1400 gcc_assert (is_simple_use);
1401 if (def_stmt && dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1403
1404 vec_oprnds->create (ncopies);
1405 if (dt == vect_constant_def || dt == vect_external_def)
1406 {
1407 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1408 tree vector_type;
1409
1410 if (vectype)
1411 vector_type = vectype;
1412 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1413 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1414 vector_type = truth_type_for (stmt_vectype);
1415 else
1416 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1417
1418 gcc_assert (vector_type);
1419 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1420 while (ncopies--)
1421 vec_oprnds->quick_push (vop);
1422 }
1423 else
1424 {
1425 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1426 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1427 for (unsigned i = 0; i < ncopies; ++i)
1428 vec_oprnds->quick_push (gimple_get_lhs
1429 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1430 }
1431 }
1432
1433
1434 /* Get vectorized definitions for OP0 and OP1. */
1435
1436 void
1437 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1438 unsigned ncopies,
1439 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1440 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1441 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1442 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1443 {
1444 if (slp_node)
1445 {
1446 if (op0)
1447 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1448 if (op1)
1449 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1450 if (op2)
1451 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1452 if (op3)
1453 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1454 }
1455 else
1456 {
1457 if (op0)
1458 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1459 op0, vec_oprnds0, vectype0);
1460 if (op1)
1461 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1462 op1, vec_oprnds1, vectype1);
1463 if (op2)
1464 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1465 op2, vec_oprnds2, vectype2);
1466 if (op3)
1467 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1468 op3, vec_oprnds3, vectype3);
1469 }
1470 }
1471
1472 void
1473 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1474 unsigned ncopies,
1475 tree op0, vec<tree> *vec_oprnds0,
1476 tree op1, vec<tree> *vec_oprnds1,
1477 tree op2, vec<tree> *vec_oprnds2,
1478 tree op3, vec<tree> *vec_oprnds3)
1479 {
1480 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1481 op0, vec_oprnds0, NULL_TREE,
1482 op1, vec_oprnds1, NULL_TREE,
1483 op2, vec_oprnds2, NULL_TREE,
1484 op3, vec_oprnds3, NULL_TREE);
1485 }
1486
1487 /* Helper function called by vect_finish_replace_stmt and
1488 vect_finish_stmt_generation. Set the location of the new
1489 statement and create and return a stmt_vec_info for it. */
1490
1491 static void
1492 vect_finish_stmt_generation_1 (vec_info *,
1493 stmt_vec_info stmt_info, gimple *vec_stmt)
1494 {
1495 if (dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1497
1498 if (stmt_info)
1499 {
1500 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1501
1502 /* While EH edges will generally prevent vectorization, stmt might
1503 e.g. be in a must-not-throw region. Ensure newly created stmts
1504 that could throw are part of the same region. */
1505 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1506 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1507 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1508 }
1509 else
1510 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1511 }
1512
1513 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1514 which sets the same scalar result as STMT_INFO did. Create and return a
1515 stmt_vec_info for VEC_STMT. */
1516
1517 void
1518 vect_finish_replace_stmt (vec_info *vinfo,
1519 stmt_vec_info stmt_info, gimple *vec_stmt)
1520 {
1521 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1522 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1523
1524 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1525 gsi_replace (&gsi, vec_stmt, true);
1526
1527 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1528 }
1529
1530 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1531 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1532
1533 void
1534 vect_finish_stmt_generation (vec_info *vinfo,
1535 stmt_vec_info stmt_info, gimple *vec_stmt,
1536 gimple_stmt_iterator *gsi)
1537 {
1538 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1539
1540 if (!gsi_end_p (*gsi)
1541 && gimple_has_mem_ops (vec_stmt))
1542 {
1543 gimple *at_stmt = gsi_stmt (*gsi);
1544 tree vuse = gimple_vuse (at_stmt);
1545 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1546 {
1547 tree vdef = gimple_vdef (at_stmt);
1548 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1549 gimple_set_modified (vec_stmt, true);
1550 /* If we have an SSA vuse and insert a store, update virtual
1551 SSA form to avoid triggering the renamer. Do so only
1552 if we can easily see all uses - which is what almost always
1553 happens with the way vectorized stmts are inserted. */
1554 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1555 && ((is_gimple_assign (vec_stmt)
1556 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1557 || (is_gimple_call (vec_stmt)
1558 && !(gimple_call_flags (vec_stmt)
1559 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1560 {
1561 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1562 gimple_set_vdef (vec_stmt, new_vdef);
1563 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1564 }
1565 }
1566 }
1567 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1568 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1569 }
1570
1571 /* We want to vectorize a call to combined function CFN with function
1572 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1573 as the types of all inputs. Check whether this is possible using
1574 an internal function, returning its code if so or IFN_LAST if not. */
1575
1576 static internal_fn
1577 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1578 tree vectype_out, tree vectype_in)
1579 {
1580 internal_fn ifn;
1581 if (internal_fn_p (cfn))
1582 ifn = as_internal_fn (cfn);
1583 else
1584 ifn = associated_internal_fn (fndecl);
1585 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1586 {
1587 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1588 if (info.vectorizable)
1589 {
1590 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1591 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1592 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1593 OPTIMIZE_FOR_SPEED))
1594 return ifn;
1595 }
1596 }
1597 return IFN_LAST;
1598 }
1599
1600
1601 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1602 gimple_stmt_iterator *);
1603
1604 /* Check whether a load or store statement in the loop described by
1605 LOOP_VINFO is possible in a loop using partial vectors. This is
1606 testing whether the vectorizer pass has the appropriate support,
1607 as well as whether the target does.
1608
1609 VLS_TYPE says whether the statement is a load or store and VECTYPE
1610 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1611 says how the load or store is going to be implemented and GROUP_SIZE
1612 is the number of load or store statements in the containing group.
1613 If the access is a gather load or scatter store, GS_INFO describes
1614 its arguments. If the load or store is conditional, SCALAR_MASK is the
1615 condition under which it occurs.
1616
1617 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1618 vectors is not supported, otherwise record the required rgroup control
1619 types. */
1620
1621 static void
1622 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1623 vec_load_store_type vls_type,
1624 int group_size,
1625 vect_memory_access_type
1626 memory_access_type,
1627 gather_scatter_info *gs_info,
1628 tree scalar_mask)
1629 {
1630 /* Invariant loads need no special support. */
1631 if (memory_access_type == VMAT_INVARIANT)
1632 return;
1633
1634 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1635 machine_mode vecmode = TYPE_MODE (vectype);
1636 bool is_load = (vls_type == VLS_LOAD);
1637 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1638 {
1639 if (is_load
1640 ? !vect_load_lanes_supported (vectype, group_size, true)
1641 : !vect_store_lanes_supported (vectype, group_size, true))
1642 {
1643 if (dump_enabled_p ())
1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1645 "can't operate on partial vectors because"
1646 " the target doesn't have an appropriate"
1647 " load/store-lanes instruction.\n");
1648 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1649 return;
1650 }
1651 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1652 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1653 return;
1654 }
1655
1656 if (memory_access_type == VMAT_GATHER_SCATTER)
1657 {
1658 internal_fn ifn = (is_load
1659 ? IFN_MASK_GATHER_LOAD
1660 : IFN_MASK_SCATTER_STORE);
1661 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1662 gs_info->memory_type,
1663 gs_info->offset_vectype,
1664 gs_info->scale))
1665 {
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1668 "can't operate on partial vectors because"
1669 " the target doesn't have an appropriate"
1670 " gather load or scatter store instruction.\n");
1671 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1672 return;
1673 }
1674 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1675 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1676 return;
1677 }
1678
1679 if (memory_access_type != VMAT_CONTIGUOUS
1680 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1681 {
1682 /* Element X of the data must come from iteration i * VF + X of the
1683 scalar loop. We need more work to support other mappings. */
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686 "can't operate on partial vectors because an"
1687 " access isn't contiguous.\n");
1688 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1689 return;
1690 }
1691
1692 if (!VECTOR_MODE_P (vecmode))
1693 {
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "can't operate on partial vectors when emulating"
1697 " vector operations.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1699 return;
1700 }
1701
1702 /* We might load more scalars than we need for permuting SLP loads.
1703 We checked in get_group_load_store_type that the extra elements
1704 don't leak into a new vector. */
1705 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1706 {
1707 unsigned int nvectors;
1708 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1709 return nvectors;
1710 gcc_unreachable ();
1711 };
1712
1713 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1714 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1715 machine_mode mask_mode;
1716 bool using_partial_vectors_p = false;
1717 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1718 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1719 {
1720 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1721 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1722 using_partial_vectors_p = true;
1723 }
1724
1725 machine_mode vmode;
1726 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1727 {
1728 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1729 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1730 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1731 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1732 using_partial_vectors_p = true;
1733 }
1734
1735 if (!using_partial_vectors_p)
1736 {
1737 if (dump_enabled_p ())
1738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1739 "can't operate on partial vectors because the"
1740 " target doesn't have the appropriate partial"
1741 " vectorization load or store.\n");
1742 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1743 }
1744 }
1745
1746 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1747 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1748 that needs to be applied to all loads and stores in a vectorized loop.
1749 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1750
1751 MASK_TYPE is the type of both masks. If new statements are needed,
1752 insert them before GSI. */
1753
1754 static tree
1755 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1756 gimple_stmt_iterator *gsi)
1757 {
1758 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1759 if (!loop_mask)
1760 return vec_mask;
1761
1762 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1763 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1764 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1765 vec_mask, loop_mask);
1766 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1767 return and_res;
1768 }
1769
1770 /* Determine whether we can use a gather load or scatter store to vectorize
1771 strided load or store STMT_INFO by truncating the current offset to a
1772 smaller width. We need to be able to construct an offset vector:
1773
1774 { 0, X, X*2, X*3, ... }
1775
1776 without loss of precision, where X is STMT_INFO's DR_STEP.
1777
1778 Return true if this is possible, describing the gather load or scatter
1779 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1780
1781 static bool
1782 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1783 loop_vec_info loop_vinfo, bool masked_p,
1784 gather_scatter_info *gs_info)
1785 {
1786 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1787 data_reference *dr = dr_info->dr;
1788 tree step = DR_STEP (dr);
1789 if (TREE_CODE (step) != INTEGER_CST)
1790 {
1791 /* ??? Perhaps we could use range information here? */
1792 if (dump_enabled_p ())
1793 dump_printf_loc (MSG_NOTE, vect_location,
1794 "cannot truncate variable step.\n");
1795 return false;
1796 }
1797
1798 /* Get the number of bits in an element. */
1799 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1800 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1801 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1802
1803 /* Set COUNT to the upper limit on the number of elements - 1.
1804 Start with the maximum vectorization factor. */
1805 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1806
1807 /* Try lowering COUNT to the number of scalar latch iterations. */
1808 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1809 widest_int max_iters;
1810 if (max_loop_iterations (loop, &max_iters)
1811 && max_iters < count)
1812 count = max_iters.to_shwi ();
1813
1814 /* Try scales of 1 and the element size. */
1815 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1816 wi::overflow_type overflow = wi::OVF_NONE;
1817 for (int i = 0; i < 2; ++i)
1818 {
1819 int scale = scales[i];
1820 widest_int factor;
1821 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1822 continue;
1823
1824 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1825 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1826 if (overflow)
1827 continue;
1828 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1829 unsigned int min_offset_bits = wi::min_precision (range, sign);
1830
1831 /* Find the narrowest viable offset type. */
1832 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1833 tree offset_type = build_nonstandard_integer_type (offset_bits,
1834 sign == UNSIGNED);
1835
1836 /* See whether the target supports the operation with an offset
1837 no narrower than OFFSET_TYPE. */
1838 tree memory_type = TREE_TYPE (DR_REF (dr));
1839 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1840 vectype, memory_type, offset_type, scale,
1841 &gs_info->ifn, &gs_info->offset_vectype))
1842 continue;
1843
1844 gs_info->decl = NULL_TREE;
1845 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1846 but we don't need to store that here. */
1847 gs_info->base = NULL_TREE;
1848 gs_info->element_type = TREE_TYPE (vectype);
1849 gs_info->offset = fold_convert (offset_type, step);
1850 gs_info->offset_dt = vect_constant_def;
1851 gs_info->scale = scale;
1852 gs_info->memory_type = memory_type;
1853 return true;
1854 }
1855
1856 if (overflow && dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE, vect_location,
1858 "truncating gather/scatter offset to %d bits"
1859 " might change its value.\n", element_bits);
1860
1861 return false;
1862 }
1863
1864 /* Return true if we can use gather/scatter internal functions to
1865 vectorize STMT_INFO, which is a grouped or strided load or store.
1866 MASKED_P is true if load or store is conditional. When returning
1867 true, fill in GS_INFO with the information required to perform the
1868 operation. */
1869
1870 static bool
1871 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1872 loop_vec_info loop_vinfo, bool masked_p,
1873 gather_scatter_info *gs_info)
1874 {
1875 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1876 || gs_info->decl)
1877 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1878 masked_p, gs_info);
1879
1880 tree old_offset_type = TREE_TYPE (gs_info->offset);
1881 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1882
1883 gcc_assert (TYPE_PRECISION (new_offset_type)
1884 >= TYPE_PRECISION (old_offset_type));
1885 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1886
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE, vect_location,
1889 "using gather/scatter for strided/grouped access,"
1890 " scale = %d\n", gs_info->scale);
1891
1892 return true;
1893 }
1894
1895 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1896 elements with a known constant step. Return -1 if that step
1897 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1898
1899 static int
1900 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1901 {
1902 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1903 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1904 size_zero_node);
1905 }
1906
1907 /* If the target supports a permute mask that reverses the elements in
1908 a vector of type VECTYPE, return that mask, otherwise return null. */
1909
1910 static tree
1911 perm_mask_for_reverse (tree vectype)
1912 {
1913 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1914
1915 /* The encoding has a single stepped pattern. */
1916 vec_perm_builder sel (nunits, 1, 3);
1917 for (int i = 0; i < 3; ++i)
1918 sel.quick_push (nunits - 1 - i);
1919
1920 vec_perm_indices indices (sel, 1, nunits);
1921 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1922 return NULL_TREE;
1923 return vect_gen_perm_mask_checked (vectype, indices);
1924 }
1925
1926 /* A subroutine of get_load_store_type, with a subset of the same
1927 arguments. Handle the case where STMT_INFO is a load or store that
1928 accesses consecutive elements with a negative step. */
1929
1930 static vect_memory_access_type
1931 get_negative_load_store_type (vec_info *vinfo,
1932 stmt_vec_info stmt_info, tree vectype,
1933 vec_load_store_type vls_type,
1934 unsigned int ncopies)
1935 {
1936 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1937 dr_alignment_support alignment_support_scheme;
1938
1939 if (ncopies > 1)
1940 {
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1943 "multiple types with negative step.\n");
1944 return VMAT_ELEMENTWISE;
1945 }
1946
1947 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1948 dr_info, false);
1949 if (alignment_support_scheme != dr_aligned
1950 && alignment_support_scheme != dr_unaligned_supported)
1951 {
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 "negative step but alignment required.\n");
1955 return VMAT_ELEMENTWISE;
1956 }
1957
1958 if (vls_type == VLS_STORE_INVARIANT)
1959 {
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_NOTE, vect_location,
1962 "negative step with invariant source;"
1963 " no permute needed.\n");
1964 return VMAT_CONTIGUOUS_DOWN;
1965 }
1966
1967 if (!perm_mask_for_reverse (vectype))
1968 {
1969 if (dump_enabled_p ())
1970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1971 "negative step and reversing not supported.\n");
1972 return VMAT_ELEMENTWISE;
1973 }
1974
1975 return VMAT_CONTIGUOUS_REVERSE;
1976 }
1977
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1979 being stored. */
1980
1981 tree
1982 vect_get_store_rhs (stmt_vec_info stmt_info)
1983 {
1984 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1985 {
1986 gcc_assert (gimple_assign_single_p (assign));
1987 return gimple_assign_rhs1 (assign);
1988 }
1989 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1990 {
1991 internal_fn ifn = gimple_call_internal_fn (call);
1992 int index = internal_fn_stored_value_index (ifn);
1993 gcc_assert (index >= 0);
1994 return gimple_call_arg (call, index);
1995 }
1996 gcc_unreachable ();
1997 }
1998
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2000
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2007
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2011 - NULL_TREE. */
2012
2013 static tree
2014 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2015 {
2016 gcc_assert (VECTOR_TYPE_P (vtype));
2017 gcc_assert (known_gt (nelts, 0U));
2018
2019 machine_mode vmode = TYPE_MODE (vtype);
2020 if (!VECTOR_MODE_P (vmode))
2021 return NULL_TREE;
2022
2023 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2024 unsigned int pbsize;
2025 if (constant_multiple_p (vbsize, nelts, &pbsize))
2026 {
2027 /* First check if vec_init optab supports construction from
2028 vector pieces directly. */
2029 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2030 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2031 machine_mode rmode;
2032 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2033 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2034 != CODE_FOR_nothing))
2035 {
2036 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2037 return vtype;
2038 }
2039
2040 /* Otherwise check if exists an integer type of the same piece size and
2041 if vec_init optab supports construction from it directly. */
2042 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2043 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2044 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2045 != CODE_FOR_nothing))
2046 {
2047 *ptype = build_nonstandard_integer_type (pbsize, 1);
2048 return build_vector_type (*ptype, nelts);
2049 }
2050 }
2051
2052 return NULL_TREE;
2053 }
2054
2055 /* A subroutine of get_load_store_type, with a subset of the same
2056 arguments. Handle the case where STMT_INFO is part of a grouped load
2057 or store.
2058
2059 For stores, the statements in the group are all consecutive
2060 and there is no gap at the end. For loads, the statements in the
2061 group might not be consecutive; there can be gaps between statements
2062 as well as at the end. */
2063
2064 static bool
2065 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2066 tree vectype, slp_tree slp_node,
2067 bool masked_p, vec_load_store_type vls_type,
2068 vect_memory_access_type *memory_access_type,
2069 dr_alignment_support *alignment_support_scheme,
2070 gather_scatter_info *gs_info)
2071 {
2072 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2073 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2074 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2075 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2076 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2077 bool single_element_p = (stmt_info == first_stmt_info
2078 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2079 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2080 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2081
2082 /* True if the vectorized statements would access beyond the last
2083 statement in the group. */
2084 bool overrun_p = false;
2085
2086 /* True if we can cope with such overrun by peeling for gaps, so that
2087 there is at least one final scalar iteration after the vector loop. */
2088 bool can_overrun_p = (!masked_p
2089 && vls_type == VLS_LOAD
2090 && loop_vinfo
2091 && !loop->inner);
2092
2093 /* There can only be a gap at the end of the group if the stride is
2094 known at compile time. */
2095 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2096
2097 /* Stores can't yet have gaps. */
2098 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2099
2100 if (slp_node)
2101 {
2102 /* For SLP vectorization we directly vectorize a subchain
2103 without permutation. */
2104 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2105 first_dr_info
2106 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2107 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2108 {
2109 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2110 separated by the stride, until we have a complete vector.
2111 Fall back to scalar accesses if that isn't possible. */
2112 if (multiple_p (nunits, group_size))
2113 *memory_access_type = VMAT_STRIDED_SLP;
2114 else
2115 *memory_access_type = VMAT_ELEMENTWISE;
2116 }
2117 else
2118 {
2119 overrun_p = loop_vinfo && gap != 0;
2120 if (overrun_p && vls_type != VLS_LOAD)
2121 {
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123 "Grouped store with gaps requires"
2124 " non-consecutive accesses\n");
2125 return false;
2126 }
2127 /* An overrun is fine if the trailing elements are smaller
2128 than the alignment boundary B. Every vector access will
2129 be a multiple of B and so we are guaranteed to access a
2130 non-gap element in the same B-sized block. */
2131 if (overrun_p
2132 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2133 / vect_get_scalar_dr_size (first_dr_info)))
2134 overrun_p = false;
2135
2136 /* If the gap splits the vector in half and the target
2137 can do half-vector operations avoid the epilogue peeling
2138 by simply loading half of the vector only. Usually
2139 the construction with an upper zero half will be elided. */
2140 dr_alignment_support alignment_support_scheme;
2141 tree half_vtype;
2142 if (overrun_p
2143 && !masked_p
2144 && (((alignment_support_scheme
2145 = vect_supportable_dr_alignment (vinfo,
2146 first_dr_info, false)))
2147 == dr_aligned
2148 || alignment_support_scheme == dr_unaligned_supported)
2149 && known_eq (nunits, (group_size - gap) * 2)
2150 && known_eq (nunits, group_size)
2151 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2152 != NULL_TREE))
2153 overrun_p = false;
2154
2155 if (overrun_p && !can_overrun_p)
2156 {
2157 if (dump_enabled_p ())
2158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2159 "Peeling for outer loop is not supported\n");
2160 return false;
2161 }
2162 int cmp = compare_step_with_zero (vinfo, stmt_info);
2163 if (cmp < 0)
2164 {
2165 if (single_element_p)
2166 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2167 only correct for single element "interleaving" SLP. */
2168 *memory_access_type = get_negative_load_store_type
2169 (vinfo, stmt_info, vectype, vls_type, 1);
2170 else
2171 {
2172 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2173 separated by the stride, until we have a complete vector.
2174 Fall back to scalar accesses if that isn't possible. */
2175 if (multiple_p (nunits, group_size))
2176 *memory_access_type = VMAT_STRIDED_SLP;
2177 else
2178 *memory_access_type = VMAT_ELEMENTWISE;
2179 }
2180 }
2181 else
2182 {
2183 gcc_assert (!loop_vinfo || cmp > 0);
2184 *memory_access_type = VMAT_CONTIGUOUS;
2185 }
2186 }
2187 }
2188 else
2189 {
2190 /* We can always handle this case using elementwise accesses,
2191 but see if something more efficient is available. */
2192 *memory_access_type = VMAT_ELEMENTWISE;
2193
2194 /* If there is a gap at the end of the group then these optimizations
2195 would access excess elements in the last iteration. */
2196 bool would_overrun_p = (gap != 0);
2197 /* An overrun is fine if the trailing elements are smaller than the
2198 alignment boundary B. Every vector access will be a multiple of B
2199 and so we are guaranteed to access a non-gap element in the
2200 same B-sized block. */
2201 if (would_overrun_p
2202 && !masked_p
2203 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2204 / vect_get_scalar_dr_size (first_dr_info)))
2205 would_overrun_p = false;
2206
2207 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2208 && (can_overrun_p || !would_overrun_p)
2209 && compare_step_with_zero (vinfo, stmt_info) > 0)
2210 {
2211 /* First cope with the degenerate case of a single-element
2212 vector. */
2213 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2214 ;
2215
2216 /* Otherwise try using LOAD/STORE_LANES. */
2217 else if (vls_type == VLS_LOAD
2218 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2219 : vect_store_lanes_supported (vectype, group_size,
2220 masked_p))
2221 {
2222 *memory_access_type = VMAT_LOAD_STORE_LANES;
2223 overrun_p = would_overrun_p;
2224 }
2225
2226 /* If that fails, try using permuting loads. */
2227 else if (vls_type == VLS_LOAD
2228 ? vect_grouped_load_supported (vectype, single_element_p,
2229 group_size)
2230 : vect_grouped_store_supported (vectype, group_size))
2231 {
2232 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2233 overrun_p = would_overrun_p;
2234 }
2235 }
2236
2237 /* As a last resort, trying using a gather load or scatter store.
2238
2239 ??? Although the code can handle all group sizes correctly,
2240 it probably isn't a win to use separate strided accesses based
2241 on nearby locations. Or, even if it's a win over scalar code,
2242 it might not be a win over vectorizing at a lower VF, if that
2243 allows us to use contiguous accesses. */
2244 if (*memory_access_type == VMAT_ELEMENTWISE
2245 && single_element_p
2246 && loop_vinfo
2247 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2248 masked_p, gs_info))
2249 *memory_access_type = VMAT_GATHER_SCATTER;
2250 }
2251
2252 if (*memory_access_type == VMAT_GATHER_SCATTER
2253 || *memory_access_type == VMAT_ELEMENTWISE)
2254 *alignment_support_scheme = dr_unaligned_supported;
2255 else
2256 *alignment_support_scheme
2257 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2258
2259 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2260 {
2261 /* STMT is the leader of the group. Check the operands of all the
2262 stmts of the group. */
2263 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2264 while (next_stmt_info)
2265 {
2266 tree op = vect_get_store_rhs (next_stmt_info);
2267 enum vect_def_type dt;
2268 if (!vect_is_simple_use (op, vinfo, &dt))
2269 {
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "use not simple.\n");
2273 return false;
2274 }
2275 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2276 }
2277 }
2278
2279 if (overrun_p)
2280 {
2281 gcc_assert (can_overrun_p);
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "Data access with gaps requires scalar "
2285 "epilogue loop\n");
2286 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2287 }
2288
2289 return true;
2290 }
2291
2292 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2293 if there is a memory access type that the vectorized form can use,
2294 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2295 or scatters, fill in GS_INFO accordingly. In addition
2296 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2297 the target does not support the alignment scheme.
2298
2299 SLP says whether we're performing SLP rather than loop vectorization.
2300 MASKED_P is true if the statement is conditional on a vectorized mask.
2301 VECTYPE is the vector type that the vectorized statements will use.
2302 NCOPIES is the number of vector statements that will be needed. */
2303
2304 static bool
2305 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2306 tree vectype, slp_tree slp_node,
2307 bool masked_p, vec_load_store_type vls_type,
2308 unsigned int ncopies,
2309 vect_memory_access_type *memory_access_type,
2310 dr_alignment_support *alignment_support_scheme,
2311 gather_scatter_info *gs_info)
2312 {
2313 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2314 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2315 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2316 {
2317 *memory_access_type = VMAT_GATHER_SCATTER;
2318 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2319 gcc_unreachable ();
2320 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2321 &gs_info->offset_dt,
2322 &gs_info->offset_vectype))
2323 {
2324 if (dump_enabled_p ())
2325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2326 "%s index use not simple.\n",
2327 vls_type == VLS_LOAD ? "gather" : "scatter");
2328 return false;
2329 }
2330 /* Gather-scatter accesses perform only component accesses, alignment
2331 is irrelevant for them. */
2332 *alignment_support_scheme = dr_unaligned_supported;
2333 }
2334 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2335 {
2336 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2337 masked_p,
2338 vls_type, memory_access_type,
2339 alignment_support_scheme, gs_info))
2340 return false;
2341 }
2342 else if (STMT_VINFO_STRIDED_P (stmt_info))
2343 {
2344 gcc_assert (!slp_node);
2345 if (loop_vinfo
2346 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2347 masked_p, gs_info))
2348 *memory_access_type = VMAT_GATHER_SCATTER;
2349 else
2350 *memory_access_type = VMAT_ELEMENTWISE;
2351 /* Alignment is irrelevant here. */
2352 *alignment_support_scheme = dr_unaligned_supported;
2353 }
2354 else
2355 {
2356 int cmp = compare_step_with_zero (vinfo, stmt_info);
2357 if (cmp < 0)
2358 *memory_access_type = get_negative_load_store_type
2359 (vinfo, stmt_info, vectype, vls_type, ncopies);
2360 else if (cmp == 0)
2361 {
2362 gcc_assert (vls_type == VLS_LOAD);
2363 *memory_access_type = VMAT_INVARIANT;
2364 }
2365 else
2366 *memory_access_type = VMAT_CONTIGUOUS;
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info), false);
2370 }
2371
2372 if ((*memory_access_type == VMAT_ELEMENTWISE
2373 || *memory_access_type == VMAT_STRIDED_SLP)
2374 && !nunits.is_constant ())
2375 {
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2378 "Not using elementwise accesses due to variable "
2379 "vectorization factor.\n");
2380 return false;
2381 }
2382
2383 if (*alignment_support_scheme == dr_unaligned_unsupported)
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "unsupported unaligned access\n");
2388 return false;
2389 }
2390
2391 /* FIXME: At the moment the cost model seems to underestimate the
2392 cost of using elementwise accesses. This check preserves the
2393 traditional behavior until that can be fixed. */
2394 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2395 if (!first_stmt_info)
2396 first_stmt_info = stmt_info;
2397 if (*memory_access_type == VMAT_ELEMENTWISE
2398 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2399 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2400 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2401 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2402 {
2403 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2405 "not falling back to elementwise accesses\n");
2406 return false;
2407 }
2408 return true;
2409 }
2410
2411 /* Return true if boolean argument MASK is suitable for vectorizing
2412 conditional operation STMT_INFO. When returning true, store the type
2413 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2414 in *MASK_VECTYPE_OUT. */
2415
2416 static bool
2417 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2418 vect_def_type *mask_dt_out,
2419 tree *mask_vectype_out)
2420 {
2421 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2422 {
2423 if (dump_enabled_p ())
2424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2425 "mask argument is not a boolean.\n");
2426 return false;
2427 }
2428
2429 if (TREE_CODE (mask) != SSA_NAME)
2430 {
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask argument is not an SSA name.\n");
2434 return false;
2435 }
2436
2437 enum vect_def_type mask_dt;
2438 tree mask_vectype;
2439 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2440 {
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "mask use not simple.\n");
2444 return false;
2445 }
2446
2447 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2448 if (!mask_vectype)
2449 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2450
2451 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2452 {
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2455 "could not find an appropriate vector mask type.\n");
2456 return false;
2457 }
2458
2459 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2460 TYPE_VECTOR_SUBPARTS (vectype)))
2461 {
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "vector mask type %T"
2465 " does not match vector data type %T.\n",
2466 mask_vectype, vectype);
2467
2468 return false;
2469 }
2470
2471 *mask_dt_out = mask_dt;
2472 *mask_vectype_out = mask_vectype;
2473 return true;
2474 }
2475
2476 /* Return true if stored value RHS is suitable for vectorizing store
2477 statement STMT_INFO. When returning true, store the type of the
2478 definition in *RHS_DT_OUT, the type of the vectorized store value in
2479 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2480
2481 static bool
2482 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2483 slp_tree slp_node, tree rhs,
2484 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2485 vec_load_store_type *vls_type_out)
2486 {
2487 /* In the case this is a store from a constant make sure
2488 native_encode_expr can handle it. */
2489 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2490 {
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "cannot encode constant as a byte sequence.\n");
2494 return false;
2495 }
2496
2497 enum vect_def_type rhs_dt;
2498 tree rhs_vectype;
2499 slp_tree slp_op;
2500 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2501 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2502 {
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "use not simple.\n");
2506 return false;
2507 }
2508
2509 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2510 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2511 {
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2514 "incompatible vector types.\n");
2515 return false;
2516 }
2517
2518 *rhs_dt_out = rhs_dt;
2519 *rhs_vectype_out = rhs_vectype;
2520 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2521 *vls_type_out = VLS_STORE_INVARIANT;
2522 else
2523 *vls_type_out = VLS_STORE;
2524 return true;
2525 }
2526
2527 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2528 Note that we support masks with floating-point type, in which case the
2529 floats are interpreted as a bitmask. */
2530
2531 static tree
2532 vect_build_all_ones_mask (vec_info *vinfo,
2533 stmt_vec_info stmt_info, tree masktype)
2534 {
2535 if (TREE_CODE (masktype) == INTEGER_TYPE)
2536 return build_int_cst (masktype, -1);
2537 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2538 {
2539 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2540 mask = build_vector_from_val (masktype, mask);
2541 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2542 }
2543 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2544 {
2545 REAL_VALUE_TYPE r;
2546 long tmp[6];
2547 for (int j = 0; j < 6; ++j)
2548 tmp[j] = -1;
2549 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2550 tree mask = build_real (TREE_TYPE (masktype), r);
2551 mask = build_vector_from_val (masktype, mask);
2552 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2553 }
2554 gcc_unreachable ();
2555 }
2556
2557 /* Build an all-zero merge value of type VECTYPE while vectorizing
2558 STMT_INFO as a gather load. */
2559
2560 static tree
2561 vect_build_zero_merge_argument (vec_info *vinfo,
2562 stmt_vec_info stmt_info, tree vectype)
2563 {
2564 tree merge;
2565 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2566 merge = build_int_cst (TREE_TYPE (vectype), 0);
2567 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2568 {
2569 REAL_VALUE_TYPE r;
2570 long tmp[6];
2571 for (int j = 0; j < 6; ++j)
2572 tmp[j] = 0;
2573 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2574 merge = build_real (TREE_TYPE (vectype), r);
2575 }
2576 else
2577 gcc_unreachable ();
2578 merge = build_vector_from_val (vectype, merge);
2579 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2580 }
2581
2582 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2583 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2584 the gather load operation. If the load is conditional, MASK is the
2585 unvectorized condition and MASK_DT is its definition type, otherwise
2586 MASK is null. */
2587
2588 static void
2589 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2590 gimple_stmt_iterator *gsi,
2591 gimple **vec_stmt,
2592 gather_scatter_info *gs_info,
2593 tree mask)
2594 {
2595 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2596 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2597 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2598 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2599 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2600 edge pe = loop_preheader_edge (loop);
2601 enum { NARROW, NONE, WIDEN } modifier;
2602 poly_uint64 gather_off_nunits
2603 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2604
2605 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2606 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2607 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2608 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2609 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2610 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2611 tree scaletype = TREE_VALUE (arglist);
2612 tree real_masktype = masktype;
2613 gcc_checking_assert (types_compatible_p (srctype, rettype)
2614 && (!mask
2615 || TREE_CODE (masktype) == INTEGER_TYPE
2616 || types_compatible_p (srctype, masktype)));
2617 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2618 masktype = truth_type_for (srctype);
2619
2620 tree mask_halftype = masktype;
2621 tree perm_mask = NULL_TREE;
2622 tree mask_perm_mask = NULL_TREE;
2623 if (known_eq (nunits, gather_off_nunits))
2624 modifier = NONE;
2625 else if (known_eq (nunits * 2, gather_off_nunits))
2626 {
2627 modifier = WIDEN;
2628
2629 /* Currently widening gathers and scatters are only supported for
2630 fixed-length vectors. */
2631 int count = gather_off_nunits.to_constant ();
2632 vec_perm_builder sel (count, count, 1);
2633 for (int i = 0; i < count; ++i)
2634 sel.quick_push (i | (count / 2));
2635
2636 vec_perm_indices indices (sel, 1, count);
2637 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2638 indices);
2639 }
2640 else if (known_eq (nunits, gather_off_nunits * 2))
2641 {
2642 modifier = NARROW;
2643
2644 /* Currently narrowing gathers and scatters are only supported for
2645 fixed-length vectors. */
2646 int count = nunits.to_constant ();
2647 vec_perm_builder sel (count, count, 1);
2648 sel.quick_grow (count);
2649 for (int i = 0; i < count; ++i)
2650 sel[i] = i < count / 2 ? i : i + count / 2;
2651 vec_perm_indices indices (sel, 2, count);
2652 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2653
2654 ncopies *= 2;
2655
2656 if (mask && masktype == real_masktype)
2657 {
2658 for (int i = 0; i < count; ++i)
2659 sel[i] = i | (count / 2);
2660 indices.new_vector (sel, 2, count);
2661 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2662 }
2663 else if (mask)
2664 mask_halftype = truth_type_for (gs_info->offset_vectype);
2665 }
2666 else
2667 gcc_unreachable ();
2668
2669 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2670 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2671
2672 tree ptr = fold_convert (ptrtype, gs_info->base);
2673 if (!is_gimple_min_invariant (ptr))
2674 {
2675 gimple_seq seq;
2676 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2677 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2678 gcc_assert (!new_bb);
2679 }
2680
2681 tree scale = build_int_cst (scaletype, gs_info->scale);
2682
2683 tree vec_oprnd0 = NULL_TREE;
2684 tree vec_mask = NULL_TREE;
2685 tree src_op = NULL_TREE;
2686 tree mask_op = NULL_TREE;
2687 tree prev_res = NULL_TREE;
2688
2689 if (!mask)
2690 {
2691 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2692 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2693 }
2694
2695 auto_vec<tree> vec_oprnds0;
2696 auto_vec<tree> vec_masks;
2697 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2698 modifier == WIDEN ? ncopies / 2 : ncopies,
2699 gs_info->offset, &vec_oprnds0);
2700 if (mask)
2701 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2702 modifier == NARROW ? ncopies / 2 : ncopies,
2703 mask, &vec_masks);
2704 for (int j = 0; j < ncopies; ++j)
2705 {
2706 tree op, var;
2707 if (modifier == WIDEN && (j & 1))
2708 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2709 perm_mask, stmt_info, gsi);
2710 else
2711 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2712
2713 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2714 {
2715 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2716 TYPE_VECTOR_SUBPARTS (idxtype)));
2717 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2718 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2719 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2721 op = var;
2722 }
2723
2724 if (mask)
2725 {
2726 if (mask_perm_mask && (j & 1))
2727 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2728 mask_perm_mask, stmt_info, gsi);
2729 else
2730 {
2731 if (modifier == NARROW)
2732 {
2733 if ((j & 1) == 0)
2734 vec_mask = vec_masks[j / 2];
2735 }
2736 else
2737 vec_mask = vec_masks[j];
2738
2739 mask_op = vec_mask;
2740 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2741 {
2742 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2743 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2744 gcc_assert (known_eq (sub1, sub2));
2745 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2746 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2747 gassign *new_stmt
2748 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_op = var;
2751 }
2752 }
2753 if (modifier == NARROW && masktype != real_masktype)
2754 {
2755 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2756 gassign *new_stmt
2757 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2758 : VEC_UNPACK_LO_EXPR,
2759 mask_op);
2760 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2761 mask_op = var;
2762 }
2763 src_op = mask_op;
2764 }
2765
2766 tree mask_arg = mask_op;
2767 if (masktype != real_masktype)
2768 {
2769 tree utype, optype = TREE_TYPE (mask_op);
2770 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2771 utype = real_masktype;
2772 else
2773 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2774 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2775 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2776 gassign *new_stmt
2777 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2778 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2779 mask_arg = var;
2780 if (!useless_type_conversion_p (real_masktype, utype))
2781 {
2782 gcc_assert (TYPE_PRECISION (utype)
2783 <= TYPE_PRECISION (real_masktype));
2784 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2785 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2786 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2787 mask_arg = var;
2788 }
2789 src_op = build_zero_cst (srctype);
2790 }
2791 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2792 mask_arg, scale);
2793
2794 if (!useless_type_conversion_p (vectype, rettype))
2795 {
2796 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2797 TYPE_VECTOR_SUBPARTS (rettype)));
2798 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2799 gimple_call_set_lhs (new_stmt, op);
2800 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2801 var = make_ssa_name (vec_dest);
2802 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2803 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2804 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2805 }
2806 else
2807 {
2808 var = make_ssa_name (vec_dest, new_stmt);
2809 gimple_call_set_lhs (new_stmt, var);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 }
2812
2813 if (modifier == NARROW)
2814 {
2815 if ((j & 1) == 0)
2816 {
2817 prev_res = var;
2818 continue;
2819 }
2820 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2821 stmt_info, gsi);
2822 new_stmt = SSA_NAME_DEF_STMT (var);
2823 }
2824
2825 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2826 }
2827 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2828 }
2829
2830 /* Prepare the base and offset in GS_INFO for vectorization.
2831 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2832 to the vectorized offset argument for the first copy of STMT_INFO.
2833 STMT_INFO is the statement described by GS_INFO and LOOP is the
2834 containing loop. */
2835
2836 static void
2837 vect_get_gather_scatter_ops (vec_info *vinfo,
2838 class loop *loop, stmt_vec_info stmt_info,
2839 gather_scatter_info *gs_info,
2840 tree *dataref_ptr, vec<tree> *vec_offset,
2841 unsigned ncopies)
2842 {
2843 gimple_seq stmts = NULL;
2844 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2845 if (stmts != NULL)
2846 {
2847 basic_block new_bb;
2848 edge pe = loop_preheader_edge (loop);
2849 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2850 gcc_assert (!new_bb);
2851 }
2852 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2853 vec_offset, gs_info->offset_vectype);
2854 }
2855
2856 /* Prepare to implement a grouped or strided load or store using
2857 the gather load or scatter store operation described by GS_INFO.
2858 STMT_INFO is the load or store statement.
2859
2860 Set *DATAREF_BUMP to the amount that should be added to the base
2861 address after each copy of the vectorized statement. Set *VEC_OFFSET
2862 to an invariant offset vector in which element I has the value
2863 I * DR_STEP / SCALE. */
2864
2865 static void
2866 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2867 loop_vec_info loop_vinfo,
2868 gather_scatter_info *gs_info,
2869 tree *dataref_bump, tree *vec_offset)
2870 {
2871 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873
2874 tree bump = size_binop (MULT_EXPR,
2875 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2876 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2877 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2878
2879 /* The offset given in GS_INFO can have pointer type, so use the element
2880 type of the vector instead. */
2881 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2882
2883 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2884 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2885 ssize_int (gs_info->scale));
2886 step = fold_convert (offset_type, step);
2887
2888 /* Create {0, X, X*2, X*3, ...}. */
2889 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2890 build_zero_cst (offset_type), step);
2891 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2892 }
2893
2894 /* Return the amount that should be added to a vector pointer to move
2895 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2896 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2897 vectorization. */
2898
2899 static tree
2900 vect_get_data_ptr_increment (vec_info *vinfo,
2901 dr_vec_info *dr_info, tree aggr_type,
2902 vect_memory_access_type memory_access_type)
2903 {
2904 if (memory_access_type == VMAT_INVARIANT)
2905 return size_zero_node;
2906
2907 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2908 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2909 if (tree_int_cst_sgn (step) == -1)
2910 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2911 return iv_step;
2912 }
2913
2914 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2915
2916 static bool
2917 vectorizable_bswap (vec_info *vinfo,
2918 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2919 gimple **vec_stmt, slp_tree slp_node,
2920 slp_tree *slp_op,
2921 tree vectype_in, stmt_vector_for_cost *cost_vec)
2922 {
2923 tree op, vectype;
2924 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2925 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2926 unsigned ncopies;
2927
2928 op = gimple_call_arg (stmt, 0);
2929 vectype = STMT_VINFO_VECTYPE (stmt_info);
2930 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2931
2932 /* Multiple types in SLP are handled by creating the appropriate number of
2933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2934 case of SLP. */
2935 if (slp_node)
2936 ncopies = 1;
2937 else
2938 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2939
2940 gcc_assert (ncopies >= 1);
2941
2942 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2943 if (! char_vectype)
2944 return false;
2945
2946 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2947 unsigned word_bytes;
2948 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2949 return false;
2950
2951 /* The encoding uses one stepped pattern for each byte in the word. */
2952 vec_perm_builder elts (num_bytes, word_bytes, 3);
2953 for (unsigned i = 0; i < 3; ++i)
2954 for (unsigned j = 0; j < word_bytes; ++j)
2955 elts.quick_push ((i + 1) * word_bytes - j - 1);
2956
2957 vec_perm_indices indices (elts, 1, num_bytes);
2958 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2959 return false;
2960
2961 if (! vec_stmt)
2962 {
2963 if (slp_node
2964 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2965 {
2966 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2968 "incompatible vector types for invariants\n");
2969 return false;
2970 }
2971
2972 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2973 DUMP_VECT_SCOPE ("vectorizable_bswap");
2974 if (! slp_node)
2975 {
2976 record_stmt_cost (cost_vec,
2977 1, vector_stmt, stmt_info, 0, vect_prologue);
2978 record_stmt_cost (cost_vec,
2979 ncopies, vec_perm, stmt_info, 0, vect_body);
2980 }
2981 return true;
2982 }
2983
2984 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2985
2986 /* Transform. */
2987 vec<tree> vec_oprnds = vNULL;
2988 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
2989 op, &vec_oprnds);
2990 /* Arguments are ready. create the new vector stmt. */
2991 unsigned i;
2992 tree vop;
2993 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2994 {
2995 gimple *new_stmt;
2996 tree tem = make_ssa_name (char_vectype);
2997 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2998 char_vectype, vop));
2999 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3000 tree tem2 = make_ssa_name (char_vectype);
3001 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3002 tem, tem, bswap_vconst);
3003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3004 tem = make_ssa_name (vectype);
3005 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3006 vectype, tem2));
3007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3008 if (slp_node)
3009 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3010 else
3011 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3012 }
3013
3014 if (!slp_node)
3015 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3016
3017 vec_oprnds.release ();
3018 return true;
3019 }
3020
3021 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3022 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3023 in a single step. On success, store the binary pack code in
3024 *CONVERT_CODE. */
3025
3026 static bool
3027 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3028 tree_code *convert_code)
3029 {
3030 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3031 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3032 return false;
3033
3034 tree_code code;
3035 int multi_step_cvt = 0;
3036 auto_vec <tree, 8> interm_types;
3037 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3038 &code, &multi_step_cvt, &interm_types)
3039 || multi_step_cvt)
3040 return false;
3041
3042 *convert_code = code;
3043 return true;
3044 }
3045
3046 /* Function vectorizable_call.
3047
3048 Check if STMT_INFO performs a function call that can be vectorized.
3049 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3050 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3051 Return true if STMT_INFO is vectorizable in this way. */
3052
3053 static bool
3054 vectorizable_call (vec_info *vinfo,
3055 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3056 gimple **vec_stmt, slp_tree slp_node,
3057 stmt_vector_for_cost *cost_vec)
3058 {
3059 gcall *stmt;
3060 tree vec_dest;
3061 tree scalar_dest;
3062 tree op;
3063 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3064 tree vectype_out, vectype_in;
3065 poly_uint64 nunits_in;
3066 poly_uint64 nunits_out;
3067 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3068 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3069 tree fndecl, new_temp, rhs_type;
3070 enum vect_def_type dt[4]
3071 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3072 vect_unknown_def_type };
3073 tree vectypes[ARRAY_SIZE (dt)] = {};
3074 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3075 int ndts = ARRAY_SIZE (dt);
3076 int ncopies, j;
3077 auto_vec<tree, 8> vargs;
3078 auto_vec<tree, 8> orig_vargs;
3079 enum { NARROW, NONE, WIDEN } modifier;
3080 size_t i, nargs;
3081 tree lhs;
3082
3083 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3084 return false;
3085
3086 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3087 && ! vec_stmt)
3088 return false;
3089
3090 /* Is STMT_INFO a vectorizable call? */
3091 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3092 if (!stmt)
3093 return false;
3094
3095 if (gimple_call_internal_p (stmt)
3096 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3097 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3098 /* Handled by vectorizable_load and vectorizable_store. */
3099 return false;
3100
3101 if (gimple_call_lhs (stmt) == NULL_TREE
3102 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3103 return false;
3104
3105 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3106
3107 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3108
3109 /* Process function arguments. */
3110 rhs_type = NULL_TREE;
3111 vectype_in = NULL_TREE;
3112 nargs = gimple_call_num_args (stmt);
3113
3114 /* Bail out if the function has more than four arguments, we do not have
3115 interesting builtin functions to vectorize with more than two arguments
3116 except for fma. No arguments is also not good. */
3117 if (nargs == 0 || nargs > 4)
3118 return false;
3119
3120 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3121 combined_fn cfn = gimple_call_combined_fn (stmt);
3122 if (cfn == CFN_GOMP_SIMD_LANE)
3123 {
3124 nargs = 0;
3125 rhs_type = unsigned_type_node;
3126 }
3127
3128 int mask_opno = -1;
3129 if (internal_fn_p (cfn))
3130 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3131
3132 for (i = 0; i < nargs; i++)
3133 {
3134 if ((int) i == mask_opno)
3135 {
3136 op = gimple_call_arg (stmt, i);
3137 if (!vect_check_scalar_mask (vinfo,
3138 stmt_info, op, &dt[i], &vectypes[i]))
3139 return false;
3140 continue;
3141 }
3142
3143 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3144 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3145 {
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148 "use not simple.\n");
3149 return false;
3150 }
3151
3152 /* We can only handle calls with arguments of the same type. */
3153 if (rhs_type
3154 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3155 {
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3158 "argument types differ.\n");
3159 return false;
3160 }
3161 if (!rhs_type)
3162 rhs_type = TREE_TYPE (op);
3163
3164 if (!vectype_in)
3165 vectype_in = vectypes[i];
3166 else if (vectypes[i]
3167 && !types_compatible_p (vectypes[i], vectype_in))
3168 {
3169 if (dump_enabled_p ())
3170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3171 "argument vector types differ.\n");
3172 return false;
3173 }
3174 }
3175 /* If all arguments are external or constant defs, infer the vector type
3176 from the scalar type. */
3177 if (!vectype_in)
3178 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3179 if (vec_stmt)
3180 gcc_assert (vectype_in);
3181 if (!vectype_in)
3182 {
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3185 "no vectype for scalar type %T\n", rhs_type);
3186
3187 return false;
3188 }
3189 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3190 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3191 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3192 by a pack of the two vectors into an SI vector. We would need
3193 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3194 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3195 {
3196 if (dump_enabled_p ())
3197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3198 "mismatched vector sizes %T and %T\n",
3199 vectype_in, vectype_out);
3200 return false;
3201 }
3202
3203 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3204 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3205 {
3206 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3208 "mixed mask and nonmask vector types\n");
3209 return false;
3210 }
3211
3212 /* FORNOW */
3213 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3214 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3215 if (known_eq (nunits_in * 2, nunits_out))
3216 modifier = NARROW;
3217 else if (known_eq (nunits_out, nunits_in))
3218 modifier = NONE;
3219 else if (known_eq (nunits_out * 2, nunits_in))
3220 modifier = WIDEN;
3221 else
3222 return false;
3223
3224 /* We only handle functions that do not read or clobber memory. */
3225 if (gimple_vuse (stmt))
3226 {
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3229 "function reads from or writes to memory.\n");
3230 return false;
3231 }
3232
3233 /* For now, we only vectorize functions if a target specific builtin
3234 is available. TODO -- in some cases, it might be profitable to
3235 insert the calls for pieces of the vector, in order to be able
3236 to vectorize other operations in the loop. */
3237 fndecl = NULL_TREE;
3238 internal_fn ifn = IFN_LAST;
3239 tree callee = gimple_call_fndecl (stmt);
3240
3241 /* First try using an internal function. */
3242 tree_code convert_code = ERROR_MARK;
3243 if (cfn != CFN_LAST
3244 && (modifier == NONE
3245 || (modifier == NARROW
3246 && simple_integer_narrowing (vectype_out, vectype_in,
3247 &convert_code))))
3248 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3249 vectype_in);
3250
3251 /* If that fails, try asking for a target-specific built-in function. */
3252 if (ifn == IFN_LAST)
3253 {
3254 if (cfn != CFN_LAST)
3255 fndecl = targetm.vectorize.builtin_vectorized_function
3256 (cfn, vectype_out, vectype_in);
3257 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3258 fndecl = targetm.vectorize.builtin_md_vectorized_function
3259 (callee, vectype_out, vectype_in);
3260 }
3261
3262 if (ifn == IFN_LAST && !fndecl)
3263 {
3264 if (cfn == CFN_GOMP_SIMD_LANE
3265 && !slp_node
3266 && loop_vinfo
3267 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3268 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3269 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3270 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3271 {
3272 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3273 { 0, 1, 2, ... vf - 1 } vector. */
3274 gcc_assert (nargs == 0);
3275 }
3276 else if (modifier == NONE
3277 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3278 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3279 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3280 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3281 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3282 slp_op, vectype_in, cost_vec);
3283 else
3284 {
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "function is not vectorizable.\n");
3288 return false;
3289 }
3290 }
3291
3292 if (slp_node)
3293 ncopies = 1;
3294 else if (modifier == NARROW && ifn == IFN_LAST)
3295 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3296 else
3297 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3298
3299 /* Sanity check: make sure that at least one copy of the vectorized stmt
3300 needs to be generated. */
3301 gcc_assert (ncopies >= 1);
3302
3303 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3304 if (!vec_stmt) /* transformation not required. */
3305 {
3306 if (slp_node)
3307 for (i = 0; i < nargs; ++i)
3308 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3309 {
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "incompatible vector types for invariants\n");
3313 return false;
3314 }
3315 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3316 DUMP_VECT_SCOPE ("vectorizable_call");
3317 vect_model_simple_cost (vinfo, stmt_info,
3318 ncopies, dt, ndts, slp_node, cost_vec);
3319 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3320 record_stmt_cost (cost_vec, ncopies / 2,
3321 vec_promote_demote, stmt_info, 0, vect_body);
3322
3323 if (loop_vinfo && mask_opno >= 0)
3324 {
3325 unsigned int nvectors = (slp_node
3326 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3327 : ncopies);
3328 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3329 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3330 vectype_out, scalar_mask);
3331 }
3332 return true;
3333 }
3334
3335 /* Transform. */
3336
3337 if (dump_enabled_p ())
3338 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3339
3340 /* Handle def. */
3341 scalar_dest = gimple_call_lhs (stmt);
3342 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3343
3344 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3345
3346 if (modifier == NONE || ifn != IFN_LAST)
3347 {
3348 tree prev_res = NULL_TREE;
3349 vargs.safe_grow (nargs, true);
3350 orig_vargs.safe_grow (nargs, true);
3351 auto_vec<vec<tree> > vec_defs (nargs);
3352 for (j = 0; j < ncopies; ++j)
3353 {
3354 /* Build argument list for the vectorized call. */
3355 if (slp_node)
3356 {
3357 vec<tree> vec_oprnds0;
3358
3359 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3360 vec_oprnds0 = vec_defs[0];
3361
3362 /* Arguments are ready. Create the new vector stmt. */
3363 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3364 {
3365 size_t k;
3366 for (k = 0; k < nargs; k++)
3367 {
3368 vec<tree> vec_oprndsk = vec_defs[k];
3369 vargs[k] = vec_oprndsk[i];
3370 }
3371 gimple *new_stmt;
3372 if (modifier == NARROW)
3373 {
3374 /* We don't define any narrowing conditional functions
3375 at present. */
3376 gcc_assert (mask_opno < 0);
3377 tree half_res = make_ssa_name (vectype_in);
3378 gcall *call
3379 = gimple_build_call_internal_vec (ifn, vargs);
3380 gimple_call_set_lhs (call, half_res);
3381 gimple_call_set_nothrow (call, true);
3382 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3383 if ((i & 1) == 0)
3384 {
3385 prev_res = half_res;
3386 continue;
3387 }
3388 new_temp = make_ssa_name (vec_dest);
3389 new_stmt = gimple_build_assign (new_temp, convert_code,
3390 prev_res, half_res);
3391 vect_finish_stmt_generation (vinfo, stmt_info,
3392 new_stmt, gsi);
3393 }
3394 else
3395 {
3396 if (mask_opno >= 0 && masked_loop_p)
3397 {
3398 unsigned int vec_num = vec_oprnds0.length ();
3399 /* Always true for SLP. */
3400 gcc_assert (ncopies == 1);
3401 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3402 vectype_out, i);
3403 vargs[mask_opno] = prepare_load_store_mask
3404 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3405 }
3406
3407 gcall *call;
3408 if (ifn != IFN_LAST)
3409 call = gimple_build_call_internal_vec (ifn, vargs);
3410 else
3411 call = gimple_build_call_vec (fndecl, vargs);
3412 new_temp = make_ssa_name (vec_dest, call);
3413 gimple_call_set_lhs (call, new_temp);
3414 gimple_call_set_nothrow (call, true);
3415 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3416 new_stmt = call;
3417 }
3418 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3419 }
3420 continue;
3421 }
3422
3423 for (i = 0; i < nargs; i++)
3424 {
3425 op = gimple_call_arg (stmt, i);
3426 if (j == 0)
3427 {
3428 vec_defs.quick_push (vNULL);
3429 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3430 op, &vec_defs[i]);
3431 }
3432 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3433 }
3434
3435 if (mask_opno >= 0 && masked_loop_p)
3436 {
3437 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3438 vectype_out, j);
3439 vargs[mask_opno]
3440 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3441 vargs[mask_opno], gsi);
3442 }
3443
3444 gimple *new_stmt;
3445 if (cfn == CFN_GOMP_SIMD_LANE)
3446 {
3447 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3448 tree new_var
3449 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3450 gimple *init_stmt = gimple_build_assign (new_var, cst);
3451 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3452 new_temp = make_ssa_name (vec_dest);
3453 new_stmt = gimple_build_assign (new_temp, new_var);
3454 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3455 }
3456 else if (modifier == NARROW)
3457 {
3458 /* We don't define any narrowing conditional functions at
3459 present. */
3460 gcc_assert (mask_opno < 0);
3461 tree half_res = make_ssa_name (vectype_in);
3462 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3463 gimple_call_set_lhs (call, half_res);
3464 gimple_call_set_nothrow (call, true);
3465 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3466 if ((j & 1) == 0)
3467 {
3468 prev_res = half_res;
3469 continue;
3470 }
3471 new_temp = make_ssa_name (vec_dest);
3472 new_stmt = gimple_build_assign (new_temp, convert_code,
3473 prev_res, half_res);
3474 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3475 }
3476 else
3477 {
3478 gcall *call;
3479 if (ifn != IFN_LAST)
3480 call = gimple_build_call_internal_vec (ifn, vargs);
3481 else
3482 call = gimple_build_call_vec (fndecl, vargs);
3483 new_temp = make_ssa_name (vec_dest, call);
3484 gimple_call_set_lhs (call, new_temp);
3485 gimple_call_set_nothrow (call, true);
3486 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3487 new_stmt = call;
3488 }
3489
3490 if (j == (modifier == NARROW ? 1 : 0))
3491 *vec_stmt = new_stmt;
3492 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3493 }
3494 for (i = 0; i < nargs; i++)
3495 {
3496 vec<tree> vec_oprndsi = vec_defs[i];
3497 vec_oprndsi.release ();
3498 }
3499 }
3500 else if (modifier == NARROW)
3501 {
3502 auto_vec<vec<tree> > vec_defs (nargs);
3503 /* We don't define any narrowing conditional functions at present. */
3504 gcc_assert (mask_opno < 0);
3505 for (j = 0; j < ncopies; ++j)
3506 {
3507 /* Build argument list for the vectorized call. */
3508 if (j == 0)
3509 vargs.create (nargs * 2);
3510 else
3511 vargs.truncate (0);
3512
3513 if (slp_node)
3514 {
3515 vec<tree> vec_oprnds0;
3516
3517 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3518 vec_oprnds0 = vec_defs[0];
3519
3520 /* Arguments are ready. Create the new vector stmt. */
3521 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3522 {
3523 size_t k;
3524 vargs.truncate (0);
3525 for (k = 0; k < nargs; k++)
3526 {
3527 vec<tree> vec_oprndsk = vec_defs[k];
3528 vargs.quick_push (vec_oprndsk[i]);
3529 vargs.quick_push (vec_oprndsk[i + 1]);
3530 }
3531 gcall *call;
3532 if (ifn != IFN_LAST)
3533 call = gimple_build_call_internal_vec (ifn, vargs);
3534 else
3535 call = gimple_build_call_vec (fndecl, vargs);
3536 new_temp = make_ssa_name (vec_dest, call);
3537 gimple_call_set_lhs (call, new_temp);
3538 gimple_call_set_nothrow (call, true);
3539 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3540 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3541 }
3542 continue;
3543 }
3544
3545 for (i = 0; i < nargs; i++)
3546 {
3547 op = gimple_call_arg (stmt, i);
3548 if (j == 0)
3549 {
3550 vec_defs.quick_push (vNULL);
3551 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3552 op, &vec_defs[i], vectypes[i]);
3553 }
3554 vec_oprnd0 = vec_defs[i][2*j];
3555 vec_oprnd1 = vec_defs[i][2*j+1];
3556
3557 vargs.quick_push (vec_oprnd0);
3558 vargs.quick_push (vec_oprnd1);
3559 }
3560
3561 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3562 new_temp = make_ssa_name (vec_dest, new_stmt);
3563 gimple_call_set_lhs (new_stmt, new_temp);
3564 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3565
3566 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3567 }
3568
3569 if (!slp_node)
3570 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3571
3572 for (i = 0; i < nargs; i++)
3573 {
3574 vec<tree> vec_oprndsi = vec_defs[i];
3575 vec_oprndsi.release ();
3576 }
3577 }
3578 else
3579 /* No current target implements this case. */
3580 return false;
3581
3582 vargs.release ();
3583
3584 /* The call in STMT might prevent it from being removed in dce.
3585 We however cannot remove it here, due to the way the ssa name
3586 it defines is mapped to the new definition. So just replace
3587 rhs of the statement with something harmless. */
3588
3589 if (slp_node)
3590 return true;
3591
3592 stmt_info = vect_orig_stmt (stmt_info);
3593 lhs = gimple_get_lhs (stmt_info->stmt);
3594
3595 gassign *new_stmt
3596 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3597 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3598
3599 return true;
3600 }
3601
3602
3603 struct simd_call_arg_info
3604 {
3605 tree vectype;
3606 tree op;
3607 HOST_WIDE_INT linear_step;
3608 enum vect_def_type dt;
3609 unsigned int align;
3610 bool simd_lane_linear;
3611 };
3612
3613 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3614 is linear within simd lane (but not within whole loop), note it in
3615 *ARGINFO. */
3616
3617 static void
3618 vect_simd_lane_linear (tree op, class loop *loop,
3619 struct simd_call_arg_info *arginfo)
3620 {
3621 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3622
3623 if (!is_gimple_assign (def_stmt)
3624 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3625 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3626 return;
3627
3628 tree base = gimple_assign_rhs1 (def_stmt);
3629 HOST_WIDE_INT linear_step = 0;
3630 tree v = gimple_assign_rhs2 (def_stmt);
3631 while (TREE_CODE (v) == SSA_NAME)
3632 {
3633 tree t;
3634 def_stmt = SSA_NAME_DEF_STMT (v);
3635 if (is_gimple_assign (def_stmt))
3636 switch (gimple_assign_rhs_code (def_stmt))
3637 {
3638 case PLUS_EXPR:
3639 t = gimple_assign_rhs2 (def_stmt);
3640 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3641 return;
3642 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3643 v = gimple_assign_rhs1 (def_stmt);
3644 continue;
3645 case MULT_EXPR:
3646 t = gimple_assign_rhs2 (def_stmt);
3647 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3648 return;
3649 linear_step = tree_to_shwi (t);
3650 v = gimple_assign_rhs1 (def_stmt);
3651 continue;
3652 CASE_CONVERT:
3653 t = gimple_assign_rhs1 (def_stmt);
3654 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3655 || (TYPE_PRECISION (TREE_TYPE (v))
3656 < TYPE_PRECISION (TREE_TYPE (t))))
3657 return;
3658 if (!linear_step)
3659 linear_step = 1;
3660 v = t;
3661 continue;
3662 default:
3663 return;
3664 }
3665 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3666 && loop->simduid
3667 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3668 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3669 == loop->simduid))
3670 {
3671 if (!linear_step)
3672 linear_step = 1;
3673 arginfo->linear_step = linear_step;
3674 arginfo->op = base;
3675 arginfo->simd_lane_linear = true;
3676 return;
3677 }
3678 }
3679 }
3680
3681 /* Return the number of elements in vector type VECTYPE, which is associated
3682 with a SIMD clone. At present these vectors always have a constant
3683 length. */
3684
3685 static unsigned HOST_WIDE_INT
3686 simd_clone_subparts (tree vectype)
3687 {
3688 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3689 }
3690
3691 /* Function vectorizable_simd_clone_call.
3692
3693 Check if STMT_INFO performs a function call that can be vectorized
3694 by calling a simd clone of the function.
3695 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3696 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3697 Return true if STMT_INFO is vectorizable in this way. */
3698
3699 static bool
3700 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3701 gimple_stmt_iterator *gsi,
3702 gimple **vec_stmt, slp_tree slp_node,
3703 stmt_vector_for_cost *)
3704 {
3705 tree vec_dest;
3706 tree scalar_dest;
3707 tree op, type;
3708 tree vec_oprnd0 = NULL_TREE;
3709 tree vectype;
3710 poly_uint64 nunits;
3711 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3712 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3713 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3714 tree fndecl, new_temp;
3715 int ncopies, j;
3716 auto_vec<simd_call_arg_info> arginfo;
3717 vec<tree> vargs = vNULL;
3718 size_t i, nargs;
3719 tree lhs, rtype, ratype;
3720 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3721
3722 /* Is STMT a vectorizable call? */
3723 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3724 if (!stmt)
3725 return false;
3726
3727 fndecl = gimple_call_fndecl (stmt);
3728 if (fndecl == NULL_TREE)
3729 return false;
3730
3731 struct cgraph_node *node = cgraph_node::get (fndecl);
3732 if (node == NULL || node->simd_clones == NULL)
3733 return false;
3734
3735 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3736 return false;
3737
3738 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3739 && ! vec_stmt)
3740 return false;
3741
3742 if (gimple_call_lhs (stmt)
3743 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3744 return false;
3745
3746 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3747
3748 vectype = STMT_VINFO_VECTYPE (stmt_info);
3749
3750 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3751 return false;
3752
3753 /* FORNOW */
3754 if (slp_node)
3755 return false;
3756
3757 /* Process function arguments. */
3758 nargs = gimple_call_num_args (stmt);
3759
3760 /* Bail out if the function has zero arguments. */
3761 if (nargs == 0)
3762 return false;
3763
3764 arginfo.reserve (nargs, true);
3765
3766 for (i = 0; i < nargs; i++)
3767 {
3768 simd_call_arg_info thisarginfo;
3769 affine_iv iv;
3770
3771 thisarginfo.linear_step = 0;
3772 thisarginfo.align = 0;
3773 thisarginfo.op = NULL_TREE;
3774 thisarginfo.simd_lane_linear = false;
3775
3776 op = gimple_call_arg (stmt, i);
3777 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3778 &thisarginfo.vectype)
3779 || thisarginfo.dt == vect_uninitialized_def)
3780 {
3781 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3783 "use not simple.\n");
3784 return false;
3785 }
3786
3787 if (thisarginfo.dt == vect_constant_def
3788 || thisarginfo.dt == vect_external_def)
3789 gcc_assert (thisarginfo.vectype == NULL_TREE);
3790 else
3791 {
3792 gcc_assert (thisarginfo.vectype != NULL_TREE);
3793 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3794 {
3795 if (dump_enabled_p ())
3796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3797 "vector mask arguments are not supported\n");
3798 return false;
3799 }
3800 }
3801
3802 /* For linear arguments, the analyze phase should have saved
3803 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3804 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3805 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3806 {
3807 gcc_assert (vec_stmt);
3808 thisarginfo.linear_step
3809 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3810 thisarginfo.op
3811 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3812 thisarginfo.simd_lane_linear
3813 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3814 == boolean_true_node);
3815 /* If loop has been peeled for alignment, we need to adjust it. */
3816 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3817 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3818 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3819 {
3820 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3821 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3822 tree opt = TREE_TYPE (thisarginfo.op);
3823 bias = fold_convert (TREE_TYPE (step), bias);
3824 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3825 thisarginfo.op
3826 = fold_build2 (POINTER_TYPE_P (opt)
3827 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3828 thisarginfo.op, bias);
3829 }
3830 }
3831 else if (!vec_stmt
3832 && thisarginfo.dt != vect_constant_def
3833 && thisarginfo.dt != vect_external_def
3834 && loop_vinfo
3835 && TREE_CODE (op) == SSA_NAME
3836 && simple_iv (loop, loop_containing_stmt (stmt), op,
3837 &iv, false)
3838 && tree_fits_shwi_p (iv.step))
3839 {
3840 thisarginfo.linear_step = tree_to_shwi (iv.step);
3841 thisarginfo.op = iv.base;
3842 }
3843 else if ((thisarginfo.dt == vect_constant_def
3844 || thisarginfo.dt == vect_external_def)
3845 && POINTER_TYPE_P (TREE_TYPE (op)))
3846 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3847 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3848 linear too. */
3849 if (POINTER_TYPE_P (TREE_TYPE (op))
3850 && !thisarginfo.linear_step
3851 && !vec_stmt
3852 && thisarginfo.dt != vect_constant_def
3853 && thisarginfo.dt != vect_external_def
3854 && loop_vinfo
3855 && !slp_node
3856 && TREE_CODE (op) == SSA_NAME)
3857 vect_simd_lane_linear (op, loop, &thisarginfo);
3858
3859 arginfo.quick_push (thisarginfo);
3860 }
3861
3862 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3863 if (!vf.is_constant ())
3864 {
3865 if (dump_enabled_p ())
3866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3867 "not considering SIMD clones; not yet supported"
3868 " for variable-width vectors.\n");
3869 return false;
3870 }
3871
3872 unsigned int badness = 0;
3873 struct cgraph_node *bestn = NULL;
3874 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3875 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3876 else
3877 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3878 n = n->simdclone->next_clone)
3879 {
3880 unsigned int this_badness = 0;
3881 unsigned int num_calls;
3882 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3883 || n->simdclone->nargs != nargs)
3884 continue;
3885 if (num_calls != 1)
3886 this_badness += exact_log2 (num_calls) * 1024;
3887 if (n->simdclone->inbranch)
3888 this_badness += 2048;
3889 int target_badness = targetm.simd_clone.usable (n);
3890 if (target_badness < 0)
3891 continue;
3892 this_badness += target_badness * 512;
3893 /* FORNOW: Have to add code to add the mask argument. */
3894 if (n->simdclone->inbranch)
3895 continue;
3896 for (i = 0; i < nargs; i++)
3897 {
3898 switch (n->simdclone->args[i].arg_type)
3899 {
3900 case SIMD_CLONE_ARG_TYPE_VECTOR:
3901 if (!useless_type_conversion_p
3902 (n->simdclone->args[i].orig_type,
3903 TREE_TYPE (gimple_call_arg (stmt, i))))
3904 i = -1;
3905 else if (arginfo[i].dt == vect_constant_def
3906 || arginfo[i].dt == vect_external_def
3907 || arginfo[i].linear_step)
3908 this_badness += 64;
3909 break;
3910 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3911 if (arginfo[i].dt != vect_constant_def
3912 && arginfo[i].dt != vect_external_def)
3913 i = -1;
3914 break;
3915 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3917 if (arginfo[i].dt == vect_constant_def
3918 || arginfo[i].dt == vect_external_def
3919 || (arginfo[i].linear_step
3920 != n->simdclone->args[i].linear_step))
3921 i = -1;
3922 break;
3923 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3928 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3929 /* FORNOW */
3930 i = -1;
3931 break;
3932 case SIMD_CLONE_ARG_TYPE_MASK:
3933 gcc_unreachable ();
3934 }
3935 if (i == (size_t) -1)
3936 break;
3937 if (n->simdclone->args[i].alignment > arginfo[i].align)
3938 {
3939 i = -1;
3940 break;
3941 }
3942 if (arginfo[i].align)
3943 this_badness += (exact_log2 (arginfo[i].align)
3944 - exact_log2 (n->simdclone->args[i].alignment));
3945 }
3946 if (i == (size_t) -1)
3947 continue;
3948 if (bestn == NULL || this_badness < badness)
3949 {
3950 bestn = n;
3951 badness = this_badness;
3952 }
3953 }
3954
3955 if (bestn == NULL)
3956 return false;
3957
3958 for (i = 0; i < nargs; i++)
3959 if ((arginfo[i].dt == vect_constant_def
3960 || arginfo[i].dt == vect_external_def)
3961 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3962 {
3963 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3964 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3965 slp_node);
3966 if (arginfo[i].vectype == NULL
3967 || !constant_multiple_p (bestn->simdclone->simdlen,
3968 simd_clone_subparts (arginfo[i].vectype)))
3969 return false;
3970 }
3971
3972 fndecl = bestn->decl;
3973 nunits = bestn->simdclone->simdlen;
3974 ncopies = vector_unroll_factor (vf, nunits);
3975
3976 /* If the function isn't const, only allow it in simd loops where user
3977 has asserted that at least nunits consecutive iterations can be
3978 performed using SIMD instructions. */
3979 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
3980 && gimple_vuse (stmt))
3981 return false;
3982
3983 /* Sanity check: make sure that at least one copy of the vectorized stmt
3984 needs to be generated. */
3985 gcc_assert (ncopies >= 1);
3986
3987 if (!vec_stmt) /* transformation not required. */
3988 {
3989 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3990 for (i = 0; i < nargs; i++)
3991 if ((bestn->simdclone->args[i].arg_type
3992 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3993 || (bestn->simdclone->args[i].arg_type
3994 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3995 {
3996 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3997 + 1,
3998 true);
3999 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4000 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4001 ? size_type_node : TREE_TYPE (arginfo[i].op);
4002 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4003 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4004 tree sll = arginfo[i].simd_lane_linear
4005 ? boolean_true_node : boolean_false_node;
4006 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4007 }
4008 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4009 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4010 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4011 dt, slp_node, cost_vec); */
4012 return true;
4013 }
4014
4015 /* Transform. */
4016
4017 if (dump_enabled_p ())
4018 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4019
4020 /* Handle def. */
4021 scalar_dest = gimple_call_lhs (stmt);
4022 vec_dest = NULL_TREE;
4023 rtype = NULL_TREE;
4024 ratype = NULL_TREE;
4025 if (scalar_dest)
4026 {
4027 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4028 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4029 if (TREE_CODE (rtype) == ARRAY_TYPE)
4030 {
4031 ratype = rtype;
4032 rtype = TREE_TYPE (ratype);
4033 }
4034 }
4035
4036 auto_vec<vec<tree> > vec_oprnds;
4037 auto_vec<unsigned> vec_oprnds_i;
4038 vec_oprnds.safe_grow_cleared (nargs, true);
4039 vec_oprnds_i.safe_grow_cleared (nargs, true);
4040 for (j = 0; j < ncopies; ++j)
4041 {
4042 /* Build argument list for the vectorized call. */
4043 if (j == 0)
4044 vargs.create (nargs);
4045 else
4046 vargs.truncate (0);
4047
4048 for (i = 0; i < nargs; i++)
4049 {
4050 unsigned int k, l, m, o;
4051 tree atype;
4052 op = gimple_call_arg (stmt, i);
4053 switch (bestn->simdclone->args[i].arg_type)
4054 {
4055 case SIMD_CLONE_ARG_TYPE_VECTOR:
4056 atype = bestn->simdclone->args[i].vector_type;
4057 o = vector_unroll_factor (nunits,
4058 simd_clone_subparts (atype));
4059 for (m = j * o; m < (j + 1) * o; m++)
4060 {
4061 if (simd_clone_subparts (atype)
4062 < simd_clone_subparts (arginfo[i].vectype))
4063 {
4064 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4065 k = (simd_clone_subparts (arginfo[i].vectype)
4066 / simd_clone_subparts (atype));
4067 gcc_assert ((k & (k - 1)) == 0);
4068 if (m == 0)
4069 {
4070 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4071 ncopies * o / k, op,
4072 &vec_oprnds[i]);
4073 vec_oprnds_i[i] = 0;
4074 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4075 }
4076 else
4077 {
4078 vec_oprnd0 = arginfo[i].op;
4079 if ((m & (k - 1)) == 0)
4080 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4081 }
4082 arginfo[i].op = vec_oprnd0;
4083 vec_oprnd0
4084 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4085 bitsize_int (prec),
4086 bitsize_int ((m & (k - 1)) * prec));
4087 gassign *new_stmt
4088 = gimple_build_assign (make_ssa_name (atype),
4089 vec_oprnd0);
4090 vect_finish_stmt_generation (vinfo, stmt_info,
4091 new_stmt, gsi);
4092 vargs.safe_push (gimple_assign_lhs (new_stmt));
4093 }
4094 else
4095 {
4096 k = (simd_clone_subparts (atype)
4097 / simd_clone_subparts (arginfo[i].vectype));
4098 gcc_assert ((k & (k - 1)) == 0);
4099 vec<constructor_elt, va_gc> *ctor_elts;
4100 if (k != 1)
4101 vec_alloc (ctor_elts, k);
4102 else
4103 ctor_elts = NULL;
4104 for (l = 0; l < k; l++)
4105 {
4106 if (m == 0 && l == 0)
4107 {
4108 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4109 k * o * ncopies,
4110 op,
4111 &vec_oprnds[i]);
4112 vec_oprnds_i[i] = 0;
4113 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4114 }
4115 else
4116 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4117 arginfo[i].op = vec_oprnd0;
4118 if (k == 1)
4119 break;
4120 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4121 vec_oprnd0);
4122 }
4123 if (k == 1)
4124 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4125 atype))
4126 {
4127 vec_oprnd0
4128 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4129 gassign *new_stmt
4130 = gimple_build_assign (make_ssa_name (atype),
4131 vec_oprnd0);
4132 vect_finish_stmt_generation (vinfo, stmt_info,
4133 new_stmt, gsi);
4134 vargs.safe_push (gimple_assign_lhs (new_stmt));
4135 }
4136 else
4137 vargs.safe_push (vec_oprnd0);
4138 else
4139 {
4140 vec_oprnd0 = build_constructor (atype, ctor_elts);
4141 gassign *new_stmt
4142 = gimple_build_assign (make_ssa_name (atype),
4143 vec_oprnd0);
4144 vect_finish_stmt_generation (vinfo, stmt_info,
4145 new_stmt, gsi);
4146 vargs.safe_push (gimple_assign_lhs (new_stmt));
4147 }
4148 }
4149 }
4150 break;
4151 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4152 vargs.safe_push (op);
4153 break;
4154 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4155 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4156 if (j == 0)
4157 {
4158 gimple_seq stmts;
4159 arginfo[i].op
4160 = force_gimple_operand (unshare_expr (arginfo[i].op),
4161 &stmts, true, NULL_TREE);
4162 if (stmts != NULL)
4163 {
4164 basic_block new_bb;
4165 edge pe = loop_preheader_edge (loop);
4166 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4167 gcc_assert (!new_bb);
4168 }
4169 if (arginfo[i].simd_lane_linear)
4170 {
4171 vargs.safe_push (arginfo[i].op);
4172 break;
4173 }
4174 tree phi_res = copy_ssa_name (op);
4175 gphi *new_phi = create_phi_node (phi_res, loop->header);
4176 add_phi_arg (new_phi, arginfo[i].op,
4177 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4178 enum tree_code code
4179 = POINTER_TYPE_P (TREE_TYPE (op))
4180 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4181 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4182 ? sizetype : TREE_TYPE (op);
4183 poly_widest_int cst
4184 = wi::mul (bestn->simdclone->args[i].linear_step,
4185 ncopies * nunits);
4186 tree tcst = wide_int_to_tree (type, cst);
4187 tree phi_arg = copy_ssa_name (op);
4188 gassign *new_stmt
4189 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4190 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4191 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4192 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4193 UNKNOWN_LOCATION);
4194 arginfo[i].op = phi_res;
4195 vargs.safe_push (phi_res);
4196 }
4197 else
4198 {
4199 enum tree_code code
4200 = POINTER_TYPE_P (TREE_TYPE (op))
4201 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4202 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4203 ? sizetype : TREE_TYPE (op);
4204 poly_widest_int cst
4205 = wi::mul (bestn->simdclone->args[i].linear_step,
4206 j * nunits);
4207 tree tcst = wide_int_to_tree (type, cst);
4208 new_temp = make_ssa_name (TREE_TYPE (op));
4209 gassign *new_stmt
4210 = gimple_build_assign (new_temp, code,
4211 arginfo[i].op, tcst);
4212 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4213 vargs.safe_push (new_temp);
4214 }
4215 break;
4216 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4217 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4218 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4219 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4220 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4221 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4222 default:
4223 gcc_unreachable ();
4224 }
4225 }
4226
4227 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4228 if (vec_dest)
4229 {
4230 gcc_assert (ratype
4231 || known_eq (simd_clone_subparts (rtype), nunits));
4232 if (ratype)
4233 new_temp = create_tmp_var (ratype);
4234 else if (useless_type_conversion_p (vectype, rtype))
4235 new_temp = make_ssa_name (vec_dest, new_call);
4236 else
4237 new_temp = make_ssa_name (rtype, new_call);
4238 gimple_call_set_lhs (new_call, new_temp);
4239 }
4240 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4241 gimple *new_stmt = new_call;
4242
4243 if (vec_dest)
4244 {
4245 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4246 {
4247 unsigned int k, l;
4248 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4249 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4250 k = vector_unroll_factor (nunits,
4251 simd_clone_subparts (vectype));
4252 gcc_assert ((k & (k - 1)) == 0);
4253 for (l = 0; l < k; l++)
4254 {
4255 tree t;
4256 if (ratype)
4257 {
4258 t = build_fold_addr_expr (new_temp);
4259 t = build2 (MEM_REF, vectype, t,
4260 build_int_cst (TREE_TYPE (t), l * bytes));
4261 }
4262 else
4263 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4264 bitsize_int (prec), bitsize_int (l * prec));
4265 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4266 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4267
4268 if (j == 0 && l == 0)
4269 *vec_stmt = new_stmt;
4270 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4271 }
4272
4273 if (ratype)
4274 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4275 continue;
4276 }
4277 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4278 {
4279 unsigned int k = (simd_clone_subparts (vectype)
4280 / simd_clone_subparts (rtype));
4281 gcc_assert ((k & (k - 1)) == 0);
4282 if ((j & (k - 1)) == 0)
4283 vec_alloc (ret_ctor_elts, k);
4284 if (ratype)
4285 {
4286 unsigned int m, o;
4287 o = vector_unroll_factor (nunits,
4288 simd_clone_subparts (rtype));
4289 for (m = 0; m < o; m++)
4290 {
4291 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4292 size_int (m), NULL_TREE, NULL_TREE);
4293 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4294 tem);
4295 vect_finish_stmt_generation (vinfo, stmt_info,
4296 new_stmt, gsi);
4297 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4298 gimple_assign_lhs (new_stmt));
4299 }
4300 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4301 }
4302 else
4303 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4304 if ((j & (k - 1)) != k - 1)
4305 continue;
4306 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4307 new_stmt
4308 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4309 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4310
4311 if ((unsigned) j == k - 1)
4312 *vec_stmt = new_stmt;
4313 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4314 continue;
4315 }
4316 else if (ratype)
4317 {
4318 tree t = build_fold_addr_expr (new_temp);
4319 t = build2 (MEM_REF, vectype, t,
4320 build_int_cst (TREE_TYPE (t), 0));
4321 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4322 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4323 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4324 }
4325 else if (!useless_type_conversion_p (vectype, rtype))
4326 {
4327 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4328 new_stmt
4329 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4330 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4331 }
4332 }
4333
4334 if (j == 0)
4335 *vec_stmt = new_stmt;
4336 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4337 }
4338
4339 for (i = 0; i < nargs; ++i)
4340 {
4341 vec<tree> oprndsi = vec_oprnds[i];
4342 oprndsi.release ();
4343 }
4344 vargs.release ();
4345
4346 /* The call in STMT might prevent it from being removed in dce.
4347 We however cannot remove it here, due to the way the ssa name
4348 it defines is mapped to the new definition. So just replace
4349 rhs of the statement with something harmless. */
4350
4351 if (slp_node)
4352 return true;
4353
4354 gimple *new_stmt;
4355 if (scalar_dest)
4356 {
4357 type = TREE_TYPE (scalar_dest);
4358 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4359 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4360 }
4361 else
4362 new_stmt = gimple_build_nop ();
4363 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4364 unlink_stmt_vdef (stmt);
4365
4366 return true;
4367 }
4368
4369
4370 /* Function vect_gen_widened_results_half
4371
4372 Create a vector stmt whose code, type, number of arguments, and result
4373 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4374 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4375 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4376 needs to be created (DECL is a function-decl of a target-builtin).
4377 STMT_INFO is the original scalar stmt that we are vectorizing. */
4378
4379 static gimple *
4380 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4381 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4382 tree vec_dest, gimple_stmt_iterator *gsi,
4383 stmt_vec_info stmt_info)
4384 {
4385 gimple *new_stmt;
4386 tree new_temp;
4387
4388 /* Generate half of the widened result: */
4389 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4390 if (op_type != binary_op)
4391 vec_oprnd1 = NULL;
4392 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4393 new_temp = make_ssa_name (vec_dest, new_stmt);
4394 gimple_assign_set_lhs (new_stmt, new_temp);
4395 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4396
4397 return new_stmt;
4398 }
4399
4400
4401 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4402 For multi-step conversions store the resulting vectors and call the function
4403 recursively. */
4404
4405 static void
4406 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4407 int multi_step_cvt,
4408 stmt_vec_info stmt_info,
4409 vec<tree> vec_dsts,
4410 gimple_stmt_iterator *gsi,
4411 slp_tree slp_node, enum tree_code code)
4412 {
4413 unsigned int i;
4414 tree vop0, vop1, new_tmp, vec_dest;
4415
4416 vec_dest = vec_dsts.pop ();
4417
4418 for (i = 0; i < vec_oprnds->length (); i += 2)
4419 {
4420 /* Create demotion operation. */
4421 vop0 = (*vec_oprnds)[i];
4422 vop1 = (*vec_oprnds)[i + 1];
4423 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4424 new_tmp = make_ssa_name (vec_dest, new_stmt);
4425 gimple_assign_set_lhs (new_stmt, new_tmp);
4426 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4427
4428 if (multi_step_cvt)
4429 /* Store the resulting vector for next recursive call. */
4430 (*vec_oprnds)[i/2] = new_tmp;
4431 else
4432 {
4433 /* This is the last step of the conversion sequence. Store the
4434 vectors in SLP_NODE or in vector info of the scalar statement
4435 (or in STMT_VINFO_RELATED_STMT chain). */
4436 if (slp_node)
4437 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4438 else
4439 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4440 }
4441 }
4442
4443 /* For multi-step demotion operations we first generate demotion operations
4444 from the source type to the intermediate types, and then combine the
4445 results (stored in VEC_OPRNDS) in demotion operation to the destination
4446 type. */
4447 if (multi_step_cvt)
4448 {
4449 /* At each level of recursion we have half of the operands we had at the
4450 previous level. */
4451 vec_oprnds->truncate ((i+1)/2);
4452 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4453 multi_step_cvt - 1,
4454 stmt_info, vec_dsts, gsi,
4455 slp_node, VEC_PACK_TRUNC_EXPR);
4456 }
4457
4458 vec_dsts.quick_push (vec_dest);
4459 }
4460
4461
4462 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4463 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4464 STMT_INFO. For multi-step conversions store the resulting vectors and
4465 call the function recursively. */
4466
4467 static void
4468 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4469 vec<tree> *vec_oprnds0,
4470 vec<tree> *vec_oprnds1,
4471 stmt_vec_info stmt_info, tree vec_dest,
4472 gimple_stmt_iterator *gsi,
4473 enum tree_code code1,
4474 enum tree_code code2, int op_type)
4475 {
4476 int i;
4477 tree vop0, vop1, new_tmp1, new_tmp2;
4478 gimple *new_stmt1, *new_stmt2;
4479 vec<tree> vec_tmp = vNULL;
4480
4481 vec_tmp.create (vec_oprnds0->length () * 2);
4482 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4483 {
4484 if (op_type == binary_op)
4485 vop1 = (*vec_oprnds1)[i];
4486 else
4487 vop1 = NULL_TREE;
4488
4489 /* Generate the two halves of promotion operation. */
4490 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4491 op_type, vec_dest, gsi,
4492 stmt_info);
4493 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4494 op_type, vec_dest, gsi,
4495 stmt_info);
4496 if (is_gimple_call (new_stmt1))
4497 {
4498 new_tmp1 = gimple_call_lhs (new_stmt1);
4499 new_tmp2 = gimple_call_lhs (new_stmt2);
4500 }
4501 else
4502 {
4503 new_tmp1 = gimple_assign_lhs (new_stmt1);
4504 new_tmp2 = gimple_assign_lhs (new_stmt2);
4505 }
4506
4507 /* Store the results for the next step. */
4508 vec_tmp.quick_push (new_tmp1);
4509 vec_tmp.quick_push (new_tmp2);
4510 }
4511
4512 vec_oprnds0->release ();
4513 *vec_oprnds0 = vec_tmp;
4514 }
4515
4516
4517 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4518 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4519 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4520 Return true if STMT_INFO is vectorizable in this way. */
4521
4522 static bool
4523 vectorizable_conversion (vec_info *vinfo,
4524 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4525 gimple **vec_stmt, slp_tree slp_node,
4526 stmt_vector_for_cost *cost_vec)
4527 {
4528 tree vec_dest;
4529 tree scalar_dest;
4530 tree op0, op1 = NULL_TREE;
4531 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4532 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4533 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4534 tree new_temp;
4535 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4536 int ndts = 2;
4537 poly_uint64 nunits_in;
4538 poly_uint64 nunits_out;
4539 tree vectype_out, vectype_in;
4540 int ncopies, i;
4541 tree lhs_type, rhs_type;
4542 enum { NARROW, NONE, WIDEN } modifier;
4543 vec<tree> vec_oprnds0 = vNULL;
4544 vec<tree> vec_oprnds1 = vNULL;
4545 tree vop0;
4546 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4547 int multi_step_cvt = 0;
4548 vec<tree> interm_types = vNULL;
4549 tree intermediate_type, cvt_type = NULL_TREE;
4550 int op_type;
4551 unsigned short fltsz;
4552
4553 /* Is STMT a vectorizable conversion? */
4554
4555 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4556 return false;
4557
4558 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4559 && ! vec_stmt)
4560 return false;
4561
4562 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4563 if (!stmt)
4564 return false;
4565
4566 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4567 return false;
4568
4569 code = gimple_assign_rhs_code (stmt);
4570 if (!CONVERT_EXPR_CODE_P (code)
4571 && code != FIX_TRUNC_EXPR
4572 && code != FLOAT_EXPR
4573 && code != WIDEN_MULT_EXPR
4574 && code != WIDEN_LSHIFT_EXPR)
4575 return false;
4576
4577 op_type = TREE_CODE_LENGTH (code);
4578
4579 /* Check types of lhs and rhs. */
4580 scalar_dest = gimple_assign_lhs (stmt);
4581 lhs_type = TREE_TYPE (scalar_dest);
4582 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4583
4584 /* Check the operands of the operation. */
4585 slp_tree slp_op0, slp_op1 = NULL;
4586 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4587 0, &op0, &slp_op0, &dt[0], &vectype_in))
4588 {
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4591 "use not simple.\n");
4592 return false;
4593 }
4594
4595 rhs_type = TREE_TYPE (op0);
4596 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4597 && !((INTEGRAL_TYPE_P (lhs_type)
4598 && INTEGRAL_TYPE_P (rhs_type))
4599 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4600 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4601 return false;
4602
4603 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4604 && ((INTEGRAL_TYPE_P (lhs_type)
4605 && !type_has_mode_precision_p (lhs_type))
4606 || (INTEGRAL_TYPE_P (rhs_type)
4607 && !type_has_mode_precision_p (rhs_type))))
4608 {
4609 if (dump_enabled_p ())
4610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4611 "type conversion to/from bit-precision unsupported."
4612 "\n");
4613 return false;
4614 }
4615
4616 if (op_type == binary_op)
4617 {
4618 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4619
4620 op1 = gimple_assign_rhs2 (stmt);
4621 tree vectype1_in;
4622 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4623 &op1, &slp_op1, &dt[1], &vectype1_in))
4624 {
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4627 "use not simple.\n");
4628 return false;
4629 }
4630 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4631 OP1. */
4632 if (!vectype_in)
4633 vectype_in = vectype1_in;
4634 }
4635
4636 /* If op0 is an external or constant def, infer the vector type
4637 from the scalar type. */
4638 if (!vectype_in)
4639 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4640 if (vec_stmt)
4641 gcc_assert (vectype_in);
4642 if (!vectype_in)
4643 {
4644 if (dump_enabled_p ())
4645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4646 "no vectype for scalar type %T\n", rhs_type);
4647
4648 return false;
4649 }
4650
4651 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4652 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4653 {
4654 if (dump_enabled_p ())
4655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4656 "can't convert between boolean and non "
4657 "boolean vectors %T\n", rhs_type);
4658
4659 return false;
4660 }
4661
4662 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4663 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4664 if (known_eq (nunits_out, nunits_in))
4665 modifier = NONE;
4666 else if (multiple_p (nunits_out, nunits_in))
4667 modifier = NARROW;
4668 else
4669 {
4670 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4671 modifier = WIDEN;
4672 }
4673
4674 /* Multiple types in SLP are handled by creating the appropriate number of
4675 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4676 case of SLP. */
4677 if (slp_node)
4678 ncopies = 1;
4679 else if (modifier == NARROW)
4680 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4681 else
4682 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4683
4684 /* Sanity check: make sure that at least one copy of the vectorized stmt
4685 needs to be generated. */
4686 gcc_assert (ncopies >= 1);
4687
4688 bool found_mode = false;
4689 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4690 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4691 opt_scalar_mode rhs_mode_iter;
4692
4693 /* Supportable by target? */
4694 switch (modifier)
4695 {
4696 case NONE:
4697 if (code != FIX_TRUNC_EXPR
4698 && code != FLOAT_EXPR
4699 && !CONVERT_EXPR_CODE_P (code))
4700 return false;
4701 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4702 break;
4703 /* FALLTHRU */
4704 unsupported:
4705 if (dump_enabled_p ())
4706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4707 "conversion not supported by target.\n");
4708 return false;
4709
4710 case WIDEN:
4711 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4712 vectype_in, &code1, &code2,
4713 &multi_step_cvt, &interm_types))
4714 {
4715 /* Binary widening operation can only be supported directly by the
4716 architecture. */
4717 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4718 break;
4719 }
4720
4721 if (code != FLOAT_EXPR
4722 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4723 goto unsupported;
4724
4725 fltsz = GET_MODE_SIZE (lhs_mode);
4726 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4727 {
4728 rhs_mode = rhs_mode_iter.require ();
4729 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4730 break;
4731
4732 cvt_type
4733 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4734 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4735 if (cvt_type == NULL_TREE)
4736 goto unsupported;
4737
4738 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4739 {
4740 if (!supportable_convert_operation (code, vectype_out,
4741 cvt_type, &codecvt1))
4742 goto unsupported;
4743 }
4744 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4745 vectype_out, cvt_type,
4746 &codecvt1, &codecvt2,
4747 &multi_step_cvt,
4748 &interm_types))
4749 continue;
4750 else
4751 gcc_assert (multi_step_cvt == 0);
4752
4753 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4754 cvt_type,
4755 vectype_in, &code1, &code2,
4756 &multi_step_cvt, &interm_types))
4757 {
4758 found_mode = true;
4759 break;
4760 }
4761 }
4762
4763 if (!found_mode)
4764 goto unsupported;
4765
4766 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4767 codecvt2 = ERROR_MARK;
4768 else
4769 {
4770 multi_step_cvt++;
4771 interm_types.safe_push (cvt_type);
4772 cvt_type = NULL_TREE;
4773 }
4774 break;
4775
4776 case NARROW:
4777 gcc_assert (op_type == unary_op);
4778 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4779 &code1, &multi_step_cvt,
4780 &interm_types))
4781 break;
4782
4783 if (code != FIX_TRUNC_EXPR
4784 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4785 goto unsupported;
4786
4787 cvt_type
4788 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4789 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4790 if (cvt_type == NULL_TREE)
4791 goto unsupported;
4792 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4793 &codecvt1))
4794 goto unsupported;
4795 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4796 &code1, &multi_step_cvt,
4797 &interm_types))
4798 break;
4799 goto unsupported;
4800
4801 default:
4802 gcc_unreachable ();
4803 }
4804
4805 if (!vec_stmt) /* transformation not required. */
4806 {
4807 if (slp_node
4808 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4809 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4810 {
4811 if (dump_enabled_p ())
4812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4813 "incompatible vector types for invariants\n");
4814 return false;
4815 }
4816 DUMP_VECT_SCOPE ("vectorizable_conversion");
4817 if (modifier == NONE)
4818 {
4819 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4820 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4821 cost_vec);
4822 }
4823 else if (modifier == NARROW)
4824 {
4825 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4826 /* The final packing step produces one vector result per copy. */
4827 unsigned int nvectors
4828 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4829 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4830 multi_step_cvt, cost_vec);
4831 }
4832 else
4833 {
4834 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4835 /* The initial unpacking step produces two vector results
4836 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4837 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4838 unsigned int nvectors
4839 = (slp_node
4840 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4841 : ncopies * 2);
4842 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4843 multi_step_cvt, cost_vec);
4844 }
4845 interm_types.release ();
4846 return true;
4847 }
4848
4849 /* Transform. */
4850 if (dump_enabled_p ())
4851 dump_printf_loc (MSG_NOTE, vect_location,
4852 "transform conversion. ncopies = %d.\n", ncopies);
4853
4854 if (op_type == binary_op)
4855 {
4856 if (CONSTANT_CLASS_P (op0))
4857 op0 = fold_convert (TREE_TYPE (op1), op0);
4858 else if (CONSTANT_CLASS_P (op1))
4859 op1 = fold_convert (TREE_TYPE (op0), op1);
4860 }
4861
4862 /* In case of multi-step conversion, we first generate conversion operations
4863 to the intermediate types, and then from that types to the final one.
4864 We create vector destinations for the intermediate type (TYPES) received
4865 from supportable_*_operation, and store them in the correct order
4866 for future use in vect_create_vectorized_*_stmts (). */
4867 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4868 vec_dest = vect_create_destination_var (scalar_dest,
4869 (cvt_type && modifier == WIDEN)
4870 ? cvt_type : vectype_out);
4871 vec_dsts.quick_push (vec_dest);
4872
4873 if (multi_step_cvt)
4874 {
4875 for (i = interm_types.length () - 1;
4876 interm_types.iterate (i, &intermediate_type); i--)
4877 {
4878 vec_dest = vect_create_destination_var (scalar_dest,
4879 intermediate_type);
4880 vec_dsts.quick_push (vec_dest);
4881 }
4882 }
4883
4884 if (cvt_type)
4885 vec_dest = vect_create_destination_var (scalar_dest,
4886 modifier == WIDEN
4887 ? vectype_out : cvt_type);
4888
4889 int ninputs = 1;
4890 if (!slp_node)
4891 {
4892 if (modifier == WIDEN)
4893 ;
4894 else if (modifier == NARROW)
4895 {
4896 if (multi_step_cvt)
4897 ninputs = vect_pow2 (multi_step_cvt);
4898 ninputs *= 2;
4899 }
4900 }
4901
4902 switch (modifier)
4903 {
4904 case NONE:
4905 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4906 op0, &vec_oprnds0);
4907 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4908 {
4909 /* Arguments are ready, create the new vector stmt. */
4910 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4911 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4912 new_temp = make_ssa_name (vec_dest, new_stmt);
4913 gimple_assign_set_lhs (new_stmt, new_temp);
4914 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4915
4916 if (slp_node)
4917 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4918 else
4919 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4920 }
4921 break;
4922
4923 case WIDEN:
4924 /* In case the vectorization factor (VF) is bigger than the number
4925 of elements that we can fit in a vectype (nunits), we have to
4926 generate more than one vector stmt - i.e - we need to "unroll"
4927 the vector stmt by a factor VF/nunits. */
4928 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4929 op0, &vec_oprnds0,
4930 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4931 &vec_oprnds1);
4932 if (code == WIDEN_LSHIFT_EXPR)
4933 {
4934 vec_oprnds1.create (ncopies * ninputs);
4935 for (i = 0; i < ncopies * ninputs; ++i)
4936 vec_oprnds1.quick_push (op1);
4937 }
4938 /* Arguments are ready. Create the new vector stmts. */
4939 for (i = multi_step_cvt; i >= 0; i--)
4940 {
4941 tree this_dest = vec_dsts[i];
4942 enum tree_code c1 = code1, c2 = code2;
4943 if (i == 0 && codecvt2 != ERROR_MARK)
4944 {
4945 c1 = codecvt1;
4946 c2 = codecvt2;
4947 }
4948 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4949 &vec_oprnds1, stmt_info,
4950 this_dest, gsi,
4951 c1, c2, op_type);
4952 }
4953
4954 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4955 {
4956 gimple *new_stmt;
4957 if (cvt_type)
4958 {
4959 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4960 new_temp = make_ssa_name (vec_dest);
4961 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4962 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4963 }
4964 else
4965 new_stmt = SSA_NAME_DEF_STMT (vop0);
4966
4967 if (slp_node)
4968 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4969 else
4970 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4971 }
4972 break;
4973
4974 case NARROW:
4975 /* In case the vectorization factor (VF) is bigger than the number
4976 of elements that we can fit in a vectype (nunits), we have to
4977 generate more than one vector stmt - i.e - we need to "unroll"
4978 the vector stmt by a factor VF/nunits. */
4979 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4980 op0, &vec_oprnds0);
4981 /* Arguments are ready. Create the new vector stmts. */
4982 if (cvt_type)
4983 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4984 {
4985 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4986 new_temp = make_ssa_name (vec_dest);
4987 gassign *new_stmt
4988 = gimple_build_assign (new_temp, codecvt1, vop0);
4989 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4990 vec_oprnds0[i] = new_temp;
4991 }
4992
4993 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
4994 multi_step_cvt,
4995 stmt_info, vec_dsts, gsi,
4996 slp_node, code1);
4997 break;
4998 }
4999 if (!slp_node)
5000 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5001
5002 vec_oprnds0.release ();
5003 vec_oprnds1.release ();
5004 interm_types.release ();
5005
5006 return true;
5007 }
5008
5009 /* Return true if we can assume from the scalar form of STMT_INFO that
5010 neither the scalar nor the vector forms will generate code. STMT_INFO
5011 is known not to involve a data reference. */
5012
5013 bool
5014 vect_nop_conversion_p (stmt_vec_info stmt_info)
5015 {
5016 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5017 if (!stmt)
5018 return false;
5019
5020 tree lhs = gimple_assign_lhs (stmt);
5021 tree_code code = gimple_assign_rhs_code (stmt);
5022 tree rhs = gimple_assign_rhs1 (stmt);
5023
5024 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5025 return true;
5026
5027 if (CONVERT_EXPR_CODE_P (code))
5028 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5029
5030 return false;
5031 }
5032
5033 /* Function vectorizable_assignment.
5034
5035 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5036 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5037 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5038 Return true if STMT_INFO is vectorizable in this way. */
5039
5040 static bool
5041 vectorizable_assignment (vec_info *vinfo,
5042 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5043 gimple **vec_stmt, slp_tree slp_node,
5044 stmt_vector_for_cost *cost_vec)
5045 {
5046 tree vec_dest;
5047 tree scalar_dest;
5048 tree op;
5049 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5050 tree new_temp;
5051 enum vect_def_type dt[1] = {vect_unknown_def_type};
5052 int ndts = 1;
5053 int ncopies;
5054 int i;
5055 vec<tree> vec_oprnds = vNULL;
5056 tree vop;
5057 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5058 enum tree_code code;
5059 tree vectype_in;
5060
5061 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5062 return false;
5063
5064 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5065 && ! vec_stmt)
5066 return false;
5067
5068 /* Is vectorizable assignment? */
5069 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5070 if (!stmt)
5071 return false;
5072
5073 scalar_dest = gimple_assign_lhs (stmt);
5074 if (TREE_CODE (scalar_dest) != SSA_NAME)
5075 return false;
5076
5077 if (STMT_VINFO_DATA_REF (stmt_info))
5078 return false;
5079
5080 code = gimple_assign_rhs_code (stmt);
5081 if (!(gimple_assign_single_p (stmt)
5082 || code == PAREN_EXPR
5083 || CONVERT_EXPR_CODE_P (code)))
5084 return false;
5085
5086 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5087 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5088
5089 /* Multiple types in SLP are handled by creating the appropriate number of
5090 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5091 case of SLP. */
5092 if (slp_node)
5093 ncopies = 1;
5094 else
5095 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5096
5097 gcc_assert (ncopies >= 1);
5098
5099 slp_tree slp_op;
5100 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5101 &dt[0], &vectype_in))
5102 {
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5105 "use not simple.\n");
5106 return false;
5107 }
5108 if (!vectype_in)
5109 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5110
5111 /* We can handle NOP_EXPR conversions that do not change the number
5112 of elements or the vector size. */
5113 if ((CONVERT_EXPR_CODE_P (code)
5114 || code == VIEW_CONVERT_EXPR)
5115 && (!vectype_in
5116 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5117 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5118 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5119 return false;
5120
5121 /* We do not handle bit-precision changes. */
5122 if ((CONVERT_EXPR_CODE_P (code)
5123 || code == VIEW_CONVERT_EXPR)
5124 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5125 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5126 || !type_has_mode_precision_p (TREE_TYPE (op)))
5127 /* But a conversion that does not change the bit-pattern is ok. */
5128 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5129 > TYPE_PRECISION (TREE_TYPE (op)))
5130 && TYPE_UNSIGNED (TREE_TYPE (op)))
5131 /* Conversion between boolean types of different sizes is
5132 a simple assignment in case their vectypes are same
5133 boolean vectors. */
5134 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5135 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5136 {
5137 if (dump_enabled_p ())
5138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5139 "type conversion to/from bit-precision "
5140 "unsupported.\n");
5141 return false;
5142 }
5143
5144 if (!vec_stmt) /* transformation not required. */
5145 {
5146 if (slp_node
5147 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5148 {
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5151 "incompatible vector types for invariants\n");
5152 return false;
5153 }
5154 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5155 DUMP_VECT_SCOPE ("vectorizable_assignment");
5156 if (!vect_nop_conversion_p (stmt_info))
5157 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5158 cost_vec);
5159 return true;
5160 }
5161
5162 /* Transform. */
5163 if (dump_enabled_p ())
5164 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5165
5166 /* Handle def. */
5167 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5168
5169 /* Handle use. */
5170 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5171
5172 /* Arguments are ready. create the new vector stmt. */
5173 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5174 {
5175 if (CONVERT_EXPR_CODE_P (code)
5176 || code == VIEW_CONVERT_EXPR)
5177 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5178 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5179 new_temp = make_ssa_name (vec_dest, new_stmt);
5180 gimple_assign_set_lhs (new_stmt, new_temp);
5181 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5182 if (slp_node)
5183 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5184 else
5185 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5186 }
5187 if (!slp_node)
5188 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5189
5190 vec_oprnds.release ();
5191 return true;
5192 }
5193
5194
5195 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5196 either as shift by a scalar or by a vector. */
5197
5198 bool
5199 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5200 {
5201
5202 machine_mode vec_mode;
5203 optab optab;
5204 int icode;
5205 tree vectype;
5206
5207 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5208 if (!vectype)
5209 return false;
5210
5211 optab = optab_for_tree_code (code, vectype, optab_scalar);
5212 if (!optab
5213 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5214 {
5215 optab = optab_for_tree_code (code, vectype, optab_vector);
5216 if (!optab
5217 || (optab_handler (optab, TYPE_MODE (vectype))
5218 == CODE_FOR_nothing))
5219 return false;
5220 }
5221
5222 vec_mode = TYPE_MODE (vectype);
5223 icode = (int) optab_handler (optab, vec_mode);
5224 if (icode == CODE_FOR_nothing)
5225 return false;
5226
5227 return true;
5228 }
5229
5230
5231 /* Function vectorizable_shift.
5232
5233 Check if STMT_INFO performs a shift operation that can be vectorized.
5234 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5235 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5236 Return true if STMT_INFO is vectorizable in this way. */
5237
5238 static bool
5239 vectorizable_shift (vec_info *vinfo,
5240 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5241 gimple **vec_stmt, slp_tree slp_node,
5242 stmt_vector_for_cost *cost_vec)
5243 {
5244 tree vec_dest;
5245 tree scalar_dest;
5246 tree op0, op1 = NULL;
5247 tree vec_oprnd1 = NULL_TREE;
5248 tree vectype;
5249 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5250 enum tree_code code;
5251 machine_mode vec_mode;
5252 tree new_temp;
5253 optab optab;
5254 int icode;
5255 machine_mode optab_op2_mode;
5256 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5257 int ndts = 2;
5258 poly_uint64 nunits_in;
5259 poly_uint64 nunits_out;
5260 tree vectype_out;
5261 tree op1_vectype;
5262 int ncopies;
5263 int i;
5264 vec<tree> vec_oprnds0 = vNULL;
5265 vec<tree> vec_oprnds1 = vNULL;
5266 tree vop0, vop1;
5267 unsigned int k;
5268 bool scalar_shift_arg = true;
5269 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5270 bool incompatible_op1_vectype_p = false;
5271
5272 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5273 return false;
5274
5275 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5276 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5277 && ! vec_stmt)
5278 return false;
5279
5280 /* Is STMT a vectorizable binary/unary operation? */
5281 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5282 if (!stmt)
5283 return false;
5284
5285 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5286 return false;
5287
5288 code = gimple_assign_rhs_code (stmt);
5289
5290 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5291 || code == RROTATE_EXPR))
5292 return false;
5293
5294 scalar_dest = gimple_assign_lhs (stmt);
5295 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5296 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5297 {
5298 if (dump_enabled_p ())
5299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5300 "bit-precision shifts not supported.\n");
5301 return false;
5302 }
5303
5304 slp_tree slp_op0;
5305 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5306 0, &op0, &slp_op0, &dt[0], &vectype))
5307 {
5308 if (dump_enabled_p ())
5309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5310 "use not simple.\n");
5311 return false;
5312 }
5313 /* If op0 is an external or constant def, infer the vector type
5314 from the scalar type. */
5315 if (!vectype)
5316 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5317 if (vec_stmt)
5318 gcc_assert (vectype);
5319 if (!vectype)
5320 {
5321 if (dump_enabled_p ())
5322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5323 "no vectype for scalar type\n");
5324 return false;
5325 }
5326
5327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5328 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5329 if (maybe_ne (nunits_out, nunits_in))
5330 return false;
5331
5332 stmt_vec_info op1_def_stmt_info;
5333 slp_tree slp_op1;
5334 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5335 &dt[1], &op1_vectype, &op1_def_stmt_info))
5336 {
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5339 "use not simple.\n");
5340 return false;
5341 }
5342
5343 /* Multiple types in SLP are handled by creating the appropriate number of
5344 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5345 case of SLP. */
5346 if (slp_node)
5347 ncopies = 1;
5348 else
5349 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5350
5351 gcc_assert (ncopies >= 1);
5352
5353 /* Determine whether the shift amount is a vector, or scalar. If the
5354 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5355
5356 if ((dt[1] == vect_internal_def
5357 || dt[1] == vect_induction_def
5358 || dt[1] == vect_nested_cycle)
5359 && !slp_node)
5360 scalar_shift_arg = false;
5361 else if (dt[1] == vect_constant_def
5362 || dt[1] == vect_external_def
5363 || dt[1] == vect_internal_def)
5364 {
5365 /* In SLP, need to check whether the shift count is the same,
5366 in loops if it is a constant or invariant, it is always
5367 a scalar shift. */
5368 if (slp_node)
5369 {
5370 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5371 stmt_vec_info slpstmt_info;
5372
5373 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5374 {
5375 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5376 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5377 scalar_shift_arg = false;
5378 }
5379
5380 /* For internal SLP defs we have to make sure we see scalar stmts
5381 for all vector elements.
5382 ??? For different vectors we could resort to a different
5383 scalar shift operand but code-generation below simply always
5384 takes the first. */
5385 if (dt[1] == vect_internal_def
5386 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5387 stmts.length ()))
5388 scalar_shift_arg = false;
5389 }
5390
5391 /* If the shift amount is computed by a pattern stmt we cannot
5392 use the scalar amount directly thus give up and use a vector
5393 shift. */
5394 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5395 scalar_shift_arg = false;
5396 }
5397 else
5398 {
5399 if (dump_enabled_p ())
5400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5401 "operand mode requires invariant argument.\n");
5402 return false;
5403 }
5404
5405 /* Vector shifted by vector. */
5406 bool was_scalar_shift_arg = scalar_shift_arg;
5407 if (!scalar_shift_arg)
5408 {
5409 optab = optab_for_tree_code (code, vectype, optab_vector);
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_NOTE, vect_location,
5412 "vector/vector shift/rotate found.\n");
5413
5414 if (!op1_vectype)
5415 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5416 slp_op1);
5417 incompatible_op1_vectype_p
5418 = (op1_vectype == NULL_TREE
5419 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5420 TYPE_VECTOR_SUBPARTS (vectype))
5421 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5422 if (incompatible_op1_vectype_p
5423 && (!slp_node
5424 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5425 || slp_op1->refcnt != 1))
5426 {
5427 if (dump_enabled_p ())
5428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5429 "unusable type for last operand in"
5430 " vector/vector shift/rotate.\n");
5431 return false;
5432 }
5433 }
5434 /* See if the machine has a vector shifted by scalar insn and if not
5435 then see if it has a vector shifted by vector insn. */
5436 else
5437 {
5438 optab = optab_for_tree_code (code, vectype, optab_scalar);
5439 if (optab
5440 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5441 {
5442 if (dump_enabled_p ())
5443 dump_printf_loc (MSG_NOTE, vect_location,
5444 "vector/scalar shift/rotate found.\n");
5445 }
5446 else
5447 {
5448 optab = optab_for_tree_code (code, vectype, optab_vector);
5449 if (optab
5450 && (optab_handler (optab, TYPE_MODE (vectype))
5451 != CODE_FOR_nothing))
5452 {
5453 scalar_shift_arg = false;
5454
5455 if (dump_enabled_p ())
5456 dump_printf_loc (MSG_NOTE, vect_location,
5457 "vector/vector shift/rotate found.\n");
5458
5459 if (!op1_vectype)
5460 op1_vectype = get_vectype_for_scalar_type (vinfo,
5461 TREE_TYPE (op1),
5462 slp_op1);
5463
5464 /* Unlike the other binary operators, shifts/rotates have
5465 the rhs being int, instead of the same type as the lhs,
5466 so make sure the scalar is the right type if we are
5467 dealing with vectors of long long/long/short/char. */
5468 incompatible_op1_vectype_p
5469 = (!op1_vectype
5470 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5471 TREE_TYPE (op1)));
5472 if (incompatible_op1_vectype_p
5473 && dt[1] == vect_internal_def)
5474 {
5475 if (dump_enabled_p ())
5476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5477 "unusable type for last operand in"
5478 " vector/vector shift/rotate.\n");
5479 return false;
5480 }
5481 }
5482 }
5483 }
5484
5485 /* Supportable by target? */
5486 if (!optab)
5487 {
5488 if (dump_enabled_p ())
5489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5490 "no optab.\n");
5491 return false;
5492 }
5493 vec_mode = TYPE_MODE (vectype);
5494 icode = (int) optab_handler (optab, vec_mode);
5495 if (icode == CODE_FOR_nothing)
5496 {
5497 if (dump_enabled_p ())
5498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5499 "op not supported by target.\n");
5500 /* Check only during analysis. */
5501 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5502 || (!vec_stmt
5503 && !vect_worthwhile_without_simd_p (vinfo, code)))
5504 return false;
5505 if (dump_enabled_p ())
5506 dump_printf_loc (MSG_NOTE, vect_location,
5507 "proceeding using word mode.\n");
5508 }
5509
5510 /* Worthwhile without SIMD support? Check only during analysis. */
5511 if (!vec_stmt
5512 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5513 && !vect_worthwhile_without_simd_p (vinfo, code))
5514 {
5515 if (dump_enabled_p ())
5516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5517 "not worthwhile without SIMD support.\n");
5518 return false;
5519 }
5520
5521 if (!vec_stmt) /* transformation not required. */
5522 {
5523 if (slp_node
5524 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5525 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5526 && (!incompatible_op1_vectype_p
5527 || dt[1] == vect_constant_def)
5528 && !vect_maybe_update_slp_op_vectype
5529 (slp_op1,
5530 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5531 {
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5534 "incompatible vector types for invariants\n");
5535 return false;
5536 }
5537 /* Now adjust the constant shift amount in place. */
5538 if (slp_node
5539 && incompatible_op1_vectype_p
5540 && dt[1] == vect_constant_def)
5541 {
5542 for (unsigned i = 0;
5543 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5544 {
5545 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5546 = fold_convert (TREE_TYPE (vectype),
5547 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5548 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5549 == INTEGER_CST));
5550 }
5551 }
5552 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5553 DUMP_VECT_SCOPE ("vectorizable_shift");
5554 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5555 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5556 return true;
5557 }
5558
5559 /* Transform. */
5560
5561 if (dump_enabled_p ())
5562 dump_printf_loc (MSG_NOTE, vect_location,
5563 "transform binary/unary operation.\n");
5564
5565 if (incompatible_op1_vectype_p && !slp_node)
5566 {
5567 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5568 op1 = fold_convert (TREE_TYPE (vectype), op1);
5569 if (dt[1] != vect_constant_def)
5570 op1 = vect_init_vector (vinfo, stmt_info, op1,
5571 TREE_TYPE (vectype), NULL);
5572 }
5573
5574 /* Handle def. */
5575 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5576
5577 if (scalar_shift_arg && dt[1] != vect_internal_def)
5578 {
5579 /* Vector shl and shr insn patterns can be defined with scalar
5580 operand 2 (shift operand). In this case, use constant or loop
5581 invariant op1 directly, without extending it to vector mode
5582 first. */
5583 optab_op2_mode = insn_data[icode].operand[2].mode;
5584 if (!VECTOR_MODE_P (optab_op2_mode))
5585 {
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_NOTE, vect_location,
5588 "operand 1 using scalar mode.\n");
5589 vec_oprnd1 = op1;
5590 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5591 vec_oprnds1.quick_push (vec_oprnd1);
5592 /* Store vec_oprnd1 for every vector stmt to be created.
5593 We check during the analysis that all the shift arguments
5594 are the same.
5595 TODO: Allow different constants for different vector
5596 stmts generated for an SLP instance. */
5597 for (k = 0;
5598 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5599 vec_oprnds1.quick_push (vec_oprnd1);
5600 }
5601 }
5602 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5603 {
5604 if (was_scalar_shift_arg)
5605 {
5606 /* If the argument was the same in all lanes create
5607 the correctly typed vector shift amount directly. */
5608 op1 = fold_convert (TREE_TYPE (vectype), op1);
5609 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5610 !loop_vinfo ? gsi : NULL);
5611 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5612 !loop_vinfo ? gsi : NULL);
5613 vec_oprnds1.create (slp_node->vec_stmts_size);
5614 for (k = 0; k < slp_node->vec_stmts_size; k++)
5615 vec_oprnds1.quick_push (vec_oprnd1);
5616 }
5617 else if (dt[1] == vect_constant_def)
5618 /* The constant shift amount has been adjusted in place. */
5619 ;
5620 else
5621 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5622 }
5623
5624 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5625 (a special case for certain kind of vector shifts); otherwise,
5626 operand 1 should be of a vector type (the usual case). */
5627 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5628 op0, &vec_oprnds0,
5629 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5630
5631 /* Arguments are ready. Create the new vector stmt. */
5632 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5633 {
5634 /* For internal defs where we need to use a scalar shift arg
5635 extract the first lane. */
5636 if (scalar_shift_arg && dt[1] == vect_internal_def)
5637 {
5638 vop1 = vec_oprnds1[0];
5639 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5640 gassign *new_stmt
5641 = gimple_build_assign (new_temp,
5642 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5643 vop1,
5644 TYPE_SIZE (TREE_TYPE (new_temp)),
5645 bitsize_zero_node));
5646 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5647 vop1 = new_temp;
5648 }
5649 else
5650 vop1 = vec_oprnds1[i];
5651 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5652 new_temp = make_ssa_name (vec_dest, new_stmt);
5653 gimple_assign_set_lhs (new_stmt, new_temp);
5654 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5655 if (slp_node)
5656 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5657 else
5658 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5659 }
5660
5661 if (!slp_node)
5662 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5663
5664 vec_oprnds0.release ();
5665 vec_oprnds1.release ();
5666
5667 return true;
5668 }
5669
5670
5671 /* Function vectorizable_operation.
5672
5673 Check if STMT_INFO performs a binary, unary or ternary operation that can
5674 be vectorized.
5675 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5676 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5677 Return true if STMT_INFO is vectorizable in this way. */
5678
5679 static bool
5680 vectorizable_operation (vec_info *vinfo,
5681 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5682 gimple **vec_stmt, slp_tree slp_node,
5683 stmt_vector_for_cost *cost_vec)
5684 {
5685 tree vec_dest;
5686 tree scalar_dest;
5687 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5688 tree vectype;
5689 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5690 enum tree_code code, orig_code;
5691 machine_mode vec_mode;
5692 tree new_temp;
5693 int op_type;
5694 optab optab;
5695 bool target_support_p;
5696 enum vect_def_type dt[3]
5697 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5698 int ndts = 3;
5699 poly_uint64 nunits_in;
5700 poly_uint64 nunits_out;
5701 tree vectype_out;
5702 int ncopies, vec_num;
5703 int i;
5704 vec<tree> vec_oprnds0 = vNULL;
5705 vec<tree> vec_oprnds1 = vNULL;
5706 vec<tree> vec_oprnds2 = vNULL;
5707 tree vop0, vop1, vop2;
5708 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5709
5710 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5711 return false;
5712
5713 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5714 && ! vec_stmt)
5715 return false;
5716
5717 /* Is STMT a vectorizable binary/unary operation? */
5718 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5719 if (!stmt)
5720 return false;
5721
5722 /* Loads and stores are handled in vectorizable_{load,store}. */
5723 if (STMT_VINFO_DATA_REF (stmt_info))
5724 return false;
5725
5726 orig_code = code = gimple_assign_rhs_code (stmt);
5727
5728 /* Shifts are handled in vectorizable_shift. */
5729 if (code == LSHIFT_EXPR
5730 || code == RSHIFT_EXPR
5731 || code == LROTATE_EXPR
5732 || code == RROTATE_EXPR)
5733 return false;
5734
5735 /* Comparisons are handled in vectorizable_comparison. */
5736 if (TREE_CODE_CLASS (code) == tcc_comparison)
5737 return false;
5738
5739 /* Conditions are handled in vectorizable_condition. */
5740 if (code == COND_EXPR)
5741 return false;
5742
5743 /* For pointer addition and subtraction, we should use the normal
5744 plus and minus for the vector operation. */
5745 if (code == POINTER_PLUS_EXPR)
5746 code = PLUS_EXPR;
5747 if (code == POINTER_DIFF_EXPR)
5748 code = MINUS_EXPR;
5749
5750 /* Support only unary or binary operations. */
5751 op_type = TREE_CODE_LENGTH (code);
5752 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5753 {
5754 if (dump_enabled_p ())
5755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5756 "num. args = %d (not unary/binary/ternary op).\n",
5757 op_type);
5758 return false;
5759 }
5760
5761 scalar_dest = gimple_assign_lhs (stmt);
5762 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5763
5764 /* Most operations cannot handle bit-precision types without extra
5765 truncations. */
5766 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5767 if (!mask_op_p
5768 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5769 /* Exception are bitwise binary operations. */
5770 && code != BIT_IOR_EXPR
5771 && code != BIT_XOR_EXPR
5772 && code != BIT_AND_EXPR)
5773 {
5774 if (dump_enabled_p ())
5775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5776 "bit-precision arithmetic not supported.\n");
5777 return false;
5778 }
5779
5780 slp_tree slp_op0;
5781 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5782 0, &op0, &slp_op0, &dt[0], &vectype))
5783 {
5784 if (dump_enabled_p ())
5785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5786 "use not simple.\n");
5787 return false;
5788 }
5789 /* If op0 is an external or constant def, infer the vector type
5790 from the scalar type. */
5791 if (!vectype)
5792 {
5793 /* For boolean type we cannot determine vectype by
5794 invariant value (don't know whether it is a vector
5795 of booleans or vector of integers). We use output
5796 vectype because operations on boolean don't change
5797 type. */
5798 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5799 {
5800 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5801 {
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5804 "not supported operation on bool value.\n");
5805 return false;
5806 }
5807 vectype = vectype_out;
5808 }
5809 else
5810 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5811 slp_node);
5812 }
5813 if (vec_stmt)
5814 gcc_assert (vectype);
5815 if (!vectype)
5816 {
5817 if (dump_enabled_p ())
5818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5819 "no vectype for scalar type %T\n",
5820 TREE_TYPE (op0));
5821
5822 return false;
5823 }
5824
5825 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5826 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5827 if (maybe_ne (nunits_out, nunits_in))
5828 return false;
5829
5830 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5831 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5832 if (op_type == binary_op || op_type == ternary_op)
5833 {
5834 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5835 1, &op1, &slp_op1, &dt[1], &vectype2))
5836 {
5837 if (dump_enabled_p ())
5838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5839 "use not simple.\n");
5840 return false;
5841 }
5842 }
5843 if (op_type == ternary_op)
5844 {
5845 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5846 2, &op2, &slp_op2, &dt[2], &vectype3))
5847 {
5848 if (dump_enabled_p ())
5849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5850 "use not simple.\n");
5851 return false;
5852 }
5853 }
5854
5855 /* Multiple types in SLP are handled by creating the appropriate number of
5856 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5857 case of SLP. */
5858 if (slp_node)
5859 {
5860 ncopies = 1;
5861 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5862 }
5863 else
5864 {
5865 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5866 vec_num = 1;
5867 }
5868
5869 gcc_assert (ncopies >= 1);
5870
5871 /* Reject attempts to combine mask types with nonmask types, e.g. if
5872 we have an AND between a (nonmask) boolean loaded from memory and
5873 a (mask) boolean result of a comparison.
5874
5875 TODO: We could easily fix these cases up using pattern statements. */
5876 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5877 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5878 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5879 {
5880 if (dump_enabled_p ())
5881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5882 "mixed mask and nonmask vector types\n");
5883 return false;
5884 }
5885
5886 /* Supportable by target? */
5887
5888 vec_mode = TYPE_MODE (vectype);
5889 if (code == MULT_HIGHPART_EXPR)
5890 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5891 else
5892 {
5893 optab = optab_for_tree_code (code, vectype, optab_default);
5894 if (!optab)
5895 {
5896 if (dump_enabled_p ())
5897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5898 "no optab.\n");
5899 return false;
5900 }
5901 target_support_p = (optab_handler (optab, vec_mode)
5902 != CODE_FOR_nothing);
5903 }
5904
5905 if (!target_support_p)
5906 {
5907 if (dump_enabled_p ())
5908 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5909 "op not supported by target.\n");
5910 /* Check only during analysis. */
5911 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5912 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5913 return false;
5914 if (dump_enabled_p ())
5915 dump_printf_loc (MSG_NOTE, vect_location,
5916 "proceeding using word mode.\n");
5917 }
5918
5919 /* Worthwhile without SIMD support? Check only during analysis. */
5920 if (!VECTOR_MODE_P (vec_mode)
5921 && !vec_stmt
5922 && !vect_worthwhile_without_simd_p (vinfo, code))
5923 {
5924 if (dump_enabled_p ())
5925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5926 "not worthwhile without SIMD support.\n");
5927 return false;
5928 }
5929
5930 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5931 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5932 internal_fn cond_fn = get_conditional_internal_fn (code);
5933
5934 if (!vec_stmt) /* transformation not required. */
5935 {
5936 /* If this operation is part of a reduction, a fully-masked loop
5937 should only change the active lanes of the reduction chain,
5938 keeping the inactive lanes as-is. */
5939 if (loop_vinfo
5940 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5941 && reduc_idx >= 0)
5942 {
5943 if (cond_fn == IFN_LAST
5944 || !direct_internal_fn_supported_p (cond_fn, vectype,
5945 OPTIMIZE_FOR_SPEED))
5946 {
5947 if (dump_enabled_p ())
5948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5949 "can't use a fully-masked loop because no"
5950 " conditional operation is available.\n");
5951 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5952 }
5953 else
5954 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5955 vectype, NULL);
5956 }
5957
5958 /* Put types on constant and invariant SLP children. */
5959 if (slp_node
5960 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5961 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5962 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5963 {
5964 if (dump_enabled_p ())
5965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5966 "incompatible vector types for invariants\n");
5967 return false;
5968 }
5969
5970 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5971 DUMP_VECT_SCOPE ("vectorizable_operation");
5972 vect_model_simple_cost (vinfo, stmt_info,
5973 ncopies, dt, ndts, slp_node, cost_vec);
5974 return true;
5975 }
5976
5977 /* Transform. */
5978
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_NOTE, vect_location,
5981 "transform binary/unary operation.\n");
5982
5983 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
5984
5985 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5986 vectors with unsigned elements, but the result is signed. So, we
5987 need to compute the MINUS_EXPR into vectype temporary and
5988 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5989 tree vec_cvt_dest = NULL_TREE;
5990 if (orig_code == POINTER_DIFF_EXPR)
5991 {
5992 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5993 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5994 }
5995 /* Handle def. */
5996 else
5997 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5998
5999 /* In case the vectorization factor (VF) is bigger than the number
6000 of elements that we can fit in a vectype (nunits), we have to generate
6001 more than one vector stmt - i.e - we need to "unroll" the
6002 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6003 from one copy of the vector stmt to the next, in the field
6004 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6005 stages to find the correct vector defs to be used when vectorizing
6006 stmts that use the defs of the current stmt. The example below
6007 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6008 we need to create 4 vectorized stmts):
6009
6010 before vectorization:
6011 RELATED_STMT VEC_STMT
6012 S1: x = memref - -
6013 S2: z = x + 1 - -
6014
6015 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6016 there):
6017 RELATED_STMT VEC_STMT
6018 VS1_0: vx0 = memref0 VS1_1 -
6019 VS1_1: vx1 = memref1 VS1_2 -
6020 VS1_2: vx2 = memref2 VS1_3 -
6021 VS1_3: vx3 = memref3 - -
6022 S1: x = load - VS1_0
6023 S2: z = x + 1 - -
6024
6025 step2: vectorize stmt S2 (done here):
6026 To vectorize stmt S2 we first need to find the relevant vector
6027 def for the first operand 'x'. This is, as usual, obtained from
6028 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6029 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6030 relevant vector def 'vx0'. Having found 'vx0' we can generate
6031 the vector stmt VS2_0, and as usual, record it in the
6032 STMT_VINFO_VEC_STMT of stmt S2.
6033 When creating the second copy (VS2_1), we obtain the relevant vector
6034 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6035 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6036 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6037 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6038 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6039 chain of stmts and pointers:
6040 RELATED_STMT VEC_STMT
6041 VS1_0: vx0 = memref0 VS1_1 -
6042 VS1_1: vx1 = memref1 VS1_2 -
6043 VS1_2: vx2 = memref2 VS1_3 -
6044 VS1_3: vx3 = memref3 - -
6045 S1: x = load - VS1_0
6046 VS2_0: vz0 = vx0 + v1 VS2_1 -
6047 VS2_1: vz1 = vx1 + v1 VS2_2 -
6048 VS2_2: vz2 = vx2 + v1 VS2_3 -
6049 VS2_3: vz3 = vx3 + v1 - -
6050 S2: z = x + 1 - VS2_0 */
6051
6052 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6053 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6054 /* Arguments are ready. Create the new vector stmt. */
6055 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6056 {
6057 gimple *new_stmt = NULL;
6058 vop1 = ((op_type == binary_op || op_type == ternary_op)
6059 ? vec_oprnds1[i] : NULL_TREE);
6060 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6061 if (masked_loop_p && reduc_idx >= 0)
6062 {
6063 /* Perform the operation on active elements only and take
6064 inactive elements from the reduction chain input. */
6065 gcc_assert (!vop2);
6066 vop2 = reduc_idx == 1 ? vop1 : vop0;
6067 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6068 vectype, i);
6069 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6070 vop0, vop1, vop2);
6071 new_temp = make_ssa_name (vec_dest, call);
6072 gimple_call_set_lhs (call, new_temp);
6073 gimple_call_set_nothrow (call, true);
6074 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6075 new_stmt = call;
6076 }
6077 else
6078 {
6079 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6080 new_temp = make_ssa_name (vec_dest, new_stmt);
6081 gimple_assign_set_lhs (new_stmt, new_temp);
6082 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6083 if (vec_cvt_dest)
6084 {
6085 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6086 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6087 new_temp);
6088 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6089 gimple_assign_set_lhs (new_stmt, new_temp);
6090 vect_finish_stmt_generation (vinfo, stmt_info,
6091 new_stmt, gsi);
6092 }
6093 }
6094 if (slp_node)
6095 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6096 else
6097 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6098 }
6099
6100 if (!slp_node)
6101 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6102
6103 vec_oprnds0.release ();
6104 vec_oprnds1.release ();
6105 vec_oprnds2.release ();
6106
6107 return true;
6108 }
6109
6110 /* A helper function to ensure data reference DR_INFO's base alignment. */
6111
6112 static void
6113 ensure_base_align (dr_vec_info *dr_info)
6114 {
6115 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6116 return;
6117
6118 if (dr_info->base_misaligned)
6119 {
6120 tree base_decl = dr_info->base_decl;
6121
6122 // We should only be able to increase the alignment of a base object if
6123 // we know what its new alignment should be at compile time.
6124 unsigned HOST_WIDE_INT align_base_to =
6125 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6126
6127 if (decl_in_symtab_p (base_decl))
6128 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6129 else if (DECL_ALIGN (base_decl) < align_base_to)
6130 {
6131 SET_DECL_ALIGN (base_decl, align_base_to);
6132 DECL_USER_ALIGN (base_decl) = 1;
6133 }
6134 dr_info->base_misaligned = false;
6135 }
6136 }
6137
6138
6139 /* Function get_group_alias_ptr_type.
6140
6141 Return the alias type for the group starting at FIRST_STMT_INFO. */
6142
6143 static tree
6144 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6145 {
6146 struct data_reference *first_dr, *next_dr;
6147
6148 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6149 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6150 while (next_stmt_info)
6151 {
6152 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6153 if (get_alias_set (DR_REF (first_dr))
6154 != get_alias_set (DR_REF (next_dr)))
6155 {
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_NOTE, vect_location,
6158 "conflicting alias set types.\n");
6159 return ptr_type_node;
6160 }
6161 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6162 }
6163 return reference_alias_ptr_type (DR_REF (first_dr));
6164 }
6165
6166
6167 /* Function scan_operand_equal_p.
6168
6169 Helper function for check_scan_store. Compare two references
6170 with .GOMP_SIMD_LANE bases. */
6171
6172 static bool
6173 scan_operand_equal_p (tree ref1, tree ref2)
6174 {
6175 tree ref[2] = { ref1, ref2 };
6176 poly_int64 bitsize[2], bitpos[2];
6177 tree offset[2], base[2];
6178 for (int i = 0; i < 2; ++i)
6179 {
6180 machine_mode mode;
6181 int unsignedp, reversep, volatilep = 0;
6182 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6183 &offset[i], &mode, &unsignedp,
6184 &reversep, &volatilep);
6185 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6186 return false;
6187 if (TREE_CODE (base[i]) == MEM_REF
6188 && offset[i] == NULL_TREE
6189 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6190 {
6191 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6192 if (is_gimple_assign (def_stmt)
6193 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6194 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6195 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6196 {
6197 if (maybe_ne (mem_ref_offset (base[i]), 0))
6198 return false;
6199 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6200 offset[i] = gimple_assign_rhs2 (def_stmt);
6201 }
6202 }
6203 }
6204
6205 if (!operand_equal_p (base[0], base[1], 0))
6206 return false;
6207 if (maybe_ne (bitsize[0], bitsize[1]))
6208 return false;
6209 if (offset[0] != offset[1])
6210 {
6211 if (!offset[0] || !offset[1])
6212 return false;
6213 if (!operand_equal_p (offset[0], offset[1], 0))
6214 {
6215 tree step[2];
6216 for (int i = 0; i < 2; ++i)
6217 {
6218 step[i] = integer_one_node;
6219 if (TREE_CODE (offset[i]) == SSA_NAME)
6220 {
6221 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6222 if (is_gimple_assign (def_stmt)
6223 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6224 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6225 == INTEGER_CST))
6226 {
6227 step[i] = gimple_assign_rhs2 (def_stmt);
6228 offset[i] = gimple_assign_rhs1 (def_stmt);
6229 }
6230 }
6231 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6232 {
6233 step[i] = TREE_OPERAND (offset[i], 1);
6234 offset[i] = TREE_OPERAND (offset[i], 0);
6235 }
6236 tree rhs1 = NULL_TREE;
6237 if (TREE_CODE (offset[i]) == SSA_NAME)
6238 {
6239 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6240 if (gimple_assign_cast_p (def_stmt))
6241 rhs1 = gimple_assign_rhs1 (def_stmt);
6242 }
6243 else if (CONVERT_EXPR_P (offset[i]))
6244 rhs1 = TREE_OPERAND (offset[i], 0);
6245 if (rhs1
6246 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6247 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6248 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6249 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6250 offset[i] = rhs1;
6251 }
6252 if (!operand_equal_p (offset[0], offset[1], 0)
6253 || !operand_equal_p (step[0], step[1], 0))
6254 return false;
6255 }
6256 }
6257 return true;
6258 }
6259
6260
6261 enum scan_store_kind {
6262 /* Normal permutation. */
6263 scan_store_kind_perm,
6264
6265 /* Whole vector left shift permutation with zero init. */
6266 scan_store_kind_lshift_zero,
6267
6268 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6269 scan_store_kind_lshift_cond
6270 };
6271
6272 /* Function check_scan_store.
6273
6274 Verify if we can perform the needed permutations or whole vector shifts.
6275 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6276 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6277 to do at each step. */
6278
6279 static int
6280 scan_store_can_perm_p (tree vectype, tree init,
6281 vec<enum scan_store_kind> *use_whole_vector = NULL)
6282 {
6283 enum machine_mode vec_mode = TYPE_MODE (vectype);
6284 unsigned HOST_WIDE_INT nunits;
6285 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6286 return -1;
6287 int units_log2 = exact_log2 (nunits);
6288 if (units_log2 <= 0)
6289 return -1;
6290
6291 int i;
6292 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6293 for (i = 0; i <= units_log2; ++i)
6294 {
6295 unsigned HOST_WIDE_INT j, k;
6296 enum scan_store_kind kind = scan_store_kind_perm;
6297 vec_perm_builder sel (nunits, nunits, 1);
6298 sel.quick_grow (nunits);
6299 if (i == units_log2)
6300 {
6301 for (j = 0; j < nunits; ++j)
6302 sel[j] = nunits - 1;
6303 }
6304 else
6305 {
6306 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6307 sel[j] = j;
6308 for (k = 0; j < nunits; ++j, ++k)
6309 sel[j] = nunits + k;
6310 }
6311 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6312 if (!can_vec_perm_const_p (vec_mode, indices))
6313 {
6314 if (i == units_log2)
6315 return -1;
6316
6317 if (whole_vector_shift_kind == scan_store_kind_perm)
6318 {
6319 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6320 return -1;
6321 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6322 /* Whole vector shifts shift in zeros, so if init is all zero
6323 constant, there is no need to do anything further. */
6324 if ((TREE_CODE (init) != INTEGER_CST
6325 && TREE_CODE (init) != REAL_CST)
6326 || !initializer_zerop (init))
6327 {
6328 tree masktype = truth_type_for (vectype);
6329 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6330 return -1;
6331 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6332 }
6333 }
6334 kind = whole_vector_shift_kind;
6335 }
6336 if (use_whole_vector)
6337 {
6338 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6339 use_whole_vector->safe_grow_cleared (i, true);
6340 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6341 use_whole_vector->safe_push (kind);
6342 }
6343 }
6344
6345 return units_log2;
6346 }
6347
6348
6349 /* Function check_scan_store.
6350
6351 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6352
6353 static bool
6354 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6355 enum vect_def_type rhs_dt, bool slp, tree mask,
6356 vect_memory_access_type memory_access_type)
6357 {
6358 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6359 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6360 tree ref_type;
6361
6362 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6363 if (slp
6364 || mask
6365 || memory_access_type != VMAT_CONTIGUOUS
6366 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6367 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6368 || loop_vinfo == NULL
6369 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6370 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6371 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6372 || !integer_zerop (DR_INIT (dr_info->dr))
6373 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6374 || !alias_sets_conflict_p (get_alias_set (vectype),
6375 get_alias_set (TREE_TYPE (ref_type))))
6376 {
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6379 "unsupported OpenMP scan store.\n");
6380 return false;
6381 }
6382
6383 /* We need to pattern match code built by OpenMP lowering and simplified
6384 by following optimizations into something we can handle.
6385 #pragma omp simd reduction(inscan,+:r)
6386 for (...)
6387 {
6388 r += something ();
6389 #pragma omp scan inclusive (r)
6390 use (r);
6391 }
6392 shall have body with:
6393 // Initialization for input phase, store the reduction initializer:
6394 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6395 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6396 D.2042[_21] = 0;
6397 // Actual input phase:
6398 ...
6399 r.0_5 = D.2042[_20];
6400 _6 = _4 + r.0_5;
6401 D.2042[_20] = _6;
6402 // Initialization for scan phase:
6403 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6404 _26 = D.2043[_25];
6405 _27 = D.2042[_25];
6406 _28 = _26 + _27;
6407 D.2043[_25] = _28;
6408 D.2042[_25] = _28;
6409 // Actual scan phase:
6410 ...
6411 r.1_8 = D.2042[_20];
6412 ...
6413 The "omp simd array" variable D.2042 holds the privatized copy used
6414 inside of the loop and D.2043 is another one that holds copies of
6415 the current original list item. The separate GOMP_SIMD_LANE ifn
6416 kinds are there in order to allow optimizing the initializer store
6417 and combiner sequence, e.g. if it is originally some C++ish user
6418 defined reduction, but allow the vectorizer to pattern recognize it
6419 and turn into the appropriate vectorized scan.
6420
6421 For exclusive scan, this is slightly different:
6422 #pragma omp simd reduction(inscan,+:r)
6423 for (...)
6424 {
6425 use (r);
6426 #pragma omp scan exclusive (r)
6427 r += something ();
6428 }
6429 shall have body with:
6430 // Initialization for input phase, store the reduction initializer:
6431 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6432 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6433 D.2042[_21] = 0;
6434 // Actual input phase:
6435 ...
6436 r.0_5 = D.2042[_20];
6437 _6 = _4 + r.0_5;
6438 D.2042[_20] = _6;
6439 // Initialization for scan phase:
6440 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6441 _26 = D.2043[_25];
6442 D.2044[_25] = _26;
6443 _27 = D.2042[_25];
6444 _28 = _26 + _27;
6445 D.2043[_25] = _28;
6446 // Actual scan phase:
6447 ...
6448 r.1_8 = D.2044[_20];
6449 ... */
6450
6451 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6452 {
6453 /* Match the D.2042[_21] = 0; store above. Just require that
6454 it is a constant or external definition store. */
6455 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6456 {
6457 fail_init:
6458 if (dump_enabled_p ())
6459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6460 "unsupported OpenMP scan initializer store.\n");
6461 return false;
6462 }
6463
6464 if (! loop_vinfo->scan_map)
6465 loop_vinfo->scan_map = new hash_map<tree, tree>;
6466 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6467 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6468 if (cached)
6469 goto fail_init;
6470 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6471
6472 /* These stores can be vectorized normally. */
6473 return true;
6474 }
6475
6476 if (rhs_dt != vect_internal_def)
6477 {
6478 fail:
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6481 "unsupported OpenMP scan combiner pattern.\n");
6482 return false;
6483 }
6484
6485 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6486 tree rhs = gimple_assign_rhs1 (stmt);
6487 if (TREE_CODE (rhs) != SSA_NAME)
6488 goto fail;
6489
6490 gimple *other_store_stmt = NULL;
6491 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6492 bool inscan_var_store
6493 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6494
6495 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6496 {
6497 if (!inscan_var_store)
6498 {
6499 use_operand_p use_p;
6500 imm_use_iterator iter;
6501 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6502 {
6503 gimple *use_stmt = USE_STMT (use_p);
6504 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6505 continue;
6506 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6507 || !is_gimple_assign (use_stmt)
6508 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6509 || other_store_stmt
6510 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6511 goto fail;
6512 other_store_stmt = use_stmt;
6513 }
6514 if (other_store_stmt == NULL)
6515 goto fail;
6516 rhs = gimple_assign_lhs (other_store_stmt);
6517 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6518 goto fail;
6519 }
6520 }
6521 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6522 {
6523 use_operand_p use_p;
6524 imm_use_iterator iter;
6525 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6526 {
6527 gimple *use_stmt = USE_STMT (use_p);
6528 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6529 continue;
6530 if (other_store_stmt)
6531 goto fail;
6532 other_store_stmt = use_stmt;
6533 }
6534 }
6535 else
6536 goto fail;
6537
6538 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6539 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6540 || !is_gimple_assign (def_stmt)
6541 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6542 goto fail;
6543
6544 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6545 /* For pointer addition, we should use the normal plus for the vector
6546 operation. */
6547 switch (code)
6548 {
6549 case POINTER_PLUS_EXPR:
6550 code = PLUS_EXPR;
6551 break;
6552 case MULT_HIGHPART_EXPR:
6553 goto fail;
6554 default:
6555 break;
6556 }
6557 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6558 goto fail;
6559
6560 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6561 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6562 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6563 goto fail;
6564
6565 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6566 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6567 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6568 || !gimple_assign_load_p (load1_stmt)
6569 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6570 || !gimple_assign_load_p (load2_stmt))
6571 goto fail;
6572
6573 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6574 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6575 if (load1_stmt_info == NULL
6576 || load2_stmt_info == NULL
6577 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6578 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6579 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6580 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6581 goto fail;
6582
6583 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6584 {
6585 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6586 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6587 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6588 goto fail;
6589 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6590 tree lrhs;
6591 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6592 lrhs = rhs1;
6593 else
6594 lrhs = rhs2;
6595 use_operand_p use_p;
6596 imm_use_iterator iter;
6597 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6598 {
6599 gimple *use_stmt = USE_STMT (use_p);
6600 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6601 continue;
6602 if (other_store_stmt)
6603 goto fail;
6604 other_store_stmt = use_stmt;
6605 }
6606 }
6607
6608 if (other_store_stmt == NULL)
6609 goto fail;
6610 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6611 || !gimple_store_p (other_store_stmt))
6612 goto fail;
6613
6614 stmt_vec_info other_store_stmt_info
6615 = loop_vinfo->lookup_stmt (other_store_stmt);
6616 if (other_store_stmt_info == NULL
6617 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6618 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6619 goto fail;
6620
6621 gimple *stmt1 = stmt;
6622 gimple *stmt2 = other_store_stmt;
6623 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6624 std::swap (stmt1, stmt2);
6625 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6626 gimple_assign_rhs1 (load2_stmt)))
6627 {
6628 std::swap (rhs1, rhs2);
6629 std::swap (load1_stmt, load2_stmt);
6630 std::swap (load1_stmt_info, load2_stmt_info);
6631 }
6632 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6633 gimple_assign_rhs1 (load1_stmt)))
6634 goto fail;
6635
6636 tree var3 = NULL_TREE;
6637 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6638 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6639 gimple_assign_rhs1 (load2_stmt)))
6640 goto fail;
6641 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6642 {
6643 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6644 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6645 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6646 goto fail;
6647 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6648 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6649 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6650 || lookup_attribute ("omp simd inscan exclusive",
6651 DECL_ATTRIBUTES (var3)))
6652 goto fail;
6653 }
6654
6655 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6656 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6657 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6658 goto fail;
6659
6660 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6661 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6662 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6663 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6664 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6665 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6666 goto fail;
6667
6668 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6669 std::swap (var1, var2);
6670
6671 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6672 {
6673 if (!lookup_attribute ("omp simd inscan exclusive",
6674 DECL_ATTRIBUTES (var1)))
6675 goto fail;
6676 var1 = var3;
6677 }
6678
6679 if (loop_vinfo->scan_map == NULL)
6680 goto fail;
6681 tree *init = loop_vinfo->scan_map->get (var1);
6682 if (init == NULL)
6683 goto fail;
6684
6685 /* The IL is as expected, now check if we can actually vectorize it.
6686 Inclusive scan:
6687 _26 = D.2043[_25];
6688 _27 = D.2042[_25];
6689 _28 = _26 + _27;
6690 D.2043[_25] = _28;
6691 D.2042[_25] = _28;
6692 should be vectorized as (where _40 is the vectorized rhs
6693 from the D.2042[_21] = 0; store):
6694 _30 = MEM <vector(8) int> [(int *)&D.2043];
6695 _31 = MEM <vector(8) int> [(int *)&D.2042];
6696 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6697 _33 = _31 + _32;
6698 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6699 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6700 _35 = _33 + _34;
6701 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6702 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6703 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6704 _37 = _35 + _36;
6705 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6706 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6707 _38 = _30 + _37;
6708 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6709 MEM <vector(8) int> [(int *)&D.2043] = _39;
6710 MEM <vector(8) int> [(int *)&D.2042] = _38;
6711 Exclusive scan:
6712 _26 = D.2043[_25];
6713 D.2044[_25] = _26;
6714 _27 = D.2042[_25];
6715 _28 = _26 + _27;
6716 D.2043[_25] = _28;
6717 should be vectorized as (where _40 is the vectorized rhs
6718 from the D.2042[_21] = 0; store):
6719 _30 = MEM <vector(8) int> [(int *)&D.2043];
6720 _31 = MEM <vector(8) int> [(int *)&D.2042];
6721 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6722 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6723 _34 = _32 + _33;
6724 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6725 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6726 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6727 _36 = _34 + _35;
6728 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6729 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6730 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6731 _38 = _36 + _37;
6732 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6733 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6734 _39 = _30 + _38;
6735 _50 = _31 + _39;
6736 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6737 MEM <vector(8) int> [(int *)&D.2044] = _39;
6738 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6739 enum machine_mode vec_mode = TYPE_MODE (vectype);
6740 optab optab = optab_for_tree_code (code, vectype, optab_default);
6741 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6742 goto fail;
6743
6744 int units_log2 = scan_store_can_perm_p (vectype, *init);
6745 if (units_log2 == -1)
6746 goto fail;
6747
6748 return true;
6749 }
6750
6751
6752 /* Function vectorizable_scan_store.
6753
6754 Helper of vectorizable_score, arguments like on vectorizable_store.
6755 Handle only the transformation, checking is done in check_scan_store. */
6756
6757 static bool
6758 vectorizable_scan_store (vec_info *vinfo,
6759 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6760 gimple **vec_stmt, int ncopies)
6761 {
6762 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6763 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6764 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6765 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6766
6767 if (dump_enabled_p ())
6768 dump_printf_loc (MSG_NOTE, vect_location,
6769 "transform scan store. ncopies = %d\n", ncopies);
6770
6771 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6772 tree rhs = gimple_assign_rhs1 (stmt);
6773 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6774
6775 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6776 bool inscan_var_store
6777 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6778
6779 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6780 {
6781 use_operand_p use_p;
6782 imm_use_iterator iter;
6783 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6784 {
6785 gimple *use_stmt = USE_STMT (use_p);
6786 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6787 continue;
6788 rhs = gimple_assign_lhs (use_stmt);
6789 break;
6790 }
6791 }
6792
6793 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6794 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6795 if (code == POINTER_PLUS_EXPR)
6796 code = PLUS_EXPR;
6797 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6798 && commutative_tree_code (code));
6799 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6800 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6801 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6802 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6803 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6804 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6805 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6806 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6807 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6808 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6809 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6810
6811 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6812 {
6813 std::swap (rhs1, rhs2);
6814 std::swap (var1, var2);
6815 std::swap (load1_dr_info, load2_dr_info);
6816 }
6817
6818 tree *init = loop_vinfo->scan_map->get (var1);
6819 gcc_assert (init);
6820
6821 unsigned HOST_WIDE_INT nunits;
6822 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6823 gcc_unreachable ();
6824 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6825 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6826 gcc_assert (units_log2 > 0);
6827 auto_vec<tree, 16> perms;
6828 perms.quick_grow (units_log2 + 1);
6829 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6830 for (int i = 0; i <= units_log2; ++i)
6831 {
6832 unsigned HOST_WIDE_INT j, k;
6833 vec_perm_builder sel (nunits, nunits, 1);
6834 sel.quick_grow (nunits);
6835 if (i == units_log2)
6836 for (j = 0; j < nunits; ++j)
6837 sel[j] = nunits - 1;
6838 else
6839 {
6840 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6841 sel[j] = j;
6842 for (k = 0; j < nunits; ++j, ++k)
6843 sel[j] = nunits + k;
6844 }
6845 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6846 if (!use_whole_vector.is_empty ()
6847 && use_whole_vector[i] != scan_store_kind_perm)
6848 {
6849 if (zero_vec == NULL_TREE)
6850 zero_vec = build_zero_cst (vectype);
6851 if (masktype == NULL_TREE
6852 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6853 masktype = truth_type_for (vectype);
6854 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6855 }
6856 else
6857 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6858 }
6859
6860 tree vec_oprnd1 = NULL_TREE;
6861 tree vec_oprnd2 = NULL_TREE;
6862 tree vec_oprnd3 = NULL_TREE;
6863 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6864 tree dataref_offset = build_int_cst (ref_type, 0);
6865 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6866 vectype, VMAT_CONTIGUOUS);
6867 tree ldataref_ptr = NULL_TREE;
6868 tree orig = NULL_TREE;
6869 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6870 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6871 auto_vec<tree> vec_oprnds1;
6872 auto_vec<tree> vec_oprnds2;
6873 auto_vec<tree> vec_oprnds3;
6874 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6875 *init, &vec_oprnds1,
6876 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6877 rhs2, &vec_oprnds3);
6878 for (int j = 0; j < ncopies; j++)
6879 {
6880 vec_oprnd1 = vec_oprnds1[j];
6881 if (ldataref_ptr == NULL)
6882 vec_oprnd2 = vec_oprnds2[j];
6883 vec_oprnd3 = vec_oprnds3[j];
6884 if (j == 0)
6885 orig = vec_oprnd3;
6886 else if (!inscan_var_store)
6887 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6888
6889 if (ldataref_ptr)
6890 {
6891 vec_oprnd2 = make_ssa_name (vectype);
6892 tree data_ref = fold_build2 (MEM_REF, vectype,
6893 unshare_expr (ldataref_ptr),
6894 dataref_offset);
6895 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6896 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6897 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6898 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6899 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6900 }
6901
6902 tree v = vec_oprnd2;
6903 for (int i = 0; i < units_log2; ++i)
6904 {
6905 tree new_temp = make_ssa_name (vectype);
6906 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6907 (zero_vec
6908 && (use_whole_vector[i]
6909 != scan_store_kind_perm))
6910 ? zero_vec : vec_oprnd1, v,
6911 perms[i]);
6912 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6913 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6914 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6915
6916 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6917 {
6918 /* Whole vector shift shifted in zero bits, but if *init
6919 is not initializer_zerop, we need to replace those elements
6920 with elements from vec_oprnd1. */
6921 tree_vector_builder vb (masktype, nunits, 1);
6922 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6923 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6924 ? boolean_false_node : boolean_true_node);
6925
6926 tree new_temp2 = make_ssa_name (vectype);
6927 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6928 new_temp, vec_oprnd1);
6929 vect_finish_stmt_generation (vinfo, stmt_info,
6930 g, gsi);
6931 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6932 new_temp = new_temp2;
6933 }
6934
6935 /* For exclusive scan, perform the perms[i] permutation once
6936 more. */
6937 if (i == 0
6938 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6939 && v == vec_oprnd2)
6940 {
6941 v = new_temp;
6942 --i;
6943 continue;
6944 }
6945
6946 tree new_temp2 = make_ssa_name (vectype);
6947 g = gimple_build_assign (new_temp2, code, v, new_temp);
6948 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6949 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6950
6951 v = new_temp2;
6952 }
6953
6954 tree new_temp = make_ssa_name (vectype);
6955 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6956 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6957 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6958
6959 tree last_perm_arg = new_temp;
6960 /* For exclusive scan, new_temp computed above is the exclusive scan
6961 prefix sum. Turn it into inclusive prefix sum for the broadcast
6962 of the last element into orig. */
6963 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6964 {
6965 last_perm_arg = make_ssa_name (vectype);
6966 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6967 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6968 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6969 }
6970
6971 orig = make_ssa_name (vectype);
6972 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6973 last_perm_arg, perms[units_log2]);
6974 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6975 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6976
6977 if (!inscan_var_store)
6978 {
6979 tree data_ref = fold_build2 (MEM_REF, vectype,
6980 unshare_expr (dataref_ptr),
6981 dataref_offset);
6982 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6983 g = gimple_build_assign (data_ref, new_temp);
6984 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6985 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6986 }
6987 }
6988
6989 if (inscan_var_store)
6990 for (int j = 0; j < ncopies; j++)
6991 {
6992 if (j != 0)
6993 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6994
6995 tree data_ref = fold_build2 (MEM_REF, vectype,
6996 unshare_expr (dataref_ptr),
6997 dataref_offset);
6998 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6999 gimple *g = gimple_build_assign (data_ref, orig);
7000 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7001 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7002 }
7003 return true;
7004 }
7005
7006
7007 /* Function vectorizable_store.
7008
7009 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7010 that can be vectorized.
7011 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7012 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7013 Return true if STMT_INFO is vectorizable in this way. */
7014
7015 static bool
7016 vectorizable_store (vec_info *vinfo,
7017 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7018 gimple **vec_stmt, slp_tree slp_node,
7019 stmt_vector_for_cost *cost_vec)
7020 {
7021 tree data_ref;
7022 tree op;
7023 tree vec_oprnd = NULL_TREE;
7024 tree elem_type;
7025 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7026 class loop *loop = NULL;
7027 machine_mode vec_mode;
7028 tree dummy;
7029 enum vect_def_type rhs_dt = vect_unknown_def_type;
7030 enum vect_def_type mask_dt = vect_unknown_def_type;
7031 tree dataref_ptr = NULL_TREE;
7032 tree dataref_offset = NULL_TREE;
7033 gimple *ptr_incr = NULL;
7034 int ncopies;
7035 int j;
7036 stmt_vec_info first_stmt_info;
7037 bool grouped_store;
7038 unsigned int group_size, i;
7039 vec<tree> oprnds = vNULL;
7040 vec<tree> result_chain = vNULL;
7041 tree offset = NULL_TREE;
7042 vec<tree> vec_oprnds = vNULL;
7043 bool slp = (slp_node != NULL);
7044 unsigned int vec_num;
7045 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7046 tree aggr_type;
7047 gather_scatter_info gs_info;
7048 poly_uint64 vf;
7049 vec_load_store_type vls_type;
7050 tree ref_type;
7051
7052 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7053 return false;
7054
7055 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7056 && ! vec_stmt)
7057 return false;
7058
7059 /* Is vectorizable store? */
7060
7061 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7062 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7063 {
7064 tree scalar_dest = gimple_assign_lhs (assign);
7065 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7066 && is_pattern_stmt_p (stmt_info))
7067 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7068 if (TREE_CODE (scalar_dest) != ARRAY_REF
7069 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7070 && TREE_CODE (scalar_dest) != INDIRECT_REF
7071 && TREE_CODE (scalar_dest) != COMPONENT_REF
7072 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7073 && TREE_CODE (scalar_dest) != REALPART_EXPR
7074 && TREE_CODE (scalar_dest) != MEM_REF)
7075 return false;
7076 }
7077 else
7078 {
7079 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7080 if (!call || !gimple_call_internal_p (call))
7081 return false;
7082
7083 internal_fn ifn = gimple_call_internal_fn (call);
7084 if (!internal_store_fn_p (ifn))
7085 return false;
7086
7087 if (slp_node != NULL)
7088 {
7089 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7091 "SLP of masked stores not supported.\n");
7092 return false;
7093 }
7094
7095 int mask_index = internal_fn_mask_index (ifn);
7096 if (mask_index >= 0)
7097 {
7098 mask = gimple_call_arg (call, mask_index);
7099 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7100 &mask_vectype))
7101 return false;
7102 }
7103 }
7104
7105 op = vect_get_store_rhs (stmt_info);
7106
7107 /* Cannot have hybrid store SLP -- that would mean storing to the
7108 same location twice. */
7109 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7110
7111 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7112 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7113
7114 if (loop_vinfo)
7115 {
7116 loop = LOOP_VINFO_LOOP (loop_vinfo);
7117 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7118 }
7119 else
7120 vf = 1;
7121
7122 /* Multiple types in SLP are handled by creating the appropriate number of
7123 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7124 case of SLP. */
7125 if (slp)
7126 ncopies = 1;
7127 else
7128 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7129
7130 gcc_assert (ncopies >= 1);
7131
7132 /* FORNOW. This restriction should be relaxed. */
7133 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7134 {
7135 if (dump_enabled_p ())
7136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7137 "multiple types in nested loop.\n");
7138 return false;
7139 }
7140
7141 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7142 op, &rhs_dt, &rhs_vectype, &vls_type))
7143 return false;
7144
7145 elem_type = TREE_TYPE (vectype);
7146 vec_mode = TYPE_MODE (vectype);
7147
7148 if (!STMT_VINFO_DATA_REF (stmt_info))
7149 return false;
7150
7151 vect_memory_access_type memory_access_type;
7152 enum dr_alignment_support alignment_support_scheme;
7153 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7154 ncopies, &memory_access_type,
7155 &alignment_support_scheme, &gs_info))
7156 return false;
7157
7158 if (mask)
7159 {
7160 if (memory_access_type == VMAT_CONTIGUOUS)
7161 {
7162 if (!VECTOR_MODE_P (vec_mode)
7163 || !can_vec_mask_load_store_p (vec_mode,
7164 TYPE_MODE (mask_vectype), false))
7165 return false;
7166 }
7167 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7168 && (memory_access_type != VMAT_GATHER_SCATTER
7169 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7170 {
7171 if (dump_enabled_p ())
7172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7173 "unsupported access type for masked store.\n");
7174 return false;
7175 }
7176 }
7177 else
7178 {
7179 /* FORNOW. In some cases can vectorize even if data-type not supported
7180 (e.g. - array initialization with 0). */
7181 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7182 return false;
7183 }
7184
7185 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7186 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7187 && memory_access_type != VMAT_GATHER_SCATTER
7188 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7189 if (grouped_store)
7190 {
7191 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7192 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7193 group_size = DR_GROUP_SIZE (first_stmt_info);
7194 }
7195 else
7196 {
7197 first_stmt_info = stmt_info;
7198 first_dr_info = dr_info;
7199 group_size = vec_num = 1;
7200 }
7201
7202 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7203 {
7204 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7205 memory_access_type))
7206 return false;
7207 }
7208
7209 if (!vec_stmt) /* transformation not required. */
7210 {
7211 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7212
7213 if (loop_vinfo
7214 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7215 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7216 group_size, memory_access_type,
7217 &gs_info, mask);
7218
7219 if (slp_node
7220 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7221 vectype))
7222 {
7223 if (dump_enabled_p ())
7224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7225 "incompatible vector types for invariants\n");
7226 return false;
7227 }
7228
7229 if (dump_enabled_p ()
7230 && memory_access_type != VMAT_ELEMENTWISE
7231 && memory_access_type != VMAT_GATHER_SCATTER
7232 && alignment_support_scheme != dr_aligned)
7233 dump_printf_loc (MSG_NOTE, vect_location,
7234 "Vectorizing an unaligned access.\n");
7235
7236 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7237 vect_model_store_cost (vinfo, stmt_info, ncopies,
7238 memory_access_type, vls_type, slp_node, cost_vec);
7239 return true;
7240 }
7241 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7242
7243 /* Transform. */
7244
7245 ensure_base_align (dr_info);
7246
7247 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7248 {
7249 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7250 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7251 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7252 tree ptr, var, scale, vec_mask;
7253 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7254 tree mask_halfvectype = mask_vectype;
7255 edge pe = loop_preheader_edge (loop);
7256 gimple_seq seq;
7257 basic_block new_bb;
7258 enum { NARROW, NONE, WIDEN } modifier;
7259 poly_uint64 scatter_off_nunits
7260 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7261
7262 if (known_eq (nunits, scatter_off_nunits))
7263 modifier = NONE;
7264 else if (known_eq (nunits * 2, scatter_off_nunits))
7265 {
7266 modifier = WIDEN;
7267
7268 /* Currently gathers and scatters are only supported for
7269 fixed-length vectors. */
7270 unsigned int count = scatter_off_nunits.to_constant ();
7271 vec_perm_builder sel (count, count, 1);
7272 for (i = 0; i < (unsigned int) count; ++i)
7273 sel.quick_push (i | (count / 2));
7274
7275 vec_perm_indices indices (sel, 1, count);
7276 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7277 indices);
7278 gcc_assert (perm_mask != NULL_TREE);
7279 }
7280 else if (known_eq (nunits, scatter_off_nunits * 2))
7281 {
7282 modifier = NARROW;
7283
7284 /* Currently gathers and scatters are only supported for
7285 fixed-length vectors. */
7286 unsigned int count = nunits.to_constant ();
7287 vec_perm_builder sel (count, count, 1);
7288 for (i = 0; i < (unsigned int) count; ++i)
7289 sel.quick_push (i | (count / 2));
7290
7291 vec_perm_indices indices (sel, 2, count);
7292 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7293 gcc_assert (perm_mask != NULL_TREE);
7294 ncopies *= 2;
7295
7296 if (mask)
7297 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7298 }
7299 else
7300 gcc_unreachable ();
7301
7302 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7303 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7304 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7305 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7306 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7307 scaletype = TREE_VALUE (arglist);
7308
7309 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7310 && TREE_CODE (rettype) == VOID_TYPE);
7311
7312 ptr = fold_convert (ptrtype, gs_info.base);
7313 if (!is_gimple_min_invariant (ptr))
7314 {
7315 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7316 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7317 gcc_assert (!new_bb);
7318 }
7319
7320 if (mask == NULL_TREE)
7321 {
7322 mask_arg = build_int_cst (masktype, -1);
7323 mask_arg = vect_init_vector (vinfo, stmt_info,
7324 mask_arg, masktype, NULL);
7325 }
7326
7327 scale = build_int_cst (scaletype, gs_info.scale);
7328
7329 auto_vec<tree> vec_oprnds0;
7330 auto_vec<tree> vec_oprnds1;
7331 auto_vec<tree> vec_masks;
7332 if (mask)
7333 {
7334 tree mask_vectype = truth_type_for (vectype);
7335 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7336 modifier == NARROW
7337 ? ncopies / 2 : ncopies,
7338 mask, &vec_masks, mask_vectype);
7339 }
7340 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7341 modifier == WIDEN
7342 ? ncopies / 2 : ncopies,
7343 gs_info.offset, &vec_oprnds0);
7344 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7345 modifier == NARROW
7346 ? ncopies / 2 : ncopies,
7347 op, &vec_oprnds1);
7348 for (j = 0; j < ncopies; ++j)
7349 {
7350 if (modifier == WIDEN)
7351 {
7352 if (j & 1)
7353 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7354 perm_mask, stmt_info, gsi);
7355 else
7356 op = vec_oprnd0 = vec_oprnds0[j / 2];
7357 src = vec_oprnd1 = vec_oprnds1[j];
7358 if (mask)
7359 mask_op = vec_mask = vec_masks[j];
7360 }
7361 else if (modifier == NARROW)
7362 {
7363 if (j & 1)
7364 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7365 perm_mask, stmt_info, gsi);
7366 else
7367 src = vec_oprnd1 = vec_oprnds1[j / 2];
7368 op = vec_oprnd0 = vec_oprnds0[j];
7369 if (mask)
7370 mask_op = vec_mask = vec_masks[j / 2];
7371 }
7372 else
7373 {
7374 op = vec_oprnd0 = vec_oprnds0[j];
7375 src = vec_oprnd1 = vec_oprnds1[j];
7376 if (mask)
7377 mask_op = vec_mask = vec_masks[j];
7378 }
7379
7380 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7381 {
7382 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7383 TYPE_VECTOR_SUBPARTS (srctype)));
7384 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7385 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7386 gassign *new_stmt
7387 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7388 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7389 src = var;
7390 }
7391
7392 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7393 {
7394 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7395 TYPE_VECTOR_SUBPARTS (idxtype)));
7396 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7397 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7398 gassign *new_stmt
7399 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7400 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7401 op = var;
7402 }
7403
7404 if (mask)
7405 {
7406 tree utype;
7407 mask_arg = mask_op;
7408 if (modifier == NARROW)
7409 {
7410 var = vect_get_new_ssa_name (mask_halfvectype,
7411 vect_simple_var);
7412 gassign *new_stmt
7413 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7414 : VEC_UNPACK_LO_EXPR,
7415 mask_op);
7416 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7417 mask_arg = var;
7418 }
7419 tree optype = TREE_TYPE (mask_arg);
7420 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7421 utype = masktype;
7422 else
7423 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7424 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7425 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7426 gassign *new_stmt
7427 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7428 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7429 mask_arg = var;
7430 if (!useless_type_conversion_p (masktype, utype))
7431 {
7432 gcc_assert (TYPE_PRECISION (utype)
7433 <= TYPE_PRECISION (masktype));
7434 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7435 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7436 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7437 mask_arg = var;
7438 }
7439 }
7440
7441 gcall *new_stmt
7442 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7443 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7444
7445 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7446 }
7447 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7448 return true;
7449 }
7450 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7451 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7452
7453 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7454 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7455
7456 if (grouped_store)
7457 {
7458 /* FORNOW */
7459 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7460
7461 /* We vectorize all the stmts of the interleaving group when we
7462 reach the last stmt in the group. */
7463 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7464 < DR_GROUP_SIZE (first_stmt_info)
7465 && !slp)
7466 {
7467 *vec_stmt = NULL;
7468 return true;
7469 }
7470
7471 if (slp)
7472 {
7473 grouped_store = false;
7474 /* VEC_NUM is the number of vect stmts to be created for this
7475 group. */
7476 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7477 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7478 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7479 == first_stmt_info);
7480 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7481 op = vect_get_store_rhs (first_stmt_info);
7482 }
7483 else
7484 /* VEC_NUM is the number of vect stmts to be created for this
7485 group. */
7486 vec_num = group_size;
7487
7488 ref_type = get_group_alias_ptr_type (first_stmt_info);
7489 }
7490 else
7491 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7492
7493 if (dump_enabled_p ())
7494 dump_printf_loc (MSG_NOTE, vect_location,
7495 "transform store. ncopies = %d\n", ncopies);
7496
7497 if (memory_access_type == VMAT_ELEMENTWISE
7498 || memory_access_type == VMAT_STRIDED_SLP)
7499 {
7500 gimple_stmt_iterator incr_gsi;
7501 bool insert_after;
7502 gimple *incr;
7503 tree offvar;
7504 tree ivstep;
7505 tree running_off;
7506 tree stride_base, stride_step, alias_off;
7507 tree vec_oprnd;
7508 tree dr_offset;
7509 unsigned int g;
7510 /* Checked by get_load_store_type. */
7511 unsigned int const_nunits = nunits.to_constant ();
7512
7513 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7514 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7515
7516 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7517 stride_base
7518 = fold_build_pointer_plus
7519 (DR_BASE_ADDRESS (first_dr_info->dr),
7520 size_binop (PLUS_EXPR,
7521 convert_to_ptrofftype (dr_offset),
7522 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7523 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7524
7525 /* For a store with loop-invariant (but other than power-of-2)
7526 stride (i.e. not a grouped access) like so:
7527
7528 for (i = 0; i < n; i += stride)
7529 array[i] = ...;
7530
7531 we generate a new induction variable and new stores from
7532 the components of the (vectorized) rhs:
7533
7534 for (j = 0; ; j += VF*stride)
7535 vectemp = ...;
7536 tmp1 = vectemp[0];
7537 array[j] = tmp1;
7538 tmp2 = vectemp[1];
7539 array[j + stride] = tmp2;
7540 ...
7541 */
7542
7543 unsigned nstores = const_nunits;
7544 unsigned lnel = 1;
7545 tree ltype = elem_type;
7546 tree lvectype = vectype;
7547 if (slp)
7548 {
7549 if (group_size < const_nunits
7550 && const_nunits % group_size == 0)
7551 {
7552 nstores = const_nunits / group_size;
7553 lnel = group_size;
7554 ltype = build_vector_type (elem_type, group_size);
7555 lvectype = vectype;
7556
7557 /* First check if vec_extract optab doesn't support extraction
7558 of vector elts directly. */
7559 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7560 machine_mode vmode;
7561 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7562 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7563 group_size).exists (&vmode)
7564 || (convert_optab_handler (vec_extract_optab,
7565 TYPE_MODE (vectype), vmode)
7566 == CODE_FOR_nothing))
7567 {
7568 /* Try to avoid emitting an extract of vector elements
7569 by performing the extracts using an integer type of the
7570 same size, extracting from a vector of those and then
7571 re-interpreting it as the original vector type if
7572 supported. */
7573 unsigned lsize
7574 = group_size * GET_MODE_BITSIZE (elmode);
7575 unsigned int lnunits = const_nunits / group_size;
7576 /* If we can't construct such a vector fall back to
7577 element extracts from the original vector type and
7578 element size stores. */
7579 if (int_mode_for_size (lsize, 0).exists (&elmode)
7580 && VECTOR_MODE_P (TYPE_MODE (vectype))
7581 && related_vector_mode (TYPE_MODE (vectype), elmode,
7582 lnunits).exists (&vmode)
7583 && (convert_optab_handler (vec_extract_optab,
7584 vmode, elmode)
7585 != CODE_FOR_nothing))
7586 {
7587 nstores = lnunits;
7588 lnel = group_size;
7589 ltype = build_nonstandard_integer_type (lsize, 1);
7590 lvectype = build_vector_type (ltype, nstores);
7591 }
7592 /* Else fall back to vector extraction anyway.
7593 Fewer stores are more important than avoiding spilling
7594 of the vector we extract from. Compared to the
7595 construction case in vectorizable_load no store-forwarding
7596 issue exists here for reasonable archs. */
7597 }
7598 }
7599 else if (group_size >= const_nunits
7600 && group_size % const_nunits == 0)
7601 {
7602 nstores = 1;
7603 lnel = const_nunits;
7604 ltype = vectype;
7605 lvectype = vectype;
7606 }
7607 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7608 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7609 }
7610
7611 ivstep = stride_step;
7612 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7613 build_int_cst (TREE_TYPE (ivstep), vf));
7614
7615 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7616
7617 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7618 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7619 create_iv (stride_base, ivstep, NULL,
7620 loop, &incr_gsi, insert_after,
7621 &offvar, NULL);
7622 incr = gsi_stmt (incr_gsi);
7623
7624 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7625
7626 alias_off = build_int_cst (ref_type, 0);
7627 stmt_vec_info next_stmt_info = first_stmt_info;
7628 for (g = 0; g < group_size; g++)
7629 {
7630 running_off = offvar;
7631 if (g)
7632 {
7633 tree size = TYPE_SIZE_UNIT (ltype);
7634 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7635 size);
7636 tree newoff = copy_ssa_name (running_off, NULL);
7637 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7638 running_off, pos);
7639 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7640 running_off = newoff;
7641 }
7642 if (!slp)
7643 op = vect_get_store_rhs (next_stmt_info);
7644 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7645 op, &vec_oprnds);
7646 unsigned int group_el = 0;
7647 unsigned HOST_WIDE_INT
7648 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7649 for (j = 0; j < ncopies; j++)
7650 {
7651 vec_oprnd = vec_oprnds[j];
7652 /* Pun the vector to extract from if necessary. */
7653 if (lvectype != vectype)
7654 {
7655 tree tem = make_ssa_name (lvectype);
7656 gimple *pun
7657 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7658 lvectype, vec_oprnd));
7659 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7660 vec_oprnd = tem;
7661 }
7662 for (i = 0; i < nstores; i++)
7663 {
7664 tree newref, newoff;
7665 gimple *incr, *assign;
7666 tree size = TYPE_SIZE (ltype);
7667 /* Extract the i'th component. */
7668 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7669 bitsize_int (i), size);
7670 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7671 size, pos);
7672
7673 elem = force_gimple_operand_gsi (gsi, elem, true,
7674 NULL_TREE, true,
7675 GSI_SAME_STMT);
7676
7677 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7678 group_el * elsz);
7679 newref = build2 (MEM_REF, ltype,
7680 running_off, this_off);
7681 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7682
7683 /* And store it to *running_off. */
7684 assign = gimple_build_assign (newref, elem);
7685 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7686
7687 group_el += lnel;
7688 if (! slp
7689 || group_el == group_size)
7690 {
7691 newoff = copy_ssa_name (running_off, NULL);
7692 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7693 running_off, stride_step);
7694 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7695
7696 running_off = newoff;
7697 group_el = 0;
7698 }
7699 if (g == group_size - 1
7700 && !slp)
7701 {
7702 if (j == 0 && i == 0)
7703 *vec_stmt = assign;
7704 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7705 }
7706 }
7707 }
7708 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7709 if (slp)
7710 break;
7711 }
7712
7713 vec_oprnds.release ();
7714 return true;
7715 }
7716
7717 auto_vec<tree> dr_chain (group_size);
7718 oprnds.create (group_size);
7719
7720 /* Gather-scatter accesses perform only component accesses, alignment
7721 is irrelevant for them. */
7722 if (memory_access_type == VMAT_GATHER_SCATTER)
7723 alignment_support_scheme = dr_unaligned_supported;
7724 else
7725 alignment_support_scheme
7726 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7727
7728 gcc_assert (alignment_support_scheme);
7729 vec_loop_masks *loop_masks
7730 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7731 ? &LOOP_VINFO_MASKS (loop_vinfo)
7732 : NULL);
7733 vec_loop_lens *loop_lens
7734 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7735 ? &LOOP_VINFO_LENS (loop_vinfo)
7736 : NULL);
7737
7738 /* Shouldn't go with length-based approach if fully masked. */
7739 gcc_assert (!loop_lens || !loop_masks);
7740
7741 /* Targets with store-lane instructions must not require explicit
7742 realignment. vect_supportable_dr_alignment always returns either
7743 dr_aligned or dr_unaligned_supported for masked operations. */
7744 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7745 && !mask
7746 && !loop_masks)
7747 || alignment_support_scheme == dr_aligned
7748 || alignment_support_scheme == dr_unaligned_supported);
7749
7750 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7751 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7752 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7753
7754 tree bump;
7755 tree vec_offset = NULL_TREE;
7756 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7757 {
7758 aggr_type = NULL_TREE;
7759 bump = NULL_TREE;
7760 }
7761 else if (memory_access_type == VMAT_GATHER_SCATTER)
7762 {
7763 aggr_type = elem_type;
7764 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7765 &bump, &vec_offset);
7766 }
7767 else
7768 {
7769 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7770 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7771 else
7772 aggr_type = vectype;
7773 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7774 memory_access_type);
7775 }
7776
7777 if (mask)
7778 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7779
7780 /* In case the vectorization factor (VF) is bigger than the number
7781 of elements that we can fit in a vectype (nunits), we have to generate
7782 more than one vector stmt - i.e - we need to "unroll" the
7783 vector stmt by a factor VF/nunits. */
7784
7785 /* In case of interleaving (non-unit grouped access):
7786
7787 S1: &base + 2 = x2
7788 S2: &base = x0
7789 S3: &base + 1 = x1
7790 S4: &base + 3 = x3
7791
7792 We create vectorized stores starting from base address (the access of the
7793 first stmt in the chain (S2 in the above example), when the last store stmt
7794 of the chain (S4) is reached:
7795
7796 VS1: &base = vx2
7797 VS2: &base + vec_size*1 = vx0
7798 VS3: &base + vec_size*2 = vx1
7799 VS4: &base + vec_size*3 = vx3
7800
7801 Then permutation statements are generated:
7802
7803 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7804 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7805 ...
7806
7807 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7808 (the order of the data-refs in the output of vect_permute_store_chain
7809 corresponds to the order of scalar stmts in the interleaving chain - see
7810 the documentation of vect_permute_store_chain()).
7811
7812 In case of both multiple types and interleaving, above vector stores and
7813 permutation stmts are created for every copy. The result vector stmts are
7814 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7815 STMT_VINFO_RELATED_STMT for the next copies.
7816 */
7817
7818 auto_vec<tree> vec_masks;
7819 tree vec_mask = NULL;
7820 auto_vec<tree> vec_offsets;
7821 auto_vec<vec<tree> > gvec_oprnds;
7822 gvec_oprnds.safe_grow_cleared (group_size, true);
7823 for (j = 0; j < ncopies; j++)
7824 {
7825 gimple *new_stmt;
7826 if (j == 0)
7827 {
7828 if (slp)
7829 {
7830 /* Get vectorized arguments for SLP_NODE. */
7831 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7832 op, &vec_oprnds);
7833 vec_oprnd = vec_oprnds[0];
7834 }
7835 else
7836 {
7837 /* For interleaved stores we collect vectorized defs for all the
7838 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7839 used as an input to vect_permute_store_chain().
7840
7841 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7842 and OPRNDS are of size 1. */
7843 stmt_vec_info next_stmt_info = first_stmt_info;
7844 for (i = 0; i < group_size; i++)
7845 {
7846 /* Since gaps are not supported for interleaved stores,
7847 DR_GROUP_SIZE is the exact number of stmts in the chain.
7848 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7849 that there is no interleaving, DR_GROUP_SIZE is 1,
7850 and only one iteration of the loop will be executed. */
7851 op = vect_get_store_rhs (next_stmt_info);
7852 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7853 ncopies, op, &gvec_oprnds[i]);
7854 vec_oprnd = gvec_oprnds[i][0];
7855 dr_chain.quick_push (gvec_oprnds[i][0]);
7856 oprnds.quick_push (gvec_oprnds[i][0]);
7857 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7858 }
7859 if (mask)
7860 {
7861 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7862 mask, &vec_masks, mask_vectype);
7863 vec_mask = vec_masks[0];
7864 }
7865 }
7866
7867 /* We should have catched mismatched types earlier. */
7868 gcc_assert (useless_type_conversion_p (vectype,
7869 TREE_TYPE (vec_oprnd)));
7870 bool simd_lane_access_p
7871 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7872 if (simd_lane_access_p
7873 && !loop_masks
7874 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7875 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7876 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7877 && integer_zerop (DR_INIT (first_dr_info->dr))
7878 && alias_sets_conflict_p (get_alias_set (aggr_type),
7879 get_alias_set (TREE_TYPE (ref_type))))
7880 {
7881 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7882 dataref_offset = build_int_cst (ref_type, 0);
7883 }
7884 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7885 {
7886 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7887 &dataref_ptr, &vec_offsets, ncopies);
7888 vec_offset = vec_offsets[0];
7889 }
7890 else
7891 dataref_ptr
7892 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7893 simd_lane_access_p ? loop : NULL,
7894 offset, &dummy, gsi, &ptr_incr,
7895 simd_lane_access_p, NULL_TREE, bump);
7896 }
7897 else
7898 {
7899 /* For interleaved stores we created vectorized defs for all the
7900 defs stored in OPRNDS in the previous iteration (previous copy).
7901 DR_CHAIN is then used as an input to vect_permute_store_chain().
7902 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7903 OPRNDS are of size 1. */
7904 for (i = 0; i < group_size; i++)
7905 {
7906 vec_oprnd = gvec_oprnds[i][j];
7907 dr_chain[i] = gvec_oprnds[i][j];
7908 oprnds[i] = gvec_oprnds[i][j];
7909 }
7910 if (mask)
7911 vec_mask = vec_masks[j];
7912 if (dataref_offset)
7913 dataref_offset
7914 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7915 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7916 vec_offset = vec_offsets[j];
7917 else
7918 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7919 stmt_info, bump);
7920 }
7921
7922 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7923 {
7924 tree vec_array;
7925
7926 /* Get an array into which we can store the individual vectors. */
7927 vec_array = create_vector_array (vectype, vec_num);
7928
7929 /* Invalidate the current contents of VEC_ARRAY. This should
7930 become an RTL clobber too, which prevents the vector registers
7931 from being upward-exposed. */
7932 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7933
7934 /* Store the individual vectors into the array. */
7935 for (i = 0; i < vec_num; i++)
7936 {
7937 vec_oprnd = dr_chain[i];
7938 write_vector_array (vinfo, stmt_info,
7939 gsi, vec_oprnd, vec_array, i);
7940 }
7941
7942 tree final_mask = NULL;
7943 if (loop_masks)
7944 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7945 vectype, j);
7946 if (vec_mask)
7947 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7948 vec_mask, gsi);
7949
7950 gcall *call;
7951 if (final_mask)
7952 {
7953 /* Emit:
7954 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7955 VEC_ARRAY). */
7956 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7957 tree alias_ptr = build_int_cst (ref_type, align);
7958 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7959 dataref_ptr, alias_ptr,
7960 final_mask, vec_array);
7961 }
7962 else
7963 {
7964 /* Emit:
7965 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7966 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7967 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7968 vec_array);
7969 gimple_call_set_lhs (call, data_ref);
7970 }
7971 gimple_call_set_nothrow (call, true);
7972 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7973 new_stmt = call;
7974
7975 /* Record that VEC_ARRAY is now dead. */
7976 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7977 }
7978 else
7979 {
7980 new_stmt = NULL;
7981 if (grouped_store)
7982 {
7983 if (j == 0)
7984 result_chain.create (group_size);
7985 /* Permute. */
7986 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
7987 gsi, &result_chain);
7988 }
7989
7990 stmt_vec_info next_stmt_info = first_stmt_info;
7991 for (i = 0; i < vec_num; i++)
7992 {
7993 unsigned misalign;
7994 unsigned HOST_WIDE_INT align;
7995
7996 tree final_mask = NULL_TREE;
7997 if (loop_masks)
7998 final_mask = vect_get_loop_mask (gsi, loop_masks,
7999 vec_num * ncopies,
8000 vectype, vec_num * j + i);
8001 if (vec_mask)
8002 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8003 vec_mask, gsi);
8004
8005 if (memory_access_type == VMAT_GATHER_SCATTER)
8006 {
8007 tree scale = size_int (gs_info.scale);
8008 gcall *call;
8009 if (loop_masks)
8010 call = gimple_build_call_internal
8011 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8012 scale, vec_oprnd, final_mask);
8013 else
8014 call = gimple_build_call_internal
8015 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8016 scale, vec_oprnd);
8017 gimple_call_set_nothrow (call, true);
8018 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8019 new_stmt = call;
8020 break;
8021 }
8022
8023 if (i > 0)
8024 /* Bump the vector pointer. */
8025 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8026 gsi, stmt_info, bump);
8027
8028 if (slp)
8029 vec_oprnd = vec_oprnds[i];
8030 else if (grouped_store)
8031 /* For grouped stores vectorized defs are interleaved in
8032 vect_permute_store_chain(). */
8033 vec_oprnd = result_chain[i];
8034
8035 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8036 if (aligned_access_p (first_dr_info))
8037 misalign = 0;
8038 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8039 {
8040 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8041 misalign = 0;
8042 }
8043 else
8044 misalign = DR_MISALIGNMENT (first_dr_info);
8045 if (dataref_offset == NULL_TREE
8046 && TREE_CODE (dataref_ptr) == SSA_NAME)
8047 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8048 misalign);
8049
8050 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8051 {
8052 tree perm_mask = perm_mask_for_reverse (vectype);
8053 tree perm_dest = vect_create_destination_var
8054 (vect_get_store_rhs (stmt_info), vectype);
8055 tree new_temp = make_ssa_name (perm_dest);
8056
8057 /* Generate the permute statement. */
8058 gimple *perm_stmt
8059 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8060 vec_oprnd, perm_mask);
8061 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8062
8063 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8064 vec_oprnd = new_temp;
8065 }
8066
8067 /* Arguments are ready. Create the new vector stmt. */
8068 if (final_mask)
8069 {
8070 align = least_bit_hwi (misalign | align);
8071 tree ptr = build_int_cst (ref_type, align);
8072 gcall *call
8073 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8074 dataref_ptr, ptr,
8075 final_mask, vec_oprnd);
8076 gimple_call_set_nothrow (call, true);
8077 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8078 new_stmt = call;
8079 }
8080 else if (loop_lens)
8081 {
8082 tree final_len
8083 = vect_get_loop_len (loop_vinfo, loop_lens,
8084 vec_num * ncopies, vec_num * j + i);
8085 align = least_bit_hwi (misalign | align);
8086 tree ptr = build_int_cst (ref_type, align);
8087 machine_mode vmode = TYPE_MODE (vectype);
8088 opt_machine_mode new_ovmode
8089 = get_len_load_store_mode (vmode, false);
8090 machine_mode new_vmode = new_ovmode.require ();
8091 /* Need conversion if it's wrapped with VnQI. */
8092 if (vmode != new_vmode)
8093 {
8094 tree new_vtype
8095 = build_vector_type_for_mode (unsigned_intQI_type_node,
8096 new_vmode);
8097 tree var
8098 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8099 vec_oprnd
8100 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8101 gassign *new_stmt
8102 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8103 vec_oprnd);
8104 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8105 gsi);
8106 vec_oprnd = var;
8107 }
8108 gcall *call
8109 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8110 ptr, final_len, vec_oprnd);
8111 gimple_call_set_nothrow (call, true);
8112 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8113 new_stmt = call;
8114 }
8115 else
8116 {
8117 data_ref = fold_build2 (MEM_REF, vectype,
8118 dataref_ptr,
8119 dataref_offset
8120 ? dataref_offset
8121 : build_int_cst (ref_type, 0));
8122 if (aligned_access_p (first_dr_info))
8123 ;
8124 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8125 TREE_TYPE (data_ref)
8126 = build_aligned_type (TREE_TYPE (data_ref),
8127 align * BITS_PER_UNIT);
8128 else
8129 TREE_TYPE (data_ref)
8130 = build_aligned_type (TREE_TYPE (data_ref),
8131 TYPE_ALIGN (elem_type));
8132 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8133 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8134 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8135 }
8136
8137 if (slp)
8138 continue;
8139
8140 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8141 if (!next_stmt_info)
8142 break;
8143 }
8144 }
8145 if (!slp)
8146 {
8147 if (j == 0)
8148 *vec_stmt = new_stmt;
8149 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8150 }
8151 }
8152
8153 for (i = 0; i < group_size; ++i)
8154 {
8155 vec<tree> oprndsi = gvec_oprnds[i];
8156 oprndsi.release ();
8157 }
8158 oprnds.release ();
8159 result_chain.release ();
8160 vec_oprnds.release ();
8161
8162 return true;
8163 }
8164
8165 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8166 VECTOR_CST mask. No checks are made that the target platform supports the
8167 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8168 vect_gen_perm_mask_checked. */
8169
8170 tree
8171 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8172 {
8173 tree mask_type;
8174
8175 poly_uint64 nunits = sel.length ();
8176 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8177
8178 mask_type = build_vector_type (ssizetype, nunits);
8179 return vec_perm_indices_to_tree (mask_type, sel);
8180 }
8181
8182 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8183 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8184
8185 tree
8186 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8187 {
8188 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8189 return vect_gen_perm_mask_any (vectype, sel);
8190 }
8191
8192 /* Given a vector variable X and Y, that was generated for the scalar
8193 STMT_INFO, generate instructions to permute the vector elements of X and Y
8194 using permutation mask MASK_VEC, insert them at *GSI and return the
8195 permuted vector variable. */
8196
8197 static tree
8198 permute_vec_elements (vec_info *vinfo,
8199 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8200 gimple_stmt_iterator *gsi)
8201 {
8202 tree vectype = TREE_TYPE (x);
8203 tree perm_dest, data_ref;
8204 gimple *perm_stmt;
8205
8206 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8207 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8208 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8209 else
8210 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8211 data_ref = make_ssa_name (perm_dest);
8212
8213 /* Generate the permute statement. */
8214 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8215 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8216
8217 return data_ref;
8218 }
8219
8220 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8221 inserting them on the loops preheader edge. Returns true if we
8222 were successful in doing so (and thus STMT_INFO can be moved then),
8223 otherwise returns false. */
8224
8225 static bool
8226 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8227 {
8228 ssa_op_iter i;
8229 tree op;
8230 bool any = false;
8231
8232 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8233 {
8234 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8235 if (!gimple_nop_p (def_stmt)
8236 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8237 {
8238 /* Make sure we don't need to recurse. While we could do
8239 so in simple cases when there are more complex use webs
8240 we don't have an easy way to preserve stmt order to fulfil
8241 dependencies within them. */
8242 tree op2;
8243 ssa_op_iter i2;
8244 if (gimple_code (def_stmt) == GIMPLE_PHI)
8245 return false;
8246 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8247 {
8248 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8249 if (!gimple_nop_p (def_stmt2)
8250 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8251 return false;
8252 }
8253 any = true;
8254 }
8255 }
8256
8257 if (!any)
8258 return true;
8259
8260 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8261 {
8262 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8263 if (!gimple_nop_p (def_stmt)
8264 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8265 {
8266 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8267 gsi_remove (&gsi, false);
8268 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8269 }
8270 }
8271
8272 return true;
8273 }
8274
8275 /* vectorizable_load.
8276
8277 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8278 that can be vectorized.
8279 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8280 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8281 Return true if STMT_INFO is vectorizable in this way. */
8282
8283 static bool
8284 vectorizable_load (vec_info *vinfo,
8285 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8286 gimple **vec_stmt, slp_tree slp_node,
8287 stmt_vector_for_cost *cost_vec)
8288 {
8289 tree scalar_dest;
8290 tree vec_dest = NULL;
8291 tree data_ref = NULL;
8292 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8293 class loop *loop = NULL;
8294 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8295 bool nested_in_vect_loop = false;
8296 tree elem_type;
8297 tree new_temp;
8298 machine_mode mode;
8299 tree dummy;
8300 tree dataref_ptr = NULL_TREE;
8301 tree dataref_offset = NULL_TREE;
8302 gimple *ptr_incr = NULL;
8303 int ncopies;
8304 int i, j;
8305 unsigned int group_size;
8306 poly_uint64 group_gap_adj;
8307 tree msq = NULL_TREE, lsq;
8308 tree offset = NULL_TREE;
8309 tree byte_offset = NULL_TREE;
8310 tree realignment_token = NULL_TREE;
8311 gphi *phi = NULL;
8312 vec<tree> dr_chain = vNULL;
8313 bool grouped_load = false;
8314 stmt_vec_info first_stmt_info;
8315 stmt_vec_info first_stmt_info_for_drptr = NULL;
8316 bool compute_in_loop = false;
8317 class loop *at_loop;
8318 int vec_num;
8319 bool slp = (slp_node != NULL);
8320 bool slp_perm = false;
8321 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8322 poly_uint64 vf;
8323 tree aggr_type;
8324 gather_scatter_info gs_info;
8325 tree ref_type;
8326 enum vect_def_type mask_dt = vect_unknown_def_type;
8327
8328 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8329 return false;
8330
8331 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8332 && ! vec_stmt)
8333 return false;
8334
8335 if (!STMT_VINFO_DATA_REF (stmt_info))
8336 return false;
8337
8338 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8339 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8340 which can be different when reduction chains were re-ordered.
8341 Now that we figured we're a dataref reset stmt_info back to
8342 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8343 refactored in a way to maintain the dr_vec_info pointer for the
8344 relevant access explicitely. */
8345 stmt_vec_info orig_stmt_info = stmt_info;
8346 if (slp_node)
8347 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8348
8349 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8350 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8351 {
8352 scalar_dest = gimple_assign_lhs (assign);
8353 if (TREE_CODE (scalar_dest) != SSA_NAME)
8354 return false;
8355
8356 tree_code code = gimple_assign_rhs_code (assign);
8357 if (code != ARRAY_REF
8358 && code != BIT_FIELD_REF
8359 && code != INDIRECT_REF
8360 && code != COMPONENT_REF
8361 && code != IMAGPART_EXPR
8362 && code != REALPART_EXPR
8363 && code != MEM_REF
8364 && TREE_CODE_CLASS (code) != tcc_declaration)
8365 return false;
8366 }
8367 else
8368 {
8369 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8370 if (!call || !gimple_call_internal_p (call))
8371 return false;
8372
8373 internal_fn ifn = gimple_call_internal_fn (call);
8374 if (!internal_load_fn_p (ifn))
8375 return false;
8376
8377 scalar_dest = gimple_call_lhs (call);
8378 if (!scalar_dest)
8379 return false;
8380
8381 int mask_index = internal_fn_mask_index (ifn);
8382 if (mask_index >= 0)
8383 {
8384 mask = gimple_call_arg (call, mask_index);
8385 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8386 &mask_vectype))
8387 return false;
8388 }
8389 }
8390
8391 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8392 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8393
8394 if (loop_vinfo)
8395 {
8396 loop = LOOP_VINFO_LOOP (loop_vinfo);
8397 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8398 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8399 }
8400 else
8401 vf = 1;
8402
8403 /* Multiple types in SLP are handled by creating the appropriate number of
8404 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8405 case of SLP. */
8406 if (slp)
8407 ncopies = 1;
8408 else
8409 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8410
8411 gcc_assert (ncopies >= 1);
8412
8413 /* FORNOW. This restriction should be relaxed. */
8414 if (nested_in_vect_loop && ncopies > 1)
8415 {
8416 if (dump_enabled_p ())
8417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8418 "multiple types in nested loop.\n");
8419 return false;
8420 }
8421
8422 /* Invalidate assumptions made by dependence analysis when vectorization
8423 on the unrolled body effectively re-orders stmts. */
8424 if (ncopies > 1
8425 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8426 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8427 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8428 {
8429 if (dump_enabled_p ())
8430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8431 "cannot perform implicit CSE when unrolling "
8432 "with negative dependence distance\n");
8433 return false;
8434 }
8435
8436 elem_type = TREE_TYPE (vectype);
8437 mode = TYPE_MODE (vectype);
8438
8439 /* FORNOW. In some cases can vectorize even if data-type not supported
8440 (e.g. - data copies). */
8441 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8442 {
8443 if (dump_enabled_p ())
8444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8445 "Aligned load, but unsupported type.\n");
8446 return false;
8447 }
8448
8449 /* Check if the load is a part of an interleaving chain. */
8450 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8451 {
8452 grouped_load = true;
8453 /* FORNOW */
8454 gcc_assert (!nested_in_vect_loop);
8455 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8456
8457 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8458 group_size = DR_GROUP_SIZE (first_stmt_info);
8459
8460 /* Refuse non-SLP vectorization of SLP-only groups. */
8461 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8462 {
8463 if (dump_enabled_p ())
8464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8465 "cannot vectorize load in non-SLP mode.\n");
8466 return false;
8467 }
8468
8469 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8470 {
8471 slp_perm = true;
8472
8473 if (!loop_vinfo)
8474 {
8475 /* In BB vectorization we may not actually use a loaded vector
8476 accessing elements in excess of DR_GROUP_SIZE. */
8477 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8478 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8479 unsigned HOST_WIDE_INT nunits;
8480 unsigned j, k, maxk = 0;
8481 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8482 if (k > maxk)
8483 maxk = k;
8484 tree vectype = STMT_VINFO_VECTYPE (group_info);
8485 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8486 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8487 {
8488 if (dump_enabled_p ())
8489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8490 "BB vectorization with gaps at the end of "
8491 "a load is not supported\n");
8492 return false;
8493 }
8494 }
8495
8496 auto_vec<tree> tem;
8497 unsigned n_perms;
8498 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8499 true, &n_perms))
8500 {
8501 if (dump_enabled_p ())
8502 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8503 vect_location,
8504 "unsupported load permutation\n");
8505 return false;
8506 }
8507 }
8508
8509 /* Invalidate assumptions made by dependence analysis when vectorization
8510 on the unrolled body effectively re-orders stmts. */
8511 if (!PURE_SLP_STMT (stmt_info)
8512 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8513 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8514 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8515 {
8516 if (dump_enabled_p ())
8517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8518 "cannot perform implicit CSE when performing "
8519 "group loads with negative dependence distance\n");
8520 return false;
8521 }
8522 }
8523 else
8524 group_size = 1;
8525
8526 vect_memory_access_type memory_access_type;
8527 enum dr_alignment_support alignment_support_scheme;
8528 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8529 ncopies, &memory_access_type,
8530 &alignment_support_scheme, &gs_info))
8531 return false;
8532
8533 if (mask)
8534 {
8535 if (memory_access_type == VMAT_CONTIGUOUS)
8536 {
8537 machine_mode vec_mode = TYPE_MODE (vectype);
8538 if (!VECTOR_MODE_P (vec_mode)
8539 || !can_vec_mask_load_store_p (vec_mode,
8540 TYPE_MODE (mask_vectype), true))
8541 return false;
8542 }
8543 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8544 && memory_access_type != VMAT_GATHER_SCATTER)
8545 {
8546 if (dump_enabled_p ())
8547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8548 "unsupported access type for masked load.\n");
8549 return false;
8550 }
8551 }
8552
8553 if (!vec_stmt) /* transformation not required. */
8554 {
8555 if (!slp)
8556 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8557
8558 if (loop_vinfo
8559 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8560 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8561 group_size, memory_access_type,
8562 &gs_info, mask);
8563
8564 if (dump_enabled_p ()
8565 && memory_access_type != VMAT_ELEMENTWISE
8566 && memory_access_type != VMAT_GATHER_SCATTER
8567 && alignment_support_scheme != dr_aligned)
8568 dump_printf_loc (MSG_NOTE, vect_location,
8569 "Vectorizing an unaligned access.\n");
8570
8571 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8572 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8573 slp_node, cost_vec);
8574 return true;
8575 }
8576
8577 if (!slp)
8578 gcc_assert (memory_access_type
8579 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8580
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_NOTE, vect_location,
8583 "transform load. ncopies = %d\n", ncopies);
8584
8585 /* Transform. */
8586
8587 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8588 ensure_base_align (dr_info);
8589
8590 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8591 {
8592 vect_build_gather_load_calls (vinfo,
8593 stmt_info, gsi, vec_stmt, &gs_info, mask);
8594 return true;
8595 }
8596
8597 if (memory_access_type == VMAT_INVARIANT)
8598 {
8599 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8600 /* If we have versioned for aliasing or the loop doesn't
8601 have any data dependencies that would preclude this,
8602 then we are sure this is a loop invariant load and
8603 thus we can insert it on the preheader edge. */
8604 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8605 && !nested_in_vect_loop
8606 && hoist_defs_of_uses (stmt_info, loop));
8607 if (hoist_p)
8608 {
8609 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8610 if (dump_enabled_p ())
8611 dump_printf_loc (MSG_NOTE, vect_location,
8612 "hoisting out of the vectorized loop: %G", stmt);
8613 scalar_dest = copy_ssa_name (scalar_dest);
8614 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8615 gsi_insert_on_edge_immediate
8616 (loop_preheader_edge (loop),
8617 gimple_build_assign (scalar_dest, rhs));
8618 }
8619 /* These copies are all equivalent, but currently the representation
8620 requires a separate STMT_VINFO_VEC_STMT for each one. */
8621 gimple_stmt_iterator gsi2 = *gsi;
8622 gsi_next (&gsi2);
8623 for (j = 0; j < ncopies; j++)
8624 {
8625 if (hoist_p)
8626 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8627 vectype, NULL);
8628 else
8629 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8630 vectype, &gsi2);
8631 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8632 if (slp)
8633 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8634 else
8635 {
8636 if (j == 0)
8637 *vec_stmt = new_stmt;
8638 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8639 }
8640 }
8641 return true;
8642 }
8643
8644 if (memory_access_type == VMAT_ELEMENTWISE
8645 || memory_access_type == VMAT_STRIDED_SLP)
8646 {
8647 gimple_stmt_iterator incr_gsi;
8648 bool insert_after;
8649 tree offvar;
8650 tree ivstep;
8651 tree running_off;
8652 vec<constructor_elt, va_gc> *v = NULL;
8653 tree stride_base, stride_step, alias_off;
8654 /* Checked by get_load_store_type. */
8655 unsigned int const_nunits = nunits.to_constant ();
8656 unsigned HOST_WIDE_INT cst_offset = 0;
8657 tree dr_offset;
8658
8659 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8660 gcc_assert (!nested_in_vect_loop);
8661
8662 if (grouped_load)
8663 {
8664 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8665 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8666 }
8667 else
8668 {
8669 first_stmt_info = stmt_info;
8670 first_dr_info = dr_info;
8671 }
8672 if (slp && grouped_load)
8673 {
8674 group_size = DR_GROUP_SIZE (first_stmt_info);
8675 ref_type = get_group_alias_ptr_type (first_stmt_info);
8676 }
8677 else
8678 {
8679 if (grouped_load)
8680 cst_offset
8681 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8682 * vect_get_place_in_interleaving_chain (stmt_info,
8683 first_stmt_info));
8684 group_size = 1;
8685 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8686 }
8687
8688 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8689 stride_base
8690 = fold_build_pointer_plus
8691 (DR_BASE_ADDRESS (first_dr_info->dr),
8692 size_binop (PLUS_EXPR,
8693 convert_to_ptrofftype (dr_offset),
8694 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8695 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8696
8697 /* For a load with loop-invariant (but other than power-of-2)
8698 stride (i.e. not a grouped access) like so:
8699
8700 for (i = 0; i < n; i += stride)
8701 ... = array[i];
8702
8703 we generate a new induction variable and new accesses to
8704 form a new vector (or vectors, depending on ncopies):
8705
8706 for (j = 0; ; j += VF*stride)
8707 tmp1 = array[j];
8708 tmp2 = array[j + stride];
8709 ...
8710 vectemp = {tmp1, tmp2, ...}
8711 */
8712
8713 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8714 build_int_cst (TREE_TYPE (stride_step), vf));
8715
8716 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8717
8718 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8719 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8720 create_iv (stride_base, ivstep, NULL,
8721 loop, &incr_gsi, insert_after,
8722 &offvar, NULL);
8723
8724 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8725
8726 running_off = offvar;
8727 alias_off = build_int_cst (ref_type, 0);
8728 int nloads = const_nunits;
8729 int lnel = 1;
8730 tree ltype = TREE_TYPE (vectype);
8731 tree lvectype = vectype;
8732 auto_vec<tree> dr_chain;
8733 if (memory_access_type == VMAT_STRIDED_SLP)
8734 {
8735 if (group_size < const_nunits)
8736 {
8737 /* First check if vec_init optab supports construction from vector
8738 elts directly. Otherwise avoid emitting a constructor of
8739 vector elements by performing the loads using an integer type
8740 of the same size, constructing a vector of those and then
8741 re-interpreting it as the original vector type. This avoids a
8742 huge runtime penalty due to the general inability to perform
8743 store forwarding from smaller stores to a larger load. */
8744 tree ptype;
8745 tree vtype
8746 = vector_vector_composition_type (vectype,
8747 const_nunits / group_size,
8748 &ptype);
8749 if (vtype != NULL_TREE)
8750 {
8751 nloads = const_nunits / group_size;
8752 lnel = group_size;
8753 lvectype = vtype;
8754 ltype = ptype;
8755 }
8756 }
8757 else
8758 {
8759 nloads = 1;
8760 lnel = const_nunits;
8761 ltype = vectype;
8762 }
8763 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8764 }
8765 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8766 else if (nloads == 1)
8767 ltype = vectype;
8768
8769 if (slp)
8770 {
8771 /* For SLP permutation support we need to load the whole group,
8772 not only the number of vector stmts the permutation result
8773 fits in. */
8774 if (slp_perm)
8775 {
8776 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8777 variable VF. */
8778 unsigned int const_vf = vf.to_constant ();
8779 ncopies = CEIL (group_size * const_vf, const_nunits);
8780 dr_chain.create (ncopies);
8781 }
8782 else
8783 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8784 }
8785 unsigned int group_el = 0;
8786 unsigned HOST_WIDE_INT
8787 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8788 for (j = 0; j < ncopies; j++)
8789 {
8790 if (nloads > 1)
8791 vec_alloc (v, nloads);
8792 gimple *new_stmt = NULL;
8793 for (i = 0; i < nloads; i++)
8794 {
8795 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8796 group_el * elsz + cst_offset);
8797 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8798 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8799 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8800 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8801 if (nloads > 1)
8802 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8803 gimple_assign_lhs (new_stmt));
8804
8805 group_el += lnel;
8806 if (! slp
8807 || group_el == group_size)
8808 {
8809 tree newoff = copy_ssa_name (running_off);
8810 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8811 running_off, stride_step);
8812 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8813
8814 running_off = newoff;
8815 group_el = 0;
8816 }
8817 }
8818 if (nloads > 1)
8819 {
8820 tree vec_inv = build_constructor (lvectype, v);
8821 new_temp = vect_init_vector (vinfo, stmt_info,
8822 vec_inv, lvectype, gsi);
8823 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8824 if (lvectype != vectype)
8825 {
8826 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8827 VIEW_CONVERT_EXPR,
8828 build1 (VIEW_CONVERT_EXPR,
8829 vectype, new_temp));
8830 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8831 }
8832 }
8833
8834 if (slp)
8835 {
8836 if (slp_perm)
8837 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8838 else
8839 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8840 }
8841 else
8842 {
8843 if (j == 0)
8844 *vec_stmt = new_stmt;
8845 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8846 }
8847 }
8848 if (slp_perm)
8849 {
8850 unsigned n_perms;
8851 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8852 false, &n_perms);
8853 }
8854 return true;
8855 }
8856
8857 if (memory_access_type == VMAT_GATHER_SCATTER
8858 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8859 grouped_load = false;
8860
8861 if (grouped_load)
8862 {
8863 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8864 group_size = DR_GROUP_SIZE (first_stmt_info);
8865 /* For SLP vectorization we directly vectorize a subchain
8866 without permutation. */
8867 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8868 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8869 /* For BB vectorization always use the first stmt to base
8870 the data ref pointer on. */
8871 if (bb_vinfo)
8872 first_stmt_info_for_drptr
8873 = vect_find_first_scalar_stmt_in_slp (slp_node);
8874
8875 /* Check if the chain of loads is already vectorized. */
8876 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8877 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8878 ??? But we can only do so if there is exactly one
8879 as we have no way to get at the rest. Leave the CSE
8880 opportunity alone.
8881 ??? With the group load eventually participating
8882 in multiple different permutations (having multiple
8883 slp nodes which refer to the same group) the CSE
8884 is even wrong code. See PR56270. */
8885 && !slp)
8886 {
8887 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8888 return true;
8889 }
8890 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8891 group_gap_adj = 0;
8892
8893 /* VEC_NUM is the number of vect stmts to be created for this group. */
8894 if (slp)
8895 {
8896 grouped_load = false;
8897 /* If an SLP permutation is from N elements to N elements,
8898 and if one vector holds a whole number of N, we can load
8899 the inputs to the permutation in the same way as an
8900 unpermuted sequence. In other cases we need to load the
8901 whole group, not only the number of vector stmts the
8902 permutation result fits in. */
8903 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8904 if (slp_perm
8905 && (group_size != scalar_lanes
8906 || !multiple_p (nunits, group_size)))
8907 {
8908 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8909 variable VF; see vect_transform_slp_perm_load. */
8910 unsigned int const_vf = vf.to_constant ();
8911 unsigned int const_nunits = nunits.to_constant ();
8912 vec_num = CEIL (group_size * const_vf, const_nunits);
8913 group_gap_adj = vf * group_size - nunits * vec_num;
8914 }
8915 else
8916 {
8917 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8918 group_gap_adj
8919 = group_size - scalar_lanes;
8920 }
8921 }
8922 else
8923 vec_num = group_size;
8924
8925 ref_type = get_group_alias_ptr_type (first_stmt_info);
8926 }
8927 else
8928 {
8929 first_stmt_info = stmt_info;
8930 first_dr_info = dr_info;
8931 group_size = vec_num = 1;
8932 group_gap_adj = 0;
8933 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8934 }
8935
8936 gcc_assert (alignment_support_scheme);
8937 vec_loop_masks *loop_masks
8938 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8939 ? &LOOP_VINFO_MASKS (loop_vinfo)
8940 : NULL);
8941 vec_loop_lens *loop_lens
8942 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8943 ? &LOOP_VINFO_LENS (loop_vinfo)
8944 : NULL);
8945
8946 /* Shouldn't go with length-based approach if fully masked. */
8947 gcc_assert (!loop_lens || !loop_masks);
8948
8949 /* Targets with store-lane instructions must not require explicit
8950 realignment. vect_supportable_dr_alignment always returns either
8951 dr_aligned or dr_unaligned_supported for masked operations. */
8952 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8953 && !mask
8954 && !loop_masks)
8955 || alignment_support_scheme == dr_aligned
8956 || alignment_support_scheme == dr_unaligned_supported);
8957
8958 /* In case the vectorization factor (VF) is bigger than the number
8959 of elements that we can fit in a vectype (nunits), we have to generate
8960 more than one vector stmt - i.e - we need to "unroll" the
8961 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8962 from one copy of the vector stmt to the next, in the field
8963 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8964 stages to find the correct vector defs to be used when vectorizing
8965 stmts that use the defs of the current stmt. The example below
8966 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8967 need to create 4 vectorized stmts):
8968
8969 before vectorization:
8970 RELATED_STMT VEC_STMT
8971 S1: x = memref - -
8972 S2: z = x + 1 - -
8973
8974 step 1: vectorize stmt S1:
8975 We first create the vector stmt VS1_0, and, as usual, record a
8976 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8977 Next, we create the vector stmt VS1_1, and record a pointer to
8978 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8979 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8980 stmts and pointers:
8981 RELATED_STMT VEC_STMT
8982 VS1_0: vx0 = memref0 VS1_1 -
8983 VS1_1: vx1 = memref1 VS1_2 -
8984 VS1_2: vx2 = memref2 VS1_3 -
8985 VS1_3: vx3 = memref3 - -
8986 S1: x = load - VS1_0
8987 S2: z = x + 1 - -
8988 */
8989
8990 /* In case of interleaving (non-unit grouped access):
8991
8992 S1: x2 = &base + 2
8993 S2: x0 = &base
8994 S3: x1 = &base + 1
8995 S4: x3 = &base + 3
8996
8997 Vectorized loads are created in the order of memory accesses
8998 starting from the access of the first stmt of the chain:
8999
9000 VS1: vx0 = &base
9001 VS2: vx1 = &base + vec_size*1
9002 VS3: vx3 = &base + vec_size*2
9003 VS4: vx4 = &base + vec_size*3
9004
9005 Then permutation statements are generated:
9006
9007 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9008 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9009 ...
9010
9011 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9012 (the order of the data-refs in the output of vect_permute_load_chain
9013 corresponds to the order of scalar stmts in the interleaving chain - see
9014 the documentation of vect_permute_load_chain()).
9015 The generation of permutation stmts and recording them in
9016 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9017
9018 In case of both multiple types and interleaving, the vector loads and
9019 permutation stmts above are created for every copy. The result vector
9020 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9021 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9022
9023 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9024 on a target that supports unaligned accesses (dr_unaligned_supported)
9025 we generate the following code:
9026 p = initial_addr;
9027 indx = 0;
9028 loop {
9029 p = p + indx * vectype_size;
9030 vec_dest = *(p);
9031 indx = indx + 1;
9032 }
9033
9034 Otherwise, the data reference is potentially unaligned on a target that
9035 does not support unaligned accesses (dr_explicit_realign_optimized) -
9036 then generate the following code, in which the data in each iteration is
9037 obtained by two vector loads, one from the previous iteration, and one
9038 from the current iteration:
9039 p1 = initial_addr;
9040 msq_init = *(floor(p1))
9041 p2 = initial_addr + VS - 1;
9042 realignment_token = call target_builtin;
9043 indx = 0;
9044 loop {
9045 p2 = p2 + indx * vectype_size
9046 lsq = *(floor(p2))
9047 vec_dest = realign_load (msq, lsq, realignment_token)
9048 indx = indx + 1;
9049 msq = lsq;
9050 } */
9051
9052 /* If the misalignment remains the same throughout the execution of the
9053 loop, we can create the init_addr and permutation mask at the loop
9054 preheader. Otherwise, it needs to be created inside the loop.
9055 This can only occur when vectorizing memory accesses in the inner-loop
9056 nested within an outer-loop that is being vectorized. */
9057
9058 if (nested_in_vect_loop
9059 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9060 GET_MODE_SIZE (TYPE_MODE (vectype))))
9061 {
9062 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9063 compute_in_loop = true;
9064 }
9065
9066 bool diff_first_stmt_info
9067 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9068
9069 if ((alignment_support_scheme == dr_explicit_realign_optimized
9070 || alignment_support_scheme == dr_explicit_realign)
9071 && !compute_in_loop)
9072 {
9073 /* If we have different first_stmt_info, we can't set up realignment
9074 here, since we can't guarantee first_stmt_info DR has been
9075 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9076 distance from first_stmt_info DR instead as below. */
9077 if (!diff_first_stmt_info)
9078 msq = vect_setup_realignment (vinfo,
9079 first_stmt_info, gsi, &realignment_token,
9080 alignment_support_scheme, NULL_TREE,
9081 &at_loop);
9082 if (alignment_support_scheme == dr_explicit_realign_optimized)
9083 {
9084 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9085 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9086 size_one_node);
9087 gcc_assert (!first_stmt_info_for_drptr);
9088 }
9089 }
9090 else
9091 at_loop = loop;
9092
9093 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9094 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9095
9096 tree bump;
9097 tree vec_offset = NULL_TREE;
9098 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9099 {
9100 aggr_type = NULL_TREE;
9101 bump = NULL_TREE;
9102 }
9103 else if (memory_access_type == VMAT_GATHER_SCATTER)
9104 {
9105 aggr_type = elem_type;
9106 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9107 &bump, &vec_offset);
9108 }
9109 else
9110 {
9111 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9112 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9113 else
9114 aggr_type = vectype;
9115 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9116 memory_access_type);
9117 }
9118
9119 vec<tree> vec_offsets = vNULL;
9120 auto_vec<tree> vec_masks;
9121 if (mask)
9122 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9123 mask, &vec_masks, mask_vectype, NULL_TREE);
9124 tree vec_mask = NULL_TREE;
9125 poly_uint64 group_elt = 0;
9126 for (j = 0; j < ncopies; j++)
9127 {
9128 /* 1. Create the vector or array pointer update chain. */
9129 if (j == 0)
9130 {
9131 bool simd_lane_access_p
9132 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9133 if (simd_lane_access_p
9134 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9135 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9136 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9137 && integer_zerop (DR_INIT (first_dr_info->dr))
9138 && alias_sets_conflict_p (get_alias_set (aggr_type),
9139 get_alias_set (TREE_TYPE (ref_type)))
9140 && (alignment_support_scheme == dr_aligned
9141 || alignment_support_scheme == dr_unaligned_supported))
9142 {
9143 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9144 dataref_offset = build_int_cst (ref_type, 0);
9145 }
9146 else if (diff_first_stmt_info)
9147 {
9148 dataref_ptr
9149 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9150 aggr_type, at_loop, offset, &dummy,
9151 gsi, &ptr_incr, simd_lane_access_p,
9152 byte_offset, bump);
9153 /* Adjust the pointer by the difference to first_stmt. */
9154 data_reference_p ptrdr
9155 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9156 tree diff
9157 = fold_convert (sizetype,
9158 size_binop (MINUS_EXPR,
9159 DR_INIT (first_dr_info->dr),
9160 DR_INIT (ptrdr)));
9161 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9162 stmt_info, diff);
9163 if (alignment_support_scheme == dr_explicit_realign)
9164 {
9165 msq = vect_setup_realignment (vinfo,
9166 first_stmt_info_for_drptr, gsi,
9167 &realignment_token,
9168 alignment_support_scheme,
9169 dataref_ptr, &at_loop);
9170 gcc_assert (!compute_in_loop);
9171 }
9172 }
9173 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9174 {
9175 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9176 &dataref_ptr, &vec_offsets, ncopies);
9177 vec_offset = vec_offsets[0];
9178 }
9179 else
9180 dataref_ptr
9181 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9182 at_loop,
9183 offset, &dummy, gsi, &ptr_incr,
9184 simd_lane_access_p,
9185 byte_offset, bump);
9186 if (mask)
9187 vec_mask = vec_masks[0];
9188 }
9189 else
9190 {
9191 if (dataref_offset)
9192 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9193 bump);
9194 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9195 vec_offset = vec_offsets[j];
9196 else
9197 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9198 stmt_info, bump);
9199 if (mask)
9200 vec_mask = vec_masks[j];
9201 }
9202
9203 if (grouped_load || slp_perm)
9204 dr_chain.create (vec_num);
9205
9206 gimple *new_stmt = NULL;
9207 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9208 {
9209 tree vec_array;
9210
9211 vec_array = create_vector_array (vectype, vec_num);
9212
9213 tree final_mask = NULL_TREE;
9214 if (loop_masks)
9215 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9216 vectype, j);
9217 if (vec_mask)
9218 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9219 vec_mask, gsi);
9220
9221 gcall *call;
9222 if (final_mask)
9223 {
9224 /* Emit:
9225 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9226 VEC_MASK). */
9227 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9228 tree alias_ptr = build_int_cst (ref_type, align);
9229 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9230 dataref_ptr, alias_ptr,
9231 final_mask);
9232 }
9233 else
9234 {
9235 /* Emit:
9236 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9237 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9238 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9239 }
9240 gimple_call_set_lhs (call, vec_array);
9241 gimple_call_set_nothrow (call, true);
9242 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9243 new_stmt = call;
9244
9245 /* Extract each vector into an SSA_NAME. */
9246 for (i = 0; i < vec_num; i++)
9247 {
9248 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9249 vec_array, i);
9250 dr_chain.quick_push (new_temp);
9251 }
9252
9253 /* Record the mapping between SSA_NAMEs and statements. */
9254 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9255
9256 /* Record that VEC_ARRAY is now dead. */
9257 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9258 }
9259 else
9260 {
9261 for (i = 0; i < vec_num; i++)
9262 {
9263 tree final_mask = NULL_TREE;
9264 if (loop_masks
9265 && memory_access_type != VMAT_INVARIANT)
9266 final_mask = vect_get_loop_mask (gsi, loop_masks,
9267 vec_num * ncopies,
9268 vectype, vec_num * j + i);
9269 if (vec_mask)
9270 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9271 vec_mask, gsi);
9272
9273 if (i > 0)
9274 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9275 gsi, stmt_info, bump);
9276
9277 /* 2. Create the vector-load in the loop. */
9278 switch (alignment_support_scheme)
9279 {
9280 case dr_aligned:
9281 case dr_unaligned_supported:
9282 {
9283 unsigned int misalign;
9284 unsigned HOST_WIDE_INT align;
9285
9286 if (memory_access_type == VMAT_GATHER_SCATTER)
9287 {
9288 tree zero = build_zero_cst (vectype);
9289 tree scale = size_int (gs_info.scale);
9290 gcall *call;
9291 if (loop_masks)
9292 call = gimple_build_call_internal
9293 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9294 vec_offset, scale, zero, final_mask);
9295 else
9296 call = gimple_build_call_internal
9297 (IFN_GATHER_LOAD, 4, dataref_ptr,
9298 vec_offset, scale, zero);
9299 gimple_call_set_nothrow (call, true);
9300 new_stmt = call;
9301 data_ref = NULL_TREE;
9302 break;
9303 }
9304
9305 align =
9306 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9307 if (alignment_support_scheme == dr_aligned)
9308 {
9309 gcc_assert (aligned_access_p (first_dr_info));
9310 misalign = 0;
9311 }
9312 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9313 {
9314 align = dr_alignment
9315 (vect_dr_behavior (vinfo, first_dr_info));
9316 misalign = 0;
9317 }
9318 else
9319 misalign = DR_MISALIGNMENT (first_dr_info);
9320 if (dataref_offset == NULL_TREE
9321 && TREE_CODE (dataref_ptr) == SSA_NAME)
9322 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9323 align, misalign);
9324
9325 if (final_mask)
9326 {
9327 align = least_bit_hwi (misalign | align);
9328 tree ptr = build_int_cst (ref_type, align);
9329 gcall *call
9330 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9331 dataref_ptr, ptr,
9332 final_mask);
9333 gimple_call_set_nothrow (call, true);
9334 new_stmt = call;
9335 data_ref = NULL_TREE;
9336 }
9337 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9338 {
9339 tree final_len
9340 = vect_get_loop_len (loop_vinfo, loop_lens,
9341 vec_num * ncopies,
9342 vec_num * j + i);
9343 align = least_bit_hwi (misalign | align);
9344 tree ptr = build_int_cst (ref_type, align);
9345 gcall *call
9346 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9347 dataref_ptr, ptr,
9348 final_len);
9349 gimple_call_set_nothrow (call, true);
9350 new_stmt = call;
9351 data_ref = NULL_TREE;
9352
9353 /* Need conversion if it's wrapped with VnQI. */
9354 machine_mode vmode = TYPE_MODE (vectype);
9355 opt_machine_mode new_ovmode
9356 = get_len_load_store_mode (vmode, true);
9357 machine_mode new_vmode = new_ovmode.require ();
9358 if (vmode != new_vmode)
9359 {
9360 tree qi_type = unsigned_intQI_type_node;
9361 tree new_vtype
9362 = build_vector_type_for_mode (qi_type, new_vmode);
9363 tree var = vect_get_new_ssa_name (new_vtype,
9364 vect_simple_var);
9365 gimple_set_lhs (call, var);
9366 vect_finish_stmt_generation (vinfo, stmt_info, call,
9367 gsi);
9368 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9369 new_stmt
9370 = gimple_build_assign (vec_dest,
9371 VIEW_CONVERT_EXPR, op);
9372 }
9373 }
9374 else
9375 {
9376 tree ltype = vectype;
9377 tree new_vtype = NULL_TREE;
9378 unsigned HOST_WIDE_INT gap
9379 = DR_GROUP_GAP (first_stmt_info);
9380 unsigned int vect_align
9381 = vect_known_alignment_in_bytes (first_dr_info);
9382 unsigned int scalar_dr_size
9383 = vect_get_scalar_dr_size (first_dr_info);
9384 /* If there's no peeling for gaps but we have a gap
9385 with slp loads then load the lower half of the
9386 vector only. See get_group_load_store_type for
9387 when we apply this optimization. */
9388 if (slp
9389 && loop_vinfo
9390 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9391 && gap != 0
9392 && known_eq (nunits, (group_size - gap) * 2)
9393 && known_eq (nunits, group_size)
9394 && gap >= (vect_align / scalar_dr_size))
9395 {
9396 tree half_vtype;
9397 new_vtype
9398 = vector_vector_composition_type (vectype, 2,
9399 &half_vtype);
9400 if (new_vtype != NULL_TREE)
9401 ltype = half_vtype;
9402 }
9403 tree offset
9404 = (dataref_offset ? dataref_offset
9405 : build_int_cst (ref_type, 0));
9406 if (ltype != vectype
9407 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9408 {
9409 unsigned HOST_WIDE_INT gap_offset
9410 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9411 tree gapcst = build_int_cst (ref_type, gap_offset);
9412 offset = size_binop (PLUS_EXPR, offset, gapcst);
9413 }
9414 data_ref
9415 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9416 if (alignment_support_scheme == dr_aligned)
9417 ;
9418 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9419 TREE_TYPE (data_ref)
9420 = build_aligned_type (TREE_TYPE (data_ref),
9421 align * BITS_PER_UNIT);
9422 else
9423 TREE_TYPE (data_ref)
9424 = build_aligned_type (TREE_TYPE (data_ref),
9425 TYPE_ALIGN (elem_type));
9426 if (ltype != vectype)
9427 {
9428 vect_copy_ref_info (data_ref,
9429 DR_REF (first_dr_info->dr));
9430 tree tem = make_ssa_name (ltype);
9431 new_stmt = gimple_build_assign (tem, data_ref);
9432 vect_finish_stmt_generation (vinfo, stmt_info,
9433 new_stmt, gsi);
9434 data_ref = NULL;
9435 vec<constructor_elt, va_gc> *v;
9436 vec_alloc (v, 2);
9437 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9438 {
9439 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9440 build_zero_cst (ltype));
9441 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9442 }
9443 else
9444 {
9445 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9446 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9447 build_zero_cst (ltype));
9448 }
9449 gcc_assert (new_vtype != NULL_TREE);
9450 if (new_vtype == vectype)
9451 new_stmt = gimple_build_assign (
9452 vec_dest, build_constructor (vectype, v));
9453 else
9454 {
9455 tree new_vname = make_ssa_name (new_vtype);
9456 new_stmt = gimple_build_assign (
9457 new_vname, build_constructor (new_vtype, v));
9458 vect_finish_stmt_generation (vinfo, stmt_info,
9459 new_stmt, gsi);
9460 new_stmt = gimple_build_assign (
9461 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9462 new_vname));
9463 }
9464 }
9465 }
9466 break;
9467 }
9468 case dr_explicit_realign:
9469 {
9470 tree ptr, bump;
9471
9472 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9473
9474 if (compute_in_loop)
9475 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9476 &realignment_token,
9477 dr_explicit_realign,
9478 dataref_ptr, NULL);
9479
9480 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9481 ptr = copy_ssa_name (dataref_ptr);
9482 else
9483 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9484 // For explicit realign the target alignment should be
9485 // known at compile time.
9486 unsigned HOST_WIDE_INT align =
9487 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9488 new_stmt = gimple_build_assign
9489 (ptr, BIT_AND_EXPR, dataref_ptr,
9490 build_int_cst
9491 (TREE_TYPE (dataref_ptr),
9492 -(HOST_WIDE_INT) align));
9493 vect_finish_stmt_generation (vinfo, stmt_info,
9494 new_stmt, gsi);
9495 data_ref
9496 = build2 (MEM_REF, vectype, ptr,
9497 build_int_cst (ref_type, 0));
9498 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9499 vec_dest = vect_create_destination_var (scalar_dest,
9500 vectype);
9501 new_stmt = gimple_build_assign (vec_dest, data_ref);
9502 new_temp = make_ssa_name (vec_dest, new_stmt);
9503 gimple_assign_set_lhs (new_stmt, new_temp);
9504 gimple_move_vops (new_stmt, stmt_info->stmt);
9505 vect_finish_stmt_generation (vinfo, stmt_info,
9506 new_stmt, gsi);
9507 msq = new_temp;
9508
9509 bump = size_binop (MULT_EXPR, vs,
9510 TYPE_SIZE_UNIT (elem_type));
9511 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9512 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9513 stmt_info, bump);
9514 new_stmt = gimple_build_assign
9515 (NULL_TREE, BIT_AND_EXPR, ptr,
9516 build_int_cst
9517 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9518 ptr = copy_ssa_name (ptr, new_stmt);
9519 gimple_assign_set_lhs (new_stmt, ptr);
9520 vect_finish_stmt_generation (vinfo, stmt_info,
9521 new_stmt, gsi);
9522 data_ref
9523 = build2 (MEM_REF, vectype, ptr,
9524 build_int_cst (ref_type, 0));
9525 break;
9526 }
9527 case dr_explicit_realign_optimized:
9528 {
9529 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9530 new_temp = copy_ssa_name (dataref_ptr);
9531 else
9532 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9533 // We should only be doing this if we know the target
9534 // alignment at compile time.
9535 unsigned HOST_WIDE_INT align =
9536 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9537 new_stmt = gimple_build_assign
9538 (new_temp, BIT_AND_EXPR, dataref_ptr,
9539 build_int_cst (TREE_TYPE (dataref_ptr),
9540 -(HOST_WIDE_INT) align));
9541 vect_finish_stmt_generation (vinfo, stmt_info,
9542 new_stmt, gsi);
9543 data_ref
9544 = build2 (MEM_REF, vectype, new_temp,
9545 build_int_cst (ref_type, 0));
9546 break;
9547 }
9548 default:
9549 gcc_unreachable ();
9550 }
9551 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9552 /* DATA_REF is null if we've already built the statement. */
9553 if (data_ref)
9554 {
9555 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9556 new_stmt = gimple_build_assign (vec_dest, data_ref);
9557 }
9558 new_temp = make_ssa_name (vec_dest, new_stmt);
9559 gimple_set_lhs (new_stmt, new_temp);
9560 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9561
9562 /* 3. Handle explicit realignment if necessary/supported.
9563 Create in loop:
9564 vec_dest = realign_load (msq, lsq, realignment_token) */
9565 if (alignment_support_scheme == dr_explicit_realign_optimized
9566 || alignment_support_scheme == dr_explicit_realign)
9567 {
9568 lsq = gimple_assign_lhs (new_stmt);
9569 if (!realignment_token)
9570 realignment_token = dataref_ptr;
9571 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9572 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9573 msq, lsq, realignment_token);
9574 new_temp = make_ssa_name (vec_dest, new_stmt);
9575 gimple_assign_set_lhs (new_stmt, new_temp);
9576 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9577
9578 if (alignment_support_scheme == dr_explicit_realign_optimized)
9579 {
9580 gcc_assert (phi);
9581 if (i == vec_num - 1 && j == ncopies - 1)
9582 add_phi_arg (phi, lsq,
9583 loop_latch_edge (containing_loop),
9584 UNKNOWN_LOCATION);
9585 msq = lsq;
9586 }
9587 }
9588
9589 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9590 {
9591 tree perm_mask = perm_mask_for_reverse (vectype);
9592 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9593 perm_mask, stmt_info, gsi);
9594 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9595 }
9596
9597 /* Collect vector loads and later create their permutation in
9598 vect_transform_grouped_load (). */
9599 if (grouped_load || slp_perm)
9600 dr_chain.quick_push (new_temp);
9601
9602 /* Store vector loads in the corresponding SLP_NODE. */
9603 if (slp && !slp_perm)
9604 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9605
9606 /* With SLP permutation we load the gaps as well, without
9607 we need to skip the gaps after we manage to fully load
9608 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9609 group_elt += nunits;
9610 if (maybe_ne (group_gap_adj, 0U)
9611 && !slp_perm
9612 && known_eq (group_elt, group_size - group_gap_adj))
9613 {
9614 poly_wide_int bump_val
9615 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9616 * group_gap_adj);
9617 tree bump = wide_int_to_tree (sizetype, bump_val);
9618 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9619 gsi, stmt_info, bump);
9620 group_elt = 0;
9621 }
9622 }
9623 /* Bump the vector pointer to account for a gap or for excess
9624 elements loaded for a permuted SLP load. */
9625 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9626 {
9627 poly_wide_int bump_val
9628 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9629 * group_gap_adj);
9630 tree bump = wide_int_to_tree (sizetype, bump_val);
9631 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9632 stmt_info, bump);
9633 }
9634 }
9635
9636 if (slp && !slp_perm)
9637 continue;
9638
9639 if (slp_perm)
9640 {
9641 unsigned n_perms;
9642 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9643 gsi, vf, false, &n_perms);
9644 gcc_assert (ok);
9645 }
9646 else
9647 {
9648 if (grouped_load)
9649 {
9650 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9651 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9652 group_size, gsi);
9653 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9654 }
9655 else
9656 {
9657 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9658 }
9659 }
9660 dr_chain.release ();
9661 }
9662 if (!slp)
9663 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9664
9665 return true;
9666 }
9667
9668 /* Function vect_is_simple_cond.
9669
9670 Input:
9671 LOOP - the loop that is being vectorized.
9672 COND - Condition that is checked for simple use.
9673
9674 Output:
9675 *COMP_VECTYPE - the vector type for the comparison.
9676 *DTS - The def types for the arguments of the comparison
9677
9678 Returns whether a COND can be vectorized. Checks whether
9679 condition operands are supportable using vec_is_simple_use. */
9680
9681 static bool
9682 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9683 slp_tree slp_node, tree *comp_vectype,
9684 enum vect_def_type *dts, tree vectype)
9685 {
9686 tree lhs, rhs;
9687 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9688 slp_tree slp_op;
9689
9690 /* Mask case. */
9691 if (TREE_CODE (cond) == SSA_NAME
9692 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9693 {
9694 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9695 &slp_op, &dts[0], comp_vectype)
9696 || !*comp_vectype
9697 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9698 return false;
9699 return true;
9700 }
9701
9702 if (!COMPARISON_CLASS_P (cond))
9703 return false;
9704
9705 lhs = TREE_OPERAND (cond, 0);
9706 rhs = TREE_OPERAND (cond, 1);
9707
9708 if (TREE_CODE (lhs) == SSA_NAME)
9709 {
9710 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9711 &lhs, &slp_op, &dts[0], &vectype1))
9712 return false;
9713 }
9714 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9715 || TREE_CODE (lhs) == FIXED_CST)
9716 dts[0] = vect_constant_def;
9717 else
9718 return false;
9719
9720 if (TREE_CODE (rhs) == SSA_NAME)
9721 {
9722 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9723 &rhs, &slp_op, &dts[1], &vectype2))
9724 return false;
9725 }
9726 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9727 || TREE_CODE (rhs) == FIXED_CST)
9728 dts[1] = vect_constant_def;
9729 else
9730 return false;
9731
9732 if (vectype1 && vectype2
9733 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9734 TYPE_VECTOR_SUBPARTS (vectype2)))
9735 return false;
9736
9737 *comp_vectype = vectype1 ? vectype1 : vectype2;
9738 /* Invariant comparison. */
9739 if (! *comp_vectype)
9740 {
9741 tree scalar_type = TREE_TYPE (lhs);
9742 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9743 *comp_vectype = truth_type_for (vectype);
9744 else
9745 {
9746 /* If we can widen the comparison to match vectype do so. */
9747 if (INTEGRAL_TYPE_P (scalar_type)
9748 && !slp_node
9749 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9750 TYPE_SIZE (TREE_TYPE (vectype))))
9751 scalar_type = build_nonstandard_integer_type
9752 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9753 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9754 slp_node);
9755 }
9756 }
9757
9758 return true;
9759 }
9760
9761 /* vectorizable_condition.
9762
9763 Check if STMT_INFO is conditional modify expression that can be vectorized.
9764 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9765 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9766 at GSI.
9767
9768 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9769
9770 Return true if STMT_INFO is vectorizable in this way. */
9771
9772 static bool
9773 vectorizable_condition (vec_info *vinfo,
9774 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9775 gimple **vec_stmt,
9776 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9777 {
9778 tree scalar_dest = NULL_TREE;
9779 tree vec_dest = NULL_TREE;
9780 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9781 tree then_clause, else_clause;
9782 tree comp_vectype = NULL_TREE;
9783 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9784 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9785 tree vec_compare;
9786 tree new_temp;
9787 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9788 enum vect_def_type dts[4]
9789 = {vect_unknown_def_type, vect_unknown_def_type,
9790 vect_unknown_def_type, vect_unknown_def_type};
9791 int ndts = 4;
9792 int ncopies;
9793 int vec_num;
9794 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9795 int i;
9796 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9797 vec<tree> vec_oprnds0 = vNULL;
9798 vec<tree> vec_oprnds1 = vNULL;
9799 vec<tree> vec_oprnds2 = vNULL;
9800 vec<tree> vec_oprnds3 = vNULL;
9801 tree vec_cmp_type;
9802 bool masked = false;
9803
9804 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9805 return false;
9806
9807 /* Is vectorizable conditional operation? */
9808 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9809 if (!stmt)
9810 return false;
9811
9812 code = gimple_assign_rhs_code (stmt);
9813 if (code != COND_EXPR)
9814 return false;
9815
9816 stmt_vec_info reduc_info = NULL;
9817 int reduc_index = -1;
9818 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9819 bool for_reduction
9820 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9821 if (for_reduction)
9822 {
9823 if (STMT_SLP_TYPE (stmt_info))
9824 return false;
9825 reduc_info = info_for_reduction (vinfo, stmt_info);
9826 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9827 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9828 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9829 || reduc_index != -1);
9830 }
9831 else
9832 {
9833 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9834 return false;
9835 }
9836
9837 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9838 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9839
9840 if (slp_node)
9841 {
9842 ncopies = 1;
9843 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9844 }
9845 else
9846 {
9847 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9848 vec_num = 1;
9849 }
9850
9851 gcc_assert (ncopies >= 1);
9852 if (for_reduction && ncopies > 1)
9853 return false; /* FORNOW */
9854
9855 cond_expr = gimple_assign_rhs1 (stmt);
9856
9857 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9858 &comp_vectype, &dts[0], vectype)
9859 || !comp_vectype)
9860 return false;
9861
9862 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9863 slp_tree then_slp_node, else_slp_node;
9864 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9865 &then_clause, &then_slp_node, &dts[2], &vectype1))
9866 return false;
9867 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9868 &else_clause, &else_slp_node, &dts[3], &vectype2))
9869 return false;
9870
9871 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9872 return false;
9873
9874 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9875 return false;
9876
9877 masked = !COMPARISON_CLASS_P (cond_expr);
9878 vec_cmp_type = truth_type_for (comp_vectype);
9879
9880 if (vec_cmp_type == NULL_TREE)
9881 return false;
9882
9883 cond_code = TREE_CODE (cond_expr);
9884 if (!masked)
9885 {
9886 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9887 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9888 }
9889
9890 /* For conditional reductions, the "then" value needs to be the candidate
9891 value calculated by this iteration while the "else" value needs to be
9892 the result carried over from previous iterations. If the COND_EXPR
9893 is the other way around, we need to swap it. */
9894 bool must_invert_cmp_result = false;
9895 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9896 {
9897 if (masked)
9898 must_invert_cmp_result = true;
9899 else
9900 {
9901 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9902 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9903 if (new_code == ERROR_MARK)
9904 must_invert_cmp_result = true;
9905 else
9906 {
9907 cond_code = new_code;
9908 /* Make sure we don't accidentally use the old condition. */
9909 cond_expr = NULL_TREE;
9910 }
9911 }
9912 std::swap (then_clause, else_clause);
9913 }
9914
9915 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9916 {
9917 /* Boolean values may have another representation in vectors
9918 and therefore we prefer bit operations over comparison for
9919 them (which also works for scalar masks). We store opcodes
9920 to use in bitop1 and bitop2. Statement is vectorized as
9921 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9922 depending on bitop1 and bitop2 arity. */
9923 switch (cond_code)
9924 {
9925 case GT_EXPR:
9926 bitop1 = BIT_NOT_EXPR;
9927 bitop2 = BIT_AND_EXPR;
9928 break;
9929 case GE_EXPR:
9930 bitop1 = BIT_NOT_EXPR;
9931 bitop2 = BIT_IOR_EXPR;
9932 break;
9933 case LT_EXPR:
9934 bitop1 = BIT_NOT_EXPR;
9935 bitop2 = BIT_AND_EXPR;
9936 std::swap (cond_expr0, cond_expr1);
9937 break;
9938 case LE_EXPR:
9939 bitop1 = BIT_NOT_EXPR;
9940 bitop2 = BIT_IOR_EXPR;
9941 std::swap (cond_expr0, cond_expr1);
9942 break;
9943 case NE_EXPR:
9944 bitop1 = BIT_XOR_EXPR;
9945 break;
9946 case EQ_EXPR:
9947 bitop1 = BIT_XOR_EXPR;
9948 bitop2 = BIT_NOT_EXPR;
9949 break;
9950 default:
9951 return false;
9952 }
9953 cond_code = SSA_NAME;
9954 }
9955
9956 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9957 && reduction_type == EXTRACT_LAST_REDUCTION
9958 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9959 {
9960 if (dump_enabled_p ())
9961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9962 "reduction comparison operation not supported.\n");
9963 return false;
9964 }
9965
9966 if (!vec_stmt)
9967 {
9968 if (bitop1 != NOP_EXPR)
9969 {
9970 machine_mode mode = TYPE_MODE (comp_vectype);
9971 optab optab;
9972
9973 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9974 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9975 return false;
9976
9977 if (bitop2 != NOP_EXPR)
9978 {
9979 optab = optab_for_tree_code (bitop2, comp_vectype,
9980 optab_default);
9981 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9982 return false;
9983 }
9984 }
9985
9986 vect_cost_for_stmt kind = vector_stmt;
9987 if (reduction_type == EXTRACT_LAST_REDUCTION)
9988 /* Count one reduction-like operation per vector. */
9989 kind = vec_to_scalar;
9990 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
9991 return false;
9992
9993 if (slp_node
9994 && (!vect_maybe_update_slp_op_vectype
9995 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
9996 || (op_adjust == 1
9997 && !vect_maybe_update_slp_op_vectype
9998 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
9999 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10000 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10001 {
10002 if (dump_enabled_p ())
10003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10004 "incompatible vector types for invariants\n");
10005 return false;
10006 }
10007
10008 if (loop_vinfo && for_reduction
10009 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10010 {
10011 if (reduction_type == EXTRACT_LAST_REDUCTION)
10012 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10013 ncopies * vec_num, vectype, NULL);
10014 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10015 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10016 {
10017 if (dump_enabled_p ())
10018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10019 "conditional reduction prevents the use"
10020 " of partial vectors.\n");
10021 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10022 }
10023 }
10024
10025 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10026 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10027 cost_vec, kind);
10028 return true;
10029 }
10030
10031 /* Transform. */
10032
10033 if (!slp_node)
10034 {
10035 vec_oprnds0.create (1);
10036 vec_oprnds1.create (1);
10037 vec_oprnds2.create (1);
10038 vec_oprnds3.create (1);
10039 }
10040
10041 /* Handle def. */
10042 scalar_dest = gimple_assign_lhs (stmt);
10043 if (reduction_type != EXTRACT_LAST_REDUCTION)
10044 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10045
10046 bool swap_cond_operands = false;
10047
10048 /* See whether another part of the vectorized code applies a loop
10049 mask to the condition, or to its inverse. */
10050
10051 vec_loop_masks *masks = NULL;
10052 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10053 {
10054 if (reduction_type == EXTRACT_LAST_REDUCTION)
10055 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10056 else
10057 {
10058 scalar_cond_masked_key cond (cond_expr, ncopies);
10059 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10060 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10061 else
10062 {
10063 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10064 cond.code = invert_tree_comparison (cond.code, honor_nans);
10065 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10066 {
10067 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10068 cond_code = cond.code;
10069 swap_cond_operands = true;
10070 }
10071 }
10072 }
10073 }
10074
10075 /* Handle cond expr. */
10076 if (masked)
10077 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10078 cond_expr, &vec_oprnds0, comp_vectype,
10079 then_clause, &vec_oprnds2, vectype,
10080 reduction_type != EXTRACT_LAST_REDUCTION
10081 ? else_clause : NULL, &vec_oprnds3, vectype);
10082 else
10083 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10084 cond_expr0, &vec_oprnds0, comp_vectype,
10085 cond_expr1, &vec_oprnds1, comp_vectype,
10086 then_clause, &vec_oprnds2, vectype,
10087 reduction_type != EXTRACT_LAST_REDUCTION
10088 ? else_clause : NULL, &vec_oprnds3, vectype);
10089
10090 /* Arguments are ready. Create the new vector stmt. */
10091 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10092 {
10093 vec_then_clause = vec_oprnds2[i];
10094 if (reduction_type != EXTRACT_LAST_REDUCTION)
10095 vec_else_clause = vec_oprnds3[i];
10096
10097 if (swap_cond_operands)
10098 std::swap (vec_then_clause, vec_else_clause);
10099
10100 if (masked)
10101 vec_compare = vec_cond_lhs;
10102 else
10103 {
10104 vec_cond_rhs = vec_oprnds1[i];
10105 if (bitop1 == NOP_EXPR)
10106 {
10107 gimple_seq stmts = NULL;
10108 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10109 vec_cond_lhs, vec_cond_rhs);
10110 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10111 }
10112 else
10113 {
10114 new_temp = make_ssa_name (vec_cmp_type);
10115 gassign *new_stmt;
10116 if (bitop1 == BIT_NOT_EXPR)
10117 new_stmt = gimple_build_assign (new_temp, bitop1,
10118 vec_cond_rhs);
10119 else
10120 new_stmt
10121 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10122 vec_cond_rhs);
10123 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10124 if (bitop2 == NOP_EXPR)
10125 vec_compare = new_temp;
10126 else if (bitop2 == BIT_NOT_EXPR)
10127 {
10128 /* Instead of doing ~x ? y : z do x ? z : y. */
10129 vec_compare = new_temp;
10130 std::swap (vec_then_clause, vec_else_clause);
10131 }
10132 else
10133 {
10134 vec_compare = make_ssa_name (vec_cmp_type);
10135 new_stmt
10136 = gimple_build_assign (vec_compare, bitop2,
10137 vec_cond_lhs, new_temp);
10138 vect_finish_stmt_generation (vinfo, stmt_info,
10139 new_stmt, gsi);
10140 }
10141 }
10142 }
10143
10144 /* If we decided to apply a loop mask to the result of the vector
10145 comparison, AND the comparison with the mask now. Later passes
10146 should then be able to reuse the AND results between mulitple
10147 vector statements.
10148
10149 For example:
10150 for (int i = 0; i < 100; ++i)
10151 x[i] = y[i] ? z[i] : 10;
10152
10153 results in following optimized GIMPLE:
10154
10155 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10156 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10157 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10158 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10159 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10160 vect_iftmp.11_47, { 10, ... }>;
10161
10162 instead of using a masked and unmasked forms of
10163 vec != { 0, ... } (masked in the MASK_LOAD,
10164 unmasked in the VEC_COND_EXPR). */
10165
10166 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10167 in cases where that's necessary. */
10168
10169 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10170 {
10171 if (!is_gimple_val (vec_compare))
10172 {
10173 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10174 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10175 vec_compare);
10176 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10177 vec_compare = vec_compare_name;
10178 }
10179
10180 if (must_invert_cmp_result)
10181 {
10182 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10183 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10184 BIT_NOT_EXPR,
10185 vec_compare);
10186 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10187 vec_compare = vec_compare_name;
10188 }
10189
10190 if (masks)
10191 {
10192 unsigned vec_num = vec_oprnds0.length ();
10193 tree loop_mask
10194 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10195 vectype, i);
10196 tree tmp2 = make_ssa_name (vec_cmp_type);
10197 gassign *g
10198 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10199 loop_mask);
10200 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10201 vec_compare = tmp2;
10202 }
10203 }
10204
10205 gimple *new_stmt;
10206 if (reduction_type == EXTRACT_LAST_REDUCTION)
10207 {
10208 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10209 tree lhs = gimple_get_lhs (old_stmt);
10210 new_stmt = gimple_build_call_internal
10211 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10212 vec_then_clause);
10213 gimple_call_set_lhs (new_stmt, lhs);
10214 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10215 if (old_stmt == gsi_stmt (*gsi))
10216 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10217 else
10218 {
10219 /* In this case we're moving the definition to later in the
10220 block. That doesn't matter because the only uses of the
10221 lhs are in phi statements. */
10222 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10223 gsi_remove (&old_gsi, true);
10224 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10225 }
10226 }
10227 else
10228 {
10229 new_temp = make_ssa_name (vec_dest);
10230 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10231 vec_then_clause, vec_else_clause);
10232 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10233 }
10234 if (slp_node)
10235 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10236 else
10237 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10238 }
10239
10240 if (!slp_node)
10241 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10242
10243 vec_oprnds0.release ();
10244 vec_oprnds1.release ();
10245 vec_oprnds2.release ();
10246 vec_oprnds3.release ();
10247
10248 return true;
10249 }
10250
10251 /* vectorizable_comparison.
10252
10253 Check if STMT_INFO is comparison expression that can be vectorized.
10254 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10255 comparison, put it in VEC_STMT, and insert it at GSI.
10256
10257 Return true if STMT_INFO is vectorizable in this way. */
10258
10259 static bool
10260 vectorizable_comparison (vec_info *vinfo,
10261 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10262 gimple **vec_stmt,
10263 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10264 {
10265 tree lhs, rhs1, rhs2;
10266 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10268 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10269 tree new_temp;
10270 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10271 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10272 int ndts = 2;
10273 poly_uint64 nunits;
10274 int ncopies;
10275 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10276 int i;
10277 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10278 vec<tree> vec_oprnds0 = vNULL;
10279 vec<tree> vec_oprnds1 = vNULL;
10280 tree mask_type;
10281 tree mask;
10282
10283 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10284 return false;
10285
10286 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10287 return false;
10288
10289 mask_type = vectype;
10290 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10291
10292 if (slp_node)
10293 ncopies = 1;
10294 else
10295 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10296
10297 gcc_assert (ncopies >= 1);
10298 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10299 return false;
10300
10301 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10302 if (!stmt)
10303 return false;
10304
10305 code = gimple_assign_rhs_code (stmt);
10306
10307 if (TREE_CODE_CLASS (code) != tcc_comparison)
10308 return false;
10309
10310 slp_tree slp_rhs1, slp_rhs2;
10311 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10312 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10313 return false;
10314
10315 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10316 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10317 return false;
10318
10319 if (vectype1 && vectype2
10320 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10321 TYPE_VECTOR_SUBPARTS (vectype2)))
10322 return false;
10323
10324 vectype = vectype1 ? vectype1 : vectype2;
10325
10326 /* Invariant comparison. */
10327 if (!vectype)
10328 {
10329 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10330 vectype = mask_type;
10331 else
10332 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10333 slp_node);
10334 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10335 return false;
10336 }
10337 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10338 return false;
10339
10340 /* Can't compare mask and non-mask types. */
10341 if (vectype1 && vectype2
10342 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10343 return false;
10344
10345 /* Boolean values may have another representation in vectors
10346 and therefore we prefer bit operations over comparison for
10347 them (which also works for scalar masks). We store opcodes
10348 to use in bitop1 and bitop2. Statement is vectorized as
10349 BITOP2 (rhs1 BITOP1 rhs2) or
10350 rhs1 BITOP2 (BITOP1 rhs2)
10351 depending on bitop1 and bitop2 arity. */
10352 bool swap_p = false;
10353 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10354 {
10355 if (code == GT_EXPR)
10356 {
10357 bitop1 = BIT_NOT_EXPR;
10358 bitop2 = BIT_AND_EXPR;
10359 }
10360 else if (code == GE_EXPR)
10361 {
10362 bitop1 = BIT_NOT_EXPR;
10363 bitop2 = BIT_IOR_EXPR;
10364 }
10365 else if (code == LT_EXPR)
10366 {
10367 bitop1 = BIT_NOT_EXPR;
10368 bitop2 = BIT_AND_EXPR;
10369 swap_p = true;
10370 }
10371 else if (code == LE_EXPR)
10372 {
10373 bitop1 = BIT_NOT_EXPR;
10374 bitop2 = BIT_IOR_EXPR;
10375 swap_p = true;
10376 }
10377 else
10378 {
10379 bitop1 = BIT_XOR_EXPR;
10380 if (code == EQ_EXPR)
10381 bitop2 = BIT_NOT_EXPR;
10382 }
10383 }
10384
10385 if (!vec_stmt)
10386 {
10387 if (bitop1 == NOP_EXPR)
10388 {
10389 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10390 return false;
10391 }
10392 else
10393 {
10394 machine_mode mode = TYPE_MODE (vectype);
10395 optab optab;
10396
10397 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10398 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10399 return false;
10400
10401 if (bitop2 != NOP_EXPR)
10402 {
10403 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10404 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10405 return false;
10406 }
10407 }
10408
10409 /* Put types on constant and invariant SLP children. */
10410 if (slp_node
10411 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10412 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10413 {
10414 if (dump_enabled_p ())
10415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10416 "incompatible vector types for invariants\n");
10417 return false;
10418 }
10419
10420 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10421 vect_model_simple_cost (vinfo, stmt_info,
10422 ncopies * (1 + (bitop2 != NOP_EXPR)),
10423 dts, ndts, slp_node, cost_vec);
10424 return true;
10425 }
10426
10427 /* Transform. */
10428 if (!slp_node)
10429 {
10430 vec_oprnds0.create (1);
10431 vec_oprnds1.create (1);
10432 }
10433
10434 /* Handle def. */
10435 lhs = gimple_assign_lhs (stmt);
10436 mask = vect_create_destination_var (lhs, mask_type);
10437
10438 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10439 rhs1, &vec_oprnds0, vectype,
10440 rhs2, &vec_oprnds1, vectype);
10441 if (swap_p)
10442 std::swap (vec_oprnds0, vec_oprnds1);
10443
10444 /* Arguments are ready. Create the new vector stmt. */
10445 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10446 {
10447 gimple *new_stmt;
10448 vec_rhs2 = vec_oprnds1[i];
10449
10450 new_temp = make_ssa_name (mask);
10451 if (bitop1 == NOP_EXPR)
10452 {
10453 new_stmt = gimple_build_assign (new_temp, code,
10454 vec_rhs1, vec_rhs2);
10455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10456 }
10457 else
10458 {
10459 if (bitop1 == BIT_NOT_EXPR)
10460 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10461 else
10462 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10463 vec_rhs2);
10464 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10465 if (bitop2 != NOP_EXPR)
10466 {
10467 tree res = make_ssa_name (mask);
10468 if (bitop2 == BIT_NOT_EXPR)
10469 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10470 else
10471 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10472 new_temp);
10473 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10474 }
10475 }
10476 if (slp_node)
10477 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10478 else
10479 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10480 }
10481
10482 if (!slp_node)
10483 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10484
10485 vec_oprnds0.release ();
10486 vec_oprnds1.release ();
10487
10488 return true;
10489 }
10490
10491 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10492 can handle all live statements in the node. Otherwise return true
10493 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10494 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10495
10496 static bool
10497 can_vectorize_live_stmts (vec_info *vinfo,
10498 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10499 slp_tree slp_node, slp_instance slp_node_instance,
10500 bool vec_stmt_p,
10501 stmt_vector_for_cost *cost_vec)
10502 {
10503 if (slp_node)
10504 {
10505 stmt_vec_info slp_stmt_info;
10506 unsigned int i;
10507 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10508 {
10509 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10510 && !vectorizable_live_operation (vinfo,
10511 slp_stmt_info, gsi, slp_node,
10512 slp_node_instance, i,
10513 vec_stmt_p, cost_vec))
10514 return false;
10515 }
10516 }
10517 else if (STMT_VINFO_LIVE_P (stmt_info)
10518 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10519 slp_node, slp_node_instance, -1,
10520 vec_stmt_p, cost_vec))
10521 return false;
10522
10523 return true;
10524 }
10525
10526 /* Make sure the statement is vectorizable. */
10527
10528 opt_result
10529 vect_analyze_stmt (vec_info *vinfo,
10530 stmt_vec_info stmt_info, bool *need_to_vectorize,
10531 slp_tree node, slp_instance node_instance,
10532 stmt_vector_for_cost *cost_vec)
10533 {
10534 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10535 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10536 bool ok;
10537 gimple_seq pattern_def_seq;
10538
10539 if (dump_enabled_p ())
10540 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10541 stmt_info->stmt);
10542
10543 if (gimple_has_volatile_ops (stmt_info->stmt))
10544 return opt_result::failure_at (stmt_info->stmt,
10545 "not vectorized:"
10546 " stmt has volatile operands: %G\n",
10547 stmt_info->stmt);
10548
10549 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10550 && node == NULL
10551 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10552 {
10553 gimple_stmt_iterator si;
10554
10555 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10556 {
10557 stmt_vec_info pattern_def_stmt_info
10558 = vinfo->lookup_stmt (gsi_stmt (si));
10559 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10560 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10561 {
10562 /* Analyze def stmt of STMT if it's a pattern stmt. */
10563 if (dump_enabled_p ())
10564 dump_printf_loc (MSG_NOTE, vect_location,
10565 "==> examining pattern def statement: %G",
10566 pattern_def_stmt_info->stmt);
10567
10568 opt_result res
10569 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10570 need_to_vectorize, node, node_instance,
10571 cost_vec);
10572 if (!res)
10573 return res;
10574 }
10575 }
10576 }
10577
10578 /* Skip stmts that do not need to be vectorized. In loops this is expected
10579 to include:
10580 - the COND_EXPR which is the loop exit condition
10581 - any LABEL_EXPRs in the loop
10582 - computations that are used only for array indexing or loop control.
10583 In basic blocks we only analyze statements that are a part of some SLP
10584 instance, therefore, all the statements are relevant.
10585
10586 Pattern statement needs to be analyzed instead of the original statement
10587 if the original statement is not relevant. Otherwise, we analyze both
10588 statements. In basic blocks we are called from some SLP instance
10589 traversal, don't analyze pattern stmts instead, the pattern stmts
10590 already will be part of SLP instance. */
10591
10592 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10593 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10594 && !STMT_VINFO_LIVE_P (stmt_info))
10595 {
10596 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10597 && pattern_stmt_info
10598 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10599 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10600 {
10601 /* Analyze PATTERN_STMT instead of the original stmt. */
10602 stmt_info = pattern_stmt_info;
10603 if (dump_enabled_p ())
10604 dump_printf_loc (MSG_NOTE, vect_location,
10605 "==> examining pattern statement: %G",
10606 stmt_info->stmt);
10607 }
10608 else
10609 {
10610 if (dump_enabled_p ())
10611 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10612
10613 return opt_result::success ();
10614 }
10615 }
10616 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10617 && node == NULL
10618 && pattern_stmt_info
10619 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10620 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10621 {
10622 /* Analyze PATTERN_STMT too. */
10623 if (dump_enabled_p ())
10624 dump_printf_loc (MSG_NOTE, vect_location,
10625 "==> examining pattern statement: %G",
10626 pattern_stmt_info->stmt);
10627
10628 opt_result res
10629 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10630 node_instance, cost_vec);
10631 if (!res)
10632 return res;
10633 }
10634
10635 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10636 {
10637 case vect_internal_def:
10638 break;
10639
10640 case vect_reduction_def:
10641 case vect_nested_cycle:
10642 gcc_assert (!bb_vinfo
10643 && (relevance == vect_used_in_outer
10644 || relevance == vect_used_in_outer_by_reduction
10645 || relevance == vect_used_by_reduction
10646 || relevance == vect_unused_in_scope
10647 || relevance == vect_used_only_live));
10648 break;
10649
10650 case vect_induction_def:
10651 gcc_assert (!bb_vinfo);
10652 break;
10653
10654 case vect_constant_def:
10655 case vect_external_def:
10656 case vect_unknown_def_type:
10657 default:
10658 gcc_unreachable ();
10659 }
10660
10661 if (STMT_VINFO_RELEVANT_P (stmt_info))
10662 {
10663 tree type = gimple_expr_type (stmt_info->stmt);
10664 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10665 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10666 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10667 || (call && gimple_call_lhs (call) == NULL_TREE));
10668 *need_to_vectorize = true;
10669 }
10670
10671 if (PURE_SLP_STMT (stmt_info) && !node)
10672 {
10673 if (dump_enabled_p ())
10674 dump_printf_loc (MSG_NOTE, vect_location,
10675 "handled only by SLP analysis\n");
10676 return opt_result::success ();
10677 }
10678
10679 ok = true;
10680 if (!bb_vinfo
10681 && (STMT_VINFO_RELEVANT_P (stmt_info)
10682 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10683 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10684 -mveclibabi= takes preference over library functions with
10685 the simd attribute. */
10686 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10687 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10688 cost_vec)
10689 || vectorizable_conversion (vinfo, stmt_info,
10690 NULL, NULL, node, cost_vec)
10691 || vectorizable_operation (vinfo, stmt_info,
10692 NULL, NULL, node, cost_vec)
10693 || vectorizable_assignment (vinfo, stmt_info,
10694 NULL, NULL, node, cost_vec)
10695 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10696 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10697 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10698 node, node_instance, cost_vec)
10699 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10700 NULL, node, cost_vec)
10701 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10702 || vectorizable_condition (vinfo, stmt_info,
10703 NULL, NULL, node, cost_vec)
10704 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10705 cost_vec)
10706 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10707 stmt_info, NULL, node));
10708 else
10709 {
10710 if (bb_vinfo)
10711 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10712 || vectorizable_simd_clone_call (vinfo, stmt_info,
10713 NULL, NULL, node, cost_vec)
10714 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10715 cost_vec)
10716 || vectorizable_shift (vinfo, stmt_info,
10717 NULL, NULL, node, cost_vec)
10718 || vectorizable_operation (vinfo, stmt_info,
10719 NULL, NULL, node, cost_vec)
10720 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10721 cost_vec)
10722 || vectorizable_load (vinfo, stmt_info,
10723 NULL, NULL, node, cost_vec)
10724 || vectorizable_store (vinfo, stmt_info,
10725 NULL, NULL, node, cost_vec)
10726 || vectorizable_condition (vinfo, stmt_info,
10727 NULL, NULL, node, cost_vec)
10728 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10729 cost_vec)
10730 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10731 }
10732
10733 if (!ok)
10734 return opt_result::failure_at (stmt_info->stmt,
10735 "not vectorized:"
10736 " relevant stmt not supported: %G",
10737 stmt_info->stmt);
10738
10739 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10740 need extra handling, except for vectorizable reductions. */
10741 if (!bb_vinfo
10742 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10743 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10744 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10745 stmt_info, NULL, node, node_instance,
10746 false, cost_vec))
10747 return opt_result::failure_at (stmt_info->stmt,
10748 "not vectorized:"
10749 " live stmt not supported: %G",
10750 stmt_info->stmt);
10751
10752 return opt_result::success ();
10753 }
10754
10755
10756 /* Function vect_transform_stmt.
10757
10758 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10759
10760 bool
10761 vect_transform_stmt (vec_info *vinfo,
10762 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10763 slp_tree slp_node, slp_instance slp_node_instance)
10764 {
10765 bool is_store = false;
10766 gimple *vec_stmt = NULL;
10767 bool done;
10768
10769 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10770
10771 switch (STMT_VINFO_TYPE (stmt_info))
10772 {
10773 case type_demotion_vec_info_type:
10774 case type_promotion_vec_info_type:
10775 case type_conversion_vec_info_type:
10776 done = vectorizable_conversion (vinfo, stmt_info,
10777 gsi, &vec_stmt, slp_node, NULL);
10778 gcc_assert (done);
10779 break;
10780
10781 case induc_vec_info_type:
10782 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10783 stmt_info, &vec_stmt, slp_node,
10784 NULL);
10785 gcc_assert (done);
10786 break;
10787
10788 case shift_vec_info_type:
10789 done = vectorizable_shift (vinfo, stmt_info,
10790 gsi, &vec_stmt, slp_node, NULL);
10791 gcc_assert (done);
10792 break;
10793
10794 case op_vec_info_type:
10795 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10796 NULL);
10797 gcc_assert (done);
10798 break;
10799
10800 case assignment_vec_info_type:
10801 done = vectorizable_assignment (vinfo, stmt_info,
10802 gsi, &vec_stmt, slp_node, NULL);
10803 gcc_assert (done);
10804 break;
10805
10806 case load_vec_info_type:
10807 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10808 NULL);
10809 gcc_assert (done);
10810 break;
10811
10812 case store_vec_info_type:
10813 done = vectorizable_store (vinfo, stmt_info,
10814 gsi, &vec_stmt, slp_node, NULL);
10815 gcc_assert (done);
10816 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10817 {
10818 /* In case of interleaving, the whole chain is vectorized when the
10819 last store in the chain is reached. Store stmts before the last
10820 one are skipped, and there vec_stmt_info shouldn't be freed
10821 meanwhile. */
10822 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10823 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10824 is_store = true;
10825 }
10826 else
10827 is_store = true;
10828 break;
10829
10830 case condition_vec_info_type:
10831 done = vectorizable_condition (vinfo, stmt_info,
10832 gsi, &vec_stmt, slp_node, NULL);
10833 gcc_assert (done);
10834 break;
10835
10836 case comparison_vec_info_type:
10837 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10838 slp_node, NULL);
10839 gcc_assert (done);
10840 break;
10841
10842 case call_vec_info_type:
10843 done = vectorizable_call (vinfo, stmt_info,
10844 gsi, &vec_stmt, slp_node, NULL);
10845 break;
10846
10847 case call_simd_clone_vec_info_type:
10848 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10849 slp_node, NULL);
10850 break;
10851
10852 case reduc_vec_info_type:
10853 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10854 gsi, &vec_stmt, slp_node);
10855 gcc_assert (done);
10856 break;
10857
10858 case cycle_phi_info_type:
10859 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10860 &vec_stmt, slp_node, slp_node_instance);
10861 gcc_assert (done);
10862 break;
10863
10864 case lc_phi_info_type:
10865 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10866 stmt_info, &vec_stmt, slp_node);
10867 gcc_assert (done);
10868 break;
10869
10870 case phi_info_type:
10871 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
10872 gcc_assert (done);
10873 break;
10874
10875 default:
10876 if (!STMT_VINFO_LIVE_P (stmt_info))
10877 {
10878 if (dump_enabled_p ())
10879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10880 "stmt not supported.\n");
10881 gcc_unreachable ();
10882 }
10883 done = true;
10884 }
10885
10886 if (!slp_node && vec_stmt)
10887 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10888
10889 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10890 return is_store;
10891
10892 /* Handle stmts whose DEF is used outside the loop-nest that is
10893 being vectorized. */
10894 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
10895 slp_node_instance, true, NULL);
10896 gcc_assert (done);
10897
10898 return false;
10899 }
10900
10901
10902 /* Remove a group of stores (for SLP or interleaving), free their
10903 stmt_vec_info. */
10904
10905 void
10906 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10907 {
10908 stmt_vec_info next_stmt_info = first_stmt_info;
10909
10910 while (next_stmt_info)
10911 {
10912 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10913 next_stmt_info = vect_orig_stmt (next_stmt_info);
10914 /* Free the attached stmt_vec_info and remove the stmt. */
10915 vinfo->remove_stmt (next_stmt_info);
10916 next_stmt_info = tmp;
10917 }
10918 }
10919
10920 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10921 elements of type SCALAR_TYPE, or null if the target doesn't support
10922 such a type.
10923
10924 If NUNITS is zero, return a vector type that contains elements of
10925 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10926
10927 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10928 for this vectorization region and want to "autodetect" the best choice.
10929 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10930 and we want the new type to be interoperable with it. PREVAILING_MODE
10931 in this case can be a scalar integer mode or a vector mode; when it
10932 is a vector mode, the function acts like a tree-level version of
10933 related_vector_mode. */
10934
10935 tree
10936 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10937 tree scalar_type, poly_uint64 nunits)
10938 {
10939 tree orig_scalar_type = scalar_type;
10940 scalar_mode inner_mode;
10941 machine_mode simd_mode;
10942 tree vectype;
10943
10944 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10945 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10946 return NULL_TREE;
10947
10948 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10949
10950 /* For vector types of elements whose mode precision doesn't
10951 match their types precision we use a element type of mode
10952 precision. The vectorization routines will have to make sure
10953 they support the proper result truncation/extension.
10954 We also make sure to build vector types with INTEGER_TYPE
10955 component type only. */
10956 if (INTEGRAL_TYPE_P (scalar_type)
10957 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10958 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10959 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10960 TYPE_UNSIGNED (scalar_type));
10961
10962 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10963 When the component mode passes the above test simply use a type
10964 corresponding to that mode. The theory is that any use that
10965 would cause problems with this will disable vectorization anyway. */
10966 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10967 && !INTEGRAL_TYPE_P (scalar_type))
10968 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10969
10970 /* We can't build a vector type of elements with alignment bigger than
10971 their size. */
10972 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10973 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10974 TYPE_UNSIGNED (scalar_type));
10975
10976 /* If we felt back to using the mode fail if there was
10977 no scalar type for it. */
10978 if (scalar_type == NULL_TREE)
10979 return NULL_TREE;
10980
10981 /* If no prevailing mode was supplied, use the mode the target prefers.
10982 Otherwise lookup a vector mode based on the prevailing mode. */
10983 if (prevailing_mode == VOIDmode)
10984 {
10985 gcc_assert (known_eq (nunits, 0U));
10986 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10987 if (SCALAR_INT_MODE_P (simd_mode))
10988 {
10989 /* Traditional behavior is not to take the integer mode
10990 literally, but simply to use it as a way of determining
10991 the vector size. It is up to mode_for_vector to decide
10992 what the TYPE_MODE should be.
10993
10994 Note that nunits == 1 is allowed in order to support single
10995 element vector types. */
10996 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
10997 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10998 return NULL_TREE;
10999 }
11000 }
11001 else if (SCALAR_INT_MODE_P (prevailing_mode)
11002 || !related_vector_mode (prevailing_mode,
11003 inner_mode, nunits).exists (&simd_mode))
11004 {
11005 /* Fall back to using mode_for_vector, mostly in the hope of being
11006 able to use an integer mode. */
11007 if (known_eq (nunits, 0U)
11008 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11009 return NULL_TREE;
11010
11011 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11012 return NULL_TREE;
11013 }
11014
11015 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11016
11017 /* In cases where the mode was chosen by mode_for_vector, check that
11018 the target actually supports the chosen mode, or that it at least
11019 allows the vector mode to be replaced by a like-sized integer. */
11020 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11021 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11022 return NULL_TREE;
11023
11024 /* Re-attach the address-space qualifier if we canonicalized the scalar
11025 type. */
11026 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11027 return build_qualified_type
11028 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11029
11030 return vectype;
11031 }
11032
11033 /* Function get_vectype_for_scalar_type.
11034
11035 Returns the vector type corresponding to SCALAR_TYPE as supported
11036 by the target. If GROUP_SIZE is nonzero and we're performing BB
11037 vectorization, make sure that the number of elements in the vector
11038 is no bigger than GROUP_SIZE. */
11039
11040 tree
11041 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11042 unsigned int group_size)
11043 {
11044 /* For BB vectorization, we should always have a group size once we've
11045 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11046 are tentative requests during things like early data reference
11047 analysis and pattern recognition. */
11048 if (is_a <bb_vec_info> (vinfo))
11049 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11050 else
11051 group_size = 0;
11052
11053 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11054 scalar_type);
11055 if (vectype && vinfo->vector_mode == VOIDmode)
11056 vinfo->vector_mode = TYPE_MODE (vectype);
11057
11058 /* Register the natural choice of vector type, before the group size
11059 has been applied. */
11060 if (vectype)
11061 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11062
11063 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11064 try again with an explicit number of elements. */
11065 if (vectype
11066 && group_size
11067 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11068 {
11069 /* Start with the biggest number of units that fits within
11070 GROUP_SIZE and halve it until we find a valid vector type.
11071 Usually either the first attempt will succeed or all will
11072 fail (in the latter case because GROUP_SIZE is too small
11073 for the target), but it's possible that a target could have
11074 a hole between supported vector types.
11075
11076 If GROUP_SIZE is not a power of 2, this has the effect of
11077 trying the largest power of 2 that fits within the group,
11078 even though the group is not a multiple of that vector size.
11079 The BB vectorizer will then try to carve up the group into
11080 smaller pieces. */
11081 unsigned int nunits = 1 << floor_log2 (group_size);
11082 do
11083 {
11084 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11085 scalar_type, nunits);
11086 nunits /= 2;
11087 }
11088 while (nunits > 1 && !vectype);
11089 }
11090
11091 return vectype;
11092 }
11093
11094 /* Return the vector type corresponding to SCALAR_TYPE as supported
11095 by the target. NODE, if nonnull, is the SLP tree node that will
11096 use the returned vector type. */
11097
11098 tree
11099 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11100 {
11101 unsigned int group_size = 0;
11102 if (node)
11103 group_size = SLP_TREE_LANES (node);
11104 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11105 }
11106
11107 /* Function get_mask_type_for_scalar_type.
11108
11109 Returns the mask type corresponding to a result of comparison
11110 of vectors of specified SCALAR_TYPE as supported by target.
11111 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11112 make sure that the number of elements in the vector is no bigger
11113 than GROUP_SIZE. */
11114
11115 tree
11116 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11117 unsigned int group_size)
11118 {
11119 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11120
11121 if (!vectype)
11122 return NULL;
11123
11124 return truth_type_for (vectype);
11125 }
11126
11127 /* Function get_same_sized_vectype
11128
11129 Returns a vector type corresponding to SCALAR_TYPE of size
11130 VECTOR_TYPE if supported by the target. */
11131
11132 tree
11133 get_same_sized_vectype (tree scalar_type, tree vector_type)
11134 {
11135 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11136 return truth_type_for (vector_type);
11137
11138 poly_uint64 nunits;
11139 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11140 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11141 return NULL_TREE;
11142
11143 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11144 scalar_type, nunits);
11145 }
11146
11147 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11148 would not change the chosen vector modes. */
11149
11150 bool
11151 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11152 {
11153 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11154 i != vinfo->used_vector_modes.end (); ++i)
11155 if (!VECTOR_MODE_P (*i)
11156 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11157 return false;
11158 return true;
11159 }
11160
11161 /* Function vect_is_simple_use.
11162
11163 Input:
11164 VINFO - the vect info of the loop or basic block that is being vectorized.
11165 OPERAND - operand in the loop or bb.
11166 Output:
11167 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11168 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11169 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11170 the definition could be anywhere in the function
11171 DT - the type of definition
11172
11173 Returns whether a stmt with OPERAND can be vectorized.
11174 For loops, supportable operands are constants, loop invariants, and operands
11175 that are defined by the current iteration of the loop. Unsupportable
11176 operands are those that are defined by a previous iteration of the loop (as
11177 is the case in reduction/induction computations).
11178 For basic blocks, supportable operands are constants and bb invariants.
11179 For now, operands defined outside the basic block are not supported. */
11180
11181 bool
11182 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11183 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11184 {
11185 if (def_stmt_info_out)
11186 *def_stmt_info_out = NULL;
11187 if (def_stmt_out)
11188 *def_stmt_out = NULL;
11189 *dt = vect_unknown_def_type;
11190
11191 if (dump_enabled_p ())
11192 {
11193 dump_printf_loc (MSG_NOTE, vect_location,
11194 "vect_is_simple_use: operand ");
11195 if (TREE_CODE (operand) == SSA_NAME
11196 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11197 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11198 else
11199 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11200 }
11201
11202 if (CONSTANT_CLASS_P (operand))
11203 *dt = vect_constant_def;
11204 else if (is_gimple_min_invariant (operand))
11205 *dt = vect_external_def;
11206 else if (TREE_CODE (operand) != SSA_NAME)
11207 *dt = vect_unknown_def_type;
11208 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11209 *dt = vect_external_def;
11210 else
11211 {
11212 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11213 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11214 if (!stmt_vinfo)
11215 *dt = vect_external_def;
11216 else
11217 {
11218 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11219 def_stmt = stmt_vinfo->stmt;
11220 switch (gimple_code (def_stmt))
11221 {
11222 case GIMPLE_PHI:
11223 case GIMPLE_ASSIGN:
11224 case GIMPLE_CALL:
11225 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11226 break;
11227 default:
11228 *dt = vect_unknown_def_type;
11229 break;
11230 }
11231 if (def_stmt_info_out)
11232 *def_stmt_info_out = stmt_vinfo;
11233 }
11234 if (def_stmt_out)
11235 *def_stmt_out = def_stmt;
11236 }
11237
11238 if (dump_enabled_p ())
11239 {
11240 dump_printf (MSG_NOTE, ", type of def: ");
11241 switch (*dt)
11242 {
11243 case vect_uninitialized_def:
11244 dump_printf (MSG_NOTE, "uninitialized\n");
11245 break;
11246 case vect_constant_def:
11247 dump_printf (MSG_NOTE, "constant\n");
11248 break;
11249 case vect_external_def:
11250 dump_printf (MSG_NOTE, "external\n");
11251 break;
11252 case vect_internal_def:
11253 dump_printf (MSG_NOTE, "internal\n");
11254 break;
11255 case vect_induction_def:
11256 dump_printf (MSG_NOTE, "induction\n");
11257 break;
11258 case vect_reduction_def:
11259 dump_printf (MSG_NOTE, "reduction\n");
11260 break;
11261 case vect_double_reduction_def:
11262 dump_printf (MSG_NOTE, "double reduction\n");
11263 break;
11264 case vect_nested_cycle:
11265 dump_printf (MSG_NOTE, "nested cycle\n");
11266 break;
11267 case vect_unknown_def_type:
11268 dump_printf (MSG_NOTE, "unknown\n");
11269 break;
11270 }
11271 }
11272
11273 if (*dt == vect_unknown_def_type)
11274 {
11275 if (dump_enabled_p ())
11276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11277 "Unsupported pattern.\n");
11278 return false;
11279 }
11280
11281 return true;
11282 }
11283
11284 /* Function vect_is_simple_use.
11285
11286 Same as vect_is_simple_use but also determines the vector operand
11287 type of OPERAND and stores it to *VECTYPE. If the definition of
11288 OPERAND is vect_uninitialized_def, vect_constant_def or
11289 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11290 is responsible to compute the best suited vector type for the
11291 scalar operand. */
11292
11293 bool
11294 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11295 tree *vectype, stmt_vec_info *def_stmt_info_out,
11296 gimple **def_stmt_out)
11297 {
11298 stmt_vec_info def_stmt_info;
11299 gimple *def_stmt;
11300 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11301 return false;
11302
11303 if (def_stmt_out)
11304 *def_stmt_out = def_stmt;
11305 if (def_stmt_info_out)
11306 *def_stmt_info_out = def_stmt_info;
11307
11308 /* Now get a vector type if the def is internal, otherwise supply
11309 NULL_TREE and leave it up to the caller to figure out a proper
11310 type for the use stmt. */
11311 if (*dt == vect_internal_def
11312 || *dt == vect_induction_def
11313 || *dt == vect_reduction_def
11314 || *dt == vect_double_reduction_def
11315 || *dt == vect_nested_cycle)
11316 {
11317 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11318 gcc_assert (*vectype != NULL_TREE);
11319 if (dump_enabled_p ())
11320 dump_printf_loc (MSG_NOTE, vect_location,
11321 "vect_is_simple_use: vectype %T\n", *vectype);
11322 }
11323 else if (*dt == vect_uninitialized_def
11324 || *dt == vect_constant_def
11325 || *dt == vect_external_def)
11326 *vectype = NULL_TREE;
11327 else
11328 gcc_unreachable ();
11329
11330 return true;
11331 }
11332
11333 /* Function vect_is_simple_use.
11334
11335 Same as vect_is_simple_use but determines the operand by operand
11336 position OPERAND from either STMT or SLP_NODE, filling in *OP
11337 and *SLP_DEF (when SLP_NODE is not NULL). */
11338
11339 bool
11340 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11341 unsigned operand, tree *op, slp_tree *slp_def,
11342 enum vect_def_type *dt,
11343 tree *vectype, stmt_vec_info *def_stmt_info_out)
11344 {
11345 if (slp_node)
11346 {
11347 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11348 *slp_def = child;
11349 *vectype = SLP_TREE_VECTYPE (child);
11350 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11351 {
11352 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11353 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11354 }
11355 else
11356 {
11357 if (def_stmt_info_out)
11358 *def_stmt_info_out = NULL;
11359 *op = SLP_TREE_SCALAR_OPS (child)[0];
11360 *dt = SLP_TREE_DEF_TYPE (child);
11361 return true;
11362 }
11363 }
11364 else
11365 {
11366 *slp_def = NULL;
11367 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11368 {
11369 if (gimple_assign_rhs_code (ass) == COND_EXPR
11370 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11371 {
11372 if (operand < 2)
11373 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11374 else
11375 *op = gimple_op (ass, operand);
11376 }
11377 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11378 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11379 else
11380 *op = gimple_op (ass, operand + 1);
11381 }
11382 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11383 {
11384 if (gimple_call_internal_p (call)
11385 && internal_store_fn_p (gimple_call_internal_fn (call)))
11386 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11387 (call));
11388 *op = gimple_call_arg (call, operand);
11389 }
11390 else
11391 gcc_unreachable ();
11392 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11393 }
11394 }
11395
11396 /* If OP is not NULL and is external or constant update its vector
11397 type with VECTYPE. Returns true if successful or false if not,
11398 for example when conflicting vector types are present. */
11399
11400 bool
11401 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11402 {
11403 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11404 return true;
11405 if (SLP_TREE_VECTYPE (op))
11406 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11407 SLP_TREE_VECTYPE (op) = vectype;
11408 return true;
11409 }
11410
11411 /* Function supportable_widening_operation
11412
11413 Check whether an operation represented by the code CODE is a
11414 widening operation that is supported by the target platform in
11415 vector form (i.e., when operating on arguments of type VECTYPE_IN
11416 producing a result of type VECTYPE_OUT).
11417
11418 Widening operations we currently support are NOP (CONVERT), FLOAT,
11419 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11420 are supported by the target platform either directly (via vector
11421 tree-codes), or via target builtins.
11422
11423 Output:
11424 - CODE1 and CODE2 are codes of vector operations to be used when
11425 vectorizing the operation, if available.
11426 - MULTI_STEP_CVT determines the number of required intermediate steps in
11427 case of multi-step conversion (like char->short->int - in that case
11428 MULTI_STEP_CVT will be 1).
11429 - INTERM_TYPES contains the intermediate type required to perform the
11430 widening operation (short in the above example). */
11431
11432 bool
11433 supportable_widening_operation (vec_info *vinfo,
11434 enum tree_code code, stmt_vec_info stmt_info,
11435 tree vectype_out, tree vectype_in,
11436 enum tree_code *code1, enum tree_code *code2,
11437 int *multi_step_cvt,
11438 vec<tree> *interm_types)
11439 {
11440 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11441 class loop *vect_loop = NULL;
11442 machine_mode vec_mode;
11443 enum insn_code icode1, icode2;
11444 optab optab1, optab2;
11445 tree vectype = vectype_in;
11446 tree wide_vectype = vectype_out;
11447 enum tree_code c1, c2;
11448 int i;
11449 tree prev_type, intermediate_type;
11450 machine_mode intermediate_mode, prev_mode;
11451 optab optab3, optab4;
11452
11453 *multi_step_cvt = 0;
11454 if (loop_info)
11455 vect_loop = LOOP_VINFO_LOOP (loop_info);
11456
11457 switch (code)
11458 {
11459 case WIDEN_MULT_EXPR:
11460 /* The result of a vectorized widening operation usually requires
11461 two vectors (because the widened results do not fit into one vector).
11462 The generated vector results would normally be expected to be
11463 generated in the same order as in the original scalar computation,
11464 i.e. if 8 results are generated in each vector iteration, they are
11465 to be organized as follows:
11466 vect1: [res1,res2,res3,res4],
11467 vect2: [res5,res6,res7,res8].
11468
11469 However, in the special case that the result of the widening
11470 operation is used in a reduction computation only, the order doesn't
11471 matter (because when vectorizing a reduction we change the order of
11472 the computation). Some targets can take advantage of this and
11473 generate more efficient code. For example, targets like Altivec,
11474 that support widen_mult using a sequence of {mult_even,mult_odd}
11475 generate the following vectors:
11476 vect1: [res1,res3,res5,res7],
11477 vect2: [res2,res4,res6,res8].
11478
11479 When vectorizing outer-loops, we execute the inner-loop sequentially
11480 (each vectorized inner-loop iteration contributes to VF outer-loop
11481 iterations in parallel). We therefore don't allow to change the
11482 order of the computation in the inner-loop during outer-loop
11483 vectorization. */
11484 /* TODO: Another case in which order doesn't *really* matter is when we
11485 widen and then contract again, e.g. (short)((int)x * y >> 8).
11486 Normally, pack_trunc performs an even/odd permute, whereas the
11487 repack from an even/odd expansion would be an interleave, which
11488 would be significantly simpler for e.g. AVX2. */
11489 /* In any case, in order to avoid duplicating the code below, recurse
11490 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11491 are properly set up for the caller. If we fail, we'll continue with
11492 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11493 if (vect_loop
11494 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11495 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11496 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11497 stmt_info, vectype_out,
11498 vectype_in, code1, code2,
11499 multi_step_cvt, interm_types))
11500 {
11501 /* Elements in a vector with vect_used_by_reduction property cannot
11502 be reordered if the use chain with this property does not have the
11503 same operation. One such an example is s += a * b, where elements
11504 in a and b cannot be reordered. Here we check if the vector defined
11505 by STMT is only directly used in the reduction statement. */
11506 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11507 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11508 if (use_stmt_info
11509 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11510 return true;
11511 }
11512 c1 = VEC_WIDEN_MULT_LO_EXPR;
11513 c2 = VEC_WIDEN_MULT_HI_EXPR;
11514 break;
11515
11516 case DOT_PROD_EXPR:
11517 c1 = DOT_PROD_EXPR;
11518 c2 = DOT_PROD_EXPR;
11519 break;
11520
11521 case SAD_EXPR:
11522 c1 = SAD_EXPR;
11523 c2 = SAD_EXPR;
11524 break;
11525
11526 case VEC_WIDEN_MULT_EVEN_EXPR:
11527 /* Support the recursion induced just above. */
11528 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11529 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11530 break;
11531
11532 case WIDEN_LSHIFT_EXPR:
11533 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11534 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11535 break;
11536
11537 CASE_CONVERT:
11538 c1 = VEC_UNPACK_LO_EXPR;
11539 c2 = VEC_UNPACK_HI_EXPR;
11540 break;
11541
11542 case FLOAT_EXPR:
11543 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11544 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11545 break;
11546
11547 case FIX_TRUNC_EXPR:
11548 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11549 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11550 break;
11551
11552 default:
11553 gcc_unreachable ();
11554 }
11555
11556 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11557 std::swap (c1, c2);
11558
11559 if (code == FIX_TRUNC_EXPR)
11560 {
11561 /* The signedness is determined from output operand. */
11562 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11563 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11564 }
11565 else if (CONVERT_EXPR_CODE_P (code)
11566 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11567 && VECTOR_BOOLEAN_TYPE_P (vectype)
11568 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11569 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11570 {
11571 /* If the input and result modes are the same, a different optab
11572 is needed where we pass in the number of units in vectype. */
11573 optab1 = vec_unpacks_sbool_lo_optab;
11574 optab2 = vec_unpacks_sbool_hi_optab;
11575 }
11576 else
11577 {
11578 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11579 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11580 }
11581
11582 if (!optab1 || !optab2)
11583 return false;
11584
11585 vec_mode = TYPE_MODE (vectype);
11586 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11587 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11588 return false;
11589
11590 *code1 = c1;
11591 *code2 = c2;
11592
11593 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11594 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11595 {
11596 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11597 return true;
11598 /* For scalar masks we may have different boolean
11599 vector types having the same QImode. Thus we
11600 add additional check for elements number. */
11601 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11602 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11603 return true;
11604 }
11605
11606 /* Check if it's a multi-step conversion that can be done using intermediate
11607 types. */
11608
11609 prev_type = vectype;
11610 prev_mode = vec_mode;
11611
11612 if (!CONVERT_EXPR_CODE_P (code))
11613 return false;
11614
11615 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11616 intermediate steps in promotion sequence. We try
11617 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11618 not. */
11619 interm_types->create (MAX_INTERM_CVT_STEPS);
11620 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11621 {
11622 intermediate_mode = insn_data[icode1].operand[0].mode;
11623 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11624 intermediate_type
11625 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11626 else
11627 intermediate_type
11628 = lang_hooks.types.type_for_mode (intermediate_mode,
11629 TYPE_UNSIGNED (prev_type));
11630
11631 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11632 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11633 && intermediate_mode == prev_mode
11634 && SCALAR_INT_MODE_P (prev_mode))
11635 {
11636 /* If the input and result modes are the same, a different optab
11637 is needed where we pass in the number of units in vectype. */
11638 optab3 = vec_unpacks_sbool_lo_optab;
11639 optab4 = vec_unpacks_sbool_hi_optab;
11640 }
11641 else
11642 {
11643 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11644 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11645 }
11646
11647 if (!optab3 || !optab4
11648 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11649 || insn_data[icode1].operand[0].mode != intermediate_mode
11650 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11651 || insn_data[icode2].operand[0].mode != intermediate_mode
11652 || ((icode1 = optab_handler (optab3, intermediate_mode))
11653 == CODE_FOR_nothing)
11654 || ((icode2 = optab_handler (optab4, intermediate_mode))
11655 == CODE_FOR_nothing))
11656 break;
11657
11658 interm_types->quick_push (intermediate_type);
11659 (*multi_step_cvt)++;
11660
11661 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11662 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11663 {
11664 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11665 return true;
11666 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11667 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11668 return true;
11669 }
11670
11671 prev_type = intermediate_type;
11672 prev_mode = intermediate_mode;
11673 }
11674
11675 interm_types->release ();
11676 return false;
11677 }
11678
11679
11680 /* Function supportable_narrowing_operation
11681
11682 Check whether an operation represented by the code CODE is a
11683 narrowing operation that is supported by the target platform in
11684 vector form (i.e., when operating on arguments of type VECTYPE_IN
11685 and producing a result of type VECTYPE_OUT).
11686
11687 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11688 and FLOAT. This function checks if these operations are supported by
11689 the target platform directly via vector tree-codes.
11690
11691 Output:
11692 - CODE1 is the code of a vector operation to be used when
11693 vectorizing the operation, if available.
11694 - MULTI_STEP_CVT determines the number of required intermediate steps in
11695 case of multi-step conversion (like int->short->char - in that case
11696 MULTI_STEP_CVT will be 1).
11697 - INTERM_TYPES contains the intermediate type required to perform the
11698 narrowing operation (short in the above example). */
11699
11700 bool
11701 supportable_narrowing_operation (enum tree_code code,
11702 tree vectype_out, tree vectype_in,
11703 enum tree_code *code1, int *multi_step_cvt,
11704 vec<tree> *interm_types)
11705 {
11706 machine_mode vec_mode;
11707 enum insn_code icode1;
11708 optab optab1, interm_optab;
11709 tree vectype = vectype_in;
11710 tree narrow_vectype = vectype_out;
11711 enum tree_code c1;
11712 tree intermediate_type, prev_type;
11713 machine_mode intermediate_mode, prev_mode;
11714 int i;
11715 bool uns;
11716
11717 *multi_step_cvt = 0;
11718 switch (code)
11719 {
11720 CASE_CONVERT:
11721 c1 = VEC_PACK_TRUNC_EXPR;
11722 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11723 && VECTOR_BOOLEAN_TYPE_P (vectype)
11724 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11725 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11726 optab1 = vec_pack_sbool_trunc_optab;
11727 else
11728 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11729 break;
11730
11731 case FIX_TRUNC_EXPR:
11732 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11733 /* The signedness is determined from output operand. */
11734 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11735 break;
11736
11737 case FLOAT_EXPR:
11738 c1 = VEC_PACK_FLOAT_EXPR;
11739 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11740 break;
11741
11742 default:
11743 gcc_unreachable ();
11744 }
11745
11746 if (!optab1)
11747 return false;
11748
11749 vec_mode = TYPE_MODE (vectype);
11750 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11751 return false;
11752
11753 *code1 = c1;
11754
11755 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11756 {
11757 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11758 return true;
11759 /* For scalar masks we may have different boolean
11760 vector types having the same QImode. Thus we
11761 add additional check for elements number. */
11762 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11763 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11764 return true;
11765 }
11766
11767 if (code == FLOAT_EXPR)
11768 return false;
11769
11770 /* Check if it's a multi-step conversion that can be done using intermediate
11771 types. */
11772 prev_mode = vec_mode;
11773 prev_type = vectype;
11774 if (code == FIX_TRUNC_EXPR)
11775 uns = TYPE_UNSIGNED (vectype_out);
11776 else
11777 uns = TYPE_UNSIGNED (vectype);
11778
11779 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11780 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11781 costly than signed. */
11782 if (code == FIX_TRUNC_EXPR && uns)
11783 {
11784 enum insn_code icode2;
11785
11786 intermediate_type
11787 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11788 interm_optab
11789 = optab_for_tree_code (c1, intermediate_type, optab_default);
11790 if (interm_optab != unknown_optab
11791 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11792 && insn_data[icode1].operand[0].mode
11793 == insn_data[icode2].operand[0].mode)
11794 {
11795 uns = false;
11796 optab1 = interm_optab;
11797 icode1 = icode2;
11798 }
11799 }
11800
11801 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11802 intermediate steps in promotion sequence. We try
11803 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11804 interm_types->create (MAX_INTERM_CVT_STEPS);
11805 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11806 {
11807 intermediate_mode = insn_data[icode1].operand[0].mode;
11808 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11809 intermediate_type
11810 = vect_double_mask_nunits (prev_type, intermediate_mode);
11811 else
11812 intermediate_type
11813 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11814 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11815 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11816 && intermediate_mode == prev_mode
11817 && SCALAR_INT_MODE_P (prev_mode))
11818 interm_optab = vec_pack_sbool_trunc_optab;
11819 else
11820 interm_optab
11821 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11822 optab_default);
11823 if (!interm_optab
11824 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11825 || insn_data[icode1].operand[0].mode != intermediate_mode
11826 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11827 == CODE_FOR_nothing))
11828 break;
11829
11830 interm_types->quick_push (intermediate_type);
11831 (*multi_step_cvt)++;
11832
11833 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11834 {
11835 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11836 return true;
11837 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11838 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11839 return true;
11840 }
11841
11842 prev_mode = intermediate_mode;
11843 prev_type = intermediate_type;
11844 optab1 = interm_optab;
11845 }
11846
11847 interm_types->release ();
11848 return false;
11849 }
11850
11851 /* Generate and return a statement that sets vector mask MASK such that
11852 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11853
11854 gcall *
11855 vect_gen_while (tree mask, tree start_index, tree end_index)
11856 {
11857 tree cmp_type = TREE_TYPE (start_index);
11858 tree mask_type = TREE_TYPE (mask);
11859 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11860 cmp_type, mask_type,
11861 OPTIMIZE_FOR_SPEED));
11862 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11863 start_index, end_index,
11864 build_zero_cst (mask_type));
11865 gimple_call_set_lhs (call, mask);
11866 return call;
11867 }
11868
11869 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11870 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11871
11872 tree
11873 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11874 tree end_index)
11875 {
11876 tree tmp = make_ssa_name (mask_type);
11877 gcall *call = vect_gen_while (tmp, start_index, end_index);
11878 gimple_seq_add_stmt (seq, call);
11879 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11880 }
11881
11882 /* Try to compute the vector types required to vectorize STMT_INFO,
11883 returning true on success and false if vectorization isn't possible.
11884 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11885 take sure that the number of elements in the vectors is no bigger
11886 than GROUP_SIZE.
11887
11888 On success:
11889
11890 - Set *STMT_VECTYPE_OUT to:
11891 - NULL_TREE if the statement doesn't need to be vectorized;
11892 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11893
11894 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11895 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11896 statement does not help to determine the overall number of units. */
11897
11898 opt_result
11899 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11900 tree *stmt_vectype_out,
11901 tree *nunits_vectype_out,
11902 unsigned int group_size)
11903 {
11904 gimple *stmt = stmt_info->stmt;
11905
11906 /* For BB vectorization, we should always have a group size once we've
11907 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11908 are tentative requests during things like early data reference
11909 analysis and pattern recognition. */
11910 if (is_a <bb_vec_info> (vinfo))
11911 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11912 else
11913 group_size = 0;
11914
11915 *stmt_vectype_out = NULL_TREE;
11916 *nunits_vectype_out = NULL_TREE;
11917
11918 if (gimple_get_lhs (stmt) == NULL_TREE
11919 /* MASK_STORE has no lhs, but is ok. */
11920 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11921 {
11922 if (is_a <gcall *> (stmt))
11923 {
11924 /* Ignore calls with no lhs. These must be calls to
11925 #pragma omp simd functions, and what vectorization factor
11926 it really needs can't be determined until
11927 vectorizable_simd_clone_call. */
11928 if (dump_enabled_p ())
11929 dump_printf_loc (MSG_NOTE, vect_location,
11930 "defer to SIMD clone analysis.\n");
11931 return opt_result::success ();
11932 }
11933
11934 return opt_result::failure_at (stmt,
11935 "not vectorized: irregular stmt.%G", stmt);
11936 }
11937
11938 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11939 return opt_result::failure_at (stmt,
11940 "not vectorized: vector stmt in loop:%G",
11941 stmt);
11942
11943 tree vectype;
11944 tree scalar_type = NULL_TREE;
11945 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
11946 {
11947 vectype = STMT_VINFO_VECTYPE (stmt_info);
11948 if (dump_enabled_p ())
11949 dump_printf_loc (MSG_NOTE, vect_location,
11950 "precomputed vectype: %T\n", vectype);
11951 }
11952 else if (vect_use_mask_type_p (stmt_info))
11953 {
11954 unsigned int precision = stmt_info->mask_precision;
11955 scalar_type = build_nonstandard_integer_type (precision, 1);
11956 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
11957 if (!vectype)
11958 return opt_result::failure_at (stmt, "not vectorized: unsupported"
11959 " data-type %T\n", scalar_type);
11960 if (dump_enabled_p ())
11961 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11962 }
11963 else
11964 {
11965 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
11966 scalar_type = TREE_TYPE (DR_REF (dr));
11967 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11968 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11969 else
11970 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11971
11972 if (dump_enabled_p ())
11973 {
11974 if (group_size)
11975 dump_printf_loc (MSG_NOTE, vect_location,
11976 "get vectype for scalar type (group size %d):"
11977 " %T\n", group_size, scalar_type);
11978 else
11979 dump_printf_loc (MSG_NOTE, vect_location,
11980 "get vectype for scalar type: %T\n", scalar_type);
11981 }
11982 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11983 if (!vectype)
11984 return opt_result::failure_at (stmt,
11985 "not vectorized:"
11986 " unsupported data-type %T\n",
11987 scalar_type);
11988
11989 if (dump_enabled_p ())
11990 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11991 }
11992 *stmt_vectype_out = vectype;
11993
11994 /* Don't try to compute scalar types if the stmt produces a boolean
11995 vector; use the existing vector type instead. */
11996 tree nunits_vectype = vectype;
11997 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11998 {
11999 /* The number of units is set according to the smallest scalar
12000 type (or the largest vector size, but we only support one
12001 vector size per vectorization). */
12002 HOST_WIDE_INT dummy;
12003 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12004 if (scalar_type != TREE_TYPE (vectype))
12005 {
12006 if (dump_enabled_p ())
12007 dump_printf_loc (MSG_NOTE, vect_location,
12008 "get vectype for smallest scalar type: %T\n",
12009 scalar_type);
12010 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12011 group_size);
12012 if (!nunits_vectype)
12013 return opt_result::failure_at
12014 (stmt, "not vectorized: unsupported data-type %T\n",
12015 scalar_type);
12016 if (dump_enabled_p ())
12017 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12018 nunits_vectype);
12019 }
12020 }
12021
12022 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12023 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12024
12025 if (dump_enabled_p ())
12026 {
12027 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12028 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12029 dump_printf (MSG_NOTE, "\n");
12030 }
12031
12032 *nunits_vectype_out = nunits_vectype;
12033 return opt_result::success ();
12034 }
12035
12036 /* Generate and return statement sequence that sets vector length LEN that is:
12037
12038 min_of_start_and_end = min (START_INDEX, END_INDEX);
12039 left_len = END_INDEX - min_of_start_and_end;
12040 rhs = min (left_len, LEN_LIMIT);
12041 LEN = rhs;
12042
12043 Note: the cost of the code generated by this function is modeled
12044 by vect_estimate_min_profitable_iters, so changes here may need
12045 corresponding changes there. */
12046
12047 gimple_seq
12048 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12049 {
12050 gimple_seq stmts = NULL;
12051 tree len_type = TREE_TYPE (len);
12052 gcc_assert (TREE_TYPE (start_index) == len_type);
12053
12054 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12055 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12056 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12057 gimple* stmt = gimple_build_assign (len, rhs);
12058 gimple_seq_add_stmt (&stmts, stmt);
12059
12060 return stmts;
12061 }
12062