doc: "used" attribute saves decls from linker garbage collection
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
840
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
846 {
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
849
850 for (i = 0; i < pwr + 1; i++)
851 {
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
855 }
856
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
862
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 }
868
869 /* Returns true if the current function returns DECL. */
870
871 static bool
872 cfun_returns (tree decl)
873 {
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
877 {
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
887 do
888 {
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
890 }
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
896 }
897 return false;
898 }
899
900 /* Function vect_model_store_cost
901
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
904
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
910 {
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
918
919 if (vls_type == VLS_STORE_INVARIANT)
920 {
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
924 }
925
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
930
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
935
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 {
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
954 }
955
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 {
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
966 }
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
969
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
972 {
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
978 }
979
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
989 {
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
993 {
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
998 {
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1007 }
1008 }
1009 }
1010
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1015 }
1016
1017
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1023 {
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1027
1028 switch (alignment_support_scheme)
1029 {
1030 case dr_aligned:
1031 {
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1035
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1040 }
1041
1042 case dr_unaligned_supported:
1043 {
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1054 }
1055
1056 case dr_unaligned_unsupported:
1057 {
1058 *inside_cost = VECT_MAX_COST;
1059
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1064 }
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071
1072 /* Function vect_model_load_cost
1073
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1078
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1085 {
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1088
1089 gcc_assert (cost_vec);
1090
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1094
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1096 {
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms, n_loads;
1102 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1103 vf, true, &n_perms, &n_loads);
1104 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1105 first_stmt_info, 0, vect_body);
1106
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1109 ncopies = n_loads;
1110 }
1111
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info = stmt_info;
1115 if (!slp_node && grouped_access_p)
1116 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1117
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p = (first_stmt_info == stmt_info);
1122
1123 /* We assume that the cost of a single load-lanes instruction is
1124 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1125 access is instead being provided by a load-and-permute operation,
1126 include the cost of the permutes. */
1127 if (first_stmt_p
1128 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1129 {
1130 /* Uses an even and odd extract operations or shuffle operations
1131 for each needed permute. */
1132 int group_size = DR_GROUP_SIZE (first_stmt_info);
1133 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1134 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1135 stmt_info, 0, vect_body);
1136
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1140 group_size);
1141 }
1142
1143 /* The loads themselves. */
1144 if (memory_access_type == VMAT_ELEMENTWISE
1145 || memory_access_type == VMAT_GATHER_SCATTER)
1146 {
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1149 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1150 inside_cost += record_stmt_cost (cost_vec,
1151 ncopies * assumed_nunits,
1152 scalar_load, stmt_info, 0, vect_body);
1153 }
1154 else
1155 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1156 &inside_cost, &prologue_cost,
1157 cost_vec, cost_vec, true);
1158 if (memory_access_type == VMAT_ELEMENTWISE
1159 || memory_access_type == VMAT_STRIDED_SLP)
1160 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1161 stmt_info, 0, vect_body);
1162
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1167 }
1168
1169
1170 /* Calculate cost of DR's memory access. */
1171 void
1172 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1173 bool add_realign_cost, unsigned int *inside_cost,
1174 unsigned int *prologue_cost,
1175 stmt_vector_for_cost *prologue_cost_vec,
1176 stmt_vector_for_cost *body_cost_vec,
1177 bool record_prologue_costs)
1178 {
1179 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1180 int alignment_support_scheme
1181 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1182
1183 switch (alignment_support_scheme)
1184 {
1185 case dr_aligned:
1186 {
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1189
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE, vect_location,
1192 "vect_model_load_cost: aligned.\n");
1193
1194 break;
1195 }
1196 case dr_unaligned_supported:
1197 {
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1200 unaligned_load, stmt_info,
1201 DR_MISALIGNMENT (dr_info),
1202 vect_body);
1203
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE, vect_location,
1206 "vect_model_load_cost: unaligned supported by "
1207 "hardware.\n");
1208
1209 break;
1210 }
1211 case dr_explicit_realign:
1212 {
1213 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1214 vector_load, stmt_info, 0, vect_body);
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1216 vec_perm, stmt_info, 0, vect_body);
1217
1218 /* FIXME: If the misalignment remains fixed across the iterations of
1219 the containing loop, the following cost should be added to the
1220 prologue costs. */
1221 if (targetm.vectorize.builtin_mask_for_load)
1222 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1223 stmt_info, 0, vect_body);
1224
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_NOTE, vect_location,
1227 "vect_model_load_cost: explicit realign\n");
1228
1229 break;
1230 }
1231 case dr_explicit_realign_optimized:
1232 {
1233 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE, vect_location,
1235 "vect_model_load_cost: unaligned software "
1236 "pipelined.\n");
1237
1238 /* Unaligned software pipeline has a load of an address, an initial
1239 load, and possibly a mask operation to "prime" the loop. However,
1240 if this is an access in a group of loads, which provide grouped
1241 access, then the above cost should only be considered for one
1242 access in the group. Inside the loop, there is a load op
1243 and a realignment op. */
1244
1245 if (add_realign_cost && record_prologue_costs)
1246 {
1247 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1248 vector_stmt, stmt_info,
1249 0, vect_prologue);
1250 if (targetm.vectorize.builtin_mask_for_load)
1251 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1252 vector_stmt, stmt_info,
1253 0, vect_prologue);
1254 }
1255
1256 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1257 stmt_info, 0, vect_body);
1258 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1259 stmt_info, 0, vect_body);
1260
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "vect_model_load_cost: explicit realign optimized"
1264 "\n");
1265
1266 break;
1267 }
1268
1269 case dr_unaligned_unsupported:
1270 {
1271 *inside_cost = VECT_MAX_COST;
1272
1273 if (dump_enabled_p ())
1274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1275 "vect_model_load_cost: unsupported access.\n");
1276 break;
1277 }
1278
1279 default:
1280 gcc_unreachable ();
1281 }
1282 }
1283
1284 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1285 the loop preheader for the vectorized stmt STMT_VINFO. */
1286
1287 static void
1288 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1289 gimple_stmt_iterator *gsi)
1290 {
1291 if (gsi)
1292 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1293 else
1294 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1295
1296 if (dump_enabled_p ())
1297 dump_printf_loc (MSG_NOTE, vect_location,
1298 "created new init_stmt: %G", new_stmt);
1299 }
1300
1301 /* Function vect_init_vector.
1302
1303 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1304 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1305 vector type a vector with all elements equal to VAL is created first.
1306 Place the initialization at GSI if it is not NULL. Otherwise, place the
1307 initialization at the loop preheader.
1308 Return the DEF of INIT_STMT.
1309 It will be used in the vectorization of STMT_INFO. */
1310
1311 tree
1312 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1313 gimple_stmt_iterator *gsi)
1314 {
1315 gimple *init_stmt;
1316 tree new_temp;
1317
1318 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1319 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1320 {
1321 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1322 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1323 {
1324 /* Scalar boolean value should be transformed into
1325 all zeros or all ones value before building a vector. */
1326 if (VECTOR_BOOLEAN_TYPE_P (type))
1327 {
1328 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1329 tree false_val = build_zero_cst (TREE_TYPE (type));
1330
1331 if (CONSTANT_CLASS_P (val))
1332 val = integer_zerop (val) ? false_val : true_val;
1333 else
1334 {
1335 new_temp = make_ssa_name (TREE_TYPE (type));
1336 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1337 val, true_val, false_val);
1338 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1339 val = new_temp;
1340 }
1341 }
1342 else
1343 {
1344 gimple_seq stmts = NULL;
1345 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1346 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1347 TREE_TYPE (type), val);
1348 else
1349 /* ??? Condition vectorization expects us to do
1350 promotion of invariant/external defs. */
1351 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1352 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1353 !gsi_end_p (gsi2); )
1354 {
1355 init_stmt = gsi_stmt (gsi2);
1356 gsi_remove (&gsi2, false);
1357 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1358 }
1359 }
1360 }
1361 val = build_vector_from_val (type, val);
1362 }
1363
1364 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1365 init_stmt = gimple_build_assign (new_temp, val);
1366 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1367 return new_temp;
1368 }
1369
1370
1371 /* Function vect_get_vec_defs_for_operand.
1372
1373 OP is an operand in STMT_VINFO. This function returns a vector of
1374 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1375
1376 In the case that OP is an SSA_NAME which is defined in the loop, then
1377 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1378
1379 In case OP is an invariant or constant, a new stmt that creates a vector def
1380 needs to be introduced. VECTYPE may be used to specify a required type for
1381 vector invariant. */
1382
1383 void
1384 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1385 unsigned ncopies,
1386 tree op, vec<tree> *vec_oprnds, tree vectype)
1387 {
1388 gimple *def_stmt;
1389 enum vect_def_type dt;
1390 bool is_simple_use;
1391 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1392
1393 if (dump_enabled_p ())
1394 dump_printf_loc (MSG_NOTE, vect_location,
1395 "vect_get_vec_defs_for_operand: %T\n", op);
1396
1397 stmt_vec_info def_stmt_info;
1398 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1399 &def_stmt_info, &def_stmt);
1400 gcc_assert (is_simple_use);
1401 if (def_stmt && dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1403
1404 vec_oprnds->create (ncopies);
1405 if (dt == vect_constant_def || dt == vect_external_def)
1406 {
1407 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1408 tree vector_type;
1409
1410 if (vectype)
1411 vector_type = vectype;
1412 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1413 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1414 vector_type = truth_type_for (stmt_vectype);
1415 else
1416 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1417
1418 gcc_assert (vector_type);
1419 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1420 while (ncopies--)
1421 vec_oprnds->quick_push (vop);
1422 }
1423 else
1424 {
1425 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1426 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1427 for (unsigned i = 0; i < ncopies; ++i)
1428 vec_oprnds->quick_push (gimple_get_lhs
1429 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1430 }
1431 }
1432
1433
1434 /* Get vectorized definitions for OP0 and OP1. */
1435
1436 void
1437 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1438 unsigned ncopies,
1439 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1440 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1441 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1442 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1443 {
1444 if (slp_node)
1445 {
1446 if (op0)
1447 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1448 if (op1)
1449 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1450 if (op2)
1451 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1452 if (op3)
1453 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1454 }
1455 else
1456 {
1457 if (op0)
1458 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1459 op0, vec_oprnds0, vectype0);
1460 if (op1)
1461 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1462 op1, vec_oprnds1, vectype1);
1463 if (op2)
1464 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1465 op2, vec_oprnds2, vectype2);
1466 if (op3)
1467 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1468 op3, vec_oprnds3, vectype3);
1469 }
1470 }
1471
1472 void
1473 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1474 unsigned ncopies,
1475 tree op0, vec<tree> *vec_oprnds0,
1476 tree op1, vec<tree> *vec_oprnds1,
1477 tree op2, vec<tree> *vec_oprnds2,
1478 tree op3, vec<tree> *vec_oprnds3)
1479 {
1480 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1481 op0, vec_oprnds0, NULL_TREE,
1482 op1, vec_oprnds1, NULL_TREE,
1483 op2, vec_oprnds2, NULL_TREE,
1484 op3, vec_oprnds3, NULL_TREE);
1485 }
1486
1487 /* Helper function called by vect_finish_replace_stmt and
1488 vect_finish_stmt_generation. Set the location of the new
1489 statement and create and return a stmt_vec_info for it. */
1490
1491 static void
1492 vect_finish_stmt_generation_1 (vec_info *,
1493 stmt_vec_info stmt_info, gimple *vec_stmt)
1494 {
1495 if (dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1497
1498 if (stmt_info)
1499 {
1500 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1501
1502 /* While EH edges will generally prevent vectorization, stmt might
1503 e.g. be in a must-not-throw region. Ensure newly created stmts
1504 that could throw are part of the same region. */
1505 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1506 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1507 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1508 }
1509 else
1510 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1511 }
1512
1513 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1514 which sets the same scalar result as STMT_INFO did. Create and return a
1515 stmt_vec_info for VEC_STMT. */
1516
1517 void
1518 vect_finish_replace_stmt (vec_info *vinfo,
1519 stmt_vec_info stmt_info, gimple *vec_stmt)
1520 {
1521 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1522 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1523
1524 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1525 gsi_replace (&gsi, vec_stmt, true);
1526
1527 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1528 }
1529
1530 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1531 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1532
1533 void
1534 vect_finish_stmt_generation (vec_info *vinfo,
1535 stmt_vec_info stmt_info, gimple *vec_stmt,
1536 gimple_stmt_iterator *gsi)
1537 {
1538 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1539
1540 if (!gsi_end_p (*gsi)
1541 && gimple_has_mem_ops (vec_stmt))
1542 {
1543 gimple *at_stmt = gsi_stmt (*gsi);
1544 tree vuse = gimple_vuse (at_stmt);
1545 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1546 {
1547 tree vdef = gimple_vdef (at_stmt);
1548 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1549 gimple_set_modified (vec_stmt, true);
1550 /* If we have an SSA vuse and insert a store, update virtual
1551 SSA form to avoid triggering the renamer. Do so only
1552 if we can easily see all uses - which is what almost always
1553 happens with the way vectorized stmts are inserted. */
1554 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1555 && ((is_gimple_assign (vec_stmt)
1556 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1557 || (is_gimple_call (vec_stmt)
1558 && !(gimple_call_flags (vec_stmt)
1559 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1560 {
1561 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1562 gimple_set_vdef (vec_stmt, new_vdef);
1563 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1564 }
1565 }
1566 }
1567 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1568 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1569 }
1570
1571 /* We want to vectorize a call to combined function CFN with function
1572 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1573 as the types of all inputs. Check whether this is possible using
1574 an internal function, returning its code if so or IFN_LAST if not. */
1575
1576 static internal_fn
1577 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1578 tree vectype_out, tree vectype_in)
1579 {
1580 internal_fn ifn;
1581 if (internal_fn_p (cfn))
1582 ifn = as_internal_fn (cfn);
1583 else
1584 ifn = associated_internal_fn (fndecl);
1585 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1586 {
1587 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1588 if (info.vectorizable)
1589 {
1590 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1591 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1592 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1593 OPTIMIZE_FOR_SPEED))
1594 return ifn;
1595 }
1596 }
1597 return IFN_LAST;
1598 }
1599
1600
1601 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1602 gimple_stmt_iterator *);
1603
1604 /* Check whether a load or store statement in the loop described by
1605 LOOP_VINFO is possible in a loop using partial vectors. This is
1606 testing whether the vectorizer pass has the appropriate support,
1607 as well as whether the target does.
1608
1609 VLS_TYPE says whether the statement is a load or store and VECTYPE
1610 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1611 says how the load or store is going to be implemented and GROUP_SIZE
1612 is the number of load or store statements in the containing group.
1613 If the access is a gather load or scatter store, GS_INFO describes
1614 its arguments. If the load or store is conditional, SCALAR_MASK is the
1615 condition under which it occurs.
1616
1617 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1618 vectors is not supported, otherwise record the required rgroup control
1619 types. */
1620
1621 static void
1622 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1623 vec_load_store_type vls_type,
1624 int group_size,
1625 vect_memory_access_type
1626 memory_access_type,
1627 gather_scatter_info *gs_info,
1628 tree scalar_mask)
1629 {
1630 /* Invariant loads need no special support. */
1631 if (memory_access_type == VMAT_INVARIANT)
1632 return;
1633
1634 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1635 machine_mode vecmode = TYPE_MODE (vectype);
1636 bool is_load = (vls_type == VLS_LOAD);
1637 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1638 {
1639 if (is_load
1640 ? !vect_load_lanes_supported (vectype, group_size, true)
1641 : !vect_store_lanes_supported (vectype, group_size, true))
1642 {
1643 if (dump_enabled_p ())
1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1645 "can't operate on partial vectors because"
1646 " the target doesn't have an appropriate"
1647 " load/store-lanes instruction.\n");
1648 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1649 return;
1650 }
1651 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1652 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1653 return;
1654 }
1655
1656 if (memory_access_type == VMAT_GATHER_SCATTER)
1657 {
1658 internal_fn ifn = (is_load
1659 ? IFN_MASK_GATHER_LOAD
1660 : IFN_MASK_SCATTER_STORE);
1661 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1662 gs_info->memory_type,
1663 gs_info->offset_vectype,
1664 gs_info->scale))
1665 {
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1668 "can't operate on partial vectors because"
1669 " the target doesn't have an appropriate"
1670 " gather load or scatter store instruction.\n");
1671 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1672 return;
1673 }
1674 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1675 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1676 return;
1677 }
1678
1679 if (memory_access_type != VMAT_CONTIGUOUS
1680 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1681 {
1682 /* Element X of the data must come from iteration i * VF + X of the
1683 scalar loop. We need more work to support other mappings. */
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686 "can't operate on partial vectors because an"
1687 " access isn't contiguous.\n");
1688 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1689 return;
1690 }
1691
1692 if (!VECTOR_MODE_P (vecmode))
1693 {
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "can't operate on partial vectors when emulating"
1697 " vector operations.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1699 return;
1700 }
1701
1702 /* We might load more scalars than we need for permuting SLP loads.
1703 We checked in get_group_load_store_type that the extra elements
1704 don't leak into a new vector. */
1705 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1706 {
1707 unsigned int nvectors;
1708 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1709 return nvectors;
1710 gcc_unreachable ();
1711 };
1712
1713 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1714 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1715 machine_mode mask_mode;
1716 bool using_partial_vectors_p = false;
1717 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1718 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1719 {
1720 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1721 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1722 using_partial_vectors_p = true;
1723 }
1724
1725 machine_mode vmode;
1726 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1727 {
1728 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1729 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1730 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1731 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1732 using_partial_vectors_p = true;
1733 }
1734
1735 if (!using_partial_vectors_p)
1736 {
1737 if (dump_enabled_p ())
1738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1739 "can't operate on partial vectors because the"
1740 " target doesn't have the appropriate partial"
1741 " vectorization load or store.\n");
1742 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1743 }
1744 }
1745
1746 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1747 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1748 that needs to be applied to all loads and stores in a vectorized loop.
1749 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1750
1751 MASK_TYPE is the type of both masks. If new statements are needed,
1752 insert them before GSI. */
1753
1754 static tree
1755 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1756 gimple_stmt_iterator *gsi)
1757 {
1758 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1759 if (!loop_mask)
1760 return vec_mask;
1761
1762 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1763 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1764 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1765 vec_mask, loop_mask);
1766 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1767 return and_res;
1768 }
1769
1770 /* Determine whether we can use a gather load or scatter store to vectorize
1771 strided load or store STMT_INFO by truncating the current offset to a
1772 smaller width. We need to be able to construct an offset vector:
1773
1774 { 0, X, X*2, X*3, ... }
1775
1776 without loss of precision, where X is STMT_INFO's DR_STEP.
1777
1778 Return true if this is possible, describing the gather load or scatter
1779 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1780
1781 static bool
1782 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1783 loop_vec_info loop_vinfo, bool masked_p,
1784 gather_scatter_info *gs_info)
1785 {
1786 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1787 data_reference *dr = dr_info->dr;
1788 tree step = DR_STEP (dr);
1789 if (TREE_CODE (step) != INTEGER_CST)
1790 {
1791 /* ??? Perhaps we could use range information here? */
1792 if (dump_enabled_p ())
1793 dump_printf_loc (MSG_NOTE, vect_location,
1794 "cannot truncate variable step.\n");
1795 return false;
1796 }
1797
1798 /* Get the number of bits in an element. */
1799 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1800 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1801 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1802
1803 /* Set COUNT to the upper limit on the number of elements - 1.
1804 Start with the maximum vectorization factor. */
1805 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1806
1807 /* Try lowering COUNT to the number of scalar latch iterations. */
1808 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1809 widest_int max_iters;
1810 if (max_loop_iterations (loop, &max_iters)
1811 && max_iters < count)
1812 count = max_iters.to_shwi ();
1813
1814 /* Try scales of 1 and the element size. */
1815 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1816 wi::overflow_type overflow = wi::OVF_NONE;
1817 for (int i = 0; i < 2; ++i)
1818 {
1819 int scale = scales[i];
1820 widest_int factor;
1821 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1822 continue;
1823
1824 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1825 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1826 if (overflow)
1827 continue;
1828 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1829 unsigned int min_offset_bits = wi::min_precision (range, sign);
1830
1831 /* Find the narrowest viable offset type. */
1832 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1833 tree offset_type = build_nonstandard_integer_type (offset_bits,
1834 sign == UNSIGNED);
1835
1836 /* See whether the target supports the operation with an offset
1837 no narrower than OFFSET_TYPE. */
1838 tree memory_type = TREE_TYPE (DR_REF (dr));
1839 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1840 vectype, memory_type, offset_type, scale,
1841 &gs_info->ifn, &gs_info->offset_vectype))
1842 continue;
1843
1844 gs_info->decl = NULL_TREE;
1845 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1846 but we don't need to store that here. */
1847 gs_info->base = NULL_TREE;
1848 gs_info->element_type = TREE_TYPE (vectype);
1849 gs_info->offset = fold_convert (offset_type, step);
1850 gs_info->offset_dt = vect_constant_def;
1851 gs_info->scale = scale;
1852 gs_info->memory_type = memory_type;
1853 return true;
1854 }
1855
1856 if (overflow && dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE, vect_location,
1858 "truncating gather/scatter offset to %d bits"
1859 " might change its value.\n", element_bits);
1860
1861 return false;
1862 }
1863
1864 /* Return true if we can use gather/scatter internal functions to
1865 vectorize STMT_INFO, which is a grouped or strided load or store.
1866 MASKED_P is true if load or store is conditional. When returning
1867 true, fill in GS_INFO with the information required to perform the
1868 operation. */
1869
1870 static bool
1871 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1872 loop_vec_info loop_vinfo, bool masked_p,
1873 gather_scatter_info *gs_info)
1874 {
1875 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1876 || gs_info->decl)
1877 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1878 masked_p, gs_info);
1879
1880 tree old_offset_type = TREE_TYPE (gs_info->offset);
1881 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1882
1883 gcc_assert (TYPE_PRECISION (new_offset_type)
1884 >= TYPE_PRECISION (old_offset_type));
1885 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1886
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE, vect_location,
1889 "using gather/scatter for strided/grouped access,"
1890 " scale = %d\n", gs_info->scale);
1891
1892 return true;
1893 }
1894
1895 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1896 elements with a known constant step. Return -1 if that step
1897 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1898
1899 static int
1900 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1901 {
1902 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1903 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1904 size_zero_node);
1905 }
1906
1907 /* If the target supports a permute mask that reverses the elements in
1908 a vector of type VECTYPE, return that mask, otherwise return null. */
1909
1910 static tree
1911 perm_mask_for_reverse (tree vectype)
1912 {
1913 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1914
1915 /* The encoding has a single stepped pattern. */
1916 vec_perm_builder sel (nunits, 1, 3);
1917 for (int i = 0; i < 3; ++i)
1918 sel.quick_push (nunits - 1 - i);
1919
1920 vec_perm_indices indices (sel, 1, nunits);
1921 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1922 return NULL_TREE;
1923 return vect_gen_perm_mask_checked (vectype, indices);
1924 }
1925
1926 /* A subroutine of get_load_store_type, with a subset of the same
1927 arguments. Handle the case where STMT_INFO is a load or store that
1928 accesses consecutive elements with a negative step. */
1929
1930 static vect_memory_access_type
1931 get_negative_load_store_type (vec_info *vinfo,
1932 stmt_vec_info stmt_info, tree vectype,
1933 vec_load_store_type vls_type,
1934 unsigned int ncopies)
1935 {
1936 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1937 dr_alignment_support alignment_support_scheme;
1938
1939 if (ncopies > 1)
1940 {
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1943 "multiple types with negative step.\n");
1944 return VMAT_ELEMENTWISE;
1945 }
1946
1947 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1948 dr_info, false);
1949 if (alignment_support_scheme != dr_aligned
1950 && alignment_support_scheme != dr_unaligned_supported)
1951 {
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 "negative step but alignment required.\n");
1955 return VMAT_ELEMENTWISE;
1956 }
1957
1958 if (vls_type == VLS_STORE_INVARIANT)
1959 {
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_NOTE, vect_location,
1962 "negative step with invariant source;"
1963 " no permute needed.\n");
1964 return VMAT_CONTIGUOUS_DOWN;
1965 }
1966
1967 if (!perm_mask_for_reverse (vectype))
1968 {
1969 if (dump_enabled_p ())
1970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1971 "negative step and reversing not supported.\n");
1972 return VMAT_ELEMENTWISE;
1973 }
1974
1975 return VMAT_CONTIGUOUS_REVERSE;
1976 }
1977
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1979 being stored. */
1980
1981 tree
1982 vect_get_store_rhs (stmt_vec_info stmt_info)
1983 {
1984 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1985 {
1986 gcc_assert (gimple_assign_single_p (assign));
1987 return gimple_assign_rhs1 (assign);
1988 }
1989 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1990 {
1991 internal_fn ifn = gimple_call_internal_fn (call);
1992 int index = internal_fn_stored_value_index (ifn);
1993 gcc_assert (index >= 0);
1994 return gimple_call_arg (call, index);
1995 }
1996 gcc_unreachable ();
1997 }
1998
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2000
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2007
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2011 - NULL_TREE. */
2012
2013 static tree
2014 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2015 {
2016 gcc_assert (VECTOR_TYPE_P (vtype));
2017 gcc_assert (known_gt (nelts, 0U));
2018
2019 machine_mode vmode = TYPE_MODE (vtype);
2020 if (!VECTOR_MODE_P (vmode))
2021 return NULL_TREE;
2022
2023 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2024 unsigned int pbsize;
2025 if (constant_multiple_p (vbsize, nelts, &pbsize))
2026 {
2027 /* First check if vec_init optab supports construction from
2028 vector pieces directly. */
2029 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2030 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2031 machine_mode rmode;
2032 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2033 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2034 != CODE_FOR_nothing))
2035 {
2036 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2037 return vtype;
2038 }
2039
2040 /* Otherwise check if exists an integer type of the same piece size and
2041 if vec_init optab supports construction from it directly. */
2042 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2043 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2044 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2045 != CODE_FOR_nothing))
2046 {
2047 *ptype = build_nonstandard_integer_type (pbsize, 1);
2048 return build_vector_type (*ptype, nelts);
2049 }
2050 }
2051
2052 return NULL_TREE;
2053 }
2054
2055 /* A subroutine of get_load_store_type, with a subset of the same
2056 arguments. Handle the case where STMT_INFO is part of a grouped load
2057 or store.
2058
2059 For stores, the statements in the group are all consecutive
2060 and there is no gap at the end. For loads, the statements in the
2061 group might not be consecutive; there can be gaps between statements
2062 as well as at the end. */
2063
2064 static bool
2065 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2066 tree vectype, slp_tree slp_node,
2067 bool masked_p, vec_load_store_type vls_type,
2068 vect_memory_access_type *memory_access_type,
2069 dr_alignment_support *alignment_support_scheme,
2070 gather_scatter_info *gs_info)
2071 {
2072 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2073 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2074 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2075 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2076 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2077 bool single_element_p = (stmt_info == first_stmt_info
2078 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2079 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2080 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2081
2082 /* True if the vectorized statements would access beyond the last
2083 statement in the group. */
2084 bool overrun_p = false;
2085
2086 /* True if we can cope with such overrun by peeling for gaps, so that
2087 there is at least one final scalar iteration after the vector loop. */
2088 bool can_overrun_p = (!masked_p
2089 && vls_type == VLS_LOAD
2090 && loop_vinfo
2091 && !loop->inner);
2092
2093 /* There can only be a gap at the end of the group if the stride is
2094 known at compile time. */
2095 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2096
2097 /* Stores can't yet have gaps. */
2098 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2099
2100 if (slp_node)
2101 {
2102 /* For SLP vectorization we directly vectorize a subchain
2103 without permutation. */
2104 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2105 first_dr_info
2106 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2107 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2108 {
2109 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2110 separated by the stride, until we have a complete vector.
2111 Fall back to scalar accesses if that isn't possible. */
2112 if (multiple_p (nunits, group_size))
2113 *memory_access_type = VMAT_STRIDED_SLP;
2114 else
2115 *memory_access_type = VMAT_ELEMENTWISE;
2116 }
2117 else
2118 {
2119 overrun_p = loop_vinfo && gap != 0;
2120 if (overrun_p && vls_type != VLS_LOAD)
2121 {
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123 "Grouped store with gaps requires"
2124 " non-consecutive accesses\n");
2125 return false;
2126 }
2127 /* An overrun is fine if the trailing elements are smaller
2128 than the alignment boundary B. Every vector access will
2129 be a multiple of B and so we are guaranteed to access a
2130 non-gap element in the same B-sized block. */
2131 if (overrun_p
2132 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2133 / vect_get_scalar_dr_size (first_dr_info)))
2134 overrun_p = false;
2135
2136 /* If the gap splits the vector in half and the target
2137 can do half-vector operations avoid the epilogue peeling
2138 by simply loading half of the vector only. Usually
2139 the construction with an upper zero half will be elided. */
2140 dr_alignment_support alignment_support_scheme;
2141 tree half_vtype;
2142 if (overrun_p
2143 && !masked_p
2144 && (((alignment_support_scheme
2145 = vect_supportable_dr_alignment (vinfo,
2146 first_dr_info, false)))
2147 == dr_aligned
2148 || alignment_support_scheme == dr_unaligned_supported)
2149 && known_eq (nunits, (group_size - gap) * 2)
2150 && known_eq (nunits, group_size)
2151 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2152 != NULL_TREE))
2153 overrun_p = false;
2154
2155 if (overrun_p && !can_overrun_p)
2156 {
2157 if (dump_enabled_p ())
2158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2159 "Peeling for outer loop is not supported\n");
2160 return false;
2161 }
2162 int cmp = compare_step_with_zero (vinfo, stmt_info);
2163 if (cmp < 0)
2164 {
2165 if (single_element_p)
2166 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2167 only correct for single element "interleaving" SLP. */
2168 *memory_access_type = get_negative_load_store_type
2169 (vinfo, stmt_info, vectype, vls_type, 1);
2170 else
2171 {
2172 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2173 separated by the stride, until we have a complete vector.
2174 Fall back to scalar accesses if that isn't possible. */
2175 if (multiple_p (nunits, group_size))
2176 *memory_access_type = VMAT_STRIDED_SLP;
2177 else
2178 *memory_access_type = VMAT_ELEMENTWISE;
2179 }
2180 }
2181 else
2182 {
2183 gcc_assert (!loop_vinfo || cmp > 0);
2184 *memory_access_type = VMAT_CONTIGUOUS;
2185 }
2186 }
2187 }
2188 else
2189 {
2190 /* We can always handle this case using elementwise accesses,
2191 but see if something more efficient is available. */
2192 *memory_access_type = VMAT_ELEMENTWISE;
2193
2194 /* If there is a gap at the end of the group then these optimizations
2195 would access excess elements in the last iteration. */
2196 bool would_overrun_p = (gap != 0);
2197 /* An overrun is fine if the trailing elements are smaller than the
2198 alignment boundary B. Every vector access will be a multiple of B
2199 and so we are guaranteed to access a non-gap element in the
2200 same B-sized block. */
2201 if (would_overrun_p
2202 && !masked_p
2203 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2204 / vect_get_scalar_dr_size (first_dr_info)))
2205 would_overrun_p = false;
2206
2207 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2208 && (can_overrun_p || !would_overrun_p)
2209 && compare_step_with_zero (vinfo, stmt_info) > 0)
2210 {
2211 /* First cope with the degenerate case of a single-element
2212 vector. */
2213 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2214 ;
2215
2216 /* Otherwise try using LOAD/STORE_LANES. */
2217 else if (vls_type == VLS_LOAD
2218 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2219 : vect_store_lanes_supported (vectype, group_size,
2220 masked_p))
2221 {
2222 *memory_access_type = VMAT_LOAD_STORE_LANES;
2223 overrun_p = would_overrun_p;
2224 }
2225
2226 /* If that fails, try using permuting loads. */
2227 else if (vls_type == VLS_LOAD
2228 ? vect_grouped_load_supported (vectype, single_element_p,
2229 group_size)
2230 : vect_grouped_store_supported (vectype, group_size))
2231 {
2232 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2233 overrun_p = would_overrun_p;
2234 }
2235 }
2236
2237 /* As a last resort, trying using a gather load or scatter store.
2238
2239 ??? Although the code can handle all group sizes correctly,
2240 it probably isn't a win to use separate strided accesses based
2241 on nearby locations. Or, even if it's a win over scalar code,
2242 it might not be a win over vectorizing at a lower VF, if that
2243 allows us to use contiguous accesses. */
2244 if (*memory_access_type == VMAT_ELEMENTWISE
2245 && single_element_p
2246 && loop_vinfo
2247 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2248 masked_p, gs_info))
2249 *memory_access_type = VMAT_GATHER_SCATTER;
2250 }
2251
2252 if (*memory_access_type == VMAT_GATHER_SCATTER
2253 || *memory_access_type == VMAT_ELEMENTWISE)
2254 *alignment_support_scheme = dr_unaligned_supported;
2255 else
2256 *alignment_support_scheme
2257 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2258
2259 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2260 {
2261 /* STMT is the leader of the group. Check the operands of all the
2262 stmts of the group. */
2263 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2264 while (next_stmt_info)
2265 {
2266 tree op = vect_get_store_rhs (next_stmt_info);
2267 enum vect_def_type dt;
2268 if (!vect_is_simple_use (op, vinfo, &dt))
2269 {
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "use not simple.\n");
2273 return false;
2274 }
2275 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2276 }
2277 }
2278
2279 if (overrun_p)
2280 {
2281 gcc_assert (can_overrun_p);
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "Data access with gaps requires scalar "
2285 "epilogue loop\n");
2286 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2287 }
2288
2289 return true;
2290 }
2291
2292 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2293 if there is a memory access type that the vectorized form can use,
2294 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2295 or scatters, fill in GS_INFO accordingly. In addition
2296 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2297 the target does not support the alignment scheme.
2298
2299 SLP says whether we're performing SLP rather than loop vectorization.
2300 MASKED_P is true if the statement is conditional on a vectorized mask.
2301 VECTYPE is the vector type that the vectorized statements will use.
2302 NCOPIES is the number of vector statements that will be needed. */
2303
2304 static bool
2305 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2306 tree vectype, slp_tree slp_node,
2307 bool masked_p, vec_load_store_type vls_type,
2308 unsigned int ncopies,
2309 vect_memory_access_type *memory_access_type,
2310 dr_alignment_support *alignment_support_scheme,
2311 gather_scatter_info *gs_info)
2312 {
2313 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2314 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2315 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2316 {
2317 *memory_access_type = VMAT_GATHER_SCATTER;
2318 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2319 gcc_unreachable ();
2320 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2321 &gs_info->offset_dt,
2322 &gs_info->offset_vectype))
2323 {
2324 if (dump_enabled_p ())
2325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2326 "%s index use not simple.\n",
2327 vls_type == VLS_LOAD ? "gather" : "scatter");
2328 return false;
2329 }
2330 /* Gather-scatter accesses perform only component accesses, alignment
2331 is irrelevant for them. */
2332 *alignment_support_scheme = dr_unaligned_supported;
2333 }
2334 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2335 {
2336 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2337 masked_p,
2338 vls_type, memory_access_type,
2339 alignment_support_scheme, gs_info))
2340 return false;
2341 }
2342 else if (STMT_VINFO_STRIDED_P (stmt_info))
2343 {
2344 gcc_assert (!slp_node);
2345 if (loop_vinfo
2346 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2347 masked_p, gs_info))
2348 *memory_access_type = VMAT_GATHER_SCATTER;
2349 else
2350 *memory_access_type = VMAT_ELEMENTWISE;
2351 /* Alignment is irrelevant here. */
2352 *alignment_support_scheme = dr_unaligned_supported;
2353 }
2354 else
2355 {
2356 int cmp = compare_step_with_zero (vinfo, stmt_info);
2357 if (cmp < 0)
2358 *memory_access_type = get_negative_load_store_type
2359 (vinfo, stmt_info, vectype, vls_type, ncopies);
2360 else if (cmp == 0)
2361 {
2362 gcc_assert (vls_type == VLS_LOAD);
2363 *memory_access_type = VMAT_INVARIANT;
2364 }
2365 else
2366 *memory_access_type = VMAT_CONTIGUOUS;
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info), false);
2370 }
2371
2372 if ((*memory_access_type == VMAT_ELEMENTWISE
2373 || *memory_access_type == VMAT_STRIDED_SLP)
2374 && !nunits.is_constant ())
2375 {
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2378 "Not using elementwise accesses due to variable "
2379 "vectorization factor.\n");
2380 return false;
2381 }
2382
2383 if (*alignment_support_scheme == dr_unaligned_unsupported)
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "unsupported unaligned access\n");
2388 return false;
2389 }
2390
2391 /* FIXME: At the moment the cost model seems to underestimate the
2392 cost of using elementwise accesses. This check preserves the
2393 traditional behavior until that can be fixed. */
2394 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2395 if (!first_stmt_info)
2396 first_stmt_info = stmt_info;
2397 if (*memory_access_type == VMAT_ELEMENTWISE
2398 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2399 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2400 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2401 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2402 {
2403 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2405 "not falling back to elementwise accesses\n");
2406 return false;
2407 }
2408 return true;
2409 }
2410
2411 /* Return true if boolean argument MASK is suitable for vectorizing
2412 conditional operation STMT_INFO. When returning true, store the type
2413 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2414 in *MASK_VECTYPE_OUT. */
2415
2416 static bool
2417 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2418 vect_def_type *mask_dt_out,
2419 tree *mask_vectype_out)
2420 {
2421 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2422 {
2423 if (dump_enabled_p ())
2424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2425 "mask argument is not a boolean.\n");
2426 return false;
2427 }
2428
2429 if (TREE_CODE (mask) != SSA_NAME)
2430 {
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask argument is not an SSA name.\n");
2434 return false;
2435 }
2436
2437 enum vect_def_type mask_dt;
2438 tree mask_vectype;
2439 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2440 {
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "mask use not simple.\n");
2444 return false;
2445 }
2446
2447 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2448 if (!mask_vectype)
2449 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2450
2451 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2452 {
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2455 "could not find an appropriate vector mask type.\n");
2456 return false;
2457 }
2458
2459 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2460 TYPE_VECTOR_SUBPARTS (vectype)))
2461 {
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "vector mask type %T"
2465 " does not match vector data type %T.\n",
2466 mask_vectype, vectype);
2467
2468 return false;
2469 }
2470
2471 *mask_dt_out = mask_dt;
2472 *mask_vectype_out = mask_vectype;
2473 return true;
2474 }
2475
2476 /* Return true if stored value RHS is suitable for vectorizing store
2477 statement STMT_INFO. When returning true, store the type of the
2478 definition in *RHS_DT_OUT, the type of the vectorized store value in
2479 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2480
2481 static bool
2482 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2483 slp_tree slp_node, tree rhs,
2484 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2485 vec_load_store_type *vls_type_out)
2486 {
2487 /* In the case this is a store from a constant make sure
2488 native_encode_expr can handle it. */
2489 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2490 {
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "cannot encode constant as a byte sequence.\n");
2494 return false;
2495 }
2496
2497 enum vect_def_type rhs_dt;
2498 tree rhs_vectype;
2499 slp_tree slp_op;
2500 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2501 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2502 {
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "use not simple.\n");
2506 return false;
2507 }
2508
2509 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2510 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2511 {
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2514 "incompatible vector types.\n");
2515 return false;
2516 }
2517
2518 *rhs_dt_out = rhs_dt;
2519 *rhs_vectype_out = rhs_vectype;
2520 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2521 *vls_type_out = VLS_STORE_INVARIANT;
2522 else
2523 *vls_type_out = VLS_STORE;
2524 return true;
2525 }
2526
2527 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2528 Note that we support masks with floating-point type, in which case the
2529 floats are interpreted as a bitmask. */
2530
2531 static tree
2532 vect_build_all_ones_mask (vec_info *vinfo,
2533 stmt_vec_info stmt_info, tree masktype)
2534 {
2535 if (TREE_CODE (masktype) == INTEGER_TYPE)
2536 return build_int_cst (masktype, -1);
2537 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2538 {
2539 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2540 mask = build_vector_from_val (masktype, mask);
2541 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2542 }
2543 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2544 {
2545 REAL_VALUE_TYPE r;
2546 long tmp[6];
2547 for (int j = 0; j < 6; ++j)
2548 tmp[j] = -1;
2549 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2550 tree mask = build_real (TREE_TYPE (masktype), r);
2551 mask = build_vector_from_val (masktype, mask);
2552 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2553 }
2554 gcc_unreachable ();
2555 }
2556
2557 /* Build an all-zero merge value of type VECTYPE while vectorizing
2558 STMT_INFO as a gather load. */
2559
2560 static tree
2561 vect_build_zero_merge_argument (vec_info *vinfo,
2562 stmt_vec_info stmt_info, tree vectype)
2563 {
2564 tree merge;
2565 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2566 merge = build_int_cst (TREE_TYPE (vectype), 0);
2567 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2568 {
2569 REAL_VALUE_TYPE r;
2570 long tmp[6];
2571 for (int j = 0; j < 6; ++j)
2572 tmp[j] = 0;
2573 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2574 merge = build_real (TREE_TYPE (vectype), r);
2575 }
2576 else
2577 gcc_unreachable ();
2578 merge = build_vector_from_val (vectype, merge);
2579 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2580 }
2581
2582 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2583 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2584 the gather load operation. If the load is conditional, MASK is the
2585 unvectorized condition and MASK_DT is its definition type, otherwise
2586 MASK is null. */
2587
2588 static void
2589 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2590 gimple_stmt_iterator *gsi,
2591 gimple **vec_stmt,
2592 gather_scatter_info *gs_info,
2593 tree mask)
2594 {
2595 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2596 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2597 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2598 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2599 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2600 edge pe = loop_preheader_edge (loop);
2601 enum { NARROW, NONE, WIDEN } modifier;
2602 poly_uint64 gather_off_nunits
2603 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2604
2605 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2606 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2607 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2608 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2609 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2610 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2611 tree scaletype = TREE_VALUE (arglist);
2612 tree real_masktype = masktype;
2613 gcc_checking_assert (types_compatible_p (srctype, rettype)
2614 && (!mask
2615 || TREE_CODE (masktype) == INTEGER_TYPE
2616 || types_compatible_p (srctype, masktype)));
2617 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2618 masktype = truth_type_for (srctype);
2619
2620 tree mask_halftype = masktype;
2621 tree perm_mask = NULL_TREE;
2622 tree mask_perm_mask = NULL_TREE;
2623 if (known_eq (nunits, gather_off_nunits))
2624 modifier = NONE;
2625 else if (known_eq (nunits * 2, gather_off_nunits))
2626 {
2627 modifier = WIDEN;
2628
2629 /* Currently widening gathers and scatters are only supported for
2630 fixed-length vectors. */
2631 int count = gather_off_nunits.to_constant ();
2632 vec_perm_builder sel (count, count, 1);
2633 for (int i = 0; i < count; ++i)
2634 sel.quick_push (i | (count / 2));
2635
2636 vec_perm_indices indices (sel, 1, count);
2637 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2638 indices);
2639 }
2640 else if (known_eq (nunits, gather_off_nunits * 2))
2641 {
2642 modifier = NARROW;
2643
2644 /* Currently narrowing gathers and scatters are only supported for
2645 fixed-length vectors. */
2646 int count = nunits.to_constant ();
2647 vec_perm_builder sel (count, count, 1);
2648 sel.quick_grow (count);
2649 for (int i = 0; i < count; ++i)
2650 sel[i] = i < count / 2 ? i : i + count / 2;
2651 vec_perm_indices indices (sel, 2, count);
2652 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2653
2654 ncopies *= 2;
2655
2656 if (mask && masktype == real_masktype)
2657 {
2658 for (int i = 0; i < count; ++i)
2659 sel[i] = i | (count / 2);
2660 indices.new_vector (sel, 2, count);
2661 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2662 }
2663 else if (mask)
2664 mask_halftype = truth_type_for (gs_info->offset_vectype);
2665 }
2666 else
2667 gcc_unreachable ();
2668
2669 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2670 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2671
2672 tree ptr = fold_convert (ptrtype, gs_info->base);
2673 if (!is_gimple_min_invariant (ptr))
2674 {
2675 gimple_seq seq;
2676 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2677 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2678 gcc_assert (!new_bb);
2679 }
2680
2681 tree scale = build_int_cst (scaletype, gs_info->scale);
2682
2683 tree vec_oprnd0 = NULL_TREE;
2684 tree vec_mask = NULL_TREE;
2685 tree src_op = NULL_TREE;
2686 tree mask_op = NULL_TREE;
2687 tree prev_res = NULL_TREE;
2688
2689 if (!mask)
2690 {
2691 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2692 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2693 }
2694
2695 auto_vec<tree> vec_oprnds0;
2696 auto_vec<tree> vec_masks;
2697 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2698 modifier == WIDEN ? ncopies / 2 : ncopies,
2699 gs_info->offset, &vec_oprnds0);
2700 if (mask)
2701 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2702 modifier == NARROW ? ncopies / 2 : ncopies,
2703 mask, &vec_masks);
2704 for (int j = 0; j < ncopies; ++j)
2705 {
2706 tree op, var;
2707 if (modifier == WIDEN && (j & 1))
2708 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2709 perm_mask, stmt_info, gsi);
2710 else
2711 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2712
2713 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2714 {
2715 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2716 TYPE_VECTOR_SUBPARTS (idxtype)));
2717 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2718 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2719 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2721 op = var;
2722 }
2723
2724 if (mask)
2725 {
2726 if (mask_perm_mask && (j & 1))
2727 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2728 mask_perm_mask, stmt_info, gsi);
2729 else
2730 {
2731 if (modifier == NARROW)
2732 {
2733 if ((j & 1) == 0)
2734 vec_mask = vec_masks[j / 2];
2735 }
2736 else
2737 vec_mask = vec_masks[j];
2738
2739 mask_op = vec_mask;
2740 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2741 {
2742 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2743 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2744 gcc_assert (known_eq (sub1, sub2));
2745 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2746 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2747 gassign *new_stmt
2748 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_op = var;
2751 }
2752 }
2753 if (modifier == NARROW && masktype != real_masktype)
2754 {
2755 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2756 gassign *new_stmt
2757 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2758 : VEC_UNPACK_LO_EXPR,
2759 mask_op);
2760 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2761 mask_op = var;
2762 }
2763 src_op = mask_op;
2764 }
2765
2766 tree mask_arg = mask_op;
2767 if (masktype != real_masktype)
2768 {
2769 tree utype, optype = TREE_TYPE (mask_op);
2770 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2771 utype = real_masktype;
2772 else
2773 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2774 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2775 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2776 gassign *new_stmt
2777 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2778 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2779 mask_arg = var;
2780 if (!useless_type_conversion_p (real_masktype, utype))
2781 {
2782 gcc_assert (TYPE_PRECISION (utype)
2783 <= TYPE_PRECISION (real_masktype));
2784 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2785 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2786 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2787 mask_arg = var;
2788 }
2789 src_op = build_zero_cst (srctype);
2790 }
2791 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2792 mask_arg, scale);
2793
2794 if (!useless_type_conversion_p (vectype, rettype))
2795 {
2796 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2797 TYPE_VECTOR_SUBPARTS (rettype)));
2798 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2799 gimple_call_set_lhs (new_stmt, op);
2800 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2801 var = make_ssa_name (vec_dest);
2802 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2803 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2804 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2805 }
2806 else
2807 {
2808 var = make_ssa_name (vec_dest, new_stmt);
2809 gimple_call_set_lhs (new_stmt, var);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 }
2812
2813 if (modifier == NARROW)
2814 {
2815 if ((j & 1) == 0)
2816 {
2817 prev_res = var;
2818 continue;
2819 }
2820 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2821 stmt_info, gsi);
2822 new_stmt = SSA_NAME_DEF_STMT (var);
2823 }
2824
2825 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2826 }
2827 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2828 }
2829
2830 /* Prepare the base and offset in GS_INFO for vectorization.
2831 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2832 to the vectorized offset argument for the first copy of STMT_INFO.
2833 STMT_INFO is the statement described by GS_INFO and LOOP is the
2834 containing loop. */
2835
2836 static void
2837 vect_get_gather_scatter_ops (vec_info *vinfo,
2838 class loop *loop, stmt_vec_info stmt_info,
2839 gather_scatter_info *gs_info,
2840 tree *dataref_ptr, vec<tree> *vec_offset,
2841 unsigned ncopies)
2842 {
2843 gimple_seq stmts = NULL;
2844 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2845 if (stmts != NULL)
2846 {
2847 basic_block new_bb;
2848 edge pe = loop_preheader_edge (loop);
2849 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2850 gcc_assert (!new_bb);
2851 }
2852 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2853 vec_offset, gs_info->offset_vectype);
2854 }
2855
2856 /* Prepare to implement a grouped or strided load or store using
2857 the gather load or scatter store operation described by GS_INFO.
2858 STMT_INFO is the load or store statement.
2859
2860 Set *DATAREF_BUMP to the amount that should be added to the base
2861 address after each copy of the vectorized statement. Set *VEC_OFFSET
2862 to an invariant offset vector in which element I has the value
2863 I * DR_STEP / SCALE. */
2864
2865 static void
2866 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2867 loop_vec_info loop_vinfo,
2868 gather_scatter_info *gs_info,
2869 tree *dataref_bump, tree *vec_offset)
2870 {
2871 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873
2874 tree bump = size_binop (MULT_EXPR,
2875 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2876 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2877 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2878
2879 /* The offset given in GS_INFO can have pointer type, so use the element
2880 type of the vector instead. */
2881 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2882
2883 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2884 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2885 ssize_int (gs_info->scale));
2886 step = fold_convert (offset_type, step);
2887
2888 /* Create {0, X, X*2, X*3, ...}. */
2889 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2890 build_zero_cst (offset_type), step);
2891 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2892 }
2893
2894 /* Return the amount that should be added to a vector pointer to move
2895 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2896 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2897 vectorization. */
2898
2899 static tree
2900 vect_get_data_ptr_increment (vec_info *vinfo,
2901 dr_vec_info *dr_info, tree aggr_type,
2902 vect_memory_access_type memory_access_type)
2903 {
2904 if (memory_access_type == VMAT_INVARIANT)
2905 return size_zero_node;
2906
2907 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2908 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2909 if (tree_int_cst_sgn (step) == -1)
2910 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2911 return iv_step;
2912 }
2913
2914 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2915
2916 static bool
2917 vectorizable_bswap (vec_info *vinfo,
2918 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2919 gimple **vec_stmt, slp_tree slp_node,
2920 slp_tree *slp_op,
2921 tree vectype_in, stmt_vector_for_cost *cost_vec)
2922 {
2923 tree op, vectype;
2924 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2925 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2926 unsigned ncopies;
2927
2928 op = gimple_call_arg (stmt, 0);
2929 vectype = STMT_VINFO_VECTYPE (stmt_info);
2930 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2931
2932 /* Multiple types in SLP are handled by creating the appropriate number of
2933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2934 case of SLP. */
2935 if (slp_node)
2936 ncopies = 1;
2937 else
2938 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2939
2940 gcc_assert (ncopies >= 1);
2941
2942 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2943 if (! char_vectype)
2944 return false;
2945
2946 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2947 unsigned word_bytes;
2948 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2949 return false;
2950
2951 /* The encoding uses one stepped pattern for each byte in the word. */
2952 vec_perm_builder elts (num_bytes, word_bytes, 3);
2953 for (unsigned i = 0; i < 3; ++i)
2954 for (unsigned j = 0; j < word_bytes; ++j)
2955 elts.quick_push ((i + 1) * word_bytes - j - 1);
2956
2957 vec_perm_indices indices (elts, 1, num_bytes);
2958 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2959 return false;
2960
2961 if (! vec_stmt)
2962 {
2963 if (slp_node
2964 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2965 {
2966 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2968 "incompatible vector types for invariants\n");
2969 return false;
2970 }
2971
2972 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2973 DUMP_VECT_SCOPE ("vectorizable_bswap");
2974 if (! slp_node)
2975 {
2976 record_stmt_cost (cost_vec,
2977 1, vector_stmt, stmt_info, 0, vect_prologue);
2978 record_stmt_cost (cost_vec,
2979 ncopies, vec_perm, stmt_info, 0, vect_body);
2980 }
2981 return true;
2982 }
2983
2984 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2985
2986 /* Transform. */
2987 vec<tree> vec_oprnds = vNULL;
2988 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
2989 op, &vec_oprnds);
2990 /* Arguments are ready. create the new vector stmt. */
2991 unsigned i;
2992 tree vop;
2993 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2994 {
2995 gimple *new_stmt;
2996 tree tem = make_ssa_name (char_vectype);
2997 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2998 char_vectype, vop));
2999 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3000 tree tem2 = make_ssa_name (char_vectype);
3001 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3002 tem, tem, bswap_vconst);
3003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3004 tem = make_ssa_name (vectype);
3005 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3006 vectype, tem2));
3007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3008 if (slp_node)
3009 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3010 else
3011 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3012 }
3013
3014 if (!slp_node)
3015 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3016
3017 vec_oprnds.release ();
3018 return true;
3019 }
3020
3021 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3022 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3023 in a single step. On success, store the binary pack code in
3024 *CONVERT_CODE. */
3025
3026 static bool
3027 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3028 tree_code *convert_code)
3029 {
3030 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3031 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3032 return false;
3033
3034 tree_code code;
3035 int multi_step_cvt = 0;
3036 auto_vec <tree, 8> interm_types;
3037 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3038 &code, &multi_step_cvt, &interm_types)
3039 || multi_step_cvt)
3040 return false;
3041
3042 *convert_code = code;
3043 return true;
3044 }
3045
3046 /* Function vectorizable_call.
3047
3048 Check if STMT_INFO performs a function call that can be vectorized.
3049 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3050 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3051 Return true if STMT_INFO is vectorizable in this way. */
3052
3053 static bool
3054 vectorizable_call (vec_info *vinfo,
3055 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3056 gimple **vec_stmt, slp_tree slp_node,
3057 stmt_vector_for_cost *cost_vec)
3058 {
3059 gcall *stmt;
3060 tree vec_dest;
3061 tree scalar_dest;
3062 tree op;
3063 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3064 tree vectype_out, vectype_in;
3065 poly_uint64 nunits_in;
3066 poly_uint64 nunits_out;
3067 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3068 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3069 tree fndecl, new_temp, rhs_type;
3070 enum vect_def_type dt[4]
3071 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3072 vect_unknown_def_type };
3073 tree vectypes[ARRAY_SIZE (dt)] = {};
3074 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3075 int ndts = ARRAY_SIZE (dt);
3076 int ncopies, j;
3077 auto_vec<tree, 8> vargs;
3078 auto_vec<tree, 8> orig_vargs;
3079 enum { NARROW, NONE, WIDEN } modifier;
3080 size_t i, nargs;
3081 tree lhs;
3082
3083 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3084 return false;
3085
3086 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3087 && ! vec_stmt)
3088 return false;
3089
3090 /* Is STMT_INFO a vectorizable call? */
3091 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3092 if (!stmt)
3093 return false;
3094
3095 if (gimple_call_internal_p (stmt)
3096 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3097 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3098 /* Handled by vectorizable_load and vectorizable_store. */
3099 return false;
3100
3101 if (gimple_call_lhs (stmt) == NULL_TREE
3102 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3103 return false;
3104
3105 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3106
3107 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3108
3109 /* Process function arguments. */
3110 rhs_type = NULL_TREE;
3111 vectype_in = NULL_TREE;
3112 nargs = gimple_call_num_args (stmt);
3113
3114 /* Bail out if the function has more than four arguments, we do not have
3115 interesting builtin functions to vectorize with more than two arguments
3116 except for fma. No arguments is also not good. */
3117 if (nargs == 0 || nargs > 4)
3118 return false;
3119
3120 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3121 combined_fn cfn = gimple_call_combined_fn (stmt);
3122 if (cfn == CFN_GOMP_SIMD_LANE)
3123 {
3124 nargs = 0;
3125 rhs_type = unsigned_type_node;
3126 }
3127
3128 int mask_opno = -1;
3129 if (internal_fn_p (cfn))
3130 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3131
3132 for (i = 0; i < nargs; i++)
3133 {
3134 if ((int) i == mask_opno)
3135 {
3136 op = gimple_call_arg (stmt, i);
3137 if (!vect_check_scalar_mask (vinfo,
3138 stmt_info, op, &dt[i], &vectypes[i]))
3139 return false;
3140 continue;
3141 }
3142
3143 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3144 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3145 {
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148 "use not simple.\n");
3149 return false;
3150 }
3151
3152 /* We can only handle calls with arguments of the same type. */
3153 if (rhs_type
3154 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3155 {
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3158 "argument types differ.\n");
3159 return false;
3160 }
3161 if (!rhs_type)
3162 rhs_type = TREE_TYPE (op);
3163
3164 if (!vectype_in)
3165 vectype_in = vectypes[i];
3166 else if (vectypes[i]
3167 && !types_compatible_p (vectypes[i], vectype_in))
3168 {
3169 if (dump_enabled_p ())
3170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3171 "argument vector types differ.\n");
3172 return false;
3173 }
3174 }
3175 /* If all arguments are external or constant defs, infer the vector type
3176 from the scalar type. */
3177 if (!vectype_in)
3178 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3179 if (vec_stmt)
3180 gcc_assert (vectype_in);
3181 if (!vectype_in)
3182 {
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3185 "no vectype for scalar type %T\n", rhs_type);
3186
3187 return false;
3188 }
3189 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3190 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3191 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3192 by a pack of the two vectors into an SI vector. We would need
3193 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3194 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3195 {
3196 if (dump_enabled_p ())
3197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3198 "mismatched vector sizes %T and %T\n",
3199 vectype_in, vectype_out);
3200 return false;
3201 }
3202
3203 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3204 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3205 {
3206 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3208 "mixed mask and nonmask vector types\n");
3209 return false;
3210 }
3211
3212 /* FORNOW */
3213 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3214 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3215 if (known_eq (nunits_in * 2, nunits_out))
3216 modifier = NARROW;
3217 else if (known_eq (nunits_out, nunits_in))
3218 modifier = NONE;
3219 else if (known_eq (nunits_out * 2, nunits_in))
3220 modifier = WIDEN;
3221 else
3222 return false;
3223
3224 /* We only handle functions that do not read or clobber memory. */
3225 if (gimple_vuse (stmt))
3226 {
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3229 "function reads from or writes to memory.\n");
3230 return false;
3231 }
3232
3233 /* For now, we only vectorize functions if a target specific builtin
3234 is available. TODO -- in some cases, it might be profitable to
3235 insert the calls for pieces of the vector, in order to be able
3236 to vectorize other operations in the loop. */
3237 fndecl = NULL_TREE;
3238 internal_fn ifn = IFN_LAST;
3239 tree callee = gimple_call_fndecl (stmt);
3240
3241 /* First try using an internal function. */
3242 tree_code convert_code = ERROR_MARK;
3243 if (cfn != CFN_LAST
3244 && (modifier == NONE
3245 || (modifier == NARROW
3246 && simple_integer_narrowing (vectype_out, vectype_in,
3247 &convert_code))))
3248 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3249 vectype_in);
3250
3251 /* If that fails, try asking for a target-specific built-in function. */
3252 if (ifn == IFN_LAST)
3253 {
3254 if (cfn != CFN_LAST)
3255 fndecl = targetm.vectorize.builtin_vectorized_function
3256 (cfn, vectype_out, vectype_in);
3257 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3258 fndecl = targetm.vectorize.builtin_md_vectorized_function
3259 (callee, vectype_out, vectype_in);
3260 }
3261
3262 if (ifn == IFN_LAST && !fndecl)
3263 {
3264 if (cfn == CFN_GOMP_SIMD_LANE
3265 && !slp_node
3266 && loop_vinfo
3267 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3268 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3269 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3270 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3271 {
3272 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3273 { 0, 1, 2, ... vf - 1 } vector. */
3274 gcc_assert (nargs == 0);
3275 }
3276 else if (modifier == NONE
3277 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3278 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3279 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3280 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3281 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3282 slp_op, vectype_in, cost_vec);
3283 else
3284 {
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "function is not vectorizable.\n");
3288 return false;
3289 }
3290 }
3291
3292 if (slp_node)
3293 ncopies = 1;
3294 else if (modifier == NARROW && ifn == IFN_LAST)
3295 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3296 else
3297 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3298
3299 /* Sanity check: make sure that at least one copy of the vectorized stmt
3300 needs to be generated. */
3301 gcc_assert (ncopies >= 1);
3302
3303 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3304 if (!vec_stmt) /* transformation not required. */
3305 {
3306 if (slp_node)
3307 for (i = 0; i < nargs; ++i)
3308 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3309 {
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "incompatible vector types for invariants\n");
3313 return false;
3314 }
3315 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3316 DUMP_VECT_SCOPE ("vectorizable_call");
3317 vect_model_simple_cost (vinfo, stmt_info,
3318 ncopies, dt, ndts, slp_node, cost_vec);
3319 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3320 record_stmt_cost (cost_vec, ncopies / 2,
3321 vec_promote_demote, stmt_info, 0, vect_body);
3322
3323 if (loop_vinfo && mask_opno >= 0)
3324 {
3325 unsigned int nvectors = (slp_node
3326 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3327 : ncopies);
3328 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3329 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3330 vectype_out, scalar_mask);
3331 }
3332 return true;
3333 }
3334
3335 /* Transform. */
3336
3337 if (dump_enabled_p ())
3338 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3339
3340 /* Handle def. */
3341 scalar_dest = gimple_call_lhs (stmt);
3342 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3343
3344 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3345
3346 if (modifier == NONE || ifn != IFN_LAST)
3347 {
3348 tree prev_res = NULL_TREE;
3349 vargs.safe_grow (nargs, true);
3350 orig_vargs.safe_grow (nargs, true);
3351 auto_vec<vec<tree> > vec_defs (nargs);
3352 for (j = 0; j < ncopies; ++j)
3353 {
3354 /* Build argument list for the vectorized call. */
3355 if (slp_node)
3356 {
3357 vec<tree> vec_oprnds0;
3358
3359 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3360 vec_oprnds0 = vec_defs[0];
3361
3362 /* Arguments are ready. Create the new vector stmt. */
3363 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3364 {
3365 size_t k;
3366 for (k = 0; k < nargs; k++)
3367 {
3368 vec<tree> vec_oprndsk = vec_defs[k];
3369 vargs[k] = vec_oprndsk[i];
3370 }
3371 gimple *new_stmt;
3372 if (modifier == NARROW)
3373 {
3374 /* We don't define any narrowing conditional functions
3375 at present. */
3376 gcc_assert (mask_opno < 0);
3377 tree half_res = make_ssa_name (vectype_in);
3378 gcall *call
3379 = gimple_build_call_internal_vec (ifn, vargs);
3380 gimple_call_set_lhs (call, half_res);
3381 gimple_call_set_nothrow (call, true);
3382 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3383 if ((i & 1) == 0)
3384 {
3385 prev_res = half_res;
3386 continue;
3387 }
3388 new_temp = make_ssa_name (vec_dest);
3389 new_stmt = gimple_build_assign (new_temp, convert_code,
3390 prev_res, half_res);
3391 vect_finish_stmt_generation (vinfo, stmt_info,
3392 new_stmt, gsi);
3393 }
3394 else
3395 {
3396 if (mask_opno >= 0 && masked_loop_p)
3397 {
3398 unsigned int vec_num = vec_oprnds0.length ();
3399 /* Always true for SLP. */
3400 gcc_assert (ncopies == 1);
3401 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3402 vectype_out, i);
3403 vargs[mask_opno] = prepare_load_store_mask
3404 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3405 }
3406
3407 gcall *call;
3408 if (ifn != IFN_LAST)
3409 call = gimple_build_call_internal_vec (ifn, vargs);
3410 else
3411 call = gimple_build_call_vec (fndecl, vargs);
3412 new_temp = make_ssa_name (vec_dest, call);
3413 gimple_call_set_lhs (call, new_temp);
3414 gimple_call_set_nothrow (call, true);
3415 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3416 new_stmt = call;
3417 }
3418 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3419 }
3420 continue;
3421 }
3422
3423 for (i = 0; i < nargs; i++)
3424 {
3425 op = gimple_call_arg (stmt, i);
3426 if (j == 0)
3427 {
3428 vec_defs.quick_push (vNULL);
3429 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3430 op, &vec_defs[i],
3431 vectypes[i]);
3432 }
3433 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3434 }
3435
3436 if (mask_opno >= 0 && masked_loop_p)
3437 {
3438 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3439 vectype_out, j);
3440 vargs[mask_opno]
3441 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3442 vargs[mask_opno], gsi);
3443 }
3444
3445 gimple *new_stmt;
3446 if (cfn == CFN_GOMP_SIMD_LANE)
3447 {
3448 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3449 tree new_var
3450 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3451 gimple *init_stmt = gimple_build_assign (new_var, cst);
3452 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3453 new_temp = make_ssa_name (vec_dest);
3454 new_stmt = gimple_build_assign (new_temp, new_var);
3455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3456 }
3457 else if (modifier == NARROW)
3458 {
3459 /* We don't define any narrowing conditional functions at
3460 present. */
3461 gcc_assert (mask_opno < 0);
3462 tree half_res = make_ssa_name (vectype_in);
3463 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3464 gimple_call_set_lhs (call, half_res);
3465 gimple_call_set_nothrow (call, true);
3466 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3467 if ((j & 1) == 0)
3468 {
3469 prev_res = half_res;
3470 continue;
3471 }
3472 new_temp = make_ssa_name (vec_dest);
3473 new_stmt = gimple_build_assign (new_temp, convert_code,
3474 prev_res, half_res);
3475 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3476 }
3477 else
3478 {
3479 gcall *call;
3480 if (ifn != IFN_LAST)
3481 call = gimple_build_call_internal_vec (ifn, vargs);
3482 else
3483 call = gimple_build_call_vec (fndecl, vargs);
3484 new_temp = make_ssa_name (vec_dest, call);
3485 gimple_call_set_lhs (call, new_temp);
3486 gimple_call_set_nothrow (call, true);
3487 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3488 new_stmt = call;
3489 }
3490
3491 if (j == (modifier == NARROW ? 1 : 0))
3492 *vec_stmt = new_stmt;
3493 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3494 }
3495 for (i = 0; i < nargs; i++)
3496 {
3497 vec<tree> vec_oprndsi = vec_defs[i];
3498 vec_oprndsi.release ();
3499 }
3500 }
3501 else if (modifier == NARROW)
3502 {
3503 auto_vec<vec<tree> > vec_defs (nargs);
3504 /* We don't define any narrowing conditional functions at present. */
3505 gcc_assert (mask_opno < 0);
3506 for (j = 0; j < ncopies; ++j)
3507 {
3508 /* Build argument list for the vectorized call. */
3509 if (j == 0)
3510 vargs.create (nargs * 2);
3511 else
3512 vargs.truncate (0);
3513
3514 if (slp_node)
3515 {
3516 vec<tree> vec_oprnds0;
3517
3518 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3519 vec_oprnds0 = vec_defs[0];
3520
3521 /* Arguments are ready. Create the new vector stmt. */
3522 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3523 {
3524 size_t k;
3525 vargs.truncate (0);
3526 for (k = 0; k < nargs; k++)
3527 {
3528 vec<tree> vec_oprndsk = vec_defs[k];
3529 vargs.quick_push (vec_oprndsk[i]);
3530 vargs.quick_push (vec_oprndsk[i + 1]);
3531 }
3532 gcall *call;
3533 if (ifn != IFN_LAST)
3534 call = gimple_build_call_internal_vec (ifn, vargs);
3535 else
3536 call = gimple_build_call_vec (fndecl, vargs);
3537 new_temp = make_ssa_name (vec_dest, call);
3538 gimple_call_set_lhs (call, new_temp);
3539 gimple_call_set_nothrow (call, true);
3540 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3541 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3542 }
3543 continue;
3544 }
3545
3546 for (i = 0; i < nargs; i++)
3547 {
3548 op = gimple_call_arg (stmt, i);
3549 if (j == 0)
3550 {
3551 vec_defs.quick_push (vNULL);
3552 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3553 op, &vec_defs[i], vectypes[i]);
3554 }
3555 vec_oprnd0 = vec_defs[i][2*j];
3556 vec_oprnd1 = vec_defs[i][2*j+1];
3557
3558 vargs.quick_push (vec_oprnd0);
3559 vargs.quick_push (vec_oprnd1);
3560 }
3561
3562 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3563 new_temp = make_ssa_name (vec_dest, new_stmt);
3564 gimple_call_set_lhs (new_stmt, new_temp);
3565 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3566
3567 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3568 }
3569
3570 if (!slp_node)
3571 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3572
3573 for (i = 0; i < nargs; i++)
3574 {
3575 vec<tree> vec_oprndsi = vec_defs[i];
3576 vec_oprndsi.release ();
3577 }
3578 }
3579 else
3580 /* No current target implements this case. */
3581 return false;
3582
3583 vargs.release ();
3584
3585 /* The call in STMT might prevent it from being removed in dce.
3586 We however cannot remove it here, due to the way the ssa name
3587 it defines is mapped to the new definition. So just replace
3588 rhs of the statement with something harmless. */
3589
3590 if (slp_node)
3591 return true;
3592
3593 stmt_info = vect_orig_stmt (stmt_info);
3594 lhs = gimple_get_lhs (stmt_info->stmt);
3595
3596 gassign *new_stmt
3597 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3598 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3599
3600 return true;
3601 }
3602
3603
3604 struct simd_call_arg_info
3605 {
3606 tree vectype;
3607 tree op;
3608 HOST_WIDE_INT linear_step;
3609 enum vect_def_type dt;
3610 unsigned int align;
3611 bool simd_lane_linear;
3612 };
3613
3614 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3615 is linear within simd lane (but not within whole loop), note it in
3616 *ARGINFO. */
3617
3618 static void
3619 vect_simd_lane_linear (tree op, class loop *loop,
3620 struct simd_call_arg_info *arginfo)
3621 {
3622 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3623
3624 if (!is_gimple_assign (def_stmt)
3625 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3626 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3627 return;
3628
3629 tree base = gimple_assign_rhs1 (def_stmt);
3630 HOST_WIDE_INT linear_step = 0;
3631 tree v = gimple_assign_rhs2 (def_stmt);
3632 while (TREE_CODE (v) == SSA_NAME)
3633 {
3634 tree t;
3635 def_stmt = SSA_NAME_DEF_STMT (v);
3636 if (is_gimple_assign (def_stmt))
3637 switch (gimple_assign_rhs_code (def_stmt))
3638 {
3639 case PLUS_EXPR:
3640 t = gimple_assign_rhs2 (def_stmt);
3641 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3642 return;
3643 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3644 v = gimple_assign_rhs1 (def_stmt);
3645 continue;
3646 case MULT_EXPR:
3647 t = gimple_assign_rhs2 (def_stmt);
3648 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3649 return;
3650 linear_step = tree_to_shwi (t);
3651 v = gimple_assign_rhs1 (def_stmt);
3652 continue;
3653 CASE_CONVERT:
3654 t = gimple_assign_rhs1 (def_stmt);
3655 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3656 || (TYPE_PRECISION (TREE_TYPE (v))
3657 < TYPE_PRECISION (TREE_TYPE (t))))
3658 return;
3659 if (!linear_step)
3660 linear_step = 1;
3661 v = t;
3662 continue;
3663 default:
3664 return;
3665 }
3666 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3667 && loop->simduid
3668 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3669 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3670 == loop->simduid))
3671 {
3672 if (!linear_step)
3673 linear_step = 1;
3674 arginfo->linear_step = linear_step;
3675 arginfo->op = base;
3676 arginfo->simd_lane_linear = true;
3677 return;
3678 }
3679 }
3680 }
3681
3682 /* Return the number of elements in vector type VECTYPE, which is associated
3683 with a SIMD clone. At present these vectors always have a constant
3684 length. */
3685
3686 static unsigned HOST_WIDE_INT
3687 simd_clone_subparts (tree vectype)
3688 {
3689 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3690 }
3691
3692 /* Function vectorizable_simd_clone_call.
3693
3694 Check if STMT_INFO performs a function call that can be vectorized
3695 by calling a simd clone of the function.
3696 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3697 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3698 Return true if STMT_INFO is vectorizable in this way. */
3699
3700 static bool
3701 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3702 gimple_stmt_iterator *gsi,
3703 gimple **vec_stmt, slp_tree slp_node,
3704 stmt_vector_for_cost *)
3705 {
3706 tree vec_dest;
3707 tree scalar_dest;
3708 tree op, type;
3709 tree vec_oprnd0 = NULL_TREE;
3710 tree vectype;
3711 poly_uint64 nunits;
3712 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3713 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3714 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3715 tree fndecl, new_temp;
3716 int ncopies, j;
3717 auto_vec<simd_call_arg_info> arginfo;
3718 vec<tree> vargs = vNULL;
3719 size_t i, nargs;
3720 tree lhs, rtype, ratype;
3721 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3722
3723 /* Is STMT a vectorizable call? */
3724 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3725 if (!stmt)
3726 return false;
3727
3728 fndecl = gimple_call_fndecl (stmt);
3729 if (fndecl == NULL_TREE)
3730 return false;
3731
3732 struct cgraph_node *node = cgraph_node::get (fndecl);
3733 if (node == NULL || node->simd_clones == NULL)
3734 return false;
3735
3736 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3737 return false;
3738
3739 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3740 && ! vec_stmt)
3741 return false;
3742
3743 if (gimple_call_lhs (stmt)
3744 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3745 return false;
3746
3747 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3748
3749 vectype = STMT_VINFO_VECTYPE (stmt_info);
3750
3751 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3752 return false;
3753
3754 /* FORNOW */
3755 if (slp_node)
3756 return false;
3757
3758 /* Process function arguments. */
3759 nargs = gimple_call_num_args (stmt);
3760
3761 /* Bail out if the function has zero arguments. */
3762 if (nargs == 0)
3763 return false;
3764
3765 arginfo.reserve (nargs, true);
3766
3767 for (i = 0; i < nargs; i++)
3768 {
3769 simd_call_arg_info thisarginfo;
3770 affine_iv iv;
3771
3772 thisarginfo.linear_step = 0;
3773 thisarginfo.align = 0;
3774 thisarginfo.op = NULL_TREE;
3775 thisarginfo.simd_lane_linear = false;
3776
3777 op = gimple_call_arg (stmt, i);
3778 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3779 &thisarginfo.vectype)
3780 || thisarginfo.dt == vect_uninitialized_def)
3781 {
3782 if (dump_enabled_p ())
3783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3784 "use not simple.\n");
3785 return false;
3786 }
3787
3788 if (thisarginfo.dt == vect_constant_def
3789 || thisarginfo.dt == vect_external_def)
3790 gcc_assert (thisarginfo.vectype == NULL_TREE);
3791 else
3792 {
3793 gcc_assert (thisarginfo.vectype != NULL_TREE);
3794 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3795 {
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3798 "vector mask arguments are not supported\n");
3799 return false;
3800 }
3801 }
3802
3803 /* For linear arguments, the analyze phase should have saved
3804 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3805 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3806 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3807 {
3808 gcc_assert (vec_stmt);
3809 thisarginfo.linear_step
3810 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3811 thisarginfo.op
3812 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3813 thisarginfo.simd_lane_linear
3814 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3815 == boolean_true_node);
3816 /* If loop has been peeled for alignment, we need to adjust it. */
3817 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3818 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3819 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3820 {
3821 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3822 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3823 tree opt = TREE_TYPE (thisarginfo.op);
3824 bias = fold_convert (TREE_TYPE (step), bias);
3825 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3826 thisarginfo.op
3827 = fold_build2 (POINTER_TYPE_P (opt)
3828 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3829 thisarginfo.op, bias);
3830 }
3831 }
3832 else if (!vec_stmt
3833 && thisarginfo.dt != vect_constant_def
3834 && thisarginfo.dt != vect_external_def
3835 && loop_vinfo
3836 && TREE_CODE (op) == SSA_NAME
3837 && simple_iv (loop, loop_containing_stmt (stmt), op,
3838 &iv, false)
3839 && tree_fits_shwi_p (iv.step))
3840 {
3841 thisarginfo.linear_step = tree_to_shwi (iv.step);
3842 thisarginfo.op = iv.base;
3843 }
3844 else if ((thisarginfo.dt == vect_constant_def
3845 || thisarginfo.dt == vect_external_def)
3846 && POINTER_TYPE_P (TREE_TYPE (op)))
3847 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3848 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3849 linear too. */
3850 if (POINTER_TYPE_P (TREE_TYPE (op))
3851 && !thisarginfo.linear_step
3852 && !vec_stmt
3853 && thisarginfo.dt != vect_constant_def
3854 && thisarginfo.dt != vect_external_def
3855 && loop_vinfo
3856 && !slp_node
3857 && TREE_CODE (op) == SSA_NAME)
3858 vect_simd_lane_linear (op, loop, &thisarginfo);
3859
3860 arginfo.quick_push (thisarginfo);
3861 }
3862
3863 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3864 if (!vf.is_constant ())
3865 {
3866 if (dump_enabled_p ())
3867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3868 "not considering SIMD clones; not yet supported"
3869 " for variable-width vectors.\n");
3870 return false;
3871 }
3872
3873 unsigned int badness = 0;
3874 struct cgraph_node *bestn = NULL;
3875 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3876 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3877 else
3878 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3879 n = n->simdclone->next_clone)
3880 {
3881 unsigned int this_badness = 0;
3882 unsigned int num_calls;
3883 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3884 || n->simdclone->nargs != nargs)
3885 continue;
3886 if (num_calls != 1)
3887 this_badness += exact_log2 (num_calls) * 1024;
3888 if (n->simdclone->inbranch)
3889 this_badness += 2048;
3890 int target_badness = targetm.simd_clone.usable (n);
3891 if (target_badness < 0)
3892 continue;
3893 this_badness += target_badness * 512;
3894 /* FORNOW: Have to add code to add the mask argument. */
3895 if (n->simdclone->inbranch)
3896 continue;
3897 for (i = 0; i < nargs; i++)
3898 {
3899 switch (n->simdclone->args[i].arg_type)
3900 {
3901 case SIMD_CLONE_ARG_TYPE_VECTOR:
3902 if (!useless_type_conversion_p
3903 (n->simdclone->args[i].orig_type,
3904 TREE_TYPE (gimple_call_arg (stmt, i))))
3905 i = -1;
3906 else if (arginfo[i].dt == vect_constant_def
3907 || arginfo[i].dt == vect_external_def
3908 || arginfo[i].linear_step)
3909 this_badness += 64;
3910 break;
3911 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3912 if (arginfo[i].dt != vect_constant_def
3913 && arginfo[i].dt != vect_external_def)
3914 i = -1;
3915 break;
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3918 if (arginfo[i].dt == vect_constant_def
3919 || arginfo[i].dt == vect_external_def
3920 || (arginfo[i].linear_step
3921 != n->simdclone->args[i].linear_step))
3922 i = -1;
3923 break;
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3928 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3929 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3930 /* FORNOW */
3931 i = -1;
3932 break;
3933 case SIMD_CLONE_ARG_TYPE_MASK:
3934 gcc_unreachable ();
3935 }
3936 if (i == (size_t) -1)
3937 break;
3938 if (n->simdclone->args[i].alignment > arginfo[i].align)
3939 {
3940 i = -1;
3941 break;
3942 }
3943 if (arginfo[i].align)
3944 this_badness += (exact_log2 (arginfo[i].align)
3945 - exact_log2 (n->simdclone->args[i].alignment));
3946 }
3947 if (i == (size_t) -1)
3948 continue;
3949 if (bestn == NULL || this_badness < badness)
3950 {
3951 bestn = n;
3952 badness = this_badness;
3953 }
3954 }
3955
3956 if (bestn == NULL)
3957 return false;
3958
3959 for (i = 0; i < nargs; i++)
3960 if ((arginfo[i].dt == vect_constant_def
3961 || arginfo[i].dt == vect_external_def)
3962 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3963 {
3964 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3965 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3966 slp_node);
3967 if (arginfo[i].vectype == NULL
3968 || !constant_multiple_p (bestn->simdclone->simdlen,
3969 simd_clone_subparts (arginfo[i].vectype)))
3970 return false;
3971 }
3972
3973 fndecl = bestn->decl;
3974 nunits = bestn->simdclone->simdlen;
3975 ncopies = vector_unroll_factor (vf, nunits);
3976
3977 /* If the function isn't const, only allow it in simd loops where user
3978 has asserted that at least nunits consecutive iterations can be
3979 performed using SIMD instructions. */
3980 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
3981 && gimple_vuse (stmt))
3982 return false;
3983
3984 /* Sanity check: make sure that at least one copy of the vectorized stmt
3985 needs to be generated. */
3986 gcc_assert (ncopies >= 1);
3987
3988 if (!vec_stmt) /* transformation not required. */
3989 {
3990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3991 for (i = 0; i < nargs; i++)
3992 if ((bestn->simdclone->args[i].arg_type
3993 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3994 || (bestn->simdclone->args[i].arg_type
3995 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3996 {
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3998 + 1,
3999 true);
4000 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4001 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4002 ? size_type_node : TREE_TYPE (arginfo[i].op);
4003 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4005 tree sll = arginfo[i].simd_lane_linear
4006 ? boolean_true_node : boolean_false_node;
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4008 }
4009 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4010 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4011 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4012 dt, slp_node, cost_vec); */
4013 return true;
4014 }
4015
4016 /* Transform. */
4017
4018 if (dump_enabled_p ())
4019 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4020
4021 /* Handle def. */
4022 scalar_dest = gimple_call_lhs (stmt);
4023 vec_dest = NULL_TREE;
4024 rtype = NULL_TREE;
4025 ratype = NULL_TREE;
4026 if (scalar_dest)
4027 {
4028 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4029 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4030 if (TREE_CODE (rtype) == ARRAY_TYPE)
4031 {
4032 ratype = rtype;
4033 rtype = TREE_TYPE (ratype);
4034 }
4035 }
4036
4037 auto_vec<vec<tree> > vec_oprnds;
4038 auto_vec<unsigned> vec_oprnds_i;
4039 vec_oprnds.safe_grow_cleared (nargs, true);
4040 vec_oprnds_i.safe_grow_cleared (nargs, true);
4041 for (j = 0; j < ncopies; ++j)
4042 {
4043 /* Build argument list for the vectorized call. */
4044 if (j == 0)
4045 vargs.create (nargs);
4046 else
4047 vargs.truncate (0);
4048
4049 for (i = 0; i < nargs; i++)
4050 {
4051 unsigned int k, l, m, o;
4052 tree atype;
4053 op = gimple_call_arg (stmt, i);
4054 switch (bestn->simdclone->args[i].arg_type)
4055 {
4056 case SIMD_CLONE_ARG_TYPE_VECTOR:
4057 atype = bestn->simdclone->args[i].vector_type;
4058 o = vector_unroll_factor (nunits,
4059 simd_clone_subparts (atype));
4060 for (m = j * o; m < (j + 1) * o; m++)
4061 {
4062 if (simd_clone_subparts (atype)
4063 < simd_clone_subparts (arginfo[i].vectype))
4064 {
4065 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4066 k = (simd_clone_subparts (arginfo[i].vectype)
4067 / simd_clone_subparts (atype));
4068 gcc_assert ((k & (k - 1)) == 0);
4069 if (m == 0)
4070 {
4071 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4072 ncopies * o / k, op,
4073 &vec_oprnds[i]);
4074 vec_oprnds_i[i] = 0;
4075 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4076 }
4077 else
4078 {
4079 vec_oprnd0 = arginfo[i].op;
4080 if ((m & (k - 1)) == 0)
4081 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4082 }
4083 arginfo[i].op = vec_oprnd0;
4084 vec_oprnd0
4085 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4086 bitsize_int (prec),
4087 bitsize_int ((m & (k - 1)) * prec));
4088 gassign *new_stmt
4089 = gimple_build_assign (make_ssa_name (atype),
4090 vec_oprnd0);
4091 vect_finish_stmt_generation (vinfo, stmt_info,
4092 new_stmt, gsi);
4093 vargs.safe_push (gimple_assign_lhs (new_stmt));
4094 }
4095 else
4096 {
4097 k = (simd_clone_subparts (atype)
4098 / simd_clone_subparts (arginfo[i].vectype));
4099 gcc_assert ((k & (k - 1)) == 0);
4100 vec<constructor_elt, va_gc> *ctor_elts;
4101 if (k != 1)
4102 vec_alloc (ctor_elts, k);
4103 else
4104 ctor_elts = NULL;
4105 for (l = 0; l < k; l++)
4106 {
4107 if (m == 0 && l == 0)
4108 {
4109 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4110 k * o * ncopies,
4111 op,
4112 &vec_oprnds[i]);
4113 vec_oprnds_i[i] = 0;
4114 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4115 }
4116 else
4117 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4118 arginfo[i].op = vec_oprnd0;
4119 if (k == 1)
4120 break;
4121 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4122 vec_oprnd0);
4123 }
4124 if (k == 1)
4125 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4126 atype))
4127 {
4128 vec_oprnd0
4129 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4130 gassign *new_stmt
4131 = gimple_build_assign (make_ssa_name (atype),
4132 vec_oprnd0);
4133 vect_finish_stmt_generation (vinfo, stmt_info,
4134 new_stmt, gsi);
4135 vargs.safe_push (gimple_assign_lhs (new_stmt));
4136 }
4137 else
4138 vargs.safe_push (vec_oprnd0);
4139 else
4140 {
4141 vec_oprnd0 = build_constructor (atype, ctor_elts);
4142 gassign *new_stmt
4143 = gimple_build_assign (make_ssa_name (atype),
4144 vec_oprnd0);
4145 vect_finish_stmt_generation (vinfo, stmt_info,
4146 new_stmt, gsi);
4147 vargs.safe_push (gimple_assign_lhs (new_stmt));
4148 }
4149 }
4150 }
4151 break;
4152 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4153 vargs.safe_push (op);
4154 break;
4155 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4156 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4157 if (j == 0)
4158 {
4159 gimple_seq stmts;
4160 arginfo[i].op
4161 = force_gimple_operand (unshare_expr (arginfo[i].op),
4162 &stmts, true, NULL_TREE);
4163 if (stmts != NULL)
4164 {
4165 basic_block new_bb;
4166 edge pe = loop_preheader_edge (loop);
4167 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4168 gcc_assert (!new_bb);
4169 }
4170 if (arginfo[i].simd_lane_linear)
4171 {
4172 vargs.safe_push (arginfo[i].op);
4173 break;
4174 }
4175 tree phi_res = copy_ssa_name (op);
4176 gphi *new_phi = create_phi_node (phi_res, loop->header);
4177 add_phi_arg (new_phi, arginfo[i].op,
4178 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4179 enum tree_code code
4180 = POINTER_TYPE_P (TREE_TYPE (op))
4181 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4182 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4183 ? sizetype : TREE_TYPE (op);
4184 poly_widest_int cst
4185 = wi::mul (bestn->simdclone->args[i].linear_step,
4186 ncopies * nunits);
4187 tree tcst = wide_int_to_tree (type, cst);
4188 tree phi_arg = copy_ssa_name (op);
4189 gassign *new_stmt
4190 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4191 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4192 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4193 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4194 UNKNOWN_LOCATION);
4195 arginfo[i].op = phi_res;
4196 vargs.safe_push (phi_res);
4197 }
4198 else
4199 {
4200 enum tree_code code
4201 = POINTER_TYPE_P (TREE_TYPE (op))
4202 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4203 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4204 ? sizetype : TREE_TYPE (op);
4205 poly_widest_int cst
4206 = wi::mul (bestn->simdclone->args[i].linear_step,
4207 j * nunits);
4208 tree tcst = wide_int_to_tree (type, cst);
4209 new_temp = make_ssa_name (TREE_TYPE (op));
4210 gassign *new_stmt
4211 = gimple_build_assign (new_temp, code,
4212 arginfo[i].op, tcst);
4213 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4214 vargs.safe_push (new_temp);
4215 }
4216 break;
4217 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4218 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4219 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4220 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4221 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4222 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4223 default:
4224 gcc_unreachable ();
4225 }
4226 }
4227
4228 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4229 if (vec_dest)
4230 {
4231 gcc_assert (ratype
4232 || known_eq (simd_clone_subparts (rtype), nunits));
4233 if (ratype)
4234 new_temp = create_tmp_var (ratype);
4235 else if (useless_type_conversion_p (vectype, rtype))
4236 new_temp = make_ssa_name (vec_dest, new_call);
4237 else
4238 new_temp = make_ssa_name (rtype, new_call);
4239 gimple_call_set_lhs (new_call, new_temp);
4240 }
4241 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4242 gimple *new_stmt = new_call;
4243
4244 if (vec_dest)
4245 {
4246 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4247 {
4248 unsigned int k, l;
4249 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4250 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4251 k = vector_unroll_factor (nunits,
4252 simd_clone_subparts (vectype));
4253 gcc_assert ((k & (k - 1)) == 0);
4254 for (l = 0; l < k; l++)
4255 {
4256 tree t;
4257 if (ratype)
4258 {
4259 t = build_fold_addr_expr (new_temp);
4260 t = build2 (MEM_REF, vectype, t,
4261 build_int_cst (TREE_TYPE (t), l * bytes));
4262 }
4263 else
4264 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4265 bitsize_int (prec), bitsize_int (l * prec));
4266 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4267 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4268
4269 if (j == 0 && l == 0)
4270 *vec_stmt = new_stmt;
4271 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4272 }
4273
4274 if (ratype)
4275 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4276 continue;
4277 }
4278 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4279 {
4280 unsigned int k = (simd_clone_subparts (vectype)
4281 / simd_clone_subparts (rtype));
4282 gcc_assert ((k & (k - 1)) == 0);
4283 if ((j & (k - 1)) == 0)
4284 vec_alloc (ret_ctor_elts, k);
4285 if (ratype)
4286 {
4287 unsigned int m, o;
4288 o = vector_unroll_factor (nunits,
4289 simd_clone_subparts (rtype));
4290 for (m = 0; m < o; m++)
4291 {
4292 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4293 size_int (m), NULL_TREE, NULL_TREE);
4294 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4295 tem);
4296 vect_finish_stmt_generation (vinfo, stmt_info,
4297 new_stmt, gsi);
4298 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4299 gimple_assign_lhs (new_stmt));
4300 }
4301 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4302 }
4303 else
4304 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4305 if ((j & (k - 1)) != k - 1)
4306 continue;
4307 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4308 new_stmt
4309 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4310 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4311
4312 if ((unsigned) j == k - 1)
4313 *vec_stmt = new_stmt;
4314 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4315 continue;
4316 }
4317 else if (ratype)
4318 {
4319 tree t = build_fold_addr_expr (new_temp);
4320 t = build2 (MEM_REF, vectype, t,
4321 build_int_cst (TREE_TYPE (t), 0));
4322 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4323 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4324 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4325 }
4326 else if (!useless_type_conversion_p (vectype, rtype))
4327 {
4328 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4329 new_stmt
4330 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4331 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4332 }
4333 }
4334
4335 if (j == 0)
4336 *vec_stmt = new_stmt;
4337 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4338 }
4339
4340 for (i = 0; i < nargs; ++i)
4341 {
4342 vec<tree> oprndsi = vec_oprnds[i];
4343 oprndsi.release ();
4344 }
4345 vargs.release ();
4346
4347 /* The call in STMT might prevent it from being removed in dce.
4348 We however cannot remove it here, due to the way the ssa name
4349 it defines is mapped to the new definition. So just replace
4350 rhs of the statement with something harmless. */
4351
4352 if (slp_node)
4353 return true;
4354
4355 gimple *new_stmt;
4356 if (scalar_dest)
4357 {
4358 type = TREE_TYPE (scalar_dest);
4359 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4360 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4361 }
4362 else
4363 new_stmt = gimple_build_nop ();
4364 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4365 unlink_stmt_vdef (stmt);
4366
4367 return true;
4368 }
4369
4370
4371 /* Function vect_gen_widened_results_half
4372
4373 Create a vector stmt whose code, type, number of arguments, and result
4374 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4375 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4376 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4377 needs to be created (DECL is a function-decl of a target-builtin).
4378 STMT_INFO is the original scalar stmt that we are vectorizing. */
4379
4380 static gimple *
4381 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4382 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4383 tree vec_dest, gimple_stmt_iterator *gsi,
4384 stmt_vec_info stmt_info)
4385 {
4386 gimple *new_stmt;
4387 tree new_temp;
4388
4389 /* Generate half of the widened result: */
4390 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4391 if (op_type != binary_op)
4392 vec_oprnd1 = NULL;
4393 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4394 new_temp = make_ssa_name (vec_dest, new_stmt);
4395 gimple_assign_set_lhs (new_stmt, new_temp);
4396 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4397
4398 return new_stmt;
4399 }
4400
4401
4402 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4403 For multi-step conversions store the resulting vectors and call the function
4404 recursively. */
4405
4406 static void
4407 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4408 int multi_step_cvt,
4409 stmt_vec_info stmt_info,
4410 vec<tree> vec_dsts,
4411 gimple_stmt_iterator *gsi,
4412 slp_tree slp_node, enum tree_code code)
4413 {
4414 unsigned int i;
4415 tree vop0, vop1, new_tmp, vec_dest;
4416
4417 vec_dest = vec_dsts.pop ();
4418
4419 for (i = 0; i < vec_oprnds->length (); i += 2)
4420 {
4421 /* Create demotion operation. */
4422 vop0 = (*vec_oprnds)[i];
4423 vop1 = (*vec_oprnds)[i + 1];
4424 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4425 new_tmp = make_ssa_name (vec_dest, new_stmt);
4426 gimple_assign_set_lhs (new_stmt, new_tmp);
4427 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4428
4429 if (multi_step_cvt)
4430 /* Store the resulting vector for next recursive call. */
4431 (*vec_oprnds)[i/2] = new_tmp;
4432 else
4433 {
4434 /* This is the last step of the conversion sequence. Store the
4435 vectors in SLP_NODE or in vector info of the scalar statement
4436 (or in STMT_VINFO_RELATED_STMT chain). */
4437 if (slp_node)
4438 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4439 else
4440 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4441 }
4442 }
4443
4444 /* For multi-step demotion operations we first generate demotion operations
4445 from the source type to the intermediate types, and then combine the
4446 results (stored in VEC_OPRNDS) in demotion operation to the destination
4447 type. */
4448 if (multi_step_cvt)
4449 {
4450 /* At each level of recursion we have half of the operands we had at the
4451 previous level. */
4452 vec_oprnds->truncate ((i+1)/2);
4453 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4454 multi_step_cvt - 1,
4455 stmt_info, vec_dsts, gsi,
4456 slp_node, VEC_PACK_TRUNC_EXPR);
4457 }
4458
4459 vec_dsts.quick_push (vec_dest);
4460 }
4461
4462
4463 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4464 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4465 STMT_INFO. For multi-step conversions store the resulting vectors and
4466 call the function recursively. */
4467
4468 static void
4469 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4470 vec<tree> *vec_oprnds0,
4471 vec<tree> *vec_oprnds1,
4472 stmt_vec_info stmt_info, tree vec_dest,
4473 gimple_stmt_iterator *gsi,
4474 enum tree_code code1,
4475 enum tree_code code2, int op_type)
4476 {
4477 int i;
4478 tree vop0, vop1, new_tmp1, new_tmp2;
4479 gimple *new_stmt1, *new_stmt2;
4480 vec<tree> vec_tmp = vNULL;
4481
4482 vec_tmp.create (vec_oprnds0->length () * 2);
4483 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4484 {
4485 if (op_type == binary_op)
4486 vop1 = (*vec_oprnds1)[i];
4487 else
4488 vop1 = NULL_TREE;
4489
4490 /* Generate the two halves of promotion operation. */
4491 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4492 op_type, vec_dest, gsi,
4493 stmt_info);
4494 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4495 op_type, vec_dest, gsi,
4496 stmt_info);
4497 if (is_gimple_call (new_stmt1))
4498 {
4499 new_tmp1 = gimple_call_lhs (new_stmt1);
4500 new_tmp2 = gimple_call_lhs (new_stmt2);
4501 }
4502 else
4503 {
4504 new_tmp1 = gimple_assign_lhs (new_stmt1);
4505 new_tmp2 = gimple_assign_lhs (new_stmt2);
4506 }
4507
4508 /* Store the results for the next step. */
4509 vec_tmp.quick_push (new_tmp1);
4510 vec_tmp.quick_push (new_tmp2);
4511 }
4512
4513 vec_oprnds0->release ();
4514 *vec_oprnds0 = vec_tmp;
4515 }
4516
4517
4518 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4519 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4520 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4521 Return true if STMT_INFO is vectorizable in this way. */
4522
4523 static bool
4524 vectorizable_conversion (vec_info *vinfo,
4525 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4526 gimple **vec_stmt, slp_tree slp_node,
4527 stmt_vector_for_cost *cost_vec)
4528 {
4529 tree vec_dest;
4530 tree scalar_dest;
4531 tree op0, op1 = NULL_TREE;
4532 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4533 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4534 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4535 tree new_temp;
4536 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4537 int ndts = 2;
4538 poly_uint64 nunits_in;
4539 poly_uint64 nunits_out;
4540 tree vectype_out, vectype_in;
4541 int ncopies, i;
4542 tree lhs_type, rhs_type;
4543 enum { NARROW, NONE, WIDEN } modifier;
4544 vec<tree> vec_oprnds0 = vNULL;
4545 vec<tree> vec_oprnds1 = vNULL;
4546 tree vop0;
4547 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4548 int multi_step_cvt = 0;
4549 vec<tree> interm_types = vNULL;
4550 tree intermediate_type, cvt_type = NULL_TREE;
4551 int op_type;
4552 unsigned short fltsz;
4553
4554 /* Is STMT a vectorizable conversion? */
4555
4556 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4557 return false;
4558
4559 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4560 && ! vec_stmt)
4561 return false;
4562
4563 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4564 if (!stmt)
4565 return false;
4566
4567 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4568 return false;
4569
4570 code = gimple_assign_rhs_code (stmt);
4571 if (!CONVERT_EXPR_CODE_P (code)
4572 && code != FIX_TRUNC_EXPR
4573 && code != FLOAT_EXPR
4574 && code != WIDEN_PLUS_EXPR
4575 && code != WIDEN_MINUS_EXPR
4576 && code != WIDEN_MULT_EXPR
4577 && code != WIDEN_LSHIFT_EXPR)
4578 return false;
4579
4580 op_type = TREE_CODE_LENGTH (code);
4581
4582 /* Check types of lhs and rhs. */
4583 scalar_dest = gimple_assign_lhs (stmt);
4584 lhs_type = TREE_TYPE (scalar_dest);
4585 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4586
4587 /* Check the operands of the operation. */
4588 slp_tree slp_op0, slp_op1 = NULL;
4589 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4590 0, &op0, &slp_op0, &dt[0], &vectype_in))
4591 {
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4594 "use not simple.\n");
4595 return false;
4596 }
4597
4598 rhs_type = TREE_TYPE (op0);
4599 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4600 && !((INTEGRAL_TYPE_P (lhs_type)
4601 && INTEGRAL_TYPE_P (rhs_type))
4602 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4603 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4604 return false;
4605
4606 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4607 && ((INTEGRAL_TYPE_P (lhs_type)
4608 && !type_has_mode_precision_p (lhs_type))
4609 || (INTEGRAL_TYPE_P (rhs_type)
4610 && !type_has_mode_precision_p (rhs_type))))
4611 {
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4614 "type conversion to/from bit-precision unsupported."
4615 "\n");
4616 return false;
4617 }
4618
4619 if (op_type == binary_op)
4620 {
4621 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4622 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4623
4624 op1 = gimple_assign_rhs2 (stmt);
4625 tree vectype1_in;
4626 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4627 &op1, &slp_op1, &dt[1], &vectype1_in))
4628 {
4629 if (dump_enabled_p ())
4630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4631 "use not simple.\n");
4632 return false;
4633 }
4634 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4635 OP1. */
4636 if (!vectype_in)
4637 vectype_in = vectype1_in;
4638 }
4639
4640 /* If op0 is an external or constant def, infer the vector type
4641 from the scalar type. */
4642 if (!vectype_in)
4643 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4644 if (vec_stmt)
4645 gcc_assert (vectype_in);
4646 if (!vectype_in)
4647 {
4648 if (dump_enabled_p ())
4649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4650 "no vectype for scalar type %T\n", rhs_type);
4651
4652 return false;
4653 }
4654
4655 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4656 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4657 {
4658 if (dump_enabled_p ())
4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4660 "can't convert between boolean and non "
4661 "boolean vectors %T\n", rhs_type);
4662
4663 return false;
4664 }
4665
4666 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4667 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4668 if (known_eq (nunits_out, nunits_in))
4669 modifier = NONE;
4670 else if (multiple_p (nunits_out, nunits_in))
4671 modifier = NARROW;
4672 else
4673 {
4674 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4675 modifier = WIDEN;
4676 }
4677
4678 /* Multiple types in SLP are handled by creating the appropriate number of
4679 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4680 case of SLP. */
4681 if (slp_node)
4682 ncopies = 1;
4683 else if (modifier == NARROW)
4684 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4685 else
4686 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4687
4688 /* Sanity check: make sure that at least one copy of the vectorized stmt
4689 needs to be generated. */
4690 gcc_assert (ncopies >= 1);
4691
4692 bool found_mode = false;
4693 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4694 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4695 opt_scalar_mode rhs_mode_iter;
4696
4697 /* Supportable by target? */
4698 switch (modifier)
4699 {
4700 case NONE:
4701 if (code != FIX_TRUNC_EXPR
4702 && code != FLOAT_EXPR
4703 && !CONVERT_EXPR_CODE_P (code))
4704 return false;
4705 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4706 break;
4707 /* FALLTHRU */
4708 unsupported:
4709 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4711 "conversion not supported by target.\n");
4712 return false;
4713
4714 case WIDEN:
4715 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4716 vectype_in, &code1, &code2,
4717 &multi_step_cvt, &interm_types))
4718 {
4719 /* Binary widening operation can only be supported directly by the
4720 architecture. */
4721 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4722 break;
4723 }
4724
4725 if (code != FLOAT_EXPR
4726 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4727 goto unsupported;
4728
4729 fltsz = GET_MODE_SIZE (lhs_mode);
4730 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4731 {
4732 rhs_mode = rhs_mode_iter.require ();
4733 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4734 break;
4735
4736 cvt_type
4737 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4738 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4739 if (cvt_type == NULL_TREE)
4740 goto unsupported;
4741
4742 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4743 {
4744 if (!supportable_convert_operation (code, vectype_out,
4745 cvt_type, &codecvt1))
4746 goto unsupported;
4747 }
4748 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4749 vectype_out, cvt_type,
4750 &codecvt1, &codecvt2,
4751 &multi_step_cvt,
4752 &interm_types))
4753 continue;
4754 else
4755 gcc_assert (multi_step_cvt == 0);
4756
4757 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4758 cvt_type,
4759 vectype_in, &code1, &code2,
4760 &multi_step_cvt, &interm_types))
4761 {
4762 found_mode = true;
4763 break;
4764 }
4765 }
4766
4767 if (!found_mode)
4768 goto unsupported;
4769
4770 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4771 codecvt2 = ERROR_MARK;
4772 else
4773 {
4774 multi_step_cvt++;
4775 interm_types.safe_push (cvt_type);
4776 cvt_type = NULL_TREE;
4777 }
4778 break;
4779
4780 case NARROW:
4781 gcc_assert (op_type == unary_op);
4782 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4783 &code1, &multi_step_cvt,
4784 &interm_types))
4785 break;
4786
4787 if (code != FIX_TRUNC_EXPR
4788 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4789 goto unsupported;
4790
4791 cvt_type
4792 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4793 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4794 if (cvt_type == NULL_TREE)
4795 goto unsupported;
4796 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4797 &codecvt1))
4798 goto unsupported;
4799 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4800 &code1, &multi_step_cvt,
4801 &interm_types))
4802 break;
4803 goto unsupported;
4804
4805 default:
4806 gcc_unreachable ();
4807 }
4808
4809 if (!vec_stmt) /* transformation not required. */
4810 {
4811 if (slp_node
4812 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4813 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4814 {
4815 if (dump_enabled_p ())
4816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4817 "incompatible vector types for invariants\n");
4818 return false;
4819 }
4820 DUMP_VECT_SCOPE ("vectorizable_conversion");
4821 if (modifier == NONE)
4822 {
4823 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4824 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4825 cost_vec);
4826 }
4827 else if (modifier == NARROW)
4828 {
4829 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4830 /* The final packing step produces one vector result per copy. */
4831 unsigned int nvectors
4832 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4833 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4834 multi_step_cvt, cost_vec);
4835 }
4836 else
4837 {
4838 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4839 /* The initial unpacking step produces two vector results
4840 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4841 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4842 unsigned int nvectors
4843 = (slp_node
4844 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4845 : ncopies * 2);
4846 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4847 multi_step_cvt, cost_vec);
4848 }
4849 interm_types.release ();
4850 return true;
4851 }
4852
4853 /* Transform. */
4854 if (dump_enabled_p ())
4855 dump_printf_loc (MSG_NOTE, vect_location,
4856 "transform conversion. ncopies = %d.\n", ncopies);
4857
4858 if (op_type == binary_op)
4859 {
4860 if (CONSTANT_CLASS_P (op0))
4861 op0 = fold_convert (TREE_TYPE (op1), op0);
4862 else if (CONSTANT_CLASS_P (op1))
4863 op1 = fold_convert (TREE_TYPE (op0), op1);
4864 }
4865
4866 /* In case of multi-step conversion, we first generate conversion operations
4867 to the intermediate types, and then from that types to the final one.
4868 We create vector destinations for the intermediate type (TYPES) received
4869 from supportable_*_operation, and store them in the correct order
4870 for future use in vect_create_vectorized_*_stmts (). */
4871 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4872 vec_dest = vect_create_destination_var (scalar_dest,
4873 (cvt_type && modifier == WIDEN)
4874 ? cvt_type : vectype_out);
4875 vec_dsts.quick_push (vec_dest);
4876
4877 if (multi_step_cvt)
4878 {
4879 for (i = interm_types.length () - 1;
4880 interm_types.iterate (i, &intermediate_type); i--)
4881 {
4882 vec_dest = vect_create_destination_var (scalar_dest,
4883 intermediate_type);
4884 vec_dsts.quick_push (vec_dest);
4885 }
4886 }
4887
4888 if (cvt_type)
4889 vec_dest = vect_create_destination_var (scalar_dest,
4890 modifier == WIDEN
4891 ? vectype_out : cvt_type);
4892
4893 int ninputs = 1;
4894 if (!slp_node)
4895 {
4896 if (modifier == WIDEN)
4897 ;
4898 else if (modifier == NARROW)
4899 {
4900 if (multi_step_cvt)
4901 ninputs = vect_pow2 (multi_step_cvt);
4902 ninputs *= 2;
4903 }
4904 }
4905
4906 switch (modifier)
4907 {
4908 case NONE:
4909 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4910 op0, &vec_oprnds0);
4911 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4912 {
4913 /* Arguments are ready, create the new vector stmt. */
4914 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4915 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4916 new_temp = make_ssa_name (vec_dest, new_stmt);
4917 gimple_assign_set_lhs (new_stmt, new_temp);
4918 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4919
4920 if (slp_node)
4921 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4922 else
4923 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4924 }
4925 break;
4926
4927 case WIDEN:
4928 /* In case the vectorization factor (VF) is bigger than the number
4929 of elements that we can fit in a vectype (nunits), we have to
4930 generate more than one vector stmt - i.e - we need to "unroll"
4931 the vector stmt by a factor VF/nunits. */
4932 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4933 op0, &vec_oprnds0,
4934 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4935 &vec_oprnds1);
4936 if (code == WIDEN_LSHIFT_EXPR)
4937 {
4938 int oprnds_size = vec_oprnds0.length ();
4939 vec_oprnds1.create (oprnds_size);
4940 for (i = 0; i < oprnds_size; ++i)
4941 vec_oprnds1.quick_push (op1);
4942 }
4943 /* Arguments are ready. Create the new vector stmts. */
4944 for (i = multi_step_cvt; i >= 0; i--)
4945 {
4946 tree this_dest = vec_dsts[i];
4947 enum tree_code c1 = code1, c2 = code2;
4948 if (i == 0 && codecvt2 != ERROR_MARK)
4949 {
4950 c1 = codecvt1;
4951 c2 = codecvt2;
4952 }
4953 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4954 &vec_oprnds1, stmt_info,
4955 this_dest, gsi,
4956 c1, c2, op_type);
4957 }
4958
4959 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4960 {
4961 gimple *new_stmt;
4962 if (cvt_type)
4963 {
4964 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4965 new_temp = make_ssa_name (vec_dest);
4966 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4967 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4968 }
4969 else
4970 new_stmt = SSA_NAME_DEF_STMT (vop0);
4971
4972 if (slp_node)
4973 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4974 else
4975 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4976 }
4977 break;
4978
4979 case NARROW:
4980 /* In case the vectorization factor (VF) is bigger than the number
4981 of elements that we can fit in a vectype (nunits), we have to
4982 generate more than one vector stmt - i.e - we need to "unroll"
4983 the vector stmt by a factor VF/nunits. */
4984 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4985 op0, &vec_oprnds0);
4986 /* Arguments are ready. Create the new vector stmts. */
4987 if (cvt_type)
4988 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4989 {
4990 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4991 new_temp = make_ssa_name (vec_dest);
4992 gassign *new_stmt
4993 = gimple_build_assign (new_temp, codecvt1, vop0);
4994 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4995 vec_oprnds0[i] = new_temp;
4996 }
4997
4998 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
4999 multi_step_cvt,
5000 stmt_info, vec_dsts, gsi,
5001 slp_node, code1);
5002 break;
5003 }
5004 if (!slp_node)
5005 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5006
5007 vec_oprnds0.release ();
5008 vec_oprnds1.release ();
5009 interm_types.release ();
5010
5011 return true;
5012 }
5013
5014 /* Return true if we can assume from the scalar form of STMT_INFO that
5015 neither the scalar nor the vector forms will generate code. STMT_INFO
5016 is known not to involve a data reference. */
5017
5018 bool
5019 vect_nop_conversion_p (stmt_vec_info stmt_info)
5020 {
5021 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5022 if (!stmt)
5023 return false;
5024
5025 tree lhs = gimple_assign_lhs (stmt);
5026 tree_code code = gimple_assign_rhs_code (stmt);
5027 tree rhs = gimple_assign_rhs1 (stmt);
5028
5029 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5030 return true;
5031
5032 if (CONVERT_EXPR_CODE_P (code))
5033 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5034
5035 return false;
5036 }
5037
5038 /* Function vectorizable_assignment.
5039
5040 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5041 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5042 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5043 Return true if STMT_INFO is vectorizable in this way. */
5044
5045 static bool
5046 vectorizable_assignment (vec_info *vinfo,
5047 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5048 gimple **vec_stmt, slp_tree slp_node,
5049 stmt_vector_for_cost *cost_vec)
5050 {
5051 tree vec_dest;
5052 tree scalar_dest;
5053 tree op;
5054 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5055 tree new_temp;
5056 enum vect_def_type dt[1] = {vect_unknown_def_type};
5057 int ndts = 1;
5058 int ncopies;
5059 int i;
5060 vec<tree> vec_oprnds = vNULL;
5061 tree vop;
5062 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5063 enum tree_code code;
5064 tree vectype_in;
5065
5066 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5067 return false;
5068
5069 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5070 && ! vec_stmt)
5071 return false;
5072
5073 /* Is vectorizable assignment? */
5074 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5075 if (!stmt)
5076 return false;
5077
5078 scalar_dest = gimple_assign_lhs (stmt);
5079 if (TREE_CODE (scalar_dest) != SSA_NAME)
5080 return false;
5081
5082 if (STMT_VINFO_DATA_REF (stmt_info))
5083 return false;
5084
5085 code = gimple_assign_rhs_code (stmt);
5086 if (!(gimple_assign_single_p (stmt)
5087 || code == PAREN_EXPR
5088 || CONVERT_EXPR_CODE_P (code)))
5089 return false;
5090
5091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5092 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5093
5094 /* Multiple types in SLP are handled by creating the appropriate number of
5095 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5096 case of SLP. */
5097 if (slp_node)
5098 ncopies = 1;
5099 else
5100 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5101
5102 gcc_assert (ncopies >= 1);
5103
5104 slp_tree slp_op;
5105 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5106 &dt[0], &vectype_in))
5107 {
5108 if (dump_enabled_p ())
5109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5110 "use not simple.\n");
5111 return false;
5112 }
5113 if (!vectype_in)
5114 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5115
5116 /* We can handle NOP_EXPR conversions that do not change the number
5117 of elements or the vector size. */
5118 if ((CONVERT_EXPR_CODE_P (code)
5119 || code == VIEW_CONVERT_EXPR)
5120 && (!vectype_in
5121 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5122 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5123 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5124 return false;
5125
5126 /* We do not handle bit-precision changes. */
5127 if ((CONVERT_EXPR_CODE_P (code)
5128 || code == VIEW_CONVERT_EXPR)
5129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5130 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5131 || !type_has_mode_precision_p (TREE_TYPE (op)))
5132 /* But a conversion that does not change the bit-pattern is ok. */
5133 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5134 > TYPE_PRECISION (TREE_TYPE (op)))
5135 && TYPE_UNSIGNED (TREE_TYPE (op)))
5136 /* Conversion between boolean types of different sizes is
5137 a simple assignment in case their vectypes are same
5138 boolean vectors. */
5139 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5140 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5141 {
5142 if (dump_enabled_p ())
5143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5144 "type conversion to/from bit-precision "
5145 "unsupported.\n");
5146 return false;
5147 }
5148
5149 if (!vec_stmt) /* transformation not required. */
5150 {
5151 if (slp_node
5152 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5153 {
5154 if (dump_enabled_p ())
5155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5156 "incompatible vector types for invariants\n");
5157 return false;
5158 }
5159 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5160 DUMP_VECT_SCOPE ("vectorizable_assignment");
5161 if (!vect_nop_conversion_p (stmt_info))
5162 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5163 cost_vec);
5164 return true;
5165 }
5166
5167 /* Transform. */
5168 if (dump_enabled_p ())
5169 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5170
5171 /* Handle def. */
5172 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5173
5174 /* Handle use. */
5175 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5176
5177 /* Arguments are ready. create the new vector stmt. */
5178 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5179 {
5180 if (CONVERT_EXPR_CODE_P (code)
5181 || code == VIEW_CONVERT_EXPR)
5182 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5183 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5184 new_temp = make_ssa_name (vec_dest, new_stmt);
5185 gimple_assign_set_lhs (new_stmt, new_temp);
5186 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5187 if (slp_node)
5188 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5189 else
5190 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5191 }
5192 if (!slp_node)
5193 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5194
5195 vec_oprnds.release ();
5196 return true;
5197 }
5198
5199
5200 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5201 either as shift by a scalar or by a vector. */
5202
5203 bool
5204 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5205 {
5206
5207 machine_mode vec_mode;
5208 optab optab;
5209 int icode;
5210 tree vectype;
5211
5212 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5213 if (!vectype)
5214 return false;
5215
5216 optab = optab_for_tree_code (code, vectype, optab_scalar);
5217 if (!optab
5218 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5219 {
5220 optab = optab_for_tree_code (code, vectype, optab_vector);
5221 if (!optab
5222 || (optab_handler (optab, TYPE_MODE (vectype))
5223 == CODE_FOR_nothing))
5224 return false;
5225 }
5226
5227 vec_mode = TYPE_MODE (vectype);
5228 icode = (int) optab_handler (optab, vec_mode);
5229 if (icode == CODE_FOR_nothing)
5230 return false;
5231
5232 return true;
5233 }
5234
5235
5236 /* Function vectorizable_shift.
5237
5238 Check if STMT_INFO performs a shift operation that can be vectorized.
5239 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5240 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5241 Return true if STMT_INFO is vectorizable in this way. */
5242
5243 static bool
5244 vectorizable_shift (vec_info *vinfo,
5245 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5246 gimple **vec_stmt, slp_tree slp_node,
5247 stmt_vector_for_cost *cost_vec)
5248 {
5249 tree vec_dest;
5250 tree scalar_dest;
5251 tree op0, op1 = NULL;
5252 tree vec_oprnd1 = NULL_TREE;
5253 tree vectype;
5254 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5255 enum tree_code code;
5256 machine_mode vec_mode;
5257 tree new_temp;
5258 optab optab;
5259 int icode;
5260 machine_mode optab_op2_mode;
5261 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5262 int ndts = 2;
5263 poly_uint64 nunits_in;
5264 poly_uint64 nunits_out;
5265 tree vectype_out;
5266 tree op1_vectype;
5267 int ncopies;
5268 int i;
5269 vec<tree> vec_oprnds0 = vNULL;
5270 vec<tree> vec_oprnds1 = vNULL;
5271 tree vop0, vop1;
5272 unsigned int k;
5273 bool scalar_shift_arg = true;
5274 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5275 bool incompatible_op1_vectype_p = false;
5276
5277 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5278 return false;
5279
5280 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5281 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5282 && ! vec_stmt)
5283 return false;
5284
5285 /* Is STMT a vectorizable binary/unary operation? */
5286 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5287 if (!stmt)
5288 return false;
5289
5290 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5291 return false;
5292
5293 code = gimple_assign_rhs_code (stmt);
5294
5295 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5296 || code == RROTATE_EXPR))
5297 return false;
5298
5299 scalar_dest = gimple_assign_lhs (stmt);
5300 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5301 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5302 {
5303 if (dump_enabled_p ())
5304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5305 "bit-precision shifts not supported.\n");
5306 return false;
5307 }
5308
5309 slp_tree slp_op0;
5310 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5311 0, &op0, &slp_op0, &dt[0], &vectype))
5312 {
5313 if (dump_enabled_p ())
5314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5315 "use not simple.\n");
5316 return false;
5317 }
5318 /* If op0 is an external or constant def, infer the vector type
5319 from the scalar type. */
5320 if (!vectype)
5321 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5322 if (vec_stmt)
5323 gcc_assert (vectype);
5324 if (!vectype)
5325 {
5326 if (dump_enabled_p ())
5327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5328 "no vectype for scalar type\n");
5329 return false;
5330 }
5331
5332 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5333 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5334 if (maybe_ne (nunits_out, nunits_in))
5335 return false;
5336
5337 stmt_vec_info op1_def_stmt_info;
5338 slp_tree slp_op1;
5339 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5340 &dt[1], &op1_vectype, &op1_def_stmt_info))
5341 {
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5344 "use not simple.\n");
5345 return false;
5346 }
5347
5348 /* Multiple types in SLP are handled by creating the appropriate number of
5349 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5350 case of SLP. */
5351 if (slp_node)
5352 ncopies = 1;
5353 else
5354 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5355
5356 gcc_assert (ncopies >= 1);
5357
5358 /* Determine whether the shift amount is a vector, or scalar. If the
5359 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5360
5361 if ((dt[1] == vect_internal_def
5362 || dt[1] == vect_induction_def
5363 || dt[1] == vect_nested_cycle)
5364 && !slp_node)
5365 scalar_shift_arg = false;
5366 else if (dt[1] == vect_constant_def
5367 || dt[1] == vect_external_def
5368 || dt[1] == vect_internal_def)
5369 {
5370 /* In SLP, need to check whether the shift count is the same,
5371 in loops if it is a constant or invariant, it is always
5372 a scalar shift. */
5373 if (slp_node)
5374 {
5375 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5376 stmt_vec_info slpstmt_info;
5377
5378 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5379 {
5380 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5381 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5382 scalar_shift_arg = false;
5383 }
5384
5385 /* For internal SLP defs we have to make sure we see scalar stmts
5386 for all vector elements.
5387 ??? For different vectors we could resort to a different
5388 scalar shift operand but code-generation below simply always
5389 takes the first. */
5390 if (dt[1] == vect_internal_def
5391 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5392 stmts.length ()))
5393 scalar_shift_arg = false;
5394 }
5395
5396 /* If the shift amount is computed by a pattern stmt we cannot
5397 use the scalar amount directly thus give up and use a vector
5398 shift. */
5399 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5400 scalar_shift_arg = false;
5401 }
5402 else
5403 {
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "operand mode requires invariant argument.\n");
5407 return false;
5408 }
5409
5410 /* Vector shifted by vector. */
5411 bool was_scalar_shift_arg = scalar_shift_arg;
5412 if (!scalar_shift_arg)
5413 {
5414 optab = optab_for_tree_code (code, vectype, optab_vector);
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_NOTE, vect_location,
5417 "vector/vector shift/rotate found.\n");
5418
5419 if (!op1_vectype)
5420 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5421 slp_op1);
5422 incompatible_op1_vectype_p
5423 = (op1_vectype == NULL_TREE
5424 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5425 TYPE_VECTOR_SUBPARTS (vectype))
5426 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5427 if (incompatible_op1_vectype_p
5428 && (!slp_node
5429 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5430 || slp_op1->refcnt != 1))
5431 {
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5434 "unusable type for last operand in"
5435 " vector/vector shift/rotate.\n");
5436 return false;
5437 }
5438 }
5439 /* See if the machine has a vector shifted by scalar insn and if not
5440 then see if it has a vector shifted by vector insn. */
5441 else
5442 {
5443 optab = optab_for_tree_code (code, vectype, optab_scalar);
5444 if (optab
5445 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5446 {
5447 if (dump_enabled_p ())
5448 dump_printf_loc (MSG_NOTE, vect_location,
5449 "vector/scalar shift/rotate found.\n");
5450 }
5451 else
5452 {
5453 optab = optab_for_tree_code (code, vectype, optab_vector);
5454 if (optab
5455 && (optab_handler (optab, TYPE_MODE (vectype))
5456 != CODE_FOR_nothing))
5457 {
5458 scalar_shift_arg = false;
5459
5460 if (dump_enabled_p ())
5461 dump_printf_loc (MSG_NOTE, vect_location,
5462 "vector/vector shift/rotate found.\n");
5463
5464 if (!op1_vectype)
5465 op1_vectype = get_vectype_for_scalar_type (vinfo,
5466 TREE_TYPE (op1),
5467 slp_op1);
5468
5469 /* Unlike the other binary operators, shifts/rotates have
5470 the rhs being int, instead of the same type as the lhs,
5471 so make sure the scalar is the right type if we are
5472 dealing with vectors of long long/long/short/char. */
5473 incompatible_op1_vectype_p
5474 = (!op1_vectype
5475 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5476 TREE_TYPE (op1)));
5477 if (incompatible_op1_vectype_p
5478 && dt[1] == vect_internal_def)
5479 {
5480 if (dump_enabled_p ())
5481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5482 "unusable type for last operand in"
5483 " vector/vector shift/rotate.\n");
5484 return false;
5485 }
5486 }
5487 }
5488 }
5489
5490 /* Supportable by target? */
5491 if (!optab)
5492 {
5493 if (dump_enabled_p ())
5494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5495 "no optab.\n");
5496 return false;
5497 }
5498 vec_mode = TYPE_MODE (vectype);
5499 icode = (int) optab_handler (optab, vec_mode);
5500 if (icode == CODE_FOR_nothing)
5501 {
5502 if (dump_enabled_p ())
5503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5504 "op not supported by target.\n");
5505 /* Check only during analysis. */
5506 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5507 || (!vec_stmt
5508 && !vect_worthwhile_without_simd_p (vinfo, code)))
5509 return false;
5510 if (dump_enabled_p ())
5511 dump_printf_loc (MSG_NOTE, vect_location,
5512 "proceeding using word mode.\n");
5513 }
5514
5515 /* Worthwhile without SIMD support? Check only during analysis. */
5516 if (!vec_stmt
5517 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5518 && !vect_worthwhile_without_simd_p (vinfo, code))
5519 {
5520 if (dump_enabled_p ())
5521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5522 "not worthwhile without SIMD support.\n");
5523 return false;
5524 }
5525
5526 if (!vec_stmt) /* transformation not required. */
5527 {
5528 if (slp_node
5529 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5530 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5531 && (!incompatible_op1_vectype_p
5532 || dt[1] == vect_constant_def)
5533 && !vect_maybe_update_slp_op_vectype
5534 (slp_op1,
5535 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5536 {
5537 if (dump_enabled_p ())
5538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5539 "incompatible vector types for invariants\n");
5540 return false;
5541 }
5542 /* Now adjust the constant shift amount in place. */
5543 if (slp_node
5544 && incompatible_op1_vectype_p
5545 && dt[1] == vect_constant_def)
5546 {
5547 for (unsigned i = 0;
5548 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5549 {
5550 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5551 = fold_convert (TREE_TYPE (vectype),
5552 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5553 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5554 == INTEGER_CST));
5555 }
5556 }
5557 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5558 DUMP_VECT_SCOPE ("vectorizable_shift");
5559 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5560 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5561 return true;
5562 }
5563
5564 /* Transform. */
5565
5566 if (dump_enabled_p ())
5567 dump_printf_loc (MSG_NOTE, vect_location,
5568 "transform binary/unary operation.\n");
5569
5570 if (incompatible_op1_vectype_p && !slp_node)
5571 {
5572 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5573 op1 = fold_convert (TREE_TYPE (vectype), op1);
5574 if (dt[1] != vect_constant_def)
5575 op1 = vect_init_vector (vinfo, stmt_info, op1,
5576 TREE_TYPE (vectype), NULL);
5577 }
5578
5579 /* Handle def. */
5580 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5581
5582 if (scalar_shift_arg && dt[1] != vect_internal_def)
5583 {
5584 /* Vector shl and shr insn patterns can be defined with scalar
5585 operand 2 (shift operand). In this case, use constant or loop
5586 invariant op1 directly, without extending it to vector mode
5587 first. */
5588 optab_op2_mode = insn_data[icode].operand[2].mode;
5589 if (!VECTOR_MODE_P (optab_op2_mode))
5590 {
5591 if (dump_enabled_p ())
5592 dump_printf_loc (MSG_NOTE, vect_location,
5593 "operand 1 using scalar mode.\n");
5594 vec_oprnd1 = op1;
5595 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5596 vec_oprnds1.quick_push (vec_oprnd1);
5597 /* Store vec_oprnd1 for every vector stmt to be created.
5598 We check during the analysis that all the shift arguments
5599 are the same.
5600 TODO: Allow different constants for different vector
5601 stmts generated for an SLP instance. */
5602 for (k = 0;
5603 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5604 vec_oprnds1.quick_push (vec_oprnd1);
5605 }
5606 }
5607 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5608 {
5609 if (was_scalar_shift_arg)
5610 {
5611 /* If the argument was the same in all lanes create
5612 the correctly typed vector shift amount directly. */
5613 op1 = fold_convert (TREE_TYPE (vectype), op1);
5614 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5615 !loop_vinfo ? gsi : NULL);
5616 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5617 !loop_vinfo ? gsi : NULL);
5618 vec_oprnds1.create (slp_node->vec_stmts_size);
5619 for (k = 0; k < slp_node->vec_stmts_size; k++)
5620 vec_oprnds1.quick_push (vec_oprnd1);
5621 }
5622 else if (dt[1] == vect_constant_def)
5623 /* The constant shift amount has been adjusted in place. */
5624 ;
5625 else
5626 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5627 }
5628
5629 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5630 (a special case for certain kind of vector shifts); otherwise,
5631 operand 1 should be of a vector type (the usual case). */
5632 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5633 op0, &vec_oprnds0,
5634 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5635
5636 /* Arguments are ready. Create the new vector stmt. */
5637 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5638 {
5639 /* For internal defs where we need to use a scalar shift arg
5640 extract the first lane. */
5641 if (scalar_shift_arg && dt[1] == vect_internal_def)
5642 {
5643 vop1 = vec_oprnds1[0];
5644 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5645 gassign *new_stmt
5646 = gimple_build_assign (new_temp,
5647 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5648 vop1,
5649 TYPE_SIZE (TREE_TYPE (new_temp)),
5650 bitsize_zero_node));
5651 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5652 vop1 = new_temp;
5653 }
5654 else
5655 vop1 = vec_oprnds1[i];
5656 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5657 new_temp = make_ssa_name (vec_dest, new_stmt);
5658 gimple_assign_set_lhs (new_stmt, new_temp);
5659 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5660 if (slp_node)
5661 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5662 else
5663 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5664 }
5665
5666 if (!slp_node)
5667 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5668
5669 vec_oprnds0.release ();
5670 vec_oprnds1.release ();
5671
5672 return true;
5673 }
5674
5675
5676 /* Function vectorizable_operation.
5677
5678 Check if STMT_INFO performs a binary, unary or ternary operation that can
5679 be vectorized.
5680 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5681 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5682 Return true if STMT_INFO is vectorizable in this way. */
5683
5684 static bool
5685 vectorizable_operation (vec_info *vinfo,
5686 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5687 gimple **vec_stmt, slp_tree slp_node,
5688 stmt_vector_for_cost *cost_vec)
5689 {
5690 tree vec_dest;
5691 tree scalar_dest;
5692 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5693 tree vectype;
5694 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5695 enum tree_code code, orig_code;
5696 machine_mode vec_mode;
5697 tree new_temp;
5698 int op_type;
5699 optab optab;
5700 bool target_support_p;
5701 enum vect_def_type dt[3]
5702 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5703 int ndts = 3;
5704 poly_uint64 nunits_in;
5705 poly_uint64 nunits_out;
5706 tree vectype_out;
5707 int ncopies, vec_num;
5708 int i;
5709 vec<tree> vec_oprnds0 = vNULL;
5710 vec<tree> vec_oprnds1 = vNULL;
5711 vec<tree> vec_oprnds2 = vNULL;
5712 tree vop0, vop1, vop2;
5713 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5714
5715 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5716 return false;
5717
5718 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5719 && ! vec_stmt)
5720 return false;
5721
5722 /* Is STMT a vectorizable binary/unary operation? */
5723 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5724 if (!stmt)
5725 return false;
5726
5727 /* Loads and stores are handled in vectorizable_{load,store}. */
5728 if (STMT_VINFO_DATA_REF (stmt_info))
5729 return false;
5730
5731 orig_code = code = gimple_assign_rhs_code (stmt);
5732
5733 /* Shifts are handled in vectorizable_shift. */
5734 if (code == LSHIFT_EXPR
5735 || code == RSHIFT_EXPR
5736 || code == LROTATE_EXPR
5737 || code == RROTATE_EXPR)
5738 return false;
5739
5740 /* Comparisons are handled in vectorizable_comparison. */
5741 if (TREE_CODE_CLASS (code) == tcc_comparison)
5742 return false;
5743
5744 /* Conditions are handled in vectorizable_condition. */
5745 if (code == COND_EXPR)
5746 return false;
5747
5748 /* For pointer addition and subtraction, we should use the normal
5749 plus and minus for the vector operation. */
5750 if (code == POINTER_PLUS_EXPR)
5751 code = PLUS_EXPR;
5752 if (code == POINTER_DIFF_EXPR)
5753 code = MINUS_EXPR;
5754
5755 /* Support only unary or binary operations. */
5756 op_type = TREE_CODE_LENGTH (code);
5757 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5758 {
5759 if (dump_enabled_p ())
5760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5761 "num. args = %d (not unary/binary/ternary op).\n",
5762 op_type);
5763 return false;
5764 }
5765
5766 scalar_dest = gimple_assign_lhs (stmt);
5767 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5768
5769 /* Most operations cannot handle bit-precision types without extra
5770 truncations. */
5771 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5772 if (!mask_op_p
5773 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5774 /* Exception are bitwise binary operations. */
5775 && code != BIT_IOR_EXPR
5776 && code != BIT_XOR_EXPR
5777 && code != BIT_AND_EXPR)
5778 {
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5781 "bit-precision arithmetic not supported.\n");
5782 return false;
5783 }
5784
5785 slp_tree slp_op0;
5786 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5787 0, &op0, &slp_op0, &dt[0], &vectype))
5788 {
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5791 "use not simple.\n");
5792 return false;
5793 }
5794 /* If op0 is an external or constant def, infer the vector type
5795 from the scalar type. */
5796 if (!vectype)
5797 {
5798 /* For boolean type we cannot determine vectype by
5799 invariant value (don't know whether it is a vector
5800 of booleans or vector of integers). We use output
5801 vectype because operations on boolean don't change
5802 type. */
5803 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5804 {
5805 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5806 {
5807 if (dump_enabled_p ())
5808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5809 "not supported operation on bool value.\n");
5810 return false;
5811 }
5812 vectype = vectype_out;
5813 }
5814 else
5815 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5816 slp_node);
5817 }
5818 if (vec_stmt)
5819 gcc_assert (vectype);
5820 if (!vectype)
5821 {
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824 "no vectype for scalar type %T\n",
5825 TREE_TYPE (op0));
5826
5827 return false;
5828 }
5829
5830 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5831 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5832 if (maybe_ne (nunits_out, nunits_in))
5833 return false;
5834
5835 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5836 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5837 if (op_type == binary_op || op_type == ternary_op)
5838 {
5839 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5840 1, &op1, &slp_op1, &dt[1], &vectype2))
5841 {
5842 if (dump_enabled_p ())
5843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5844 "use not simple.\n");
5845 return false;
5846 }
5847 }
5848 if (op_type == ternary_op)
5849 {
5850 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5851 2, &op2, &slp_op2, &dt[2], &vectype3))
5852 {
5853 if (dump_enabled_p ())
5854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5855 "use not simple.\n");
5856 return false;
5857 }
5858 }
5859
5860 /* Multiple types in SLP are handled by creating the appropriate number of
5861 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5862 case of SLP. */
5863 if (slp_node)
5864 {
5865 ncopies = 1;
5866 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5867 }
5868 else
5869 {
5870 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5871 vec_num = 1;
5872 }
5873
5874 gcc_assert (ncopies >= 1);
5875
5876 /* Reject attempts to combine mask types with nonmask types, e.g. if
5877 we have an AND between a (nonmask) boolean loaded from memory and
5878 a (mask) boolean result of a comparison.
5879
5880 TODO: We could easily fix these cases up using pattern statements. */
5881 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5882 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5883 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5884 {
5885 if (dump_enabled_p ())
5886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5887 "mixed mask and nonmask vector types\n");
5888 return false;
5889 }
5890
5891 /* Supportable by target? */
5892
5893 vec_mode = TYPE_MODE (vectype);
5894 if (code == MULT_HIGHPART_EXPR)
5895 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5896 else
5897 {
5898 optab = optab_for_tree_code (code, vectype, optab_default);
5899 if (!optab)
5900 {
5901 if (dump_enabled_p ())
5902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5903 "no optab.\n");
5904 return false;
5905 }
5906 target_support_p = (optab_handler (optab, vec_mode)
5907 != CODE_FOR_nothing);
5908 }
5909
5910 if (!target_support_p)
5911 {
5912 if (dump_enabled_p ())
5913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5914 "op not supported by target.\n");
5915 /* Check only during analysis. */
5916 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5917 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5918 return false;
5919 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_NOTE, vect_location,
5921 "proceeding using word mode.\n");
5922 }
5923
5924 /* Worthwhile without SIMD support? Check only during analysis. */
5925 if (!VECTOR_MODE_P (vec_mode)
5926 && !vec_stmt
5927 && !vect_worthwhile_without_simd_p (vinfo, code))
5928 {
5929 if (dump_enabled_p ())
5930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5931 "not worthwhile without SIMD support.\n");
5932 return false;
5933 }
5934
5935 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5936 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5937 internal_fn cond_fn = get_conditional_internal_fn (code);
5938
5939 if (!vec_stmt) /* transformation not required. */
5940 {
5941 /* If this operation is part of a reduction, a fully-masked loop
5942 should only change the active lanes of the reduction chain,
5943 keeping the inactive lanes as-is. */
5944 if (loop_vinfo
5945 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5946 && reduc_idx >= 0)
5947 {
5948 if (cond_fn == IFN_LAST
5949 || !direct_internal_fn_supported_p (cond_fn, vectype,
5950 OPTIMIZE_FOR_SPEED))
5951 {
5952 if (dump_enabled_p ())
5953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5954 "can't use a fully-masked loop because no"
5955 " conditional operation is available.\n");
5956 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5957 }
5958 else
5959 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5960 vectype, NULL);
5961 }
5962
5963 /* Put types on constant and invariant SLP children. */
5964 if (slp_node
5965 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5966 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5967 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5968 {
5969 if (dump_enabled_p ())
5970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5971 "incompatible vector types for invariants\n");
5972 return false;
5973 }
5974
5975 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5976 DUMP_VECT_SCOPE ("vectorizable_operation");
5977 vect_model_simple_cost (vinfo, stmt_info,
5978 ncopies, dt, ndts, slp_node, cost_vec);
5979 return true;
5980 }
5981
5982 /* Transform. */
5983
5984 if (dump_enabled_p ())
5985 dump_printf_loc (MSG_NOTE, vect_location,
5986 "transform binary/unary operation.\n");
5987
5988 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
5989
5990 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5991 vectors with unsigned elements, but the result is signed. So, we
5992 need to compute the MINUS_EXPR into vectype temporary and
5993 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5994 tree vec_cvt_dest = NULL_TREE;
5995 if (orig_code == POINTER_DIFF_EXPR)
5996 {
5997 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5998 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5999 }
6000 /* Handle def. */
6001 else
6002 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6003
6004 /* In case the vectorization factor (VF) is bigger than the number
6005 of elements that we can fit in a vectype (nunits), we have to generate
6006 more than one vector stmt - i.e - we need to "unroll" the
6007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6008 from one copy of the vector stmt to the next, in the field
6009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6010 stages to find the correct vector defs to be used when vectorizing
6011 stmts that use the defs of the current stmt. The example below
6012 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6013 we need to create 4 vectorized stmts):
6014
6015 before vectorization:
6016 RELATED_STMT VEC_STMT
6017 S1: x = memref - -
6018 S2: z = x + 1 - -
6019
6020 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6021 there):
6022 RELATED_STMT VEC_STMT
6023 VS1_0: vx0 = memref0 VS1_1 -
6024 VS1_1: vx1 = memref1 VS1_2 -
6025 VS1_2: vx2 = memref2 VS1_3 -
6026 VS1_3: vx3 = memref3 - -
6027 S1: x = load - VS1_0
6028 S2: z = x + 1 - -
6029
6030 step2: vectorize stmt S2 (done here):
6031 To vectorize stmt S2 we first need to find the relevant vector
6032 def for the first operand 'x'. This is, as usual, obtained from
6033 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6034 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6035 relevant vector def 'vx0'. Having found 'vx0' we can generate
6036 the vector stmt VS2_0, and as usual, record it in the
6037 STMT_VINFO_VEC_STMT of stmt S2.
6038 When creating the second copy (VS2_1), we obtain the relevant vector
6039 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6040 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6041 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6042 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6043 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6044 chain of stmts and pointers:
6045 RELATED_STMT VEC_STMT
6046 VS1_0: vx0 = memref0 VS1_1 -
6047 VS1_1: vx1 = memref1 VS1_2 -
6048 VS1_2: vx2 = memref2 VS1_3 -
6049 VS1_3: vx3 = memref3 - -
6050 S1: x = load - VS1_0
6051 VS2_0: vz0 = vx0 + v1 VS2_1 -
6052 VS2_1: vz1 = vx1 + v1 VS2_2 -
6053 VS2_2: vz2 = vx2 + v1 VS2_3 -
6054 VS2_3: vz3 = vx3 + v1 - -
6055 S2: z = x + 1 - VS2_0 */
6056
6057 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6058 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6059 /* Arguments are ready. Create the new vector stmt. */
6060 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6061 {
6062 gimple *new_stmt = NULL;
6063 vop1 = ((op_type == binary_op || op_type == ternary_op)
6064 ? vec_oprnds1[i] : NULL_TREE);
6065 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6066 if (masked_loop_p && reduc_idx >= 0)
6067 {
6068 /* Perform the operation on active elements only and take
6069 inactive elements from the reduction chain input. */
6070 gcc_assert (!vop2);
6071 vop2 = reduc_idx == 1 ? vop1 : vop0;
6072 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6073 vectype, i);
6074 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6075 vop0, vop1, vop2);
6076 new_temp = make_ssa_name (vec_dest, call);
6077 gimple_call_set_lhs (call, new_temp);
6078 gimple_call_set_nothrow (call, true);
6079 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6080 new_stmt = call;
6081 }
6082 else
6083 {
6084 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6085 new_temp = make_ssa_name (vec_dest, new_stmt);
6086 gimple_assign_set_lhs (new_stmt, new_temp);
6087 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6088 if (vec_cvt_dest)
6089 {
6090 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6091 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6092 new_temp);
6093 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6094 gimple_assign_set_lhs (new_stmt, new_temp);
6095 vect_finish_stmt_generation (vinfo, stmt_info,
6096 new_stmt, gsi);
6097 }
6098 }
6099 if (slp_node)
6100 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6101 else
6102 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6103 }
6104
6105 if (!slp_node)
6106 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6107
6108 vec_oprnds0.release ();
6109 vec_oprnds1.release ();
6110 vec_oprnds2.release ();
6111
6112 return true;
6113 }
6114
6115 /* A helper function to ensure data reference DR_INFO's base alignment. */
6116
6117 static void
6118 ensure_base_align (dr_vec_info *dr_info)
6119 {
6120 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6121 return;
6122
6123 if (dr_info->base_misaligned)
6124 {
6125 tree base_decl = dr_info->base_decl;
6126
6127 // We should only be able to increase the alignment of a base object if
6128 // we know what its new alignment should be at compile time.
6129 unsigned HOST_WIDE_INT align_base_to =
6130 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6131
6132 if (decl_in_symtab_p (base_decl))
6133 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6134 else if (DECL_ALIGN (base_decl) < align_base_to)
6135 {
6136 SET_DECL_ALIGN (base_decl, align_base_to);
6137 DECL_USER_ALIGN (base_decl) = 1;
6138 }
6139 dr_info->base_misaligned = false;
6140 }
6141 }
6142
6143
6144 /* Function get_group_alias_ptr_type.
6145
6146 Return the alias type for the group starting at FIRST_STMT_INFO. */
6147
6148 static tree
6149 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6150 {
6151 struct data_reference *first_dr, *next_dr;
6152
6153 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6154 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6155 while (next_stmt_info)
6156 {
6157 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6158 if (get_alias_set (DR_REF (first_dr))
6159 != get_alias_set (DR_REF (next_dr)))
6160 {
6161 if (dump_enabled_p ())
6162 dump_printf_loc (MSG_NOTE, vect_location,
6163 "conflicting alias set types.\n");
6164 return ptr_type_node;
6165 }
6166 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6167 }
6168 return reference_alias_ptr_type (DR_REF (first_dr));
6169 }
6170
6171
6172 /* Function scan_operand_equal_p.
6173
6174 Helper function for check_scan_store. Compare two references
6175 with .GOMP_SIMD_LANE bases. */
6176
6177 static bool
6178 scan_operand_equal_p (tree ref1, tree ref2)
6179 {
6180 tree ref[2] = { ref1, ref2 };
6181 poly_int64 bitsize[2], bitpos[2];
6182 tree offset[2], base[2];
6183 for (int i = 0; i < 2; ++i)
6184 {
6185 machine_mode mode;
6186 int unsignedp, reversep, volatilep = 0;
6187 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6188 &offset[i], &mode, &unsignedp,
6189 &reversep, &volatilep);
6190 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6191 return false;
6192 if (TREE_CODE (base[i]) == MEM_REF
6193 && offset[i] == NULL_TREE
6194 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6195 {
6196 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6197 if (is_gimple_assign (def_stmt)
6198 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6199 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6200 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6201 {
6202 if (maybe_ne (mem_ref_offset (base[i]), 0))
6203 return false;
6204 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6205 offset[i] = gimple_assign_rhs2 (def_stmt);
6206 }
6207 }
6208 }
6209
6210 if (!operand_equal_p (base[0], base[1], 0))
6211 return false;
6212 if (maybe_ne (bitsize[0], bitsize[1]))
6213 return false;
6214 if (offset[0] != offset[1])
6215 {
6216 if (!offset[0] || !offset[1])
6217 return false;
6218 if (!operand_equal_p (offset[0], offset[1], 0))
6219 {
6220 tree step[2];
6221 for (int i = 0; i < 2; ++i)
6222 {
6223 step[i] = integer_one_node;
6224 if (TREE_CODE (offset[i]) == SSA_NAME)
6225 {
6226 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6227 if (is_gimple_assign (def_stmt)
6228 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6229 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6230 == INTEGER_CST))
6231 {
6232 step[i] = gimple_assign_rhs2 (def_stmt);
6233 offset[i] = gimple_assign_rhs1 (def_stmt);
6234 }
6235 }
6236 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6237 {
6238 step[i] = TREE_OPERAND (offset[i], 1);
6239 offset[i] = TREE_OPERAND (offset[i], 0);
6240 }
6241 tree rhs1 = NULL_TREE;
6242 if (TREE_CODE (offset[i]) == SSA_NAME)
6243 {
6244 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6245 if (gimple_assign_cast_p (def_stmt))
6246 rhs1 = gimple_assign_rhs1 (def_stmt);
6247 }
6248 else if (CONVERT_EXPR_P (offset[i]))
6249 rhs1 = TREE_OPERAND (offset[i], 0);
6250 if (rhs1
6251 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6252 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6253 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6254 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6255 offset[i] = rhs1;
6256 }
6257 if (!operand_equal_p (offset[0], offset[1], 0)
6258 || !operand_equal_p (step[0], step[1], 0))
6259 return false;
6260 }
6261 }
6262 return true;
6263 }
6264
6265
6266 enum scan_store_kind {
6267 /* Normal permutation. */
6268 scan_store_kind_perm,
6269
6270 /* Whole vector left shift permutation with zero init. */
6271 scan_store_kind_lshift_zero,
6272
6273 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6274 scan_store_kind_lshift_cond
6275 };
6276
6277 /* Function check_scan_store.
6278
6279 Verify if we can perform the needed permutations or whole vector shifts.
6280 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6281 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6282 to do at each step. */
6283
6284 static int
6285 scan_store_can_perm_p (tree vectype, tree init,
6286 vec<enum scan_store_kind> *use_whole_vector = NULL)
6287 {
6288 enum machine_mode vec_mode = TYPE_MODE (vectype);
6289 unsigned HOST_WIDE_INT nunits;
6290 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6291 return -1;
6292 int units_log2 = exact_log2 (nunits);
6293 if (units_log2 <= 0)
6294 return -1;
6295
6296 int i;
6297 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6298 for (i = 0; i <= units_log2; ++i)
6299 {
6300 unsigned HOST_WIDE_INT j, k;
6301 enum scan_store_kind kind = scan_store_kind_perm;
6302 vec_perm_builder sel (nunits, nunits, 1);
6303 sel.quick_grow (nunits);
6304 if (i == units_log2)
6305 {
6306 for (j = 0; j < nunits; ++j)
6307 sel[j] = nunits - 1;
6308 }
6309 else
6310 {
6311 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6312 sel[j] = j;
6313 for (k = 0; j < nunits; ++j, ++k)
6314 sel[j] = nunits + k;
6315 }
6316 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6317 if (!can_vec_perm_const_p (vec_mode, indices))
6318 {
6319 if (i == units_log2)
6320 return -1;
6321
6322 if (whole_vector_shift_kind == scan_store_kind_perm)
6323 {
6324 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6325 return -1;
6326 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6327 /* Whole vector shifts shift in zeros, so if init is all zero
6328 constant, there is no need to do anything further. */
6329 if ((TREE_CODE (init) != INTEGER_CST
6330 && TREE_CODE (init) != REAL_CST)
6331 || !initializer_zerop (init))
6332 {
6333 tree masktype = truth_type_for (vectype);
6334 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6335 return -1;
6336 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6337 }
6338 }
6339 kind = whole_vector_shift_kind;
6340 }
6341 if (use_whole_vector)
6342 {
6343 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6344 use_whole_vector->safe_grow_cleared (i, true);
6345 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6346 use_whole_vector->safe_push (kind);
6347 }
6348 }
6349
6350 return units_log2;
6351 }
6352
6353
6354 /* Function check_scan_store.
6355
6356 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6357
6358 static bool
6359 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6360 enum vect_def_type rhs_dt, bool slp, tree mask,
6361 vect_memory_access_type memory_access_type)
6362 {
6363 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6364 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6365 tree ref_type;
6366
6367 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6368 if (slp
6369 || mask
6370 || memory_access_type != VMAT_CONTIGUOUS
6371 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6372 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6373 || loop_vinfo == NULL
6374 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6375 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6376 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6377 || !integer_zerop (DR_INIT (dr_info->dr))
6378 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6379 || !alias_sets_conflict_p (get_alias_set (vectype),
6380 get_alias_set (TREE_TYPE (ref_type))))
6381 {
6382 if (dump_enabled_p ())
6383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6384 "unsupported OpenMP scan store.\n");
6385 return false;
6386 }
6387
6388 /* We need to pattern match code built by OpenMP lowering and simplified
6389 by following optimizations into something we can handle.
6390 #pragma omp simd reduction(inscan,+:r)
6391 for (...)
6392 {
6393 r += something ();
6394 #pragma omp scan inclusive (r)
6395 use (r);
6396 }
6397 shall have body with:
6398 // Initialization for input phase, store the reduction initializer:
6399 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6400 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6401 D.2042[_21] = 0;
6402 // Actual input phase:
6403 ...
6404 r.0_5 = D.2042[_20];
6405 _6 = _4 + r.0_5;
6406 D.2042[_20] = _6;
6407 // Initialization for scan phase:
6408 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6409 _26 = D.2043[_25];
6410 _27 = D.2042[_25];
6411 _28 = _26 + _27;
6412 D.2043[_25] = _28;
6413 D.2042[_25] = _28;
6414 // Actual scan phase:
6415 ...
6416 r.1_8 = D.2042[_20];
6417 ...
6418 The "omp simd array" variable D.2042 holds the privatized copy used
6419 inside of the loop and D.2043 is another one that holds copies of
6420 the current original list item. The separate GOMP_SIMD_LANE ifn
6421 kinds are there in order to allow optimizing the initializer store
6422 and combiner sequence, e.g. if it is originally some C++ish user
6423 defined reduction, but allow the vectorizer to pattern recognize it
6424 and turn into the appropriate vectorized scan.
6425
6426 For exclusive scan, this is slightly different:
6427 #pragma omp simd reduction(inscan,+:r)
6428 for (...)
6429 {
6430 use (r);
6431 #pragma omp scan exclusive (r)
6432 r += something ();
6433 }
6434 shall have body with:
6435 // Initialization for input phase, store the reduction initializer:
6436 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6437 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6438 D.2042[_21] = 0;
6439 // Actual input phase:
6440 ...
6441 r.0_5 = D.2042[_20];
6442 _6 = _4 + r.0_5;
6443 D.2042[_20] = _6;
6444 // Initialization for scan phase:
6445 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6446 _26 = D.2043[_25];
6447 D.2044[_25] = _26;
6448 _27 = D.2042[_25];
6449 _28 = _26 + _27;
6450 D.2043[_25] = _28;
6451 // Actual scan phase:
6452 ...
6453 r.1_8 = D.2044[_20];
6454 ... */
6455
6456 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6457 {
6458 /* Match the D.2042[_21] = 0; store above. Just require that
6459 it is a constant or external definition store. */
6460 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6461 {
6462 fail_init:
6463 if (dump_enabled_p ())
6464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6465 "unsupported OpenMP scan initializer store.\n");
6466 return false;
6467 }
6468
6469 if (! loop_vinfo->scan_map)
6470 loop_vinfo->scan_map = new hash_map<tree, tree>;
6471 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6472 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6473 if (cached)
6474 goto fail_init;
6475 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6476
6477 /* These stores can be vectorized normally. */
6478 return true;
6479 }
6480
6481 if (rhs_dt != vect_internal_def)
6482 {
6483 fail:
6484 if (dump_enabled_p ())
6485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6486 "unsupported OpenMP scan combiner pattern.\n");
6487 return false;
6488 }
6489
6490 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6491 tree rhs = gimple_assign_rhs1 (stmt);
6492 if (TREE_CODE (rhs) != SSA_NAME)
6493 goto fail;
6494
6495 gimple *other_store_stmt = NULL;
6496 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6497 bool inscan_var_store
6498 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6499
6500 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6501 {
6502 if (!inscan_var_store)
6503 {
6504 use_operand_p use_p;
6505 imm_use_iterator iter;
6506 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6507 {
6508 gimple *use_stmt = USE_STMT (use_p);
6509 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6510 continue;
6511 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6512 || !is_gimple_assign (use_stmt)
6513 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6514 || other_store_stmt
6515 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6516 goto fail;
6517 other_store_stmt = use_stmt;
6518 }
6519 if (other_store_stmt == NULL)
6520 goto fail;
6521 rhs = gimple_assign_lhs (other_store_stmt);
6522 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6523 goto fail;
6524 }
6525 }
6526 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6527 {
6528 use_operand_p use_p;
6529 imm_use_iterator iter;
6530 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6531 {
6532 gimple *use_stmt = USE_STMT (use_p);
6533 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6534 continue;
6535 if (other_store_stmt)
6536 goto fail;
6537 other_store_stmt = use_stmt;
6538 }
6539 }
6540 else
6541 goto fail;
6542
6543 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6544 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6545 || !is_gimple_assign (def_stmt)
6546 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6547 goto fail;
6548
6549 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6550 /* For pointer addition, we should use the normal plus for the vector
6551 operation. */
6552 switch (code)
6553 {
6554 case POINTER_PLUS_EXPR:
6555 code = PLUS_EXPR;
6556 break;
6557 case MULT_HIGHPART_EXPR:
6558 goto fail;
6559 default:
6560 break;
6561 }
6562 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6563 goto fail;
6564
6565 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6566 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6567 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6568 goto fail;
6569
6570 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6571 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6572 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6573 || !gimple_assign_load_p (load1_stmt)
6574 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6575 || !gimple_assign_load_p (load2_stmt))
6576 goto fail;
6577
6578 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6579 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6580 if (load1_stmt_info == NULL
6581 || load2_stmt_info == NULL
6582 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6583 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6584 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6585 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6586 goto fail;
6587
6588 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6589 {
6590 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6591 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6592 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6593 goto fail;
6594 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6595 tree lrhs;
6596 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6597 lrhs = rhs1;
6598 else
6599 lrhs = rhs2;
6600 use_operand_p use_p;
6601 imm_use_iterator iter;
6602 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6603 {
6604 gimple *use_stmt = USE_STMT (use_p);
6605 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6606 continue;
6607 if (other_store_stmt)
6608 goto fail;
6609 other_store_stmt = use_stmt;
6610 }
6611 }
6612
6613 if (other_store_stmt == NULL)
6614 goto fail;
6615 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6616 || !gimple_store_p (other_store_stmt))
6617 goto fail;
6618
6619 stmt_vec_info other_store_stmt_info
6620 = loop_vinfo->lookup_stmt (other_store_stmt);
6621 if (other_store_stmt_info == NULL
6622 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6623 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6624 goto fail;
6625
6626 gimple *stmt1 = stmt;
6627 gimple *stmt2 = other_store_stmt;
6628 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6629 std::swap (stmt1, stmt2);
6630 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6631 gimple_assign_rhs1 (load2_stmt)))
6632 {
6633 std::swap (rhs1, rhs2);
6634 std::swap (load1_stmt, load2_stmt);
6635 std::swap (load1_stmt_info, load2_stmt_info);
6636 }
6637 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6638 gimple_assign_rhs1 (load1_stmt)))
6639 goto fail;
6640
6641 tree var3 = NULL_TREE;
6642 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6643 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6644 gimple_assign_rhs1 (load2_stmt)))
6645 goto fail;
6646 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6647 {
6648 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6649 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6650 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6651 goto fail;
6652 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6653 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6654 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6655 || lookup_attribute ("omp simd inscan exclusive",
6656 DECL_ATTRIBUTES (var3)))
6657 goto fail;
6658 }
6659
6660 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6661 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6662 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6663 goto fail;
6664
6665 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6666 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6667 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6668 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6669 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6670 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6671 goto fail;
6672
6673 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6674 std::swap (var1, var2);
6675
6676 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6677 {
6678 if (!lookup_attribute ("omp simd inscan exclusive",
6679 DECL_ATTRIBUTES (var1)))
6680 goto fail;
6681 var1 = var3;
6682 }
6683
6684 if (loop_vinfo->scan_map == NULL)
6685 goto fail;
6686 tree *init = loop_vinfo->scan_map->get (var1);
6687 if (init == NULL)
6688 goto fail;
6689
6690 /* The IL is as expected, now check if we can actually vectorize it.
6691 Inclusive scan:
6692 _26 = D.2043[_25];
6693 _27 = D.2042[_25];
6694 _28 = _26 + _27;
6695 D.2043[_25] = _28;
6696 D.2042[_25] = _28;
6697 should be vectorized as (where _40 is the vectorized rhs
6698 from the D.2042[_21] = 0; store):
6699 _30 = MEM <vector(8) int> [(int *)&D.2043];
6700 _31 = MEM <vector(8) int> [(int *)&D.2042];
6701 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6702 _33 = _31 + _32;
6703 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6704 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6705 _35 = _33 + _34;
6706 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6707 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6708 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6709 _37 = _35 + _36;
6710 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6711 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6712 _38 = _30 + _37;
6713 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6714 MEM <vector(8) int> [(int *)&D.2043] = _39;
6715 MEM <vector(8) int> [(int *)&D.2042] = _38;
6716 Exclusive scan:
6717 _26 = D.2043[_25];
6718 D.2044[_25] = _26;
6719 _27 = D.2042[_25];
6720 _28 = _26 + _27;
6721 D.2043[_25] = _28;
6722 should be vectorized as (where _40 is the vectorized rhs
6723 from the D.2042[_21] = 0; store):
6724 _30 = MEM <vector(8) int> [(int *)&D.2043];
6725 _31 = MEM <vector(8) int> [(int *)&D.2042];
6726 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6727 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6728 _34 = _32 + _33;
6729 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6730 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6731 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6732 _36 = _34 + _35;
6733 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6734 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6735 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6736 _38 = _36 + _37;
6737 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6738 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6739 _39 = _30 + _38;
6740 _50 = _31 + _39;
6741 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6742 MEM <vector(8) int> [(int *)&D.2044] = _39;
6743 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6744 enum machine_mode vec_mode = TYPE_MODE (vectype);
6745 optab optab = optab_for_tree_code (code, vectype, optab_default);
6746 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6747 goto fail;
6748
6749 int units_log2 = scan_store_can_perm_p (vectype, *init);
6750 if (units_log2 == -1)
6751 goto fail;
6752
6753 return true;
6754 }
6755
6756
6757 /* Function vectorizable_scan_store.
6758
6759 Helper of vectorizable_score, arguments like on vectorizable_store.
6760 Handle only the transformation, checking is done in check_scan_store. */
6761
6762 static bool
6763 vectorizable_scan_store (vec_info *vinfo,
6764 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6765 gimple **vec_stmt, int ncopies)
6766 {
6767 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6768 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6769 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6770 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6771
6772 if (dump_enabled_p ())
6773 dump_printf_loc (MSG_NOTE, vect_location,
6774 "transform scan store. ncopies = %d\n", ncopies);
6775
6776 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6777 tree rhs = gimple_assign_rhs1 (stmt);
6778 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6779
6780 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6781 bool inscan_var_store
6782 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6783
6784 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6785 {
6786 use_operand_p use_p;
6787 imm_use_iterator iter;
6788 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6789 {
6790 gimple *use_stmt = USE_STMT (use_p);
6791 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6792 continue;
6793 rhs = gimple_assign_lhs (use_stmt);
6794 break;
6795 }
6796 }
6797
6798 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6799 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6800 if (code == POINTER_PLUS_EXPR)
6801 code = PLUS_EXPR;
6802 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6803 && commutative_tree_code (code));
6804 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6805 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6806 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6807 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6808 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6809 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6810 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6811 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6812 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6813 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6814 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6815
6816 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6817 {
6818 std::swap (rhs1, rhs2);
6819 std::swap (var1, var2);
6820 std::swap (load1_dr_info, load2_dr_info);
6821 }
6822
6823 tree *init = loop_vinfo->scan_map->get (var1);
6824 gcc_assert (init);
6825
6826 unsigned HOST_WIDE_INT nunits;
6827 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6828 gcc_unreachable ();
6829 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6830 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6831 gcc_assert (units_log2 > 0);
6832 auto_vec<tree, 16> perms;
6833 perms.quick_grow (units_log2 + 1);
6834 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6835 for (int i = 0; i <= units_log2; ++i)
6836 {
6837 unsigned HOST_WIDE_INT j, k;
6838 vec_perm_builder sel (nunits, nunits, 1);
6839 sel.quick_grow (nunits);
6840 if (i == units_log2)
6841 for (j = 0; j < nunits; ++j)
6842 sel[j] = nunits - 1;
6843 else
6844 {
6845 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6846 sel[j] = j;
6847 for (k = 0; j < nunits; ++j, ++k)
6848 sel[j] = nunits + k;
6849 }
6850 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6851 if (!use_whole_vector.is_empty ()
6852 && use_whole_vector[i] != scan_store_kind_perm)
6853 {
6854 if (zero_vec == NULL_TREE)
6855 zero_vec = build_zero_cst (vectype);
6856 if (masktype == NULL_TREE
6857 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6858 masktype = truth_type_for (vectype);
6859 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6860 }
6861 else
6862 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6863 }
6864
6865 tree vec_oprnd1 = NULL_TREE;
6866 tree vec_oprnd2 = NULL_TREE;
6867 tree vec_oprnd3 = NULL_TREE;
6868 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6869 tree dataref_offset = build_int_cst (ref_type, 0);
6870 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6871 vectype, VMAT_CONTIGUOUS);
6872 tree ldataref_ptr = NULL_TREE;
6873 tree orig = NULL_TREE;
6874 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6875 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6876 auto_vec<tree> vec_oprnds1;
6877 auto_vec<tree> vec_oprnds2;
6878 auto_vec<tree> vec_oprnds3;
6879 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6880 *init, &vec_oprnds1,
6881 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6882 rhs2, &vec_oprnds3);
6883 for (int j = 0; j < ncopies; j++)
6884 {
6885 vec_oprnd1 = vec_oprnds1[j];
6886 if (ldataref_ptr == NULL)
6887 vec_oprnd2 = vec_oprnds2[j];
6888 vec_oprnd3 = vec_oprnds3[j];
6889 if (j == 0)
6890 orig = vec_oprnd3;
6891 else if (!inscan_var_store)
6892 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6893
6894 if (ldataref_ptr)
6895 {
6896 vec_oprnd2 = make_ssa_name (vectype);
6897 tree data_ref = fold_build2 (MEM_REF, vectype,
6898 unshare_expr (ldataref_ptr),
6899 dataref_offset);
6900 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6901 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6902 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6903 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6904 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6905 }
6906
6907 tree v = vec_oprnd2;
6908 for (int i = 0; i < units_log2; ++i)
6909 {
6910 tree new_temp = make_ssa_name (vectype);
6911 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6912 (zero_vec
6913 && (use_whole_vector[i]
6914 != scan_store_kind_perm))
6915 ? zero_vec : vec_oprnd1, v,
6916 perms[i]);
6917 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6918 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6919 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6920
6921 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6922 {
6923 /* Whole vector shift shifted in zero bits, but if *init
6924 is not initializer_zerop, we need to replace those elements
6925 with elements from vec_oprnd1. */
6926 tree_vector_builder vb (masktype, nunits, 1);
6927 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6928 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6929 ? boolean_false_node : boolean_true_node);
6930
6931 tree new_temp2 = make_ssa_name (vectype);
6932 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6933 new_temp, vec_oprnd1);
6934 vect_finish_stmt_generation (vinfo, stmt_info,
6935 g, gsi);
6936 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6937 new_temp = new_temp2;
6938 }
6939
6940 /* For exclusive scan, perform the perms[i] permutation once
6941 more. */
6942 if (i == 0
6943 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6944 && v == vec_oprnd2)
6945 {
6946 v = new_temp;
6947 --i;
6948 continue;
6949 }
6950
6951 tree new_temp2 = make_ssa_name (vectype);
6952 g = gimple_build_assign (new_temp2, code, v, new_temp);
6953 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6954 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6955
6956 v = new_temp2;
6957 }
6958
6959 tree new_temp = make_ssa_name (vectype);
6960 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6961 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6962 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6963
6964 tree last_perm_arg = new_temp;
6965 /* For exclusive scan, new_temp computed above is the exclusive scan
6966 prefix sum. Turn it into inclusive prefix sum for the broadcast
6967 of the last element into orig. */
6968 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6969 {
6970 last_perm_arg = make_ssa_name (vectype);
6971 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6972 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6973 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6974 }
6975
6976 orig = make_ssa_name (vectype);
6977 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6978 last_perm_arg, perms[units_log2]);
6979 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6980 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6981
6982 if (!inscan_var_store)
6983 {
6984 tree data_ref = fold_build2 (MEM_REF, vectype,
6985 unshare_expr (dataref_ptr),
6986 dataref_offset);
6987 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6988 g = gimple_build_assign (data_ref, new_temp);
6989 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6990 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6991 }
6992 }
6993
6994 if (inscan_var_store)
6995 for (int j = 0; j < ncopies; j++)
6996 {
6997 if (j != 0)
6998 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6999
7000 tree data_ref = fold_build2 (MEM_REF, vectype,
7001 unshare_expr (dataref_ptr),
7002 dataref_offset);
7003 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7004 gimple *g = gimple_build_assign (data_ref, orig);
7005 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7006 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7007 }
7008 return true;
7009 }
7010
7011
7012 /* Function vectorizable_store.
7013
7014 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7015 that can be vectorized.
7016 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7017 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7018 Return true if STMT_INFO is vectorizable in this way. */
7019
7020 static bool
7021 vectorizable_store (vec_info *vinfo,
7022 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7023 gimple **vec_stmt, slp_tree slp_node,
7024 stmt_vector_for_cost *cost_vec)
7025 {
7026 tree data_ref;
7027 tree op;
7028 tree vec_oprnd = NULL_TREE;
7029 tree elem_type;
7030 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7031 class loop *loop = NULL;
7032 machine_mode vec_mode;
7033 tree dummy;
7034 enum vect_def_type rhs_dt = vect_unknown_def_type;
7035 enum vect_def_type mask_dt = vect_unknown_def_type;
7036 tree dataref_ptr = NULL_TREE;
7037 tree dataref_offset = NULL_TREE;
7038 gimple *ptr_incr = NULL;
7039 int ncopies;
7040 int j;
7041 stmt_vec_info first_stmt_info;
7042 bool grouped_store;
7043 unsigned int group_size, i;
7044 vec<tree> oprnds = vNULL;
7045 vec<tree> result_chain = vNULL;
7046 tree offset = NULL_TREE;
7047 vec<tree> vec_oprnds = vNULL;
7048 bool slp = (slp_node != NULL);
7049 unsigned int vec_num;
7050 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7051 tree aggr_type;
7052 gather_scatter_info gs_info;
7053 poly_uint64 vf;
7054 vec_load_store_type vls_type;
7055 tree ref_type;
7056
7057 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7058 return false;
7059
7060 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7061 && ! vec_stmt)
7062 return false;
7063
7064 /* Is vectorizable store? */
7065
7066 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7067 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7068 {
7069 tree scalar_dest = gimple_assign_lhs (assign);
7070 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7071 && is_pattern_stmt_p (stmt_info))
7072 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7073 if (TREE_CODE (scalar_dest) != ARRAY_REF
7074 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7075 && TREE_CODE (scalar_dest) != INDIRECT_REF
7076 && TREE_CODE (scalar_dest) != COMPONENT_REF
7077 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7078 && TREE_CODE (scalar_dest) != REALPART_EXPR
7079 && TREE_CODE (scalar_dest) != MEM_REF)
7080 return false;
7081 }
7082 else
7083 {
7084 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7085 if (!call || !gimple_call_internal_p (call))
7086 return false;
7087
7088 internal_fn ifn = gimple_call_internal_fn (call);
7089 if (!internal_store_fn_p (ifn))
7090 return false;
7091
7092 if (slp_node != NULL)
7093 {
7094 if (dump_enabled_p ())
7095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7096 "SLP of masked stores not supported.\n");
7097 return false;
7098 }
7099
7100 int mask_index = internal_fn_mask_index (ifn);
7101 if (mask_index >= 0)
7102 {
7103 mask = gimple_call_arg (call, mask_index);
7104 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7105 &mask_vectype))
7106 return false;
7107 }
7108 }
7109
7110 op = vect_get_store_rhs (stmt_info);
7111
7112 /* Cannot have hybrid store SLP -- that would mean storing to the
7113 same location twice. */
7114 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7115
7116 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7117 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7118
7119 if (loop_vinfo)
7120 {
7121 loop = LOOP_VINFO_LOOP (loop_vinfo);
7122 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7123 }
7124 else
7125 vf = 1;
7126
7127 /* Multiple types in SLP are handled by creating the appropriate number of
7128 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7129 case of SLP. */
7130 if (slp)
7131 ncopies = 1;
7132 else
7133 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7134
7135 gcc_assert (ncopies >= 1);
7136
7137 /* FORNOW. This restriction should be relaxed. */
7138 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7139 {
7140 if (dump_enabled_p ())
7141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7142 "multiple types in nested loop.\n");
7143 return false;
7144 }
7145
7146 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7147 op, &rhs_dt, &rhs_vectype, &vls_type))
7148 return false;
7149
7150 elem_type = TREE_TYPE (vectype);
7151 vec_mode = TYPE_MODE (vectype);
7152
7153 if (!STMT_VINFO_DATA_REF (stmt_info))
7154 return false;
7155
7156 vect_memory_access_type memory_access_type;
7157 enum dr_alignment_support alignment_support_scheme;
7158 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7159 ncopies, &memory_access_type,
7160 &alignment_support_scheme, &gs_info))
7161 return false;
7162
7163 if (mask)
7164 {
7165 if (memory_access_type == VMAT_CONTIGUOUS)
7166 {
7167 if (!VECTOR_MODE_P (vec_mode)
7168 || !can_vec_mask_load_store_p (vec_mode,
7169 TYPE_MODE (mask_vectype), false))
7170 return false;
7171 }
7172 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7173 && (memory_access_type != VMAT_GATHER_SCATTER
7174 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7175 {
7176 if (dump_enabled_p ())
7177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7178 "unsupported access type for masked store.\n");
7179 return false;
7180 }
7181 }
7182 else
7183 {
7184 /* FORNOW. In some cases can vectorize even if data-type not supported
7185 (e.g. - array initialization with 0). */
7186 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7187 return false;
7188 }
7189
7190 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7191 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7192 && memory_access_type != VMAT_GATHER_SCATTER
7193 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7194 if (grouped_store)
7195 {
7196 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7197 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7198 group_size = DR_GROUP_SIZE (first_stmt_info);
7199 }
7200 else
7201 {
7202 first_stmt_info = stmt_info;
7203 first_dr_info = dr_info;
7204 group_size = vec_num = 1;
7205 }
7206
7207 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7208 {
7209 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7210 memory_access_type))
7211 return false;
7212 }
7213
7214 if (!vec_stmt) /* transformation not required. */
7215 {
7216 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7217
7218 if (loop_vinfo
7219 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7220 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7221 group_size, memory_access_type,
7222 &gs_info, mask);
7223
7224 if (slp_node
7225 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7226 vectype))
7227 {
7228 if (dump_enabled_p ())
7229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7230 "incompatible vector types for invariants\n");
7231 return false;
7232 }
7233
7234 if (dump_enabled_p ()
7235 && memory_access_type != VMAT_ELEMENTWISE
7236 && memory_access_type != VMAT_GATHER_SCATTER
7237 && alignment_support_scheme != dr_aligned)
7238 dump_printf_loc (MSG_NOTE, vect_location,
7239 "Vectorizing an unaligned access.\n");
7240
7241 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7242 vect_model_store_cost (vinfo, stmt_info, ncopies,
7243 memory_access_type, vls_type, slp_node, cost_vec);
7244 return true;
7245 }
7246 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7247
7248 /* Transform. */
7249
7250 ensure_base_align (dr_info);
7251
7252 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7253 {
7254 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7255 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7256 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7257 tree ptr, var, scale, vec_mask;
7258 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7259 tree mask_halfvectype = mask_vectype;
7260 edge pe = loop_preheader_edge (loop);
7261 gimple_seq seq;
7262 basic_block new_bb;
7263 enum { NARROW, NONE, WIDEN } modifier;
7264 poly_uint64 scatter_off_nunits
7265 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7266
7267 if (known_eq (nunits, scatter_off_nunits))
7268 modifier = NONE;
7269 else if (known_eq (nunits * 2, scatter_off_nunits))
7270 {
7271 modifier = WIDEN;
7272
7273 /* Currently gathers and scatters are only supported for
7274 fixed-length vectors. */
7275 unsigned int count = scatter_off_nunits.to_constant ();
7276 vec_perm_builder sel (count, count, 1);
7277 for (i = 0; i < (unsigned int) count; ++i)
7278 sel.quick_push (i | (count / 2));
7279
7280 vec_perm_indices indices (sel, 1, count);
7281 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7282 indices);
7283 gcc_assert (perm_mask != NULL_TREE);
7284 }
7285 else if (known_eq (nunits, scatter_off_nunits * 2))
7286 {
7287 modifier = NARROW;
7288
7289 /* Currently gathers and scatters are only supported for
7290 fixed-length vectors. */
7291 unsigned int count = nunits.to_constant ();
7292 vec_perm_builder sel (count, count, 1);
7293 for (i = 0; i < (unsigned int) count; ++i)
7294 sel.quick_push (i | (count / 2));
7295
7296 vec_perm_indices indices (sel, 2, count);
7297 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7298 gcc_assert (perm_mask != NULL_TREE);
7299 ncopies *= 2;
7300
7301 if (mask)
7302 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7303 }
7304 else
7305 gcc_unreachable ();
7306
7307 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7308 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7309 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7310 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7311 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7312 scaletype = TREE_VALUE (arglist);
7313
7314 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7315 && TREE_CODE (rettype) == VOID_TYPE);
7316
7317 ptr = fold_convert (ptrtype, gs_info.base);
7318 if (!is_gimple_min_invariant (ptr))
7319 {
7320 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7321 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7322 gcc_assert (!new_bb);
7323 }
7324
7325 if (mask == NULL_TREE)
7326 {
7327 mask_arg = build_int_cst (masktype, -1);
7328 mask_arg = vect_init_vector (vinfo, stmt_info,
7329 mask_arg, masktype, NULL);
7330 }
7331
7332 scale = build_int_cst (scaletype, gs_info.scale);
7333
7334 auto_vec<tree> vec_oprnds0;
7335 auto_vec<tree> vec_oprnds1;
7336 auto_vec<tree> vec_masks;
7337 if (mask)
7338 {
7339 tree mask_vectype = truth_type_for (vectype);
7340 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7341 modifier == NARROW
7342 ? ncopies / 2 : ncopies,
7343 mask, &vec_masks, mask_vectype);
7344 }
7345 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7346 modifier == WIDEN
7347 ? ncopies / 2 : ncopies,
7348 gs_info.offset, &vec_oprnds0);
7349 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7350 modifier == NARROW
7351 ? ncopies / 2 : ncopies,
7352 op, &vec_oprnds1);
7353 for (j = 0; j < ncopies; ++j)
7354 {
7355 if (modifier == WIDEN)
7356 {
7357 if (j & 1)
7358 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7359 perm_mask, stmt_info, gsi);
7360 else
7361 op = vec_oprnd0 = vec_oprnds0[j / 2];
7362 src = vec_oprnd1 = vec_oprnds1[j];
7363 if (mask)
7364 mask_op = vec_mask = vec_masks[j];
7365 }
7366 else if (modifier == NARROW)
7367 {
7368 if (j & 1)
7369 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7370 perm_mask, stmt_info, gsi);
7371 else
7372 src = vec_oprnd1 = vec_oprnds1[j / 2];
7373 op = vec_oprnd0 = vec_oprnds0[j];
7374 if (mask)
7375 mask_op = vec_mask = vec_masks[j / 2];
7376 }
7377 else
7378 {
7379 op = vec_oprnd0 = vec_oprnds0[j];
7380 src = vec_oprnd1 = vec_oprnds1[j];
7381 if (mask)
7382 mask_op = vec_mask = vec_masks[j];
7383 }
7384
7385 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7386 {
7387 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7388 TYPE_VECTOR_SUBPARTS (srctype)));
7389 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7390 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7391 gassign *new_stmt
7392 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7393 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7394 src = var;
7395 }
7396
7397 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7398 {
7399 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7400 TYPE_VECTOR_SUBPARTS (idxtype)));
7401 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7402 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7403 gassign *new_stmt
7404 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7405 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7406 op = var;
7407 }
7408
7409 if (mask)
7410 {
7411 tree utype;
7412 mask_arg = mask_op;
7413 if (modifier == NARROW)
7414 {
7415 var = vect_get_new_ssa_name (mask_halfvectype,
7416 vect_simple_var);
7417 gassign *new_stmt
7418 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7419 : VEC_UNPACK_LO_EXPR,
7420 mask_op);
7421 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7422 mask_arg = var;
7423 }
7424 tree optype = TREE_TYPE (mask_arg);
7425 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7426 utype = masktype;
7427 else
7428 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7429 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7430 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7431 gassign *new_stmt
7432 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7433 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7434 mask_arg = var;
7435 if (!useless_type_conversion_p (masktype, utype))
7436 {
7437 gcc_assert (TYPE_PRECISION (utype)
7438 <= TYPE_PRECISION (masktype));
7439 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7440 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7441 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7442 mask_arg = var;
7443 }
7444 }
7445
7446 gcall *new_stmt
7447 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7448 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7449
7450 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7451 }
7452 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7453 return true;
7454 }
7455 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7456 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7457
7458 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7459 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7460
7461 if (grouped_store)
7462 {
7463 /* FORNOW */
7464 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7465
7466 /* We vectorize all the stmts of the interleaving group when we
7467 reach the last stmt in the group. */
7468 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7469 < DR_GROUP_SIZE (first_stmt_info)
7470 && !slp)
7471 {
7472 *vec_stmt = NULL;
7473 return true;
7474 }
7475
7476 if (slp)
7477 {
7478 grouped_store = false;
7479 /* VEC_NUM is the number of vect stmts to be created for this
7480 group. */
7481 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7482 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7483 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7484 == first_stmt_info);
7485 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7486 op = vect_get_store_rhs (first_stmt_info);
7487 }
7488 else
7489 /* VEC_NUM is the number of vect stmts to be created for this
7490 group. */
7491 vec_num = group_size;
7492
7493 ref_type = get_group_alias_ptr_type (first_stmt_info);
7494 }
7495 else
7496 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7497
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_NOTE, vect_location,
7500 "transform store. ncopies = %d\n", ncopies);
7501
7502 if (memory_access_type == VMAT_ELEMENTWISE
7503 || memory_access_type == VMAT_STRIDED_SLP)
7504 {
7505 gimple_stmt_iterator incr_gsi;
7506 bool insert_after;
7507 gimple *incr;
7508 tree offvar;
7509 tree ivstep;
7510 tree running_off;
7511 tree stride_base, stride_step, alias_off;
7512 tree vec_oprnd;
7513 tree dr_offset;
7514 unsigned int g;
7515 /* Checked by get_load_store_type. */
7516 unsigned int const_nunits = nunits.to_constant ();
7517
7518 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7519 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7520
7521 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7522 stride_base
7523 = fold_build_pointer_plus
7524 (DR_BASE_ADDRESS (first_dr_info->dr),
7525 size_binop (PLUS_EXPR,
7526 convert_to_ptrofftype (dr_offset),
7527 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7528 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7529
7530 /* For a store with loop-invariant (but other than power-of-2)
7531 stride (i.e. not a grouped access) like so:
7532
7533 for (i = 0; i < n; i += stride)
7534 array[i] = ...;
7535
7536 we generate a new induction variable and new stores from
7537 the components of the (vectorized) rhs:
7538
7539 for (j = 0; ; j += VF*stride)
7540 vectemp = ...;
7541 tmp1 = vectemp[0];
7542 array[j] = tmp1;
7543 tmp2 = vectemp[1];
7544 array[j + stride] = tmp2;
7545 ...
7546 */
7547
7548 unsigned nstores = const_nunits;
7549 unsigned lnel = 1;
7550 tree ltype = elem_type;
7551 tree lvectype = vectype;
7552 if (slp)
7553 {
7554 if (group_size < const_nunits
7555 && const_nunits % group_size == 0)
7556 {
7557 nstores = const_nunits / group_size;
7558 lnel = group_size;
7559 ltype = build_vector_type (elem_type, group_size);
7560 lvectype = vectype;
7561
7562 /* First check if vec_extract optab doesn't support extraction
7563 of vector elts directly. */
7564 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7565 machine_mode vmode;
7566 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7567 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7568 group_size).exists (&vmode)
7569 || (convert_optab_handler (vec_extract_optab,
7570 TYPE_MODE (vectype), vmode)
7571 == CODE_FOR_nothing))
7572 {
7573 /* Try to avoid emitting an extract of vector elements
7574 by performing the extracts using an integer type of the
7575 same size, extracting from a vector of those and then
7576 re-interpreting it as the original vector type if
7577 supported. */
7578 unsigned lsize
7579 = group_size * GET_MODE_BITSIZE (elmode);
7580 unsigned int lnunits = const_nunits / group_size;
7581 /* If we can't construct such a vector fall back to
7582 element extracts from the original vector type and
7583 element size stores. */
7584 if (int_mode_for_size (lsize, 0).exists (&elmode)
7585 && VECTOR_MODE_P (TYPE_MODE (vectype))
7586 && related_vector_mode (TYPE_MODE (vectype), elmode,
7587 lnunits).exists (&vmode)
7588 && (convert_optab_handler (vec_extract_optab,
7589 vmode, elmode)
7590 != CODE_FOR_nothing))
7591 {
7592 nstores = lnunits;
7593 lnel = group_size;
7594 ltype = build_nonstandard_integer_type (lsize, 1);
7595 lvectype = build_vector_type (ltype, nstores);
7596 }
7597 /* Else fall back to vector extraction anyway.
7598 Fewer stores are more important than avoiding spilling
7599 of the vector we extract from. Compared to the
7600 construction case in vectorizable_load no store-forwarding
7601 issue exists here for reasonable archs. */
7602 }
7603 }
7604 else if (group_size >= const_nunits
7605 && group_size % const_nunits == 0)
7606 {
7607 nstores = 1;
7608 lnel = const_nunits;
7609 ltype = vectype;
7610 lvectype = vectype;
7611 }
7612 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7613 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7614 }
7615
7616 ivstep = stride_step;
7617 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7618 build_int_cst (TREE_TYPE (ivstep), vf));
7619
7620 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7621
7622 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7623 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7624 create_iv (stride_base, ivstep, NULL,
7625 loop, &incr_gsi, insert_after,
7626 &offvar, NULL);
7627 incr = gsi_stmt (incr_gsi);
7628
7629 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7630
7631 alias_off = build_int_cst (ref_type, 0);
7632 stmt_vec_info next_stmt_info = first_stmt_info;
7633 for (g = 0; g < group_size; g++)
7634 {
7635 running_off = offvar;
7636 if (g)
7637 {
7638 tree size = TYPE_SIZE_UNIT (ltype);
7639 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7640 size);
7641 tree newoff = copy_ssa_name (running_off, NULL);
7642 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7643 running_off, pos);
7644 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7645 running_off = newoff;
7646 }
7647 if (!slp)
7648 op = vect_get_store_rhs (next_stmt_info);
7649 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7650 op, &vec_oprnds);
7651 unsigned int group_el = 0;
7652 unsigned HOST_WIDE_INT
7653 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7654 for (j = 0; j < ncopies; j++)
7655 {
7656 vec_oprnd = vec_oprnds[j];
7657 /* Pun the vector to extract from if necessary. */
7658 if (lvectype != vectype)
7659 {
7660 tree tem = make_ssa_name (lvectype);
7661 gimple *pun
7662 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7663 lvectype, vec_oprnd));
7664 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7665 vec_oprnd = tem;
7666 }
7667 for (i = 0; i < nstores; i++)
7668 {
7669 tree newref, newoff;
7670 gimple *incr, *assign;
7671 tree size = TYPE_SIZE (ltype);
7672 /* Extract the i'th component. */
7673 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7674 bitsize_int (i), size);
7675 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7676 size, pos);
7677
7678 elem = force_gimple_operand_gsi (gsi, elem, true,
7679 NULL_TREE, true,
7680 GSI_SAME_STMT);
7681
7682 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7683 group_el * elsz);
7684 newref = build2 (MEM_REF, ltype,
7685 running_off, this_off);
7686 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7687
7688 /* And store it to *running_off. */
7689 assign = gimple_build_assign (newref, elem);
7690 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7691
7692 group_el += lnel;
7693 if (! slp
7694 || group_el == group_size)
7695 {
7696 newoff = copy_ssa_name (running_off, NULL);
7697 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7698 running_off, stride_step);
7699 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7700
7701 running_off = newoff;
7702 group_el = 0;
7703 }
7704 if (g == group_size - 1
7705 && !slp)
7706 {
7707 if (j == 0 && i == 0)
7708 *vec_stmt = assign;
7709 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7710 }
7711 }
7712 }
7713 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7714 if (slp)
7715 break;
7716 }
7717
7718 vec_oprnds.release ();
7719 return true;
7720 }
7721
7722 auto_vec<tree> dr_chain (group_size);
7723 oprnds.create (group_size);
7724
7725 /* Gather-scatter accesses perform only component accesses, alignment
7726 is irrelevant for them. */
7727 if (memory_access_type == VMAT_GATHER_SCATTER)
7728 alignment_support_scheme = dr_unaligned_supported;
7729 else
7730 alignment_support_scheme
7731 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7732
7733 gcc_assert (alignment_support_scheme);
7734 vec_loop_masks *loop_masks
7735 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7736 ? &LOOP_VINFO_MASKS (loop_vinfo)
7737 : NULL);
7738 vec_loop_lens *loop_lens
7739 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7740 ? &LOOP_VINFO_LENS (loop_vinfo)
7741 : NULL);
7742
7743 /* Shouldn't go with length-based approach if fully masked. */
7744 gcc_assert (!loop_lens || !loop_masks);
7745
7746 /* Targets with store-lane instructions must not require explicit
7747 realignment. vect_supportable_dr_alignment always returns either
7748 dr_aligned or dr_unaligned_supported for masked operations. */
7749 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7750 && !mask
7751 && !loop_masks)
7752 || alignment_support_scheme == dr_aligned
7753 || alignment_support_scheme == dr_unaligned_supported);
7754
7755 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7756 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7757 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7758
7759 tree bump;
7760 tree vec_offset = NULL_TREE;
7761 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7762 {
7763 aggr_type = NULL_TREE;
7764 bump = NULL_TREE;
7765 }
7766 else if (memory_access_type == VMAT_GATHER_SCATTER)
7767 {
7768 aggr_type = elem_type;
7769 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7770 &bump, &vec_offset);
7771 }
7772 else
7773 {
7774 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7775 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7776 else
7777 aggr_type = vectype;
7778 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7779 memory_access_type);
7780 }
7781
7782 if (mask)
7783 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7784
7785 /* In case the vectorization factor (VF) is bigger than the number
7786 of elements that we can fit in a vectype (nunits), we have to generate
7787 more than one vector stmt - i.e - we need to "unroll" the
7788 vector stmt by a factor VF/nunits. */
7789
7790 /* In case of interleaving (non-unit grouped access):
7791
7792 S1: &base + 2 = x2
7793 S2: &base = x0
7794 S3: &base + 1 = x1
7795 S4: &base + 3 = x3
7796
7797 We create vectorized stores starting from base address (the access of the
7798 first stmt in the chain (S2 in the above example), when the last store stmt
7799 of the chain (S4) is reached:
7800
7801 VS1: &base = vx2
7802 VS2: &base + vec_size*1 = vx0
7803 VS3: &base + vec_size*2 = vx1
7804 VS4: &base + vec_size*3 = vx3
7805
7806 Then permutation statements are generated:
7807
7808 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7809 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7810 ...
7811
7812 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7813 (the order of the data-refs in the output of vect_permute_store_chain
7814 corresponds to the order of scalar stmts in the interleaving chain - see
7815 the documentation of vect_permute_store_chain()).
7816
7817 In case of both multiple types and interleaving, above vector stores and
7818 permutation stmts are created for every copy. The result vector stmts are
7819 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7820 STMT_VINFO_RELATED_STMT for the next copies.
7821 */
7822
7823 auto_vec<tree> vec_masks;
7824 tree vec_mask = NULL;
7825 auto_vec<tree> vec_offsets;
7826 auto_vec<vec<tree> > gvec_oprnds;
7827 gvec_oprnds.safe_grow_cleared (group_size, true);
7828 for (j = 0; j < ncopies; j++)
7829 {
7830 gimple *new_stmt;
7831 if (j == 0)
7832 {
7833 if (slp)
7834 {
7835 /* Get vectorized arguments for SLP_NODE. */
7836 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7837 op, &vec_oprnds);
7838 vec_oprnd = vec_oprnds[0];
7839 }
7840 else
7841 {
7842 /* For interleaved stores we collect vectorized defs for all the
7843 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7844 used as an input to vect_permute_store_chain().
7845
7846 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7847 and OPRNDS are of size 1. */
7848 stmt_vec_info next_stmt_info = first_stmt_info;
7849 for (i = 0; i < group_size; i++)
7850 {
7851 /* Since gaps are not supported for interleaved stores,
7852 DR_GROUP_SIZE is the exact number of stmts in the chain.
7853 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7854 that there is no interleaving, DR_GROUP_SIZE is 1,
7855 and only one iteration of the loop will be executed. */
7856 op = vect_get_store_rhs (next_stmt_info);
7857 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7858 ncopies, op, &gvec_oprnds[i]);
7859 vec_oprnd = gvec_oprnds[i][0];
7860 dr_chain.quick_push (gvec_oprnds[i][0]);
7861 oprnds.quick_push (gvec_oprnds[i][0]);
7862 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7863 }
7864 if (mask)
7865 {
7866 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7867 mask, &vec_masks, mask_vectype);
7868 vec_mask = vec_masks[0];
7869 }
7870 }
7871
7872 /* We should have catched mismatched types earlier. */
7873 gcc_assert (useless_type_conversion_p (vectype,
7874 TREE_TYPE (vec_oprnd)));
7875 bool simd_lane_access_p
7876 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7877 if (simd_lane_access_p
7878 && !loop_masks
7879 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7880 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7881 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7882 && integer_zerop (DR_INIT (first_dr_info->dr))
7883 && alias_sets_conflict_p (get_alias_set (aggr_type),
7884 get_alias_set (TREE_TYPE (ref_type))))
7885 {
7886 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7887 dataref_offset = build_int_cst (ref_type, 0);
7888 }
7889 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7890 {
7891 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7892 &dataref_ptr, &vec_offsets, ncopies);
7893 vec_offset = vec_offsets[0];
7894 }
7895 else
7896 dataref_ptr
7897 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7898 simd_lane_access_p ? loop : NULL,
7899 offset, &dummy, gsi, &ptr_incr,
7900 simd_lane_access_p, NULL_TREE, bump);
7901 }
7902 else
7903 {
7904 /* For interleaved stores we created vectorized defs for all the
7905 defs stored in OPRNDS in the previous iteration (previous copy).
7906 DR_CHAIN is then used as an input to vect_permute_store_chain().
7907 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7908 OPRNDS are of size 1. */
7909 for (i = 0; i < group_size; i++)
7910 {
7911 vec_oprnd = gvec_oprnds[i][j];
7912 dr_chain[i] = gvec_oprnds[i][j];
7913 oprnds[i] = gvec_oprnds[i][j];
7914 }
7915 if (mask)
7916 vec_mask = vec_masks[j];
7917 if (dataref_offset)
7918 dataref_offset
7919 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7920 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7921 vec_offset = vec_offsets[j];
7922 else
7923 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7924 stmt_info, bump);
7925 }
7926
7927 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7928 {
7929 tree vec_array;
7930
7931 /* Get an array into which we can store the individual vectors. */
7932 vec_array = create_vector_array (vectype, vec_num);
7933
7934 /* Invalidate the current contents of VEC_ARRAY. This should
7935 become an RTL clobber too, which prevents the vector registers
7936 from being upward-exposed. */
7937 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7938
7939 /* Store the individual vectors into the array. */
7940 for (i = 0; i < vec_num; i++)
7941 {
7942 vec_oprnd = dr_chain[i];
7943 write_vector_array (vinfo, stmt_info,
7944 gsi, vec_oprnd, vec_array, i);
7945 }
7946
7947 tree final_mask = NULL;
7948 if (loop_masks)
7949 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7950 vectype, j);
7951 if (vec_mask)
7952 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7953 vec_mask, gsi);
7954
7955 gcall *call;
7956 if (final_mask)
7957 {
7958 /* Emit:
7959 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7960 VEC_ARRAY). */
7961 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7962 tree alias_ptr = build_int_cst (ref_type, align);
7963 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7964 dataref_ptr, alias_ptr,
7965 final_mask, vec_array);
7966 }
7967 else
7968 {
7969 /* Emit:
7970 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7971 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7972 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7973 vec_array);
7974 gimple_call_set_lhs (call, data_ref);
7975 }
7976 gimple_call_set_nothrow (call, true);
7977 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7978 new_stmt = call;
7979
7980 /* Record that VEC_ARRAY is now dead. */
7981 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7982 }
7983 else
7984 {
7985 new_stmt = NULL;
7986 if (grouped_store)
7987 {
7988 if (j == 0)
7989 result_chain.create (group_size);
7990 /* Permute. */
7991 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
7992 gsi, &result_chain);
7993 }
7994
7995 stmt_vec_info next_stmt_info = first_stmt_info;
7996 for (i = 0; i < vec_num; i++)
7997 {
7998 unsigned misalign;
7999 unsigned HOST_WIDE_INT align;
8000
8001 tree final_mask = NULL_TREE;
8002 if (loop_masks)
8003 final_mask = vect_get_loop_mask (gsi, loop_masks,
8004 vec_num * ncopies,
8005 vectype, vec_num * j + i);
8006 if (vec_mask)
8007 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8008 vec_mask, gsi);
8009
8010 if (memory_access_type == VMAT_GATHER_SCATTER)
8011 {
8012 tree scale = size_int (gs_info.scale);
8013 gcall *call;
8014 if (loop_masks)
8015 call = gimple_build_call_internal
8016 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8017 scale, vec_oprnd, final_mask);
8018 else
8019 call = gimple_build_call_internal
8020 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8021 scale, vec_oprnd);
8022 gimple_call_set_nothrow (call, true);
8023 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8024 new_stmt = call;
8025 break;
8026 }
8027
8028 if (i > 0)
8029 /* Bump the vector pointer. */
8030 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8031 gsi, stmt_info, bump);
8032
8033 if (slp)
8034 vec_oprnd = vec_oprnds[i];
8035 else if (grouped_store)
8036 /* For grouped stores vectorized defs are interleaved in
8037 vect_permute_store_chain(). */
8038 vec_oprnd = result_chain[i];
8039
8040 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8041 if (aligned_access_p (first_dr_info))
8042 misalign = 0;
8043 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8044 {
8045 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8046 misalign = 0;
8047 }
8048 else
8049 misalign = DR_MISALIGNMENT (first_dr_info);
8050 if (dataref_offset == NULL_TREE
8051 && TREE_CODE (dataref_ptr) == SSA_NAME)
8052 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8053 misalign);
8054
8055 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8056 {
8057 tree perm_mask = perm_mask_for_reverse (vectype);
8058 tree perm_dest = vect_create_destination_var
8059 (vect_get_store_rhs (stmt_info), vectype);
8060 tree new_temp = make_ssa_name (perm_dest);
8061
8062 /* Generate the permute statement. */
8063 gimple *perm_stmt
8064 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8065 vec_oprnd, perm_mask);
8066 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8067
8068 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8069 vec_oprnd = new_temp;
8070 }
8071
8072 /* Arguments are ready. Create the new vector stmt. */
8073 if (final_mask)
8074 {
8075 align = least_bit_hwi (misalign | align);
8076 tree ptr = build_int_cst (ref_type, align);
8077 gcall *call
8078 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8079 dataref_ptr, ptr,
8080 final_mask, vec_oprnd);
8081 gimple_call_set_nothrow (call, true);
8082 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8083 new_stmt = call;
8084 }
8085 else if (loop_lens)
8086 {
8087 tree final_len
8088 = vect_get_loop_len (loop_vinfo, loop_lens,
8089 vec_num * ncopies, vec_num * j + i);
8090 align = least_bit_hwi (misalign | align);
8091 tree ptr = build_int_cst (ref_type, align);
8092 machine_mode vmode = TYPE_MODE (vectype);
8093 opt_machine_mode new_ovmode
8094 = get_len_load_store_mode (vmode, false);
8095 machine_mode new_vmode = new_ovmode.require ();
8096 /* Need conversion if it's wrapped with VnQI. */
8097 if (vmode != new_vmode)
8098 {
8099 tree new_vtype
8100 = build_vector_type_for_mode (unsigned_intQI_type_node,
8101 new_vmode);
8102 tree var
8103 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8104 vec_oprnd
8105 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8106 gassign *new_stmt
8107 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8108 vec_oprnd);
8109 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8110 gsi);
8111 vec_oprnd = var;
8112 }
8113 gcall *call
8114 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8115 ptr, final_len, vec_oprnd);
8116 gimple_call_set_nothrow (call, true);
8117 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8118 new_stmt = call;
8119 }
8120 else
8121 {
8122 data_ref = fold_build2 (MEM_REF, vectype,
8123 dataref_ptr,
8124 dataref_offset
8125 ? dataref_offset
8126 : build_int_cst (ref_type, 0));
8127 if (aligned_access_p (first_dr_info))
8128 ;
8129 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8130 TREE_TYPE (data_ref)
8131 = build_aligned_type (TREE_TYPE (data_ref),
8132 align * BITS_PER_UNIT);
8133 else
8134 TREE_TYPE (data_ref)
8135 = build_aligned_type (TREE_TYPE (data_ref),
8136 TYPE_ALIGN (elem_type));
8137 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8138 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8139 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8140 }
8141
8142 if (slp)
8143 continue;
8144
8145 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8146 if (!next_stmt_info)
8147 break;
8148 }
8149 }
8150 if (!slp)
8151 {
8152 if (j == 0)
8153 *vec_stmt = new_stmt;
8154 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8155 }
8156 }
8157
8158 for (i = 0; i < group_size; ++i)
8159 {
8160 vec<tree> oprndsi = gvec_oprnds[i];
8161 oprndsi.release ();
8162 }
8163 oprnds.release ();
8164 result_chain.release ();
8165 vec_oprnds.release ();
8166
8167 return true;
8168 }
8169
8170 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8171 VECTOR_CST mask. No checks are made that the target platform supports the
8172 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8173 vect_gen_perm_mask_checked. */
8174
8175 tree
8176 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8177 {
8178 tree mask_type;
8179
8180 poly_uint64 nunits = sel.length ();
8181 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8182
8183 mask_type = build_vector_type (ssizetype, nunits);
8184 return vec_perm_indices_to_tree (mask_type, sel);
8185 }
8186
8187 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8188 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8189
8190 tree
8191 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8192 {
8193 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8194 return vect_gen_perm_mask_any (vectype, sel);
8195 }
8196
8197 /* Given a vector variable X and Y, that was generated for the scalar
8198 STMT_INFO, generate instructions to permute the vector elements of X and Y
8199 using permutation mask MASK_VEC, insert them at *GSI and return the
8200 permuted vector variable. */
8201
8202 static tree
8203 permute_vec_elements (vec_info *vinfo,
8204 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8205 gimple_stmt_iterator *gsi)
8206 {
8207 tree vectype = TREE_TYPE (x);
8208 tree perm_dest, data_ref;
8209 gimple *perm_stmt;
8210
8211 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8212 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8213 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8214 else
8215 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8216 data_ref = make_ssa_name (perm_dest);
8217
8218 /* Generate the permute statement. */
8219 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8220 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8221
8222 return data_ref;
8223 }
8224
8225 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8226 inserting them on the loops preheader edge. Returns true if we
8227 were successful in doing so (and thus STMT_INFO can be moved then),
8228 otherwise returns false. */
8229
8230 static bool
8231 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8232 {
8233 ssa_op_iter i;
8234 tree op;
8235 bool any = false;
8236
8237 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8238 {
8239 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8240 if (!gimple_nop_p (def_stmt)
8241 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8242 {
8243 /* Make sure we don't need to recurse. While we could do
8244 so in simple cases when there are more complex use webs
8245 we don't have an easy way to preserve stmt order to fulfil
8246 dependencies within them. */
8247 tree op2;
8248 ssa_op_iter i2;
8249 if (gimple_code (def_stmt) == GIMPLE_PHI)
8250 return false;
8251 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8252 {
8253 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8254 if (!gimple_nop_p (def_stmt2)
8255 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8256 return false;
8257 }
8258 any = true;
8259 }
8260 }
8261
8262 if (!any)
8263 return true;
8264
8265 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8266 {
8267 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8268 if (!gimple_nop_p (def_stmt)
8269 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8270 {
8271 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8272 gsi_remove (&gsi, false);
8273 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8274 }
8275 }
8276
8277 return true;
8278 }
8279
8280 /* vectorizable_load.
8281
8282 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8283 that can be vectorized.
8284 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8285 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8286 Return true if STMT_INFO is vectorizable in this way. */
8287
8288 static bool
8289 vectorizable_load (vec_info *vinfo,
8290 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8291 gimple **vec_stmt, slp_tree slp_node,
8292 stmt_vector_for_cost *cost_vec)
8293 {
8294 tree scalar_dest;
8295 tree vec_dest = NULL;
8296 tree data_ref = NULL;
8297 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8298 class loop *loop = NULL;
8299 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8300 bool nested_in_vect_loop = false;
8301 tree elem_type;
8302 tree new_temp;
8303 machine_mode mode;
8304 tree dummy;
8305 tree dataref_ptr = NULL_TREE;
8306 tree dataref_offset = NULL_TREE;
8307 gimple *ptr_incr = NULL;
8308 int ncopies;
8309 int i, j;
8310 unsigned int group_size;
8311 poly_uint64 group_gap_adj;
8312 tree msq = NULL_TREE, lsq;
8313 tree offset = NULL_TREE;
8314 tree byte_offset = NULL_TREE;
8315 tree realignment_token = NULL_TREE;
8316 gphi *phi = NULL;
8317 vec<tree> dr_chain = vNULL;
8318 bool grouped_load = false;
8319 stmt_vec_info first_stmt_info;
8320 stmt_vec_info first_stmt_info_for_drptr = NULL;
8321 bool compute_in_loop = false;
8322 class loop *at_loop;
8323 int vec_num;
8324 bool slp = (slp_node != NULL);
8325 bool slp_perm = false;
8326 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8327 poly_uint64 vf;
8328 tree aggr_type;
8329 gather_scatter_info gs_info;
8330 tree ref_type;
8331 enum vect_def_type mask_dt = vect_unknown_def_type;
8332
8333 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8334 return false;
8335
8336 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8337 && ! vec_stmt)
8338 return false;
8339
8340 if (!STMT_VINFO_DATA_REF (stmt_info))
8341 return false;
8342
8343 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8344 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8345 which can be different when reduction chains were re-ordered.
8346 Now that we figured we're a dataref reset stmt_info back to
8347 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8348 refactored in a way to maintain the dr_vec_info pointer for the
8349 relevant access explicitely. */
8350 stmt_vec_info orig_stmt_info = stmt_info;
8351 if (slp_node)
8352 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8353
8354 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8355 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8356 {
8357 scalar_dest = gimple_assign_lhs (assign);
8358 if (TREE_CODE (scalar_dest) != SSA_NAME)
8359 return false;
8360
8361 tree_code code = gimple_assign_rhs_code (assign);
8362 if (code != ARRAY_REF
8363 && code != BIT_FIELD_REF
8364 && code != INDIRECT_REF
8365 && code != COMPONENT_REF
8366 && code != IMAGPART_EXPR
8367 && code != REALPART_EXPR
8368 && code != MEM_REF
8369 && TREE_CODE_CLASS (code) != tcc_declaration)
8370 return false;
8371 }
8372 else
8373 {
8374 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8375 if (!call || !gimple_call_internal_p (call))
8376 return false;
8377
8378 internal_fn ifn = gimple_call_internal_fn (call);
8379 if (!internal_load_fn_p (ifn))
8380 return false;
8381
8382 scalar_dest = gimple_call_lhs (call);
8383 if (!scalar_dest)
8384 return false;
8385
8386 int mask_index = internal_fn_mask_index (ifn);
8387 if (mask_index >= 0)
8388 {
8389 mask = gimple_call_arg (call, mask_index);
8390 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8391 &mask_vectype))
8392 return false;
8393 }
8394 }
8395
8396 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8397 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8398
8399 if (loop_vinfo)
8400 {
8401 loop = LOOP_VINFO_LOOP (loop_vinfo);
8402 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8403 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8404 }
8405 else
8406 vf = 1;
8407
8408 /* Multiple types in SLP are handled by creating the appropriate number of
8409 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8410 case of SLP. */
8411 if (slp)
8412 ncopies = 1;
8413 else
8414 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8415
8416 gcc_assert (ncopies >= 1);
8417
8418 /* FORNOW. This restriction should be relaxed. */
8419 if (nested_in_vect_loop && ncopies > 1)
8420 {
8421 if (dump_enabled_p ())
8422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8423 "multiple types in nested loop.\n");
8424 return false;
8425 }
8426
8427 /* Invalidate assumptions made by dependence analysis when vectorization
8428 on the unrolled body effectively re-orders stmts. */
8429 if (ncopies > 1
8430 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8431 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8432 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8433 {
8434 if (dump_enabled_p ())
8435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8436 "cannot perform implicit CSE when unrolling "
8437 "with negative dependence distance\n");
8438 return false;
8439 }
8440
8441 elem_type = TREE_TYPE (vectype);
8442 mode = TYPE_MODE (vectype);
8443
8444 /* FORNOW. In some cases can vectorize even if data-type not supported
8445 (e.g. - data copies). */
8446 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8447 {
8448 if (dump_enabled_p ())
8449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8450 "Aligned load, but unsupported type.\n");
8451 return false;
8452 }
8453
8454 /* Check if the load is a part of an interleaving chain. */
8455 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8456 {
8457 grouped_load = true;
8458 /* FORNOW */
8459 gcc_assert (!nested_in_vect_loop);
8460 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8461
8462 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8463 group_size = DR_GROUP_SIZE (first_stmt_info);
8464
8465 /* Refuse non-SLP vectorization of SLP-only groups. */
8466 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8467 {
8468 if (dump_enabled_p ())
8469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8470 "cannot vectorize load in non-SLP mode.\n");
8471 return false;
8472 }
8473
8474 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8475 {
8476 slp_perm = true;
8477
8478 if (!loop_vinfo)
8479 {
8480 /* In BB vectorization we may not actually use a loaded vector
8481 accessing elements in excess of DR_GROUP_SIZE. */
8482 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8483 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8484 unsigned HOST_WIDE_INT nunits;
8485 unsigned j, k, maxk = 0;
8486 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8487 if (k > maxk)
8488 maxk = k;
8489 tree vectype = STMT_VINFO_VECTYPE (group_info);
8490 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8491 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8492 {
8493 if (dump_enabled_p ())
8494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8495 "BB vectorization with gaps at the end of "
8496 "a load is not supported\n");
8497 return false;
8498 }
8499 }
8500
8501 auto_vec<tree> tem;
8502 unsigned n_perms;
8503 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8504 true, &n_perms))
8505 {
8506 if (dump_enabled_p ())
8507 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8508 vect_location,
8509 "unsupported load permutation\n");
8510 return false;
8511 }
8512 }
8513
8514 /* Invalidate assumptions made by dependence analysis when vectorization
8515 on the unrolled body effectively re-orders stmts. */
8516 if (!PURE_SLP_STMT (stmt_info)
8517 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8518 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8519 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8520 {
8521 if (dump_enabled_p ())
8522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8523 "cannot perform implicit CSE when performing "
8524 "group loads with negative dependence distance\n");
8525 return false;
8526 }
8527 }
8528 else
8529 group_size = 1;
8530
8531 vect_memory_access_type memory_access_type;
8532 enum dr_alignment_support alignment_support_scheme;
8533 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8534 ncopies, &memory_access_type,
8535 &alignment_support_scheme, &gs_info))
8536 return false;
8537
8538 if (mask)
8539 {
8540 if (memory_access_type == VMAT_CONTIGUOUS)
8541 {
8542 machine_mode vec_mode = TYPE_MODE (vectype);
8543 if (!VECTOR_MODE_P (vec_mode)
8544 || !can_vec_mask_load_store_p (vec_mode,
8545 TYPE_MODE (mask_vectype), true))
8546 return false;
8547 }
8548 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8549 && memory_access_type != VMAT_GATHER_SCATTER)
8550 {
8551 if (dump_enabled_p ())
8552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8553 "unsupported access type for masked load.\n");
8554 return false;
8555 }
8556 }
8557
8558 if (!vec_stmt) /* transformation not required. */
8559 {
8560 if (!slp)
8561 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8562
8563 if (loop_vinfo
8564 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8565 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8566 group_size, memory_access_type,
8567 &gs_info, mask);
8568
8569 if (dump_enabled_p ()
8570 && memory_access_type != VMAT_ELEMENTWISE
8571 && memory_access_type != VMAT_GATHER_SCATTER
8572 && alignment_support_scheme != dr_aligned)
8573 dump_printf_loc (MSG_NOTE, vect_location,
8574 "Vectorizing an unaligned access.\n");
8575
8576 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8577 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8578 slp_node, cost_vec);
8579 return true;
8580 }
8581
8582 if (!slp)
8583 gcc_assert (memory_access_type
8584 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8585
8586 if (dump_enabled_p ())
8587 dump_printf_loc (MSG_NOTE, vect_location,
8588 "transform load. ncopies = %d\n", ncopies);
8589
8590 /* Transform. */
8591
8592 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8593 ensure_base_align (dr_info);
8594
8595 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8596 {
8597 vect_build_gather_load_calls (vinfo,
8598 stmt_info, gsi, vec_stmt, &gs_info, mask);
8599 return true;
8600 }
8601
8602 if (memory_access_type == VMAT_INVARIANT)
8603 {
8604 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8605 /* If we have versioned for aliasing or the loop doesn't
8606 have any data dependencies that would preclude this,
8607 then we are sure this is a loop invariant load and
8608 thus we can insert it on the preheader edge. */
8609 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8610 && !nested_in_vect_loop
8611 && hoist_defs_of_uses (stmt_info, loop));
8612 if (hoist_p)
8613 {
8614 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8615 if (dump_enabled_p ())
8616 dump_printf_loc (MSG_NOTE, vect_location,
8617 "hoisting out of the vectorized loop: %G", stmt);
8618 scalar_dest = copy_ssa_name (scalar_dest);
8619 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8620 gsi_insert_on_edge_immediate
8621 (loop_preheader_edge (loop),
8622 gimple_build_assign (scalar_dest, rhs));
8623 }
8624 /* These copies are all equivalent, but currently the representation
8625 requires a separate STMT_VINFO_VEC_STMT for each one. */
8626 gimple_stmt_iterator gsi2 = *gsi;
8627 gsi_next (&gsi2);
8628 for (j = 0; j < ncopies; j++)
8629 {
8630 if (hoist_p)
8631 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8632 vectype, NULL);
8633 else
8634 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8635 vectype, &gsi2);
8636 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8637 if (slp)
8638 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8639 else
8640 {
8641 if (j == 0)
8642 *vec_stmt = new_stmt;
8643 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8644 }
8645 }
8646 return true;
8647 }
8648
8649 if (memory_access_type == VMAT_ELEMENTWISE
8650 || memory_access_type == VMAT_STRIDED_SLP)
8651 {
8652 gimple_stmt_iterator incr_gsi;
8653 bool insert_after;
8654 tree offvar;
8655 tree ivstep;
8656 tree running_off;
8657 vec<constructor_elt, va_gc> *v = NULL;
8658 tree stride_base, stride_step, alias_off;
8659 /* Checked by get_load_store_type. */
8660 unsigned int const_nunits = nunits.to_constant ();
8661 unsigned HOST_WIDE_INT cst_offset = 0;
8662 tree dr_offset;
8663
8664 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8665 gcc_assert (!nested_in_vect_loop);
8666
8667 if (grouped_load)
8668 {
8669 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8670 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8671 }
8672 else
8673 {
8674 first_stmt_info = stmt_info;
8675 first_dr_info = dr_info;
8676 }
8677 if (slp && grouped_load)
8678 {
8679 group_size = DR_GROUP_SIZE (first_stmt_info);
8680 ref_type = get_group_alias_ptr_type (first_stmt_info);
8681 }
8682 else
8683 {
8684 if (grouped_load)
8685 cst_offset
8686 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8687 * vect_get_place_in_interleaving_chain (stmt_info,
8688 first_stmt_info));
8689 group_size = 1;
8690 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8691 }
8692
8693 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8694 stride_base
8695 = fold_build_pointer_plus
8696 (DR_BASE_ADDRESS (first_dr_info->dr),
8697 size_binop (PLUS_EXPR,
8698 convert_to_ptrofftype (dr_offset),
8699 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8700 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8701
8702 /* For a load with loop-invariant (but other than power-of-2)
8703 stride (i.e. not a grouped access) like so:
8704
8705 for (i = 0; i < n; i += stride)
8706 ... = array[i];
8707
8708 we generate a new induction variable and new accesses to
8709 form a new vector (or vectors, depending on ncopies):
8710
8711 for (j = 0; ; j += VF*stride)
8712 tmp1 = array[j];
8713 tmp2 = array[j + stride];
8714 ...
8715 vectemp = {tmp1, tmp2, ...}
8716 */
8717
8718 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8719 build_int_cst (TREE_TYPE (stride_step), vf));
8720
8721 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8722
8723 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8724 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8725 create_iv (stride_base, ivstep, NULL,
8726 loop, &incr_gsi, insert_after,
8727 &offvar, NULL);
8728
8729 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8730
8731 running_off = offvar;
8732 alias_off = build_int_cst (ref_type, 0);
8733 int nloads = const_nunits;
8734 int lnel = 1;
8735 tree ltype = TREE_TYPE (vectype);
8736 tree lvectype = vectype;
8737 auto_vec<tree> dr_chain;
8738 if (memory_access_type == VMAT_STRIDED_SLP)
8739 {
8740 if (group_size < const_nunits)
8741 {
8742 /* First check if vec_init optab supports construction from vector
8743 elts directly. Otherwise avoid emitting a constructor of
8744 vector elements by performing the loads using an integer type
8745 of the same size, constructing a vector of those and then
8746 re-interpreting it as the original vector type. This avoids a
8747 huge runtime penalty due to the general inability to perform
8748 store forwarding from smaller stores to a larger load. */
8749 tree ptype;
8750 tree vtype
8751 = vector_vector_composition_type (vectype,
8752 const_nunits / group_size,
8753 &ptype);
8754 if (vtype != NULL_TREE)
8755 {
8756 nloads = const_nunits / group_size;
8757 lnel = group_size;
8758 lvectype = vtype;
8759 ltype = ptype;
8760 }
8761 }
8762 else
8763 {
8764 nloads = 1;
8765 lnel = const_nunits;
8766 ltype = vectype;
8767 }
8768 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8769 }
8770 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8771 else if (nloads == 1)
8772 ltype = vectype;
8773
8774 if (slp)
8775 {
8776 /* For SLP permutation support we need to load the whole group,
8777 not only the number of vector stmts the permutation result
8778 fits in. */
8779 if (slp_perm)
8780 {
8781 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8782 variable VF. */
8783 unsigned int const_vf = vf.to_constant ();
8784 ncopies = CEIL (group_size * const_vf, const_nunits);
8785 dr_chain.create (ncopies);
8786 }
8787 else
8788 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8789 }
8790 unsigned int group_el = 0;
8791 unsigned HOST_WIDE_INT
8792 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8793 for (j = 0; j < ncopies; j++)
8794 {
8795 if (nloads > 1)
8796 vec_alloc (v, nloads);
8797 gimple *new_stmt = NULL;
8798 for (i = 0; i < nloads; i++)
8799 {
8800 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8801 group_el * elsz + cst_offset);
8802 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8803 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8804 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8805 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8806 if (nloads > 1)
8807 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8808 gimple_assign_lhs (new_stmt));
8809
8810 group_el += lnel;
8811 if (! slp
8812 || group_el == group_size)
8813 {
8814 tree newoff = copy_ssa_name (running_off);
8815 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8816 running_off, stride_step);
8817 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8818
8819 running_off = newoff;
8820 group_el = 0;
8821 }
8822 }
8823 if (nloads > 1)
8824 {
8825 tree vec_inv = build_constructor (lvectype, v);
8826 new_temp = vect_init_vector (vinfo, stmt_info,
8827 vec_inv, lvectype, gsi);
8828 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8829 if (lvectype != vectype)
8830 {
8831 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8832 VIEW_CONVERT_EXPR,
8833 build1 (VIEW_CONVERT_EXPR,
8834 vectype, new_temp));
8835 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8836 }
8837 }
8838
8839 if (slp)
8840 {
8841 if (slp_perm)
8842 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8843 else
8844 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8845 }
8846 else
8847 {
8848 if (j == 0)
8849 *vec_stmt = new_stmt;
8850 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8851 }
8852 }
8853 if (slp_perm)
8854 {
8855 unsigned n_perms;
8856 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8857 false, &n_perms);
8858 }
8859 return true;
8860 }
8861
8862 if (memory_access_type == VMAT_GATHER_SCATTER
8863 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8864 grouped_load = false;
8865
8866 if (grouped_load)
8867 {
8868 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8869 group_size = DR_GROUP_SIZE (first_stmt_info);
8870 /* For SLP vectorization we directly vectorize a subchain
8871 without permutation. */
8872 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8873 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8874 /* For BB vectorization always use the first stmt to base
8875 the data ref pointer on. */
8876 if (bb_vinfo)
8877 first_stmt_info_for_drptr
8878 = vect_find_first_scalar_stmt_in_slp (slp_node);
8879
8880 /* Check if the chain of loads is already vectorized. */
8881 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8882 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8883 ??? But we can only do so if there is exactly one
8884 as we have no way to get at the rest. Leave the CSE
8885 opportunity alone.
8886 ??? With the group load eventually participating
8887 in multiple different permutations (having multiple
8888 slp nodes which refer to the same group) the CSE
8889 is even wrong code. See PR56270. */
8890 && !slp)
8891 {
8892 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8893 return true;
8894 }
8895 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8896 group_gap_adj = 0;
8897
8898 /* VEC_NUM is the number of vect stmts to be created for this group. */
8899 if (slp)
8900 {
8901 grouped_load = false;
8902 /* If an SLP permutation is from N elements to N elements,
8903 and if one vector holds a whole number of N, we can load
8904 the inputs to the permutation in the same way as an
8905 unpermuted sequence. In other cases we need to load the
8906 whole group, not only the number of vector stmts the
8907 permutation result fits in. */
8908 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8909 if (slp_perm
8910 && (group_size != scalar_lanes
8911 || !multiple_p (nunits, group_size)))
8912 {
8913 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8914 variable VF; see vect_transform_slp_perm_load. */
8915 unsigned int const_vf = vf.to_constant ();
8916 unsigned int const_nunits = nunits.to_constant ();
8917 vec_num = CEIL (group_size * const_vf, const_nunits);
8918 group_gap_adj = vf * group_size - nunits * vec_num;
8919 }
8920 else
8921 {
8922 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8923 group_gap_adj
8924 = group_size - scalar_lanes;
8925 }
8926 }
8927 else
8928 vec_num = group_size;
8929
8930 ref_type = get_group_alias_ptr_type (first_stmt_info);
8931 }
8932 else
8933 {
8934 first_stmt_info = stmt_info;
8935 first_dr_info = dr_info;
8936 group_size = vec_num = 1;
8937 group_gap_adj = 0;
8938 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8939 }
8940
8941 gcc_assert (alignment_support_scheme);
8942 vec_loop_masks *loop_masks
8943 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8944 ? &LOOP_VINFO_MASKS (loop_vinfo)
8945 : NULL);
8946 vec_loop_lens *loop_lens
8947 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8948 ? &LOOP_VINFO_LENS (loop_vinfo)
8949 : NULL);
8950
8951 /* Shouldn't go with length-based approach if fully masked. */
8952 gcc_assert (!loop_lens || !loop_masks);
8953
8954 /* Targets with store-lane instructions must not require explicit
8955 realignment. vect_supportable_dr_alignment always returns either
8956 dr_aligned or dr_unaligned_supported for masked operations. */
8957 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8958 && !mask
8959 && !loop_masks)
8960 || alignment_support_scheme == dr_aligned
8961 || alignment_support_scheme == dr_unaligned_supported);
8962
8963 /* In case the vectorization factor (VF) is bigger than the number
8964 of elements that we can fit in a vectype (nunits), we have to generate
8965 more than one vector stmt - i.e - we need to "unroll" the
8966 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8967 from one copy of the vector stmt to the next, in the field
8968 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8969 stages to find the correct vector defs to be used when vectorizing
8970 stmts that use the defs of the current stmt. The example below
8971 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8972 need to create 4 vectorized stmts):
8973
8974 before vectorization:
8975 RELATED_STMT VEC_STMT
8976 S1: x = memref - -
8977 S2: z = x + 1 - -
8978
8979 step 1: vectorize stmt S1:
8980 We first create the vector stmt VS1_0, and, as usual, record a
8981 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8982 Next, we create the vector stmt VS1_1, and record a pointer to
8983 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8984 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8985 stmts and pointers:
8986 RELATED_STMT VEC_STMT
8987 VS1_0: vx0 = memref0 VS1_1 -
8988 VS1_1: vx1 = memref1 VS1_2 -
8989 VS1_2: vx2 = memref2 VS1_3 -
8990 VS1_3: vx3 = memref3 - -
8991 S1: x = load - VS1_0
8992 S2: z = x + 1 - -
8993 */
8994
8995 /* In case of interleaving (non-unit grouped access):
8996
8997 S1: x2 = &base + 2
8998 S2: x0 = &base
8999 S3: x1 = &base + 1
9000 S4: x3 = &base + 3
9001
9002 Vectorized loads are created in the order of memory accesses
9003 starting from the access of the first stmt of the chain:
9004
9005 VS1: vx0 = &base
9006 VS2: vx1 = &base + vec_size*1
9007 VS3: vx3 = &base + vec_size*2
9008 VS4: vx4 = &base + vec_size*3
9009
9010 Then permutation statements are generated:
9011
9012 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9013 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9014 ...
9015
9016 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9017 (the order of the data-refs in the output of vect_permute_load_chain
9018 corresponds to the order of scalar stmts in the interleaving chain - see
9019 the documentation of vect_permute_load_chain()).
9020 The generation of permutation stmts and recording them in
9021 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9022
9023 In case of both multiple types and interleaving, the vector loads and
9024 permutation stmts above are created for every copy. The result vector
9025 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9026 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9027
9028 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9029 on a target that supports unaligned accesses (dr_unaligned_supported)
9030 we generate the following code:
9031 p = initial_addr;
9032 indx = 0;
9033 loop {
9034 p = p + indx * vectype_size;
9035 vec_dest = *(p);
9036 indx = indx + 1;
9037 }
9038
9039 Otherwise, the data reference is potentially unaligned on a target that
9040 does not support unaligned accesses (dr_explicit_realign_optimized) -
9041 then generate the following code, in which the data in each iteration is
9042 obtained by two vector loads, one from the previous iteration, and one
9043 from the current iteration:
9044 p1 = initial_addr;
9045 msq_init = *(floor(p1))
9046 p2 = initial_addr + VS - 1;
9047 realignment_token = call target_builtin;
9048 indx = 0;
9049 loop {
9050 p2 = p2 + indx * vectype_size
9051 lsq = *(floor(p2))
9052 vec_dest = realign_load (msq, lsq, realignment_token)
9053 indx = indx + 1;
9054 msq = lsq;
9055 } */
9056
9057 /* If the misalignment remains the same throughout the execution of the
9058 loop, we can create the init_addr and permutation mask at the loop
9059 preheader. Otherwise, it needs to be created inside the loop.
9060 This can only occur when vectorizing memory accesses in the inner-loop
9061 nested within an outer-loop that is being vectorized. */
9062
9063 if (nested_in_vect_loop
9064 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9065 GET_MODE_SIZE (TYPE_MODE (vectype))))
9066 {
9067 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9068 compute_in_loop = true;
9069 }
9070
9071 bool diff_first_stmt_info
9072 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9073
9074 if ((alignment_support_scheme == dr_explicit_realign_optimized
9075 || alignment_support_scheme == dr_explicit_realign)
9076 && !compute_in_loop)
9077 {
9078 /* If we have different first_stmt_info, we can't set up realignment
9079 here, since we can't guarantee first_stmt_info DR has been
9080 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9081 distance from first_stmt_info DR instead as below. */
9082 if (!diff_first_stmt_info)
9083 msq = vect_setup_realignment (vinfo,
9084 first_stmt_info, gsi, &realignment_token,
9085 alignment_support_scheme, NULL_TREE,
9086 &at_loop);
9087 if (alignment_support_scheme == dr_explicit_realign_optimized)
9088 {
9089 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9090 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9091 size_one_node);
9092 gcc_assert (!first_stmt_info_for_drptr);
9093 }
9094 }
9095 else
9096 at_loop = loop;
9097
9098 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9099 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9100
9101 tree bump;
9102 tree vec_offset = NULL_TREE;
9103 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9104 {
9105 aggr_type = NULL_TREE;
9106 bump = NULL_TREE;
9107 }
9108 else if (memory_access_type == VMAT_GATHER_SCATTER)
9109 {
9110 aggr_type = elem_type;
9111 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9112 &bump, &vec_offset);
9113 }
9114 else
9115 {
9116 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9117 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9118 else
9119 aggr_type = vectype;
9120 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9121 memory_access_type);
9122 }
9123
9124 vec<tree> vec_offsets = vNULL;
9125 auto_vec<tree> vec_masks;
9126 if (mask)
9127 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9128 mask, &vec_masks, mask_vectype, NULL_TREE);
9129 tree vec_mask = NULL_TREE;
9130 poly_uint64 group_elt = 0;
9131 for (j = 0; j < ncopies; j++)
9132 {
9133 /* 1. Create the vector or array pointer update chain. */
9134 if (j == 0)
9135 {
9136 bool simd_lane_access_p
9137 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9138 if (simd_lane_access_p
9139 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9140 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9141 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9142 && integer_zerop (DR_INIT (first_dr_info->dr))
9143 && alias_sets_conflict_p (get_alias_set (aggr_type),
9144 get_alias_set (TREE_TYPE (ref_type)))
9145 && (alignment_support_scheme == dr_aligned
9146 || alignment_support_scheme == dr_unaligned_supported))
9147 {
9148 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9149 dataref_offset = build_int_cst (ref_type, 0);
9150 }
9151 else if (diff_first_stmt_info)
9152 {
9153 dataref_ptr
9154 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9155 aggr_type, at_loop, offset, &dummy,
9156 gsi, &ptr_incr, simd_lane_access_p,
9157 byte_offset, bump);
9158 /* Adjust the pointer by the difference to first_stmt. */
9159 data_reference_p ptrdr
9160 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9161 tree diff
9162 = fold_convert (sizetype,
9163 size_binop (MINUS_EXPR,
9164 DR_INIT (first_dr_info->dr),
9165 DR_INIT (ptrdr)));
9166 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9167 stmt_info, diff);
9168 if (alignment_support_scheme == dr_explicit_realign)
9169 {
9170 msq = vect_setup_realignment (vinfo,
9171 first_stmt_info_for_drptr, gsi,
9172 &realignment_token,
9173 alignment_support_scheme,
9174 dataref_ptr, &at_loop);
9175 gcc_assert (!compute_in_loop);
9176 }
9177 }
9178 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9179 {
9180 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9181 &dataref_ptr, &vec_offsets, ncopies);
9182 vec_offset = vec_offsets[0];
9183 }
9184 else
9185 dataref_ptr
9186 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9187 at_loop,
9188 offset, &dummy, gsi, &ptr_incr,
9189 simd_lane_access_p,
9190 byte_offset, bump);
9191 if (mask)
9192 vec_mask = vec_masks[0];
9193 }
9194 else
9195 {
9196 if (dataref_offset)
9197 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9198 bump);
9199 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9200 vec_offset = vec_offsets[j];
9201 else
9202 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9203 stmt_info, bump);
9204 if (mask)
9205 vec_mask = vec_masks[j];
9206 }
9207
9208 if (grouped_load || slp_perm)
9209 dr_chain.create (vec_num);
9210
9211 gimple *new_stmt = NULL;
9212 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9213 {
9214 tree vec_array;
9215
9216 vec_array = create_vector_array (vectype, vec_num);
9217
9218 tree final_mask = NULL_TREE;
9219 if (loop_masks)
9220 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9221 vectype, j);
9222 if (vec_mask)
9223 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9224 vec_mask, gsi);
9225
9226 gcall *call;
9227 if (final_mask)
9228 {
9229 /* Emit:
9230 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9231 VEC_MASK). */
9232 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9233 tree alias_ptr = build_int_cst (ref_type, align);
9234 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9235 dataref_ptr, alias_ptr,
9236 final_mask);
9237 }
9238 else
9239 {
9240 /* Emit:
9241 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9242 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9243 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9244 }
9245 gimple_call_set_lhs (call, vec_array);
9246 gimple_call_set_nothrow (call, true);
9247 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9248 new_stmt = call;
9249
9250 /* Extract each vector into an SSA_NAME. */
9251 for (i = 0; i < vec_num; i++)
9252 {
9253 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9254 vec_array, i);
9255 dr_chain.quick_push (new_temp);
9256 }
9257
9258 /* Record the mapping between SSA_NAMEs and statements. */
9259 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9260
9261 /* Record that VEC_ARRAY is now dead. */
9262 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9263 }
9264 else
9265 {
9266 for (i = 0; i < vec_num; i++)
9267 {
9268 tree final_mask = NULL_TREE;
9269 if (loop_masks
9270 && memory_access_type != VMAT_INVARIANT)
9271 final_mask = vect_get_loop_mask (gsi, loop_masks,
9272 vec_num * ncopies,
9273 vectype, vec_num * j + i);
9274 if (vec_mask)
9275 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9276 vec_mask, gsi);
9277
9278 if (i > 0)
9279 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9280 gsi, stmt_info, bump);
9281
9282 /* 2. Create the vector-load in the loop. */
9283 switch (alignment_support_scheme)
9284 {
9285 case dr_aligned:
9286 case dr_unaligned_supported:
9287 {
9288 unsigned int misalign;
9289 unsigned HOST_WIDE_INT align;
9290
9291 if (memory_access_type == VMAT_GATHER_SCATTER)
9292 {
9293 tree zero = build_zero_cst (vectype);
9294 tree scale = size_int (gs_info.scale);
9295 gcall *call;
9296 if (loop_masks)
9297 call = gimple_build_call_internal
9298 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9299 vec_offset, scale, zero, final_mask);
9300 else
9301 call = gimple_build_call_internal
9302 (IFN_GATHER_LOAD, 4, dataref_ptr,
9303 vec_offset, scale, zero);
9304 gimple_call_set_nothrow (call, true);
9305 new_stmt = call;
9306 data_ref = NULL_TREE;
9307 break;
9308 }
9309
9310 align =
9311 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9312 if (alignment_support_scheme == dr_aligned)
9313 {
9314 gcc_assert (aligned_access_p (first_dr_info));
9315 misalign = 0;
9316 }
9317 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9318 {
9319 align = dr_alignment
9320 (vect_dr_behavior (vinfo, first_dr_info));
9321 misalign = 0;
9322 }
9323 else
9324 misalign = DR_MISALIGNMENT (first_dr_info);
9325 if (dataref_offset == NULL_TREE
9326 && TREE_CODE (dataref_ptr) == SSA_NAME)
9327 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9328 align, misalign);
9329
9330 if (final_mask)
9331 {
9332 align = least_bit_hwi (misalign | align);
9333 tree ptr = build_int_cst (ref_type, align);
9334 gcall *call
9335 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9336 dataref_ptr, ptr,
9337 final_mask);
9338 gimple_call_set_nothrow (call, true);
9339 new_stmt = call;
9340 data_ref = NULL_TREE;
9341 }
9342 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9343 {
9344 tree final_len
9345 = vect_get_loop_len (loop_vinfo, loop_lens,
9346 vec_num * ncopies,
9347 vec_num * j + i);
9348 align = least_bit_hwi (misalign | align);
9349 tree ptr = build_int_cst (ref_type, align);
9350 gcall *call
9351 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9352 dataref_ptr, ptr,
9353 final_len);
9354 gimple_call_set_nothrow (call, true);
9355 new_stmt = call;
9356 data_ref = NULL_TREE;
9357
9358 /* Need conversion if it's wrapped with VnQI. */
9359 machine_mode vmode = TYPE_MODE (vectype);
9360 opt_machine_mode new_ovmode
9361 = get_len_load_store_mode (vmode, true);
9362 machine_mode new_vmode = new_ovmode.require ();
9363 if (vmode != new_vmode)
9364 {
9365 tree qi_type = unsigned_intQI_type_node;
9366 tree new_vtype
9367 = build_vector_type_for_mode (qi_type, new_vmode);
9368 tree var = vect_get_new_ssa_name (new_vtype,
9369 vect_simple_var);
9370 gimple_set_lhs (call, var);
9371 vect_finish_stmt_generation (vinfo, stmt_info, call,
9372 gsi);
9373 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9374 new_stmt
9375 = gimple_build_assign (vec_dest,
9376 VIEW_CONVERT_EXPR, op);
9377 }
9378 }
9379 else
9380 {
9381 tree ltype = vectype;
9382 tree new_vtype = NULL_TREE;
9383 unsigned HOST_WIDE_INT gap
9384 = DR_GROUP_GAP (first_stmt_info);
9385 unsigned int vect_align
9386 = vect_known_alignment_in_bytes (first_dr_info);
9387 unsigned int scalar_dr_size
9388 = vect_get_scalar_dr_size (first_dr_info);
9389 /* If there's no peeling for gaps but we have a gap
9390 with slp loads then load the lower half of the
9391 vector only. See get_group_load_store_type for
9392 when we apply this optimization. */
9393 if (slp
9394 && loop_vinfo
9395 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9396 && gap != 0
9397 && known_eq (nunits, (group_size - gap) * 2)
9398 && known_eq (nunits, group_size)
9399 && gap >= (vect_align / scalar_dr_size))
9400 {
9401 tree half_vtype;
9402 new_vtype
9403 = vector_vector_composition_type (vectype, 2,
9404 &half_vtype);
9405 if (new_vtype != NULL_TREE)
9406 ltype = half_vtype;
9407 }
9408 tree offset
9409 = (dataref_offset ? dataref_offset
9410 : build_int_cst (ref_type, 0));
9411 if (ltype != vectype
9412 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9413 {
9414 unsigned HOST_WIDE_INT gap_offset
9415 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9416 tree gapcst = build_int_cst (ref_type, gap_offset);
9417 offset = size_binop (PLUS_EXPR, offset, gapcst);
9418 }
9419 data_ref
9420 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9421 if (alignment_support_scheme == dr_aligned)
9422 ;
9423 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9424 TREE_TYPE (data_ref)
9425 = build_aligned_type (TREE_TYPE (data_ref),
9426 align * BITS_PER_UNIT);
9427 else
9428 TREE_TYPE (data_ref)
9429 = build_aligned_type (TREE_TYPE (data_ref),
9430 TYPE_ALIGN (elem_type));
9431 if (ltype != vectype)
9432 {
9433 vect_copy_ref_info (data_ref,
9434 DR_REF (first_dr_info->dr));
9435 tree tem = make_ssa_name (ltype);
9436 new_stmt = gimple_build_assign (tem, data_ref);
9437 vect_finish_stmt_generation (vinfo, stmt_info,
9438 new_stmt, gsi);
9439 data_ref = NULL;
9440 vec<constructor_elt, va_gc> *v;
9441 vec_alloc (v, 2);
9442 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9443 {
9444 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9445 build_zero_cst (ltype));
9446 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9447 }
9448 else
9449 {
9450 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9451 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9452 build_zero_cst (ltype));
9453 }
9454 gcc_assert (new_vtype != NULL_TREE);
9455 if (new_vtype == vectype)
9456 new_stmt = gimple_build_assign (
9457 vec_dest, build_constructor (vectype, v));
9458 else
9459 {
9460 tree new_vname = make_ssa_name (new_vtype);
9461 new_stmt = gimple_build_assign (
9462 new_vname, build_constructor (new_vtype, v));
9463 vect_finish_stmt_generation (vinfo, stmt_info,
9464 new_stmt, gsi);
9465 new_stmt = gimple_build_assign (
9466 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9467 new_vname));
9468 }
9469 }
9470 }
9471 break;
9472 }
9473 case dr_explicit_realign:
9474 {
9475 tree ptr, bump;
9476
9477 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9478
9479 if (compute_in_loop)
9480 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9481 &realignment_token,
9482 dr_explicit_realign,
9483 dataref_ptr, NULL);
9484
9485 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9486 ptr = copy_ssa_name (dataref_ptr);
9487 else
9488 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9489 // For explicit realign the target alignment should be
9490 // known at compile time.
9491 unsigned HOST_WIDE_INT align =
9492 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9493 new_stmt = gimple_build_assign
9494 (ptr, BIT_AND_EXPR, dataref_ptr,
9495 build_int_cst
9496 (TREE_TYPE (dataref_ptr),
9497 -(HOST_WIDE_INT) align));
9498 vect_finish_stmt_generation (vinfo, stmt_info,
9499 new_stmt, gsi);
9500 data_ref
9501 = build2 (MEM_REF, vectype, ptr,
9502 build_int_cst (ref_type, 0));
9503 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9504 vec_dest = vect_create_destination_var (scalar_dest,
9505 vectype);
9506 new_stmt = gimple_build_assign (vec_dest, data_ref);
9507 new_temp = make_ssa_name (vec_dest, new_stmt);
9508 gimple_assign_set_lhs (new_stmt, new_temp);
9509 gimple_move_vops (new_stmt, stmt_info->stmt);
9510 vect_finish_stmt_generation (vinfo, stmt_info,
9511 new_stmt, gsi);
9512 msq = new_temp;
9513
9514 bump = size_binop (MULT_EXPR, vs,
9515 TYPE_SIZE_UNIT (elem_type));
9516 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9517 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9518 stmt_info, bump);
9519 new_stmt = gimple_build_assign
9520 (NULL_TREE, BIT_AND_EXPR, ptr,
9521 build_int_cst
9522 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9523 ptr = copy_ssa_name (ptr, new_stmt);
9524 gimple_assign_set_lhs (new_stmt, ptr);
9525 vect_finish_stmt_generation (vinfo, stmt_info,
9526 new_stmt, gsi);
9527 data_ref
9528 = build2 (MEM_REF, vectype, ptr,
9529 build_int_cst (ref_type, 0));
9530 break;
9531 }
9532 case dr_explicit_realign_optimized:
9533 {
9534 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9535 new_temp = copy_ssa_name (dataref_ptr);
9536 else
9537 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9538 // We should only be doing this if we know the target
9539 // alignment at compile time.
9540 unsigned HOST_WIDE_INT align =
9541 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9542 new_stmt = gimple_build_assign
9543 (new_temp, BIT_AND_EXPR, dataref_ptr,
9544 build_int_cst (TREE_TYPE (dataref_ptr),
9545 -(HOST_WIDE_INT) align));
9546 vect_finish_stmt_generation (vinfo, stmt_info,
9547 new_stmt, gsi);
9548 data_ref
9549 = build2 (MEM_REF, vectype, new_temp,
9550 build_int_cst (ref_type, 0));
9551 break;
9552 }
9553 default:
9554 gcc_unreachable ();
9555 }
9556 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9557 /* DATA_REF is null if we've already built the statement. */
9558 if (data_ref)
9559 {
9560 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9561 new_stmt = gimple_build_assign (vec_dest, data_ref);
9562 }
9563 new_temp = make_ssa_name (vec_dest, new_stmt);
9564 gimple_set_lhs (new_stmt, new_temp);
9565 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9566
9567 /* 3. Handle explicit realignment if necessary/supported.
9568 Create in loop:
9569 vec_dest = realign_load (msq, lsq, realignment_token) */
9570 if (alignment_support_scheme == dr_explicit_realign_optimized
9571 || alignment_support_scheme == dr_explicit_realign)
9572 {
9573 lsq = gimple_assign_lhs (new_stmt);
9574 if (!realignment_token)
9575 realignment_token = dataref_ptr;
9576 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9577 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9578 msq, lsq, realignment_token);
9579 new_temp = make_ssa_name (vec_dest, new_stmt);
9580 gimple_assign_set_lhs (new_stmt, new_temp);
9581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9582
9583 if (alignment_support_scheme == dr_explicit_realign_optimized)
9584 {
9585 gcc_assert (phi);
9586 if (i == vec_num - 1 && j == ncopies - 1)
9587 add_phi_arg (phi, lsq,
9588 loop_latch_edge (containing_loop),
9589 UNKNOWN_LOCATION);
9590 msq = lsq;
9591 }
9592 }
9593
9594 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9595 {
9596 tree perm_mask = perm_mask_for_reverse (vectype);
9597 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9598 perm_mask, stmt_info, gsi);
9599 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9600 }
9601
9602 /* Collect vector loads and later create their permutation in
9603 vect_transform_grouped_load (). */
9604 if (grouped_load || slp_perm)
9605 dr_chain.quick_push (new_temp);
9606
9607 /* Store vector loads in the corresponding SLP_NODE. */
9608 if (slp && !slp_perm)
9609 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9610
9611 /* With SLP permutation we load the gaps as well, without
9612 we need to skip the gaps after we manage to fully load
9613 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9614 group_elt += nunits;
9615 if (maybe_ne (group_gap_adj, 0U)
9616 && !slp_perm
9617 && known_eq (group_elt, group_size - group_gap_adj))
9618 {
9619 poly_wide_int bump_val
9620 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9621 * group_gap_adj);
9622 tree bump = wide_int_to_tree (sizetype, bump_val);
9623 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9624 gsi, stmt_info, bump);
9625 group_elt = 0;
9626 }
9627 }
9628 /* Bump the vector pointer to account for a gap or for excess
9629 elements loaded for a permuted SLP load. */
9630 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9631 {
9632 poly_wide_int bump_val
9633 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9634 * group_gap_adj);
9635 tree bump = wide_int_to_tree (sizetype, bump_val);
9636 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9637 stmt_info, bump);
9638 }
9639 }
9640
9641 if (slp && !slp_perm)
9642 continue;
9643
9644 if (slp_perm)
9645 {
9646 unsigned n_perms;
9647 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9648 gsi, vf, false, &n_perms);
9649 gcc_assert (ok);
9650 }
9651 else
9652 {
9653 if (grouped_load)
9654 {
9655 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9656 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9657 group_size, gsi);
9658 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9659 }
9660 else
9661 {
9662 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9663 }
9664 }
9665 dr_chain.release ();
9666 }
9667 if (!slp)
9668 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9669
9670 return true;
9671 }
9672
9673 /* Function vect_is_simple_cond.
9674
9675 Input:
9676 LOOP - the loop that is being vectorized.
9677 COND - Condition that is checked for simple use.
9678
9679 Output:
9680 *COMP_VECTYPE - the vector type for the comparison.
9681 *DTS - The def types for the arguments of the comparison
9682
9683 Returns whether a COND can be vectorized. Checks whether
9684 condition operands are supportable using vec_is_simple_use. */
9685
9686 static bool
9687 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9688 slp_tree slp_node, tree *comp_vectype,
9689 enum vect_def_type *dts, tree vectype)
9690 {
9691 tree lhs, rhs;
9692 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9693 slp_tree slp_op;
9694
9695 /* Mask case. */
9696 if (TREE_CODE (cond) == SSA_NAME
9697 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9698 {
9699 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9700 &slp_op, &dts[0], comp_vectype)
9701 || !*comp_vectype
9702 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9703 return false;
9704 return true;
9705 }
9706
9707 if (!COMPARISON_CLASS_P (cond))
9708 return false;
9709
9710 lhs = TREE_OPERAND (cond, 0);
9711 rhs = TREE_OPERAND (cond, 1);
9712
9713 if (TREE_CODE (lhs) == SSA_NAME)
9714 {
9715 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9716 &lhs, &slp_op, &dts[0], &vectype1))
9717 return false;
9718 }
9719 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9720 || TREE_CODE (lhs) == FIXED_CST)
9721 dts[0] = vect_constant_def;
9722 else
9723 return false;
9724
9725 if (TREE_CODE (rhs) == SSA_NAME)
9726 {
9727 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9728 &rhs, &slp_op, &dts[1], &vectype2))
9729 return false;
9730 }
9731 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9732 || TREE_CODE (rhs) == FIXED_CST)
9733 dts[1] = vect_constant_def;
9734 else
9735 return false;
9736
9737 if (vectype1 && vectype2
9738 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9739 TYPE_VECTOR_SUBPARTS (vectype2)))
9740 return false;
9741
9742 *comp_vectype = vectype1 ? vectype1 : vectype2;
9743 /* Invariant comparison. */
9744 if (! *comp_vectype)
9745 {
9746 tree scalar_type = TREE_TYPE (lhs);
9747 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9748 *comp_vectype = truth_type_for (vectype);
9749 else
9750 {
9751 /* If we can widen the comparison to match vectype do so. */
9752 if (INTEGRAL_TYPE_P (scalar_type)
9753 && !slp_node
9754 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9755 TYPE_SIZE (TREE_TYPE (vectype))))
9756 scalar_type = build_nonstandard_integer_type
9757 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9758 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9759 slp_node);
9760 }
9761 }
9762
9763 return true;
9764 }
9765
9766 /* vectorizable_condition.
9767
9768 Check if STMT_INFO is conditional modify expression that can be vectorized.
9769 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9770 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9771 at GSI.
9772
9773 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9774
9775 Return true if STMT_INFO is vectorizable in this way. */
9776
9777 static bool
9778 vectorizable_condition (vec_info *vinfo,
9779 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9780 gimple **vec_stmt,
9781 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9782 {
9783 tree scalar_dest = NULL_TREE;
9784 tree vec_dest = NULL_TREE;
9785 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9786 tree then_clause, else_clause;
9787 tree comp_vectype = NULL_TREE;
9788 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9789 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9790 tree vec_compare;
9791 tree new_temp;
9792 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9793 enum vect_def_type dts[4]
9794 = {vect_unknown_def_type, vect_unknown_def_type,
9795 vect_unknown_def_type, vect_unknown_def_type};
9796 int ndts = 4;
9797 int ncopies;
9798 int vec_num;
9799 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9800 int i;
9801 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9802 vec<tree> vec_oprnds0 = vNULL;
9803 vec<tree> vec_oprnds1 = vNULL;
9804 vec<tree> vec_oprnds2 = vNULL;
9805 vec<tree> vec_oprnds3 = vNULL;
9806 tree vec_cmp_type;
9807 bool masked = false;
9808
9809 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9810 return false;
9811
9812 /* Is vectorizable conditional operation? */
9813 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9814 if (!stmt)
9815 return false;
9816
9817 code = gimple_assign_rhs_code (stmt);
9818 if (code != COND_EXPR)
9819 return false;
9820
9821 stmt_vec_info reduc_info = NULL;
9822 int reduc_index = -1;
9823 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9824 bool for_reduction
9825 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9826 if (for_reduction)
9827 {
9828 if (STMT_SLP_TYPE (stmt_info))
9829 return false;
9830 reduc_info = info_for_reduction (vinfo, stmt_info);
9831 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9832 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9833 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9834 || reduc_index != -1);
9835 }
9836 else
9837 {
9838 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9839 return false;
9840 }
9841
9842 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9843 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9844
9845 if (slp_node)
9846 {
9847 ncopies = 1;
9848 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9849 }
9850 else
9851 {
9852 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9853 vec_num = 1;
9854 }
9855
9856 gcc_assert (ncopies >= 1);
9857 if (for_reduction && ncopies > 1)
9858 return false; /* FORNOW */
9859
9860 cond_expr = gimple_assign_rhs1 (stmt);
9861
9862 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9863 &comp_vectype, &dts[0], vectype)
9864 || !comp_vectype)
9865 return false;
9866
9867 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9868 slp_tree then_slp_node, else_slp_node;
9869 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9870 &then_clause, &then_slp_node, &dts[2], &vectype1))
9871 return false;
9872 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9873 &else_clause, &else_slp_node, &dts[3], &vectype2))
9874 return false;
9875
9876 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9877 return false;
9878
9879 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9880 return false;
9881
9882 masked = !COMPARISON_CLASS_P (cond_expr);
9883 vec_cmp_type = truth_type_for (comp_vectype);
9884
9885 if (vec_cmp_type == NULL_TREE)
9886 return false;
9887
9888 cond_code = TREE_CODE (cond_expr);
9889 if (!masked)
9890 {
9891 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9892 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9893 }
9894
9895 /* For conditional reductions, the "then" value needs to be the candidate
9896 value calculated by this iteration while the "else" value needs to be
9897 the result carried over from previous iterations. If the COND_EXPR
9898 is the other way around, we need to swap it. */
9899 bool must_invert_cmp_result = false;
9900 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9901 {
9902 if (masked)
9903 must_invert_cmp_result = true;
9904 else
9905 {
9906 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9907 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9908 if (new_code == ERROR_MARK)
9909 must_invert_cmp_result = true;
9910 else
9911 {
9912 cond_code = new_code;
9913 /* Make sure we don't accidentally use the old condition. */
9914 cond_expr = NULL_TREE;
9915 }
9916 }
9917 std::swap (then_clause, else_clause);
9918 }
9919
9920 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9921 {
9922 /* Boolean values may have another representation in vectors
9923 and therefore we prefer bit operations over comparison for
9924 them (which also works for scalar masks). We store opcodes
9925 to use in bitop1 and bitop2. Statement is vectorized as
9926 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9927 depending on bitop1 and bitop2 arity. */
9928 switch (cond_code)
9929 {
9930 case GT_EXPR:
9931 bitop1 = BIT_NOT_EXPR;
9932 bitop2 = BIT_AND_EXPR;
9933 break;
9934 case GE_EXPR:
9935 bitop1 = BIT_NOT_EXPR;
9936 bitop2 = BIT_IOR_EXPR;
9937 break;
9938 case LT_EXPR:
9939 bitop1 = BIT_NOT_EXPR;
9940 bitop2 = BIT_AND_EXPR;
9941 std::swap (cond_expr0, cond_expr1);
9942 break;
9943 case LE_EXPR:
9944 bitop1 = BIT_NOT_EXPR;
9945 bitop2 = BIT_IOR_EXPR;
9946 std::swap (cond_expr0, cond_expr1);
9947 break;
9948 case NE_EXPR:
9949 bitop1 = BIT_XOR_EXPR;
9950 break;
9951 case EQ_EXPR:
9952 bitop1 = BIT_XOR_EXPR;
9953 bitop2 = BIT_NOT_EXPR;
9954 break;
9955 default:
9956 return false;
9957 }
9958 cond_code = SSA_NAME;
9959 }
9960
9961 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9962 && reduction_type == EXTRACT_LAST_REDUCTION
9963 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9964 {
9965 if (dump_enabled_p ())
9966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9967 "reduction comparison operation not supported.\n");
9968 return false;
9969 }
9970
9971 if (!vec_stmt)
9972 {
9973 if (bitop1 != NOP_EXPR)
9974 {
9975 machine_mode mode = TYPE_MODE (comp_vectype);
9976 optab optab;
9977
9978 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9979 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9980 return false;
9981
9982 if (bitop2 != NOP_EXPR)
9983 {
9984 optab = optab_for_tree_code (bitop2, comp_vectype,
9985 optab_default);
9986 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9987 return false;
9988 }
9989 }
9990
9991 vect_cost_for_stmt kind = vector_stmt;
9992 if (reduction_type == EXTRACT_LAST_REDUCTION)
9993 /* Count one reduction-like operation per vector. */
9994 kind = vec_to_scalar;
9995 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
9996 return false;
9997
9998 if (slp_node
9999 && (!vect_maybe_update_slp_op_vectype
10000 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10001 || (op_adjust == 1
10002 && !vect_maybe_update_slp_op_vectype
10003 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10004 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10005 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10006 {
10007 if (dump_enabled_p ())
10008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10009 "incompatible vector types for invariants\n");
10010 return false;
10011 }
10012
10013 if (loop_vinfo && for_reduction
10014 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10015 {
10016 if (reduction_type == EXTRACT_LAST_REDUCTION)
10017 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10018 ncopies * vec_num, vectype, NULL);
10019 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10020 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10021 {
10022 if (dump_enabled_p ())
10023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10024 "conditional reduction prevents the use"
10025 " of partial vectors.\n");
10026 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10027 }
10028 }
10029
10030 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10031 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10032 cost_vec, kind);
10033 return true;
10034 }
10035
10036 /* Transform. */
10037
10038 if (!slp_node)
10039 {
10040 vec_oprnds0.create (1);
10041 vec_oprnds1.create (1);
10042 vec_oprnds2.create (1);
10043 vec_oprnds3.create (1);
10044 }
10045
10046 /* Handle def. */
10047 scalar_dest = gimple_assign_lhs (stmt);
10048 if (reduction_type != EXTRACT_LAST_REDUCTION)
10049 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10050
10051 bool swap_cond_operands = false;
10052
10053 /* See whether another part of the vectorized code applies a loop
10054 mask to the condition, or to its inverse. */
10055
10056 vec_loop_masks *masks = NULL;
10057 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10058 {
10059 if (reduction_type == EXTRACT_LAST_REDUCTION)
10060 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10061 else
10062 {
10063 scalar_cond_masked_key cond (cond_expr, ncopies);
10064 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10065 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10066 else
10067 {
10068 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10069 cond.code = invert_tree_comparison (cond.code, honor_nans);
10070 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10071 {
10072 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10073 cond_code = cond.code;
10074 swap_cond_operands = true;
10075 }
10076 }
10077 }
10078 }
10079
10080 /* Handle cond expr. */
10081 if (masked)
10082 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10083 cond_expr, &vec_oprnds0, comp_vectype,
10084 then_clause, &vec_oprnds2, vectype,
10085 reduction_type != EXTRACT_LAST_REDUCTION
10086 ? else_clause : NULL, &vec_oprnds3, vectype);
10087 else
10088 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10089 cond_expr0, &vec_oprnds0, comp_vectype,
10090 cond_expr1, &vec_oprnds1, comp_vectype,
10091 then_clause, &vec_oprnds2, vectype,
10092 reduction_type != EXTRACT_LAST_REDUCTION
10093 ? else_clause : NULL, &vec_oprnds3, vectype);
10094
10095 /* Arguments are ready. Create the new vector stmt. */
10096 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10097 {
10098 vec_then_clause = vec_oprnds2[i];
10099 if (reduction_type != EXTRACT_LAST_REDUCTION)
10100 vec_else_clause = vec_oprnds3[i];
10101
10102 if (swap_cond_operands)
10103 std::swap (vec_then_clause, vec_else_clause);
10104
10105 if (masked)
10106 vec_compare = vec_cond_lhs;
10107 else
10108 {
10109 vec_cond_rhs = vec_oprnds1[i];
10110 if (bitop1 == NOP_EXPR)
10111 {
10112 gimple_seq stmts = NULL;
10113 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10114 vec_cond_lhs, vec_cond_rhs);
10115 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10116 }
10117 else
10118 {
10119 new_temp = make_ssa_name (vec_cmp_type);
10120 gassign *new_stmt;
10121 if (bitop1 == BIT_NOT_EXPR)
10122 new_stmt = gimple_build_assign (new_temp, bitop1,
10123 vec_cond_rhs);
10124 else
10125 new_stmt
10126 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10127 vec_cond_rhs);
10128 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10129 if (bitop2 == NOP_EXPR)
10130 vec_compare = new_temp;
10131 else if (bitop2 == BIT_NOT_EXPR)
10132 {
10133 /* Instead of doing ~x ? y : z do x ? z : y. */
10134 vec_compare = new_temp;
10135 std::swap (vec_then_clause, vec_else_clause);
10136 }
10137 else
10138 {
10139 vec_compare = make_ssa_name (vec_cmp_type);
10140 new_stmt
10141 = gimple_build_assign (vec_compare, bitop2,
10142 vec_cond_lhs, new_temp);
10143 vect_finish_stmt_generation (vinfo, stmt_info,
10144 new_stmt, gsi);
10145 }
10146 }
10147 }
10148
10149 /* If we decided to apply a loop mask to the result of the vector
10150 comparison, AND the comparison with the mask now. Later passes
10151 should then be able to reuse the AND results between mulitple
10152 vector statements.
10153
10154 For example:
10155 for (int i = 0; i < 100; ++i)
10156 x[i] = y[i] ? z[i] : 10;
10157
10158 results in following optimized GIMPLE:
10159
10160 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10161 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10162 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10163 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10164 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10165 vect_iftmp.11_47, { 10, ... }>;
10166
10167 instead of using a masked and unmasked forms of
10168 vec != { 0, ... } (masked in the MASK_LOAD,
10169 unmasked in the VEC_COND_EXPR). */
10170
10171 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10172 in cases where that's necessary. */
10173
10174 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10175 {
10176 if (!is_gimple_val (vec_compare))
10177 {
10178 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10179 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10180 vec_compare);
10181 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10182 vec_compare = vec_compare_name;
10183 }
10184
10185 if (must_invert_cmp_result)
10186 {
10187 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10188 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10189 BIT_NOT_EXPR,
10190 vec_compare);
10191 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10192 vec_compare = vec_compare_name;
10193 }
10194
10195 if (masks)
10196 {
10197 unsigned vec_num = vec_oprnds0.length ();
10198 tree loop_mask
10199 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10200 vectype, i);
10201 tree tmp2 = make_ssa_name (vec_cmp_type);
10202 gassign *g
10203 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10204 loop_mask);
10205 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10206 vec_compare = tmp2;
10207 }
10208 }
10209
10210 gimple *new_stmt;
10211 if (reduction_type == EXTRACT_LAST_REDUCTION)
10212 {
10213 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10214 tree lhs = gimple_get_lhs (old_stmt);
10215 new_stmt = gimple_build_call_internal
10216 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10217 vec_then_clause);
10218 gimple_call_set_lhs (new_stmt, lhs);
10219 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10220 if (old_stmt == gsi_stmt (*gsi))
10221 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10222 else
10223 {
10224 /* In this case we're moving the definition to later in the
10225 block. That doesn't matter because the only uses of the
10226 lhs are in phi statements. */
10227 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10228 gsi_remove (&old_gsi, true);
10229 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10230 }
10231 }
10232 else
10233 {
10234 new_temp = make_ssa_name (vec_dest);
10235 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10236 vec_then_clause, vec_else_clause);
10237 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10238 }
10239 if (slp_node)
10240 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10241 else
10242 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10243 }
10244
10245 if (!slp_node)
10246 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10247
10248 vec_oprnds0.release ();
10249 vec_oprnds1.release ();
10250 vec_oprnds2.release ();
10251 vec_oprnds3.release ();
10252
10253 return true;
10254 }
10255
10256 /* vectorizable_comparison.
10257
10258 Check if STMT_INFO is comparison expression that can be vectorized.
10259 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10260 comparison, put it in VEC_STMT, and insert it at GSI.
10261
10262 Return true if STMT_INFO is vectorizable in this way. */
10263
10264 static bool
10265 vectorizable_comparison (vec_info *vinfo,
10266 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10267 gimple **vec_stmt,
10268 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10269 {
10270 tree lhs, rhs1, rhs2;
10271 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10272 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10273 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10274 tree new_temp;
10275 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10276 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10277 int ndts = 2;
10278 poly_uint64 nunits;
10279 int ncopies;
10280 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10281 int i;
10282 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10283 vec<tree> vec_oprnds0 = vNULL;
10284 vec<tree> vec_oprnds1 = vNULL;
10285 tree mask_type;
10286 tree mask;
10287
10288 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10289 return false;
10290
10291 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10292 return false;
10293
10294 mask_type = vectype;
10295 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10296
10297 if (slp_node)
10298 ncopies = 1;
10299 else
10300 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10301
10302 gcc_assert (ncopies >= 1);
10303 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10304 return false;
10305
10306 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10307 if (!stmt)
10308 return false;
10309
10310 code = gimple_assign_rhs_code (stmt);
10311
10312 if (TREE_CODE_CLASS (code) != tcc_comparison)
10313 return false;
10314
10315 slp_tree slp_rhs1, slp_rhs2;
10316 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10317 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10318 return false;
10319
10320 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10321 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10322 return false;
10323
10324 if (vectype1 && vectype2
10325 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10326 TYPE_VECTOR_SUBPARTS (vectype2)))
10327 return false;
10328
10329 vectype = vectype1 ? vectype1 : vectype2;
10330
10331 /* Invariant comparison. */
10332 if (!vectype)
10333 {
10334 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10335 vectype = mask_type;
10336 else
10337 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10338 slp_node);
10339 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10340 return false;
10341 }
10342 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10343 return false;
10344
10345 /* Can't compare mask and non-mask types. */
10346 if (vectype1 && vectype2
10347 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10348 return false;
10349
10350 /* Boolean values may have another representation in vectors
10351 and therefore we prefer bit operations over comparison for
10352 them (which also works for scalar masks). We store opcodes
10353 to use in bitop1 and bitop2. Statement is vectorized as
10354 BITOP2 (rhs1 BITOP1 rhs2) or
10355 rhs1 BITOP2 (BITOP1 rhs2)
10356 depending on bitop1 and bitop2 arity. */
10357 bool swap_p = false;
10358 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10359 {
10360 if (code == GT_EXPR)
10361 {
10362 bitop1 = BIT_NOT_EXPR;
10363 bitop2 = BIT_AND_EXPR;
10364 }
10365 else if (code == GE_EXPR)
10366 {
10367 bitop1 = BIT_NOT_EXPR;
10368 bitop2 = BIT_IOR_EXPR;
10369 }
10370 else if (code == LT_EXPR)
10371 {
10372 bitop1 = BIT_NOT_EXPR;
10373 bitop2 = BIT_AND_EXPR;
10374 swap_p = true;
10375 }
10376 else if (code == LE_EXPR)
10377 {
10378 bitop1 = BIT_NOT_EXPR;
10379 bitop2 = BIT_IOR_EXPR;
10380 swap_p = true;
10381 }
10382 else
10383 {
10384 bitop1 = BIT_XOR_EXPR;
10385 if (code == EQ_EXPR)
10386 bitop2 = BIT_NOT_EXPR;
10387 }
10388 }
10389
10390 if (!vec_stmt)
10391 {
10392 if (bitop1 == NOP_EXPR)
10393 {
10394 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10395 return false;
10396 }
10397 else
10398 {
10399 machine_mode mode = TYPE_MODE (vectype);
10400 optab optab;
10401
10402 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10403 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10404 return false;
10405
10406 if (bitop2 != NOP_EXPR)
10407 {
10408 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10409 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10410 return false;
10411 }
10412 }
10413
10414 /* Put types on constant and invariant SLP children. */
10415 if (slp_node
10416 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10417 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10418 {
10419 if (dump_enabled_p ())
10420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10421 "incompatible vector types for invariants\n");
10422 return false;
10423 }
10424
10425 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10426 vect_model_simple_cost (vinfo, stmt_info,
10427 ncopies * (1 + (bitop2 != NOP_EXPR)),
10428 dts, ndts, slp_node, cost_vec);
10429 return true;
10430 }
10431
10432 /* Transform. */
10433 if (!slp_node)
10434 {
10435 vec_oprnds0.create (1);
10436 vec_oprnds1.create (1);
10437 }
10438
10439 /* Handle def. */
10440 lhs = gimple_assign_lhs (stmt);
10441 mask = vect_create_destination_var (lhs, mask_type);
10442
10443 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10444 rhs1, &vec_oprnds0, vectype,
10445 rhs2, &vec_oprnds1, vectype);
10446 if (swap_p)
10447 std::swap (vec_oprnds0, vec_oprnds1);
10448
10449 /* Arguments are ready. Create the new vector stmt. */
10450 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10451 {
10452 gimple *new_stmt;
10453 vec_rhs2 = vec_oprnds1[i];
10454
10455 new_temp = make_ssa_name (mask);
10456 if (bitop1 == NOP_EXPR)
10457 {
10458 new_stmt = gimple_build_assign (new_temp, code,
10459 vec_rhs1, vec_rhs2);
10460 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10461 }
10462 else
10463 {
10464 if (bitop1 == BIT_NOT_EXPR)
10465 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10466 else
10467 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10468 vec_rhs2);
10469 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10470 if (bitop2 != NOP_EXPR)
10471 {
10472 tree res = make_ssa_name (mask);
10473 if (bitop2 == BIT_NOT_EXPR)
10474 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10475 else
10476 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10477 new_temp);
10478 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10479 }
10480 }
10481 if (slp_node)
10482 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10483 else
10484 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10485 }
10486
10487 if (!slp_node)
10488 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10489
10490 vec_oprnds0.release ();
10491 vec_oprnds1.release ();
10492
10493 return true;
10494 }
10495
10496 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10497 can handle all live statements in the node. Otherwise return true
10498 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10499 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10500
10501 static bool
10502 can_vectorize_live_stmts (vec_info *vinfo,
10503 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10504 slp_tree slp_node, slp_instance slp_node_instance,
10505 bool vec_stmt_p,
10506 stmt_vector_for_cost *cost_vec)
10507 {
10508 if (slp_node)
10509 {
10510 stmt_vec_info slp_stmt_info;
10511 unsigned int i;
10512 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10513 {
10514 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10515 && !vectorizable_live_operation (vinfo,
10516 slp_stmt_info, gsi, slp_node,
10517 slp_node_instance, i,
10518 vec_stmt_p, cost_vec))
10519 return false;
10520 }
10521 }
10522 else if (STMT_VINFO_LIVE_P (stmt_info)
10523 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10524 slp_node, slp_node_instance, -1,
10525 vec_stmt_p, cost_vec))
10526 return false;
10527
10528 return true;
10529 }
10530
10531 /* Make sure the statement is vectorizable. */
10532
10533 opt_result
10534 vect_analyze_stmt (vec_info *vinfo,
10535 stmt_vec_info stmt_info, bool *need_to_vectorize,
10536 slp_tree node, slp_instance node_instance,
10537 stmt_vector_for_cost *cost_vec)
10538 {
10539 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10540 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10541 bool ok;
10542 gimple_seq pattern_def_seq;
10543
10544 if (dump_enabled_p ())
10545 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10546 stmt_info->stmt);
10547
10548 if (gimple_has_volatile_ops (stmt_info->stmt))
10549 return opt_result::failure_at (stmt_info->stmt,
10550 "not vectorized:"
10551 " stmt has volatile operands: %G\n",
10552 stmt_info->stmt);
10553
10554 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10555 && node == NULL
10556 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10557 {
10558 gimple_stmt_iterator si;
10559
10560 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10561 {
10562 stmt_vec_info pattern_def_stmt_info
10563 = vinfo->lookup_stmt (gsi_stmt (si));
10564 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10565 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10566 {
10567 /* Analyze def stmt of STMT if it's a pattern stmt. */
10568 if (dump_enabled_p ())
10569 dump_printf_loc (MSG_NOTE, vect_location,
10570 "==> examining pattern def statement: %G",
10571 pattern_def_stmt_info->stmt);
10572
10573 opt_result res
10574 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10575 need_to_vectorize, node, node_instance,
10576 cost_vec);
10577 if (!res)
10578 return res;
10579 }
10580 }
10581 }
10582
10583 /* Skip stmts that do not need to be vectorized. In loops this is expected
10584 to include:
10585 - the COND_EXPR which is the loop exit condition
10586 - any LABEL_EXPRs in the loop
10587 - computations that are used only for array indexing or loop control.
10588 In basic blocks we only analyze statements that are a part of some SLP
10589 instance, therefore, all the statements are relevant.
10590
10591 Pattern statement needs to be analyzed instead of the original statement
10592 if the original statement is not relevant. Otherwise, we analyze both
10593 statements. In basic blocks we are called from some SLP instance
10594 traversal, don't analyze pattern stmts instead, the pattern stmts
10595 already will be part of SLP instance. */
10596
10597 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10598 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10599 && !STMT_VINFO_LIVE_P (stmt_info))
10600 {
10601 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10602 && pattern_stmt_info
10603 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10604 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10605 {
10606 /* Analyze PATTERN_STMT instead of the original stmt. */
10607 stmt_info = pattern_stmt_info;
10608 if (dump_enabled_p ())
10609 dump_printf_loc (MSG_NOTE, vect_location,
10610 "==> examining pattern statement: %G",
10611 stmt_info->stmt);
10612 }
10613 else
10614 {
10615 if (dump_enabled_p ())
10616 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10617
10618 return opt_result::success ();
10619 }
10620 }
10621 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10622 && node == NULL
10623 && pattern_stmt_info
10624 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10625 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10626 {
10627 /* Analyze PATTERN_STMT too. */
10628 if (dump_enabled_p ())
10629 dump_printf_loc (MSG_NOTE, vect_location,
10630 "==> examining pattern statement: %G",
10631 pattern_stmt_info->stmt);
10632
10633 opt_result res
10634 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10635 node_instance, cost_vec);
10636 if (!res)
10637 return res;
10638 }
10639
10640 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10641 {
10642 case vect_internal_def:
10643 break;
10644
10645 case vect_reduction_def:
10646 case vect_nested_cycle:
10647 gcc_assert (!bb_vinfo
10648 && (relevance == vect_used_in_outer
10649 || relevance == vect_used_in_outer_by_reduction
10650 || relevance == vect_used_by_reduction
10651 || relevance == vect_unused_in_scope
10652 || relevance == vect_used_only_live));
10653 break;
10654
10655 case vect_induction_def:
10656 gcc_assert (!bb_vinfo);
10657 break;
10658
10659 case vect_constant_def:
10660 case vect_external_def:
10661 case vect_unknown_def_type:
10662 default:
10663 gcc_unreachable ();
10664 }
10665
10666 if (STMT_VINFO_RELEVANT_P (stmt_info))
10667 {
10668 tree type = gimple_expr_type (stmt_info->stmt);
10669 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10670 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10671 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10672 || (call && gimple_call_lhs (call) == NULL_TREE));
10673 *need_to_vectorize = true;
10674 }
10675
10676 if (PURE_SLP_STMT (stmt_info) && !node)
10677 {
10678 if (dump_enabled_p ())
10679 dump_printf_loc (MSG_NOTE, vect_location,
10680 "handled only by SLP analysis\n");
10681 return opt_result::success ();
10682 }
10683
10684 ok = true;
10685 if (!bb_vinfo
10686 && (STMT_VINFO_RELEVANT_P (stmt_info)
10687 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10688 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10689 -mveclibabi= takes preference over library functions with
10690 the simd attribute. */
10691 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10692 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10693 cost_vec)
10694 || vectorizable_conversion (vinfo, stmt_info,
10695 NULL, NULL, node, cost_vec)
10696 || vectorizable_operation (vinfo, stmt_info,
10697 NULL, NULL, node, cost_vec)
10698 || vectorizable_assignment (vinfo, stmt_info,
10699 NULL, NULL, node, cost_vec)
10700 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10701 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10702 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10703 node, node_instance, cost_vec)
10704 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10705 NULL, node, cost_vec)
10706 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10707 || vectorizable_condition (vinfo, stmt_info,
10708 NULL, NULL, node, cost_vec)
10709 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10710 cost_vec)
10711 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10712 stmt_info, NULL, node));
10713 else
10714 {
10715 if (bb_vinfo)
10716 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10717 || vectorizable_simd_clone_call (vinfo, stmt_info,
10718 NULL, NULL, node, cost_vec)
10719 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10720 cost_vec)
10721 || vectorizable_shift (vinfo, stmt_info,
10722 NULL, NULL, node, cost_vec)
10723 || vectorizable_operation (vinfo, stmt_info,
10724 NULL, NULL, node, cost_vec)
10725 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10726 cost_vec)
10727 || vectorizable_load (vinfo, stmt_info,
10728 NULL, NULL, node, cost_vec)
10729 || vectorizable_store (vinfo, stmt_info,
10730 NULL, NULL, node, cost_vec)
10731 || vectorizable_condition (vinfo, stmt_info,
10732 NULL, NULL, node, cost_vec)
10733 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10734 cost_vec)
10735 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10736 }
10737
10738 if (!ok)
10739 return opt_result::failure_at (stmt_info->stmt,
10740 "not vectorized:"
10741 " relevant stmt not supported: %G",
10742 stmt_info->stmt);
10743
10744 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10745 need extra handling, except for vectorizable reductions. */
10746 if (!bb_vinfo
10747 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10748 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10749 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10750 stmt_info, NULL, node, node_instance,
10751 false, cost_vec))
10752 return opt_result::failure_at (stmt_info->stmt,
10753 "not vectorized:"
10754 " live stmt not supported: %G",
10755 stmt_info->stmt);
10756
10757 return opt_result::success ();
10758 }
10759
10760
10761 /* Function vect_transform_stmt.
10762
10763 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10764
10765 bool
10766 vect_transform_stmt (vec_info *vinfo,
10767 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10768 slp_tree slp_node, slp_instance slp_node_instance)
10769 {
10770 bool is_store = false;
10771 gimple *vec_stmt = NULL;
10772 bool done;
10773
10774 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10775
10776 switch (STMT_VINFO_TYPE (stmt_info))
10777 {
10778 case type_demotion_vec_info_type:
10779 case type_promotion_vec_info_type:
10780 case type_conversion_vec_info_type:
10781 done = vectorizable_conversion (vinfo, stmt_info,
10782 gsi, &vec_stmt, slp_node, NULL);
10783 gcc_assert (done);
10784 break;
10785
10786 case induc_vec_info_type:
10787 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10788 stmt_info, &vec_stmt, slp_node,
10789 NULL);
10790 gcc_assert (done);
10791 break;
10792
10793 case shift_vec_info_type:
10794 done = vectorizable_shift (vinfo, stmt_info,
10795 gsi, &vec_stmt, slp_node, NULL);
10796 gcc_assert (done);
10797 break;
10798
10799 case op_vec_info_type:
10800 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10801 NULL);
10802 gcc_assert (done);
10803 break;
10804
10805 case assignment_vec_info_type:
10806 done = vectorizable_assignment (vinfo, stmt_info,
10807 gsi, &vec_stmt, slp_node, NULL);
10808 gcc_assert (done);
10809 break;
10810
10811 case load_vec_info_type:
10812 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10813 NULL);
10814 gcc_assert (done);
10815 break;
10816
10817 case store_vec_info_type:
10818 done = vectorizable_store (vinfo, stmt_info,
10819 gsi, &vec_stmt, slp_node, NULL);
10820 gcc_assert (done);
10821 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10822 {
10823 /* In case of interleaving, the whole chain is vectorized when the
10824 last store in the chain is reached. Store stmts before the last
10825 one are skipped, and there vec_stmt_info shouldn't be freed
10826 meanwhile. */
10827 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10828 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10829 is_store = true;
10830 }
10831 else
10832 is_store = true;
10833 break;
10834
10835 case condition_vec_info_type:
10836 done = vectorizable_condition (vinfo, stmt_info,
10837 gsi, &vec_stmt, slp_node, NULL);
10838 gcc_assert (done);
10839 break;
10840
10841 case comparison_vec_info_type:
10842 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10843 slp_node, NULL);
10844 gcc_assert (done);
10845 break;
10846
10847 case call_vec_info_type:
10848 done = vectorizable_call (vinfo, stmt_info,
10849 gsi, &vec_stmt, slp_node, NULL);
10850 break;
10851
10852 case call_simd_clone_vec_info_type:
10853 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10854 slp_node, NULL);
10855 break;
10856
10857 case reduc_vec_info_type:
10858 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10859 gsi, &vec_stmt, slp_node);
10860 gcc_assert (done);
10861 break;
10862
10863 case cycle_phi_info_type:
10864 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10865 &vec_stmt, slp_node, slp_node_instance);
10866 gcc_assert (done);
10867 break;
10868
10869 case lc_phi_info_type:
10870 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10871 stmt_info, &vec_stmt, slp_node);
10872 gcc_assert (done);
10873 break;
10874
10875 case phi_info_type:
10876 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
10877 gcc_assert (done);
10878 break;
10879
10880 default:
10881 if (!STMT_VINFO_LIVE_P (stmt_info))
10882 {
10883 if (dump_enabled_p ())
10884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10885 "stmt not supported.\n");
10886 gcc_unreachable ();
10887 }
10888 done = true;
10889 }
10890
10891 if (!slp_node && vec_stmt)
10892 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10893
10894 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10895 return is_store;
10896
10897 /* Handle stmts whose DEF is used outside the loop-nest that is
10898 being vectorized. */
10899 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
10900 slp_node_instance, true, NULL);
10901 gcc_assert (done);
10902
10903 return false;
10904 }
10905
10906
10907 /* Remove a group of stores (for SLP or interleaving), free their
10908 stmt_vec_info. */
10909
10910 void
10911 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10912 {
10913 stmt_vec_info next_stmt_info = first_stmt_info;
10914
10915 while (next_stmt_info)
10916 {
10917 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10918 next_stmt_info = vect_orig_stmt (next_stmt_info);
10919 /* Free the attached stmt_vec_info and remove the stmt. */
10920 vinfo->remove_stmt (next_stmt_info);
10921 next_stmt_info = tmp;
10922 }
10923 }
10924
10925 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10926 elements of type SCALAR_TYPE, or null if the target doesn't support
10927 such a type.
10928
10929 If NUNITS is zero, return a vector type that contains elements of
10930 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10931
10932 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10933 for this vectorization region and want to "autodetect" the best choice.
10934 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10935 and we want the new type to be interoperable with it. PREVAILING_MODE
10936 in this case can be a scalar integer mode or a vector mode; when it
10937 is a vector mode, the function acts like a tree-level version of
10938 related_vector_mode. */
10939
10940 tree
10941 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10942 tree scalar_type, poly_uint64 nunits)
10943 {
10944 tree orig_scalar_type = scalar_type;
10945 scalar_mode inner_mode;
10946 machine_mode simd_mode;
10947 tree vectype;
10948
10949 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10950 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10951 return NULL_TREE;
10952
10953 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10954
10955 /* For vector types of elements whose mode precision doesn't
10956 match their types precision we use a element type of mode
10957 precision. The vectorization routines will have to make sure
10958 they support the proper result truncation/extension.
10959 We also make sure to build vector types with INTEGER_TYPE
10960 component type only. */
10961 if (INTEGRAL_TYPE_P (scalar_type)
10962 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10963 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10964 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10965 TYPE_UNSIGNED (scalar_type));
10966
10967 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10968 When the component mode passes the above test simply use a type
10969 corresponding to that mode. The theory is that any use that
10970 would cause problems with this will disable vectorization anyway. */
10971 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10972 && !INTEGRAL_TYPE_P (scalar_type))
10973 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10974
10975 /* We can't build a vector type of elements with alignment bigger than
10976 their size. */
10977 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10978 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10979 TYPE_UNSIGNED (scalar_type));
10980
10981 /* If we felt back to using the mode fail if there was
10982 no scalar type for it. */
10983 if (scalar_type == NULL_TREE)
10984 return NULL_TREE;
10985
10986 /* If no prevailing mode was supplied, use the mode the target prefers.
10987 Otherwise lookup a vector mode based on the prevailing mode. */
10988 if (prevailing_mode == VOIDmode)
10989 {
10990 gcc_assert (known_eq (nunits, 0U));
10991 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10992 if (SCALAR_INT_MODE_P (simd_mode))
10993 {
10994 /* Traditional behavior is not to take the integer mode
10995 literally, but simply to use it as a way of determining
10996 the vector size. It is up to mode_for_vector to decide
10997 what the TYPE_MODE should be.
10998
10999 Note that nunits == 1 is allowed in order to support single
11000 element vector types. */
11001 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11002 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11003 return NULL_TREE;
11004 }
11005 }
11006 else if (SCALAR_INT_MODE_P (prevailing_mode)
11007 || !related_vector_mode (prevailing_mode,
11008 inner_mode, nunits).exists (&simd_mode))
11009 {
11010 /* Fall back to using mode_for_vector, mostly in the hope of being
11011 able to use an integer mode. */
11012 if (known_eq (nunits, 0U)
11013 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11014 return NULL_TREE;
11015
11016 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11017 return NULL_TREE;
11018 }
11019
11020 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11021
11022 /* In cases where the mode was chosen by mode_for_vector, check that
11023 the target actually supports the chosen mode, or that it at least
11024 allows the vector mode to be replaced by a like-sized integer. */
11025 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11026 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11027 return NULL_TREE;
11028
11029 /* Re-attach the address-space qualifier if we canonicalized the scalar
11030 type. */
11031 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11032 return build_qualified_type
11033 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11034
11035 return vectype;
11036 }
11037
11038 /* Function get_vectype_for_scalar_type.
11039
11040 Returns the vector type corresponding to SCALAR_TYPE as supported
11041 by the target. If GROUP_SIZE is nonzero and we're performing BB
11042 vectorization, make sure that the number of elements in the vector
11043 is no bigger than GROUP_SIZE. */
11044
11045 tree
11046 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11047 unsigned int group_size)
11048 {
11049 /* For BB vectorization, we should always have a group size once we've
11050 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11051 are tentative requests during things like early data reference
11052 analysis and pattern recognition. */
11053 if (is_a <bb_vec_info> (vinfo))
11054 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11055 else
11056 group_size = 0;
11057
11058 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11059 scalar_type);
11060 if (vectype && vinfo->vector_mode == VOIDmode)
11061 vinfo->vector_mode = TYPE_MODE (vectype);
11062
11063 /* Register the natural choice of vector type, before the group size
11064 has been applied. */
11065 if (vectype)
11066 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11067
11068 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11069 try again with an explicit number of elements. */
11070 if (vectype
11071 && group_size
11072 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11073 {
11074 /* Start with the biggest number of units that fits within
11075 GROUP_SIZE and halve it until we find a valid vector type.
11076 Usually either the first attempt will succeed or all will
11077 fail (in the latter case because GROUP_SIZE is too small
11078 for the target), but it's possible that a target could have
11079 a hole between supported vector types.
11080
11081 If GROUP_SIZE is not a power of 2, this has the effect of
11082 trying the largest power of 2 that fits within the group,
11083 even though the group is not a multiple of that vector size.
11084 The BB vectorizer will then try to carve up the group into
11085 smaller pieces. */
11086 unsigned int nunits = 1 << floor_log2 (group_size);
11087 do
11088 {
11089 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11090 scalar_type, nunits);
11091 nunits /= 2;
11092 }
11093 while (nunits > 1 && !vectype);
11094 }
11095
11096 return vectype;
11097 }
11098
11099 /* Return the vector type corresponding to SCALAR_TYPE as supported
11100 by the target. NODE, if nonnull, is the SLP tree node that will
11101 use the returned vector type. */
11102
11103 tree
11104 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11105 {
11106 unsigned int group_size = 0;
11107 if (node)
11108 group_size = SLP_TREE_LANES (node);
11109 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11110 }
11111
11112 /* Function get_mask_type_for_scalar_type.
11113
11114 Returns the mask type corresponding to a result of comparison
11115 of vectors of specified SCALAR_TYPE as supported by target.
11116 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11117 make sure that the number of elements in the vector is no bigger
11118 than GROUP_SIZE. */
11119
11120 tree
11121 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11122 unsigned int group_size)
11123 {
11124 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11125
11126 if (!vectype)
11127 return NULL;
11128
11129 return truth_type_for (vectype);
11130 }
11131
11132 /* Function get_same_sized_vectype
11133
11134 Returns a vector type corresponding to SCALAR_TYPE of size
11135 VECTOR_TYPE if supported by the target. */
11136
11137 tree
11138 get_same_sized_vectype (tree scalar_type, tree vector_type)
11139 {
11140 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11141 return truth_type_for (vector_type);
11142
11143 poly_uint64 nunits;
11144 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11145 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11146 return NULL_TREE;
11147
11148 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11149 scalar_type, nunits);
11150 }
11151
11152 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11153 would not change the chosen vector modes. */
11154
11155 bool
11156 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11157 {
11158 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11159 i != vinfo->used_vector_modes.end (); ++i)
11160 if (!VECTOR_MODE_P (*i)
11161 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11162 return false;
11163 return true;
11164 }
11165
11166 /* Function vect_is_simple_use.
11167
11168 Input:
11169 VINFO - the vect info of the loop or basic block that is being vectorized.
11170 OPERAND - operand in the loop or bb.
11171 Output:
11172 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11173 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11174 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11175 the definition could be anywhere in the function
11176 DT - the type of definition
11177
11178 Returns whether a stmt with OPERAND can be vectorized.
11179 For loops, supportable operands are constants, loop invariants, and operands
11180 that are defined by the current iteration of the loop. Unsupportable
11181 operands are those that are defined by a previous iteration of the loop (as
11182 is the case in reduction/induction computations).
11183 For basic blocks, supportable operands are constants and bb invariants.
11184 For now, operands defined outside the basic block are not supported. */
11185
11186 bool
11187 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11188 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11189 {
11190 if (def_stmt_info_out)
11191 *def_stmt_info_out = NULL;
11192 if (def_stmt_out)
11193 *def_stmt_out = NULL;
11194 *dt = vect_unknown_def_type;
11195
11196 if (dump_enabled_p ())
11197 {
11198 dump_printf_loc (MSG_NOTE, vect_location,
11199 "vect_is_simple_use: operand ");
11200 if (TREE_CODE (operand) == SSA_NAME
11201 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11202 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11203 else
11204 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11205 }
11206
11207 if (CONSTANT_CLASS_P (operand))
11208 *dt = vect_constant_def;
11209 else if (is_gimple_min_invariant (operand))
11210 *dt = vect_external_def;
11211 else if (TREE_CODE (operand) != SSA_NAME)
11212 *dt = vect_unknown_def_type;
11213 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11214 *dt = vect_external_def;
11215 else
11216 {
11217 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11218 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11219 if (!stmt_vinfo)
11220 *dt = vect_external_def;
11221 else
11222 {
11223 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11224 def_stmt = stmt_vinfo->stmt;
11225 switch (gimple_code (def_stmt))
11226 {
11227 case GIMPLE_PHI:
11228 case GIMPLE_ASSIGN:
11229 case GIMPLE_CALL:
11230 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11231 break;
11232 default:
11233 *dt = vect_unknown_def_type;
11234 break;
11235 }
11236 if (def_stmt_info_out)
11237 *def_stmt_info_out = stmt_vinfo;
11238 }
11239 if (def_stmt_out)
11240 *def_stmt_out = def_stmt;
11241 }
11242
11243 if (dump_enabled_p ())
11244 {
11245 dump_printf (MSG_NOTE, ", type of def: ");
11246 switch (*dt)
11247 {
11248 case vect_uninitialized_def:
11249 dump_printf (MSG_NOTE, "uninitialized\n");
11250 break;
11251 case vect_constant_def:
11252 dump_printf (MSG_NOTE, "constant\n");
11253 break;
11254 case vect_external_def:
11255 dump_printf (MSG_NOTE, "external\n");
11256 break;
11257 case vect_internal_def:
11258 dump_printf (MSG_NOTE, "internal\n");
11259 break;
11260 case vect_induction_def:
11261 dump_printf (MSG_NOTE, "induction\n");
11262 break;
11263 case vect_reduction_def:
11264 dump_printf (MSG_NOTE, "reduction\n");
11265 break;
11266 case vect_double_reduction_def:
11267 dump_printf (MSG_NOTE, "double reduction\n");
11268 break;
11269 case vect_nested_cycle:
11270 dump_printf (MSG_NOTE, "nested cycle\n");
11271 break;
11272 case vect_unknown_def_type:
11273 dump_printf (MSG_NOTE, "unknown\n");
11274 break;
11275 }
11276 }
11277
11278 if (*dt == vect_unknown_def_type)
11279 {
11280 if (dump_enabled_p ())
11281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11282 "Unsupported pattern.\n");
11283 return false;
11284 }
11285
11286 return true;
11287 }
11288
11289 /* Function vect_is_simple_use.
11290
11291 Same as vect_is_simple_use but also determines the vector operand
11292 type of OPERAND and stores it to *VECTYPE. If the definition of
11293 OPERAND is vect_uninitialized_def, vect_constant_def or
11294 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11295 is responsible to compute the best suited vector type for the
11296 scalar operand. */
11297
11298 bool
11299 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11300 tree *vectype, stmt_vec_info *def_stmt_info_out,
11301 gimple **def_stmt_out)
11302 {
11303 stmt_vec_info def_stmt_info;
11304 gimple *def_stmt;
11305 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11306 return false;
11307
11308 if (def_stmt_out)
11309 *def_stmt_out = def_stmt;
11310 if (def_stmt_info_out)
11311 *def_stmt_info_out = def_stmt_info;
11312
11313 /* Now get a vector type if the def is internal, otherwise supply
11314 NULL_TREE and leave it up to the caller to figure out a proper
11315 type for the use stmt. */
11316 if (*dt == vect_internal_def
11317 || *dt == vect_induction_def
11318 || *dt == vect_reduction_def
11319 || *dt == vect_double_reduction_def
11320 || *dt == vect_nested_cycle)
11321 {
11322 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11323 gcc_assert (*vectype != NULL_TREE);
11324 if (dump_enabled_p ())
11325 dump_printf_loc (MSG_NOTE, vect_location,
11326 "vect_is_simple_use: vectype %T\n", *vectype);
11327 }
11328 else if (*dt == vect_uninitialized_def
11329 || *dt == vect_constant_def
11330 || *dt == vect_external_def)
11331 *vectype = NULL_TREE;
11332 else
11333 gcc_unreachable ();
11334
11335 return true;
11336 }
11337
11338 /* Function vect_is_simple_use.
11339
11340 Same as vect_is_simple_use but determines the operand by operand
11341 position OPERAND from either STMT or SLP_NODE, filling in *OP
11342 and *SLP_DEF (when SLP_NODE is not NULL). */
11343
11344 bool
11345 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11346 unsigned operand, tree *op, slp_tree *slp_def,
11347 enum vect_def_type *dt,
11348 tree *vectype, stmt_vec_info *def_stmt_info_out)
11349 {
11350 if (slp_node)
11351 {
11352 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11353 *slp_def = child;
11354 *vectype = SLP_TREE_VECTYPE (child);
11355 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11356 {
11357 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11358 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11359 }
11360 else
11361 {
11362 if (def_stmt_info_out)
11363 *def_stmt_info_out = NULL;
11364 *op = SLP_TREE_SCALAR_OPS (child)[0];
11365 *dt = SLP_TREE_DEF_TYPE (child);
11366 return true;
11367 }
11368 }
11369 else
11370 {
11371 *slp_def = NULL;
11372 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11373 {
11374 if (gimple_assign_rhs_code (ass) == COND_EXPR
11375 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11376 {
11377 if (operand < 2)
11378 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11379 else
11380 *op = gimple_op (ass, operand);
11381 }
11382 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11383 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11384 else
11385 *op = gimple_op (ass, operand + 1);
11386 }
11387 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11388 {
11389 if (gimple_call_internal_p (call)
11390 && internal_store_fn_p (gimple_call_internal_fn (call)))
11391 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11392 (call));
11393 *op = gimple_call_arg (call, operand);
11394 }
11395 else
11396 gcc_unreachable ();
11397 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11398 }
11399 }
11400
11401 /* If OP is not NULL and is external or constant update its vector
11402 type with VECTYPE. Returns true if successful or false if not,
11403 for example when conflicting vector types are present. */
11404
11405 bool
11406 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11407 {
11408 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11409 return true;
11410 if (SLP_TREE_VECTYPE (op))
11411 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11412 SLP_TREE_VECTYPE (op) = vectype;
11413 return true;
11414 }
11415
11416 /* Function supportable_widening_operation
11417
11418 Check whether an operation represented by the code CODE is a
11419 widening operation that is supported by the target platform in
11420 vector form (i.e., when operating on arguments of type VECTYPE_IN
11421 producing a result of type VECTYPE_OUT).
11422
11423 Widening operations we currently support are NOP (CONVERT), FLOAT,
11424 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11425 are supported by the target platform either directly (via vector
11426 tree-codes), or via target builtins.
11427
11428 Output:
11429 - CODE1 and CODE2 are codes of vector operations to be used when
11430 vectorizing the operation, if available.
11431 - MULTI_STEP_CVT determines the number of required intermediate steps in
11432 case of multi-step conversion (like char->short->int - in that case
11433 MULTI_STEP_CVT will be 1).
11434 - INTERM_TYPES contains the intermediate type required to perform the
11435 widening operation (short in the above example). */
11436
11437 bool
11438 supportable_widening_operation (vec_info *vinfo,
11439 enum tree_code code, stmt_vec_info stmt_info,
11440 tree vectype_out, tree vectype_in,
11441 enum tree_code *code1, enum tree_code *code2,
11442 int *multi_step_cvt,
11443 vec<tree> *interm_types)
11444 {
11445 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11446 class loop *vect_loop = NULL;
11447 machine_mode vec_mode;
11448 enum insn_code icode1, icode2;
11449 optab optab1, optab2;
11450 tree vectype = vectype_in;
11451 tree wide_vectype = vectype_out;
11452 enum tree_code c1, c2;
11453 int i;
11454 tree prev_type, intermediate_type;
11455 machine_mode intermediate_mode, prev_mode;
11456 optab optab3, optab4;
11457
11458 *multi_step_cvt = 0;
11459 if (loop_info)
11460 vect_loop = LOOP_VINFO_LOOP (loop_info);
11461
11462 switch (code)
11463 {
11464 case WIDEN_MULT_EXPR:
11465 /* The result of a vectorized widening operation usually requires
11466 two vectors (because the widened results do not fit into one vector).
11467 The generated vector results would normally be expected to be
11468 generated in the same order as in the original scalar computation,
11469 i.e. if 8 results are generated in each vector iteration, they are
11470 to be organized as follows:
11471 vect1: [res1,res2,res3,res4],
11472 vect2: [res5,res6,res7,res8].
11473
11474 However, in the special case that the result of the widening
11475 operation is used in a reduction computation only, the order doesn't
11476 matter (because when vectorizing a reduction we change the order of
11477 the computation). Some targets can take advantage of this and
11478 generate more efficient code. For example, targets like Altivec,
11479 that support widen_mult using a sequence of {mult_even,mult_odd}
11480 generate the following vectors:
11481 vect1: [res1,res3,res5,res7],
11482 vect2: [res2,res4,res6,res8].
11483
11484 When vectorizing outer-loops, we execute the inner-loop sequentially
11485 (each vectorized inner-loop iteration contributes to VF outer-loop
11486 iterations in parallel). We therefore don't allow to change the
11487 order of the computation in the inner-loop during outer-loop
11488 vectorization. */
11489 /* TODO: Another case in which order doesn't *really* matter is when we
11490 widen and then contract again, e.g. (short)((int)x * y >> 8).
11491 Normally, pack_trunc performs an even/odd permute, whereas the
11492 repack from an even/odd expansion would be an interleave, which
11493 would be significantly simpler for e.g. AVX2. */
11494 /* In any case, in order to avoid duplicating the code below, recurse
11495 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11496 are properly set up for the caller. If we fail, we'll continue with
11497 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11498 if (vect_loop
11499 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11500 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11501 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11502 stmt_info, vectype_out,
11503 vectype_in, code1, code2,
11504 multi_step_cvt, interm_types))
11505 {
11506 /* Elements in a vector with vect_used_by_reduction property cannot
11507 be reordered if the use chain with this property does not have the
11508 same operation. One such an example is s += a * b, where elements
11509 in a and b cannot be reordered. Here we check if the vector defined
11510 by STMT is only directly used in the reduction statement. */
11511 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11512 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11513 if (use_stmt_info
11514 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11515 return true;
11516 }
11517 c1 = VEC_WIDEN_MULT_LO_EXPR;
11518 c2 = VEC_WIDEN_MULT_HI_EXPR;
11519 break;
11520
11521 case DOT_PROD_EXPR:
11522 c1 = DOT_PROD_EXPR;
11523 c2 = DOT_PROD_EXPR;
11524 break;
11525
11526 case SAD_EXPR:
11527 c1 = SAD_EXPR;
11528 c2 = SAD_EXPR;
11529 break;
11530
11531 case VEC_WIDEN_MULT_EVEN_EXPR:
11532 /* Support the recursion induced just above. */
11533 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11534 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11535 break;
11536
11537 case WIDEN_LSHIFT_EXPR:
11538 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11539 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11540 break;
11541
11542 case WIDEN_PLUS_EXPR:
11543 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11544 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11545 break;
11546
11547 case WIDEN_MINUS_EXPR:
11548 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11549 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11550 break;
11551
11552 CASE_CONVERT:
11553 c1 = VEC_UNPACK_LO_EXPR;
11554 c2 = VEC_UNPACK_HI_EXPR;
11555 break;
11556
11557 case FLOAT_EXPR:
11558 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11559 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11560 break;
11561
11562 case FIX_TRUNC_EXPR:
11563 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11564 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11565 break;
11566
11567 default:
11568 gcc_unreachable ();
11569 }
11570
11571 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11572 std::swap (c1, c2);
11573
11574 if (code == FIX_TRUNC_EXPR)
11575 {
11576 /* The signedness is determined from output operand. */
11577 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11578 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11579 }
11580 else if (CONVERT_EXPR_CODE_P (code)
11581 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11582 && VECTOR_BOOLEAN_TYPE_P (vectype)
11583 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11584 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11585 {
11586 /* If the input and result modes are the same, a different optab
11587 is needed where we pass in the number of units in vectype. */
11588 optab1 = vec_unpacks_sbool_lo_optab;
11589 optab2 = vec_unpacks_sbool_hi_optab;
11590 }
11591 else
11592 {
11593 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11594 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11595 }
11596
11597 if (!optab1 || !optab2)
11598 return false;
11599
11600 vec_mode = TYPE_MODE (vectype);
11601 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11602 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11603 return false;
11604
11605 *code1 = c1;
11606 *code2 = c2;
11607
11608 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11609 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11610 {
11611 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11612 return true;
11613 /* For scalar masks we may have different boolean
11614 vector types having the same QImode. Thus we
11615 add additional check for elements number. */
11616 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11617 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11618 return true;
11619 }
11620
11621 /* Check if it's a multi-step conversion that can be done using intermediate
11622 types. */
11623
11624 prev_type = vectype;
11625 prev_mode = vec_mode;
11626
11627 if (!CONVERT_EXPR_CODE_P (code))
11628 return false;
11629
11630 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11631 intermediate steps in promotion sequence. We try
11632 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11633 not. */
11634 interm_types->create (MAX_INTERM_CVT_STEPS);
11635 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11636 {
11637 intermediate_mode = insn_data[icode1].operand[0].mode;
11638 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11639 intermediate_type
11640 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11641 else
11642 intermediate_type
11643 = lang_hooks.types.type_for_mode (intermediate_mode,
11644 TYPE_UNSIGNED (prev_type));
11645
11646 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11647 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11648 && intermediate_mode == prev_mode
11649 && SCALAR_INT_MODE_P (prev_mode))
11650 {
11651 /* If the input and result modes are the same, a different optab
11652 is needed where we pass in the number of units in vectype. */
11653 optab3 = vec_unpacks_sbool_lo_optab;
11654 optab4 = vec_unpacks_sbool_hi_optab;
11655 }
11656 else
11657 {
11658 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11659 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11660 }
11661
11662 if (!optab3 || !optab4
11663 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11664 || insn_data[icode1].operand[0].mode != intermediate_mode
11665 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11666 || insn_data[icode2].operand[0].mode != intermediate_mode
11667 || ((icode1 = optab_handler (optab3, intermediate_mode))
11668 == CODE_FOR_nothing)
11669 || ((icode2 = optab_handler (optab4, intermediate_mode))
11670 == CODE_FOR_nothing))
11671 break;
11672
11673 interm_types->quick_push (intermediate_type);
11674 (*multi_step_cvt)++;
11675
11676 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11677 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11678 {
11679 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11680 return true;
11681 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11682 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11683 return true;
11684 }
11685
11686 prev_type = intermediate_type;
11687 prev_mode = intermediate_mode;
11688 }
11689
11690 interm_types->release ();
11691 return false;
11692 }
11693
11694
11695 /* Function supportable_narrowing_operation
11696
11697 Check whether an operation represented by the code CODE is a
11698 narrowing operation that is supported by the target platform in
11699 vector form (i.e., when operating on arguments of type VECTYPE_IN
11700 and producing a result of type VECTYPE_OUT).
11701
11702 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11703 and FLOAT. This function checks if these operations are supported by
11704 the target platform directly via vector tree-codes.
11705
11706 Output:
11707 - CODE1 is the code of a vector operation to be used when
11708 vectorizing the operation, if available.
11709 - MULTI_STEP_CVT determines the number of required intermediate steps in
11710 case of multi-step conversion (like int->short->char - in that case
11711 MULTI_STEP_CVT will be 1).
11712 - INTERM_TYPES contains the intermediate type required to perform the
11713 narrowing operation (short in the above example). */
11714
11715 bool
11716 supportable_narrowing_operation (enum tree_code code,
11717 tree vectype_out, tree vectype_in,
11718 enum tree_code *code1, int *multi_step_cvt,
11719 vec<tree> *interm_types)
11720 {
11721 machine_mode vec_mode;
11722 enum insn_code icode1;
11723 optab optab1, interm_optab;
11724 tree vectype = vectype_in;
11725 tree narrow_vectype = vectype_out;
11726 enum tree_code c1;
11727 tree intermediate_type, prev_type;
11728 machine_mode intermediate_mode, prev_mode;
11729 int i;
11730 bool uns;
11731
11732 *multi_step_cvt = 0;
11733 switch (code)
11734 {
11735 CASE_CONVERT:
11736 c1 = VEC_PACK_TRUNC_EXPR;
11737 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11738 && VECTOR_BOOLEAN_TYPE_P (vectype)
11739 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11740 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11741 optab1 = vec_pack_sbool_trunc_optab;
11742 else
11743 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11744 break;
11745
11746 case FIX_TRUNC_EXPR:
11747 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11748 /* The signedness is determined from output operand. */
11749 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11750 break;
11751
11752 case FLOAT_EXPR:
11753 c1 = VEC_PACK_FLOAT_EXPR;
11754 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11755 break;
11756
11757 default:
11758 gcc_unreachable ();
11759 }
11760
11761 if (!optab1)
11762 return false;
11763
11764 vec_mode = TYPE_MODE (vectype);
11765 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11766 return false;
11767
11768 *code1 = c1;
11769
11770 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11771 {
11772 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11773 return true;
11774 /* For scalar masks we may have different boolean
11775 vector types having the same QImode. Thus we
11776 add additional check for elements number. */
11777 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11778 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11779 return true;
11780 }
11781
11782 if (code == FLOAT_EXPR)
11783 return false;
11784
11785 /* Check if it's a multi-step conversion that can be done using intermediate
11786 types. */
11787 prev_mode = vec_mode;
11788 prev_type = vectype;
11789 if (code == FIX_TRUNC_EXPR)
11790 uns = TYPE_UNSIGNED (vectype_out);
11791 else
11792 uns = TYPE_UNSIGNED (vectype);
11793
11794 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11795 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11796 costly than signed. */
11797 if (code == FIX_TRUNC_EXPR && uns)
11798 {
11799 enum insn_code icode2;
11800
11801 intermediate_type
11802 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11803 interm_optab
11804 = optab_for_tree_code (c1, intermediate_type, optab_default);
11805 if (interm_optab != unknown_optab
11806 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11807 && insn_data[icode1].operand[0].mode
11808 == insn_data[icode2].operand[0].mode)
11809 {
11810 uns = false;
11811 optab1 = interm_optab;
11812 icode1 = icode2;
11813 }
11814 }
11815
11816 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11817 intermediate steps in promotion sequence. We try
11818 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11819 interm_types->create (MAX_INTERM_CVT_STEPS);
11820 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11821 {
11822 intermediate_mode = insn_data[icode1].operand[0].mode;
11823 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11824 intermediate_type
11825 = vect_double_mask_nunits (prev_type, intermediate_mode);
11826 else
11827 intermediate_type
11828 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11829 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11830 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11831 && intermediate_mode == prev_mode
11832 && SCALAR_INT_MODE_P (prev_mode))
11833 interm_optab = vec_pack_sbool_trunc_optab;
11834 else
11835 interm_optab
11836 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11837 optab_default);
11838 if (!interm_optab
11839 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11840 || insn_data[icode1].operand[0].mode != intermediate_mode
11841 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11842 == CODE_FOR_nothing))
11843 break;
11844
11845 interm_types->quick_push (intermediate_type);
11846 (*multi_step_cvt)++;
11847
11848 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11849 {
11850 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11851 return true;
11852 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11853 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11854 return true;
11855 }
11856
11857 prev_mode = intermediate_mode;
11858 prev_type = intermediate_type;
11859 optab1 = interm_optab;
11860 }
11861
11862 interm_types->release ();
11863 return false;
11864 }
11865
11866 /* Generate and return a statement that sets vector mask MASK such that
11867 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11868
11869 gcall *
11870 vect_gen_while (tree mask, tree start_index, tree end_index)
11871 {
11872 tree cmp_type = TREE_TYPE (start_index);
11873 tree mask_type = TREE_TYPE (mask);
11874 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11875 cmp_type, mask_type,
11876 OPTIMIZE_FOR_SPEED));
11877 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11878 start_index, end_index,
11879 build_zero_cst (mask_type));
11880 gimple_call_set_lhs (call, mask);
11881 return call;
11882 }
11883
11884 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11885 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11886
11887 tree
11888 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11889 tree end_index)
11890 {
11891 tree tmp = make_ssa_name (mask_type);
11892 gcall *call = vect_gen_while (tmp, start_index, end_index);
11893 gimple_seq_add_stmt (seq, call);
11894 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11895 }
11896
11897 /* Try to compute the vector types required to vectorize STMT_INFO,
11898 returning true on success and false if vectorization isn't possible.
11899 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11900 take sure that the number of elements in the vectors is no bigger
11901 than GROUP_SIZE.
11902
11903 On success:
11904
11905 - Set *STMT_VECTYPE_OUT to:
11906 - NULL_TREE if the statement doesn't need to be vectorized;
11907 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11908
11909 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11910 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11911 statement does not help to determine the overall number of units. */
11912
11913 opt_result
11914 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11915 tree *stmt_vectype_out,
11916 tree *nunits_vectype_out,
11917 unsigned int group_size)
11918 {
11919 gimple *stmt = stmt_info->stmt;
11920
11921 /* For BB vectorization, we should always have a group size once we've
11922 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11923 are tentative requests during things like early data reference
11924 analysis and pattern recognition. */
11925 if (is_a <bb_vec_info> (vinfo))
11926 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11927 else
11928 group_size = 0;
11929
11930 *stmt_vectype_out = NULL_TREE;
11931 *nunits_vectype_out = NULL_TREE;
11932
11933 if (gimple_get_lhs (stmt) == NULL_TREE
11934 /* MASK_STORE has no lhs, but is ok. */
11935 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11936 {
11937 if (is_a <gcall *> (stmt))
11938 {
11939 /* Ignore calls with no lhs. These must be calls to
11940 #pragma omp simd functions, and what vectorization factor
11941 it really needs can't be determined until
11942 vectorizable_simd_clone_call. */
11943 if (dump_enabled_p ())
11944 dump_printf_loc (MSG_NOTE, vect_location,
11945 "defer to SIMD clone analysis.\n");
11946 return opt_result::success ();
11947 }
11948
11949 return opt_result::failure_at (stmt,
11950 "not vectorized: irregular stmt.%G", stmt);
11951 }
11952
11953 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11954 return opt_result::failure_at (stmt,
11955 "not vectorized: vector stmt in loop:%G",
11956 stmt);
11957
11958 tree vectype;
11959 tree scalar_type = NULL_TREE;
11960 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
11961 {
11962 vectype = STMT_VINFO_VECTYPE (stmt_info);
11963 if (dump_enabled_p ())
11964 dump_printf_loc (MSG_NOTE, vect_location,
11965 "precomputed vectype: %T\n", vectype);
11966 }
11967 else if (vect_use_mask_type_p (stmt_info))
11968 {
11969 unsigned int precision = stmt_info->mask_precision;
11970 scalar_type = build_nonstandard_integer_type (precision, 1);
11971 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
11972 if (!vectype)
11973 return opt_result::failure_at (stmt, "not vectorized: unsupported"
11974 " data-type %T\n", scalar_type);
11975 if (dump_enabled_p ())
11976 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11977 }
11978 else
11979 {
11980 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
11981 scalar_type = TREE_TYPE (DR_REF (dr));
11982 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11983 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11984 else
11985 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11986
11987 if (dump_enabled_p ())
11988 {
11989 if (group_size)
11990 dump_printf_loc (MSG_NOTE, vect_location,
11991 "get vectype for scalar type (group size %d):"
11992 " %T\n", group_size, scalar_type);
11993 else
11994 dump_printf_loc (MSG_NOTE, vect_location,
11995 "get vectype for scalar type: %T\n", scalar_type);
11996 }
11997 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11998 if (!vectype)
11999 return opt_result::failure_at (stmt,
12000 "not vectorized:"
12001 " unsupported data-type %T\n",
12002 scalar_type);
12003
12004 if (dump_enabled_p ())
12005 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12006 }
12007 *stmt_vectype_out = vectype;
12008
12009 /* Don't try to compute scalar types if the stmt produces a boolean
12010 vector; use the existing vector type instead. */
12011 tree nunits_vectype = vectype;
12012 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12013 {
12014 /* The number of units is set according to the smallest scalar
12015 type (or the largest vector size, but we only support one
12016 vector size per vectorization). */
12017 HOST_WIDE_INT dummy;
12018 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12019 if (scalar_type != TREE_TYPE (vectype))
12020 {
12021 if (dump_enabled_p ())
12022 dump_printf_loc (MSG_NOTE, vect_location,
12023 "get vectype for smallest scalar type: %T\n",
12024 scalar_type);
12025 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12026 group_size);
12027 if (!nunits_vectype)
12028 return opt_result::failure_at
12029 (stmt, "not vectorized: unsupported data-type %T\n",
12030 scalar_type);
12031 if (dump_enabled_p ())
12032 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12033 nunits_vectype);
12034 }
12035 }
12036
12037 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12038 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12039
12040 if (dump_enabled_p ())
12041 {
12042 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12043 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12044 dump_printf (MSG_NOTE, "\n");
12045 }
12046
12047 *nunits_vectype_out = nunits_vectype;
12048 return opt_result::success ();
12049 }
12050
12051 /* Generate and return statement sequence that sets vector length LEN that is:
12052
12053 min_of_start_and_end = min (START_INDEX, END_INDEX);
12054 left_len = END_INDEX - min_of_start_and_end;
12055 rhs = min (left_len, LEN_LIMIT);
12056 LEN = rhs;
12057
12058 Note: the cost of the code generated by this function is modeled
12059 by vect_estimate_min_profitable_iters, so changes here may need
12060 corresponding changes there. */
12061
12062 gimple_seq
12063 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12064 {
12065 gimple_seq stmts = NULL;
12066 tree len_type = TREE_TYPE (len);
12067 gcc_assert (TREE_TYPE (start_index) == len_type);
12068
12069 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12070 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12071 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12072 gimple* stmt = gimple_build_assign (len, rhs);
12073 gimple_seq_add_stmt (&stmts, stmt);
12074
12075 return stmts;
12076 }
12077