Add GCC_EXTRA_DIAGNOSTIC_OUTPUT environment variable for fix-it hints
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
840
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
846 {
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
849
850 for (i = 0; i < pwr + 1; i++)
851 {
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
855 }
856
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
862
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 }
868
869 /* Returns true if the current function returns DECL. */
870
871 static bool
872 cfun_returns (tree decl)
873 {
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
877 {
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
887 do
888 {
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
890 }
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
896 }
897 return false;
898 }
899
900 /* Function vect_model_store_cost
901
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
904
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
910 {
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
918
919 if (vls_type == VLS_STORE_INVARIANT)
920 {
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
924 }
925
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
930
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
935
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 {
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
954 }
955
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 {
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
966 }
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
969
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
972 {
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
978 }
979
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
989 {
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
993 {
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
998 {
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1007 }
1008 }
1009 }
1010
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1015 }
1016
1017
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1023 {
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1027
1028 switch (alignment_support_scheme)
1029 {
1030 case dr_aligned:
1031 {
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1035
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1040 }
1041
1042 case dr_unaligned_supported:
1043 {
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1054 }
1055
1056 case dr_unaligned_unsupported:
1057 {
1058 *inside_cost = VECT_MAX_COST;
1059
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1064 }
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071
1072 /* Function vect_model_load_cost
1073
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1078
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1085 {
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1088
1089 gcc_assert (cost_vec);
1090
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1094
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1096 {
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms, n_loads;
1102 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1103 vf, true, &n_perms, &n_loads);
1104 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1105 first_stmt_info, 0, vect_body);
1106
1107 /* And adjust the number of loads performed. This handles
1108 redundancies as well as loads that are later dead. */
1109 ncopies = n_loads;
1110 }
1111
1112 /* Grouped loads read all elements in the group at once,
1113 so we want the DR for the first statement. */
1114 stmt_vec_info first_stmt_info = stmt_info;
1115 if (!slp_node && grouped_access_p)
1116 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1117
1118 /* True if we should include any once-per-group costs as well as
1119 the cost of the statement itself. For SLP we only get called
1120 once per group anyhow. */
1121 bool first_stmt_p = (first_stmt_info == stmt_info);
1122
1123 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1124 ones we actually need. Account for the cost of unused results. */
1125 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1126 {
1127 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1128 stmt_vec_info next_stmt_info = first_stmt_info;
1129 do
1130 {
1131 gaps -= 1;
1132 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1133 }
1134 while (next_stmt_info);
1135 if (gaps)
1136 {
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: %d unused vectors.\n",
1140 gaps);
1141 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, false,
1142 &inside_cost, &prologue_cost,
1143 cost_vec, cost_vec, true);
1144 }
1145 }
1146
1147 /* We assume that the cost of a single load-lanes instruction is
1148 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1149 access is instead being provided by a load-and-permute operation,
1150 include the cost of the permutes. */
1151 if (first_stmt_p
1152 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1153 {
1154 /* Uses an even and odd extract operations or shuffle operations
1155 for each needed permute. */
1156 int group_size = DR_GROUP_SIZE (first_stmt_info);
1157 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 stmt_info, 0, vect_body);
1160
1161 if (dump_enabled_p ())
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: strided group_size = %d .\n",
1164 group_size);
1165 }
1166
1167 /* The loads themselves. */
1168 if (memory_access_type == VMAT_ELEMENTWISE
1169 || memory_access_type == VMAT_GATHER_SCATTER)
1170 {
1171 /* N scalar loads plus gathering them into a vector. */
1172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174 inside_cost += record_stmt_cost (cost_vec,
1175 ncopies * assumed_nunits,
1176 scalar_load, stmt_info, 0, vect_body);
1177 }
1178 else
1179 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 &inside_cost, &prologue_cost,
1181 cost_vec, cost_vec, true);
1182 if (memory_access_type == VMAT_ELEMENTWISE
1183 || memory_access_type == VMAT_STRIDED_SLP)
1184 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 stmt_info, 0, vect_body);
1186
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: inside_cost = %d, "
1190 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1191 }
1192
1193
1194 /* Calculate cost of DR's memory access. */
1195 void
1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 bool add_realign_cost, unsigned int *inside_cost,
1198 unsigned int *prologue_cost,
1199 stmt_vector_for_cost *prologue_cost_vec,
1200 stmt_vector_for_cost *body_cost_vec,
1201 bool record_prologue_costs)
1202 {
1203 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204 int alignment_support_scheme
1205 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1206
1207 switch (alignment_support_scheme)
1208 {
1209 case dr_aligned:
1210 {
1211 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 stmt_info, 0, vect_body);
1213
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: aligned.\n");
1217
1218 break;
1219 }
1220 case dr_unaligned_supported:
1221 {
1222 /* Here, we assign an additional cost for the unaligned load. */
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 unaligned_load, stmt_info,
1225 DR_MISALIGNMENT (dr_info),
1226 vect_body);
1227
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: unaligned supported by "
1231 "hardware.\n");
1232
1233 break;
1234 }
1235 case dr_explicit_realign:
1236 {
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 vector_load, stmt_info, 0, vect_body);
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 vec_perm, stmt_info, 0, vect_body);
1241
1242 /* FIXME: If the misalignment remains fixed across the iterations of
1243 the containing loop, the following cost should be added to the
1244 prologue costs. */
1245 if (targetm.vectorize.builtin_mask_for_load)
1246 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 stmt_info, 0, vect_body);
1248
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign\n");
1252
1253 break;
1254 }
1255 case dr_explicit_realign_optimized:
1256 {
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned software "
1260 "pipelined.\n");
1261
1262 /* Unaligned software pipeline has a load of an address, an initial
1263 load, and possibly a mask operation to "prime" the loop. However,
1264 if this is an access in a group of loads, which provide grouped
1265 access, then the above cost should only be considered for one
1266 access in the group. Inside the loop, there is a load op
1267 and a realignment op. */
1268
1269 if (add_realign_cost && record_prologue_costs)
1270 {
1271 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 vector_stmt, stmt_info,
1273 0, vect_prologue);
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 vector_stmt, stmt_info,
1277 0, vect_prologue);
1278 }
1279
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 stmt_info, 0, vect_body);
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 stmt_info, 0, vect_body);
1284
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: explicit realign optimized"
1288 "\n");
1289
1290 break;
1291 }
1292
1293 case dr_unaligned_unsupported:
1294 {
1295 *inside_cost = VECT_MAX_COST;
1296
1297 if (dump_enabled_p ())
1298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299 "vect_model_load_cost: unsupported access.\n");
1300 break;
1301 }
1302
1303 default:
1304 gcc_unreachable ();
1305 }
1306 }
1307
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309 the loop preheader for the vectorized stmt STMT_VINFO. */
1310
1311 static void
1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 gimple_stmt_iterator *gsi)
1314 {
1315 if (gsi)
1316 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317 else
1318 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1319
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "created new init_stmt: %G", new_stmt);
1323 }
1324
1325 /* Function vect_init_vector.
1326
1327 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1329 vector type a vector with all elements equal to VAL is created first.
1330 Place the initialization at GSI if it is not NULL. Otherwise, place the
1331 initialization at the loop preheader.
1332 Return the DEF of INIT_STMT.
1333 It will be used in the vectorization of STMT_INFO. */
1334
1335 tree
1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 gimple_stmt_iterator *gsi)
1338 {
1339 gimple *init_stmt;
1340 tree new_temp;
1341
1342 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1343 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1344 {
1345 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1347 {
1348 /* Scalar boolean value should be transformed into
1349 all zeros or all ones value before building a vector. */
1350 if (VECTOR_BOOLEAN_TYPE_P (type))
1351 {
1352 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 tree false_val = build_zero_cst (TREE_TYPE (type));
1354
1355 if (CONSTANT_CLASS_P (val))
1356 val = integer_zerop (val) ? false_val : true_val;
1357 else
1358 {
1359 new_temp = make_ssa_name (TREE_TYPE (type));
1360 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 val, true_val, false_val);
1362 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 val = new_temp;
1364 }
1365 }
1366 else
1367 {
1368 gimple_seq stmts = NULL;
1369 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 TREE_TYPE (type), val);
1372 else
1373 /* ??? Condition vectorization expects us to do
1374 promotion of invariant/external defs. */
1375 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 !gsi_end_p (gsi2); )
1378 {
1379 init_stmt = gsi_stmt (gsi2);
1380 gsi_remove (&gsi2, false);
1381 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1382 }
1383 }
1384 }
1385 val = build_vector_from_val (type, val);
1386 }
1387
1388 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389 init_stmt = gimple_build_assign (new_temp, val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 return new_temp;
1392 }
1393
1394
1395 /* Function vect_get_vec_defs_for_operand.
1396
1397 OP is an operand in STMT_VINFO. This function returns a vector of
1398 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1399
1400 In the case that OP is an SSA_NAME which is defined in the loop, then
1401 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1402
1403 In case OP is an invariant or constant, a new stmt that creates a vector def
1404 needs to be introduced. VECTYPE may be used to specify a required type for
1405 vector invariant. */
1406
1407 void
1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 unsigned ncopies,
1410 tree op, vec<tree> *vec_oprnds, tree vectype)
1411 {
1412 gimple *def_stmt;
1413 enum vect_def_type dt;
1414 bool is_simple_use;
1415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1416
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "vect_get_vec_defs_for_operand: %T\n", op);
1420
1421 stmt_vec_info def_stmt_info;
1422 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 &def_stmt_info, &def_stmt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1427
1428 vec_oprnds->create (ncopies);
1429 if (dt == vect_constant_def || dt == vect_external_def)
1430 {
1431 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432 tree vector_type;
1433
1434 if (vectype)
1435 vector_type = vectype;
1436 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 vector_type = truth_type_for (stmt_vectype);
1439 else
1440 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1441
1442 gcc_assert (vector_type);
1443 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444 while (ncopies--)
1445 vec_oprnds->quick_push (vop);
1446 }
1447 else
1448 {
1449 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451 for (unsigned i = 0; i < ncopies; ++i)
1452 vec_oprnds->quick_push (gimple_get_lhs
1453 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1454 }
1455 }
1456
1457
1458 /* Get vectorized definitions for OP0 and OP1. */
1459
1460 void
1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 unsigned ncopies,
1463 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1467 {
1468 if (slp_node)
1469 {
1470 if (op0)
1471 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472 if (op1)
1473 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474 if (op2)
1475 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476 if (op3)
1477 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1478 }
1479 else
1480 {
1481 if (op0)
1482 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 op0, vec_oprnds0, vectype0);
1484 if (op1)
1485 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 op1, vec_oprnds1, vectype1);
1487 if (op2)
1488 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 op2, vec_oprnds2, vectype2);
1490 if (op3)
1491 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 op3, vec_oprnds3, vectype3);
1493 }
1494 }
1495
1496 void
1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 unsigned ncopies,
1499 tree op0, vec<tree> *vec_oprnds0,
1500 tree op1, vec<tree> *vec_oprnds1,
1501 tree op2, vec<tree> *vec_oprnds2,
1502 tree op3, vec<tree> *vec_oprnds3)
1503 {
1504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 op0, vec_oprnds0, NULL_TREE,
1506 op1, vec_oprnds1, NULL_TREE,
1507 op2, vec_oprnds2, NULL_TREE,
1508 op3, vec_oprnds3, NULL_TREE);
1509 }
1510
1511 /* Helper function called by vect_finish_replace_stmt and
1512 vect_finish_stmt_generation. Set the location of the new
1513 statement and create and return a stmt_vec_info for it. */
1514
1515 static void
1516 vect_finish_stmt_generation_1 (vec_info *,
1517 stmt_vec_info stmt_info, gimple *vec_stmt)
1518 {
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1521
1522 if (stmt_info)
1523 {
1524 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1525
1526 /* While EH edges will generally prevent vectorization, stmt might
1527 e.g. be in a must-not-throw region. Ensure newly created stmts
1528 that could throw are part of the same region. */
1529 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1532 }
1533 else
1534 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1535 }
1536
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538 which sets the same scalar result as STMT_INFO did. Create and return a
1539 stmt_vec_info for VEC_STMT. */
1540
1541 void
1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1544 {
1545 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1547
1548 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549 gsi_replace (&gsi, vec_stmt, true);
1550
1551 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1552 }
1553
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1556
1557 void
1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 stmt_vec_info stmt_info, gimple *vec_stmt,
1560 gimple_stmt_iterator *gsi)
1561 {
1562 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1563
1564 if (!gsi_end_p (*gsi)
1565 && gimple_has_mem_ops (vec_stmt))
1566 {
1567 gimple *at_stmt = gsi_stmt (*gsi);
1568 tree vuse = gimple_vuse (at_stmt);
1569 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1570 {
1571 tree vdef = gimple_vdef (at_stmt);
1572 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 gimple_set_modified (vec_stmt, true);
1574 /* If we have an SSA vuse and insert a store, update virtual
1575 SSA form to avoid triggering the renamer. Do so only
1576 if we can easily see all uses - which is what almost always
1577 happens with the way vectorized stmts are inserted. */
1578 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 && ((is_gimple_assign (vec_stmt)
1580 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 || (is_gimple_call (vec_stmt)
1582 && !(gimple_call_flags (vec_stmt)
1583 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1584 {
1585 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 gimple_set_vdef (vec_stmt, new_vdef);
1587 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1588 }
1589 }
1590 }
1591 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1593 }
1594
1595 /* We want to vectorize a call to combined function CFN with function
1596 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597 as the types of all inputs. Check whether this is possible using
1598 an internal function, returning its code if so or IFN_LAST if not. */
1599
1600 static internal_fn
1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 tree vectype_out, tree vectype_in)
1603 {
1604 internal_fn ifn;
1605 if (internal_fn_p (cfn))
1606 ifn = as_internal_fn (cfn);
1607 else
1608 ifn = associated_internal_fn (fndecl);
1609 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1610 {
1611 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612 if (info.vectorizable)
1613 {
1614 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 OPTIMIZE_FOR_SPEED))
1618 return ifn;
1619 }
1620 }
1621 return IFN_LAST;
1622 }
1623
1624
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 gimple_stmt_iterator *);
1627
1628 /* Check whether a load or store statement in the loop described by
1629 LOOP_VINFO is possible in a loop using partial vectors. This is
1630 testing whether the vectorizer pass has the appropriate support,
1631 as well as whether the target does.
1632
1633 VLS_TYPE says whether the statement is a load or store and VECTYPE
1634 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1635 says how the load or store is going to be implemented and GROUP_SIZE
1636 is the number of load or store statements in the containing group.
1637 If the access is a gather load or scatter store, GS_INFO describes
1638 its arguments. If the load or store is conditional, SCALAR_MASK is the
1639 condition under which it occurs.
1640
1641 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642 vectors is not supported, otherwise record the required rgroup control
1643 types. */
1644
1645 static void
1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 vec_load_store_type vls_type,
1648 int group_size,
1649 vect_memory_access_type
1650 memory_access_type,
1651 gather_scatter_info *gs_info,
1652 tree scalar_mask)
1653 {
1654 /* Invariant loads need no special support. */
1655 if (memory_access_type == VMAT_INVARIANT)
1656 return;
1657
1658 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659 machine_mode vecmode = TYPE_MODE (vectype);
1660 bool is_load = (vls_type == VLS_LOAD);
1661 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1662 {
1663 if (is_load
1664 ? !vect_load_lanes_supported (vectype, group_size, true)
1665 : !vect_store_lanes_supported (vectype, group_size, true))
1666 {
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "can't operate on partial vectors because"
1670 " the target doesn't have an appropriate"
1671 " load/store-lanes instruction.\n");
1672 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 return;
1674 }
1675 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677 return;
1678 }
1679
1680 if (memory_access_type == VMAT_GATHER_SCATTER)
1681 {
1682 internal_fn ifn = (is_load
1683 ? IFN_MASK_GATHER_LOAD
1684 : IFN_MASK_SCATTER_STORE);
1685 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 gs_info->memory_type,
1687 gs_info->offset_vectype,
1688 gs_info->scale))
1689 {
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't operate on partial vectors because"
1693 " the target doesn't have an appropriate"
1694 " gather load or scatter store instruction.\n");
1695 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 return;
1697 }
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1701 }
1702
1703 if (memory_access_type != VMAT_CONTIGUOUS
1704 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1705 {
1706 /* Element X of the data must come from iteration i * VF + X of the
1707 scalar loop. We need more work to support other mappings. */
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 "can't operate on partial vectors because an"
1711 " access isn't contiguous.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1714 }
1715
1716 if (!VECTOR_MODE_P (vecmode))
1717 {
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1720 "can't operate on partial vectors when emulating"
1721 " vector operations.\n");
1722 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1723 return;
1724 }
1725
1726 /* We might load more scalars than we need for permuting SLP loads.
1727 We checked in get_group_load_store_type that the extra elements
1728 don't leak into a new vector. */
1729 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1730 {
1731 unsigned int nvectors;
1732 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1733 return nvectors;
1734 gcc_unreachable ();
1735 };
1736
1737 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1738 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1739 machine_mode mask_mode;
1740 bool using_partial_vectors_p = false;
1741 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1742 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1743 {
1744 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1745 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1746 using_partial_vectors_p = true;
1747 }
1748
1749 machine_mode vmode;
1750 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1751 {
1752 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1753 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1754 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1755 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1756 using_partial_vectors_p = true;
1757 }
1758
1759 if (!using_partial_vectors_p)
1760 {
1761 if (dump_enabled_p ())
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763 "can't operate on partial vectors because the"
1764 " target doesn't have the appropriate partial"
1765 " vectorization load or store.\n");
1766 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1767 }
1768 }
1769
1770 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1771 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1772 that needs to be applied to all loads and stores in a vectorized loop.
1773 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1774
1775 MASK_TYPE is the type of both masks. If new statements are needed,
1776 insert them before GSI. */
1777
1778 static tree
1779 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1780 gimple_stmt_iterator *gsi)
1781 {
1782 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1783 if (!loop_mask)
1784 return vec_mask;
1785
1786 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1787 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1788 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1789 vec_mask, loop_mask);
1790 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1791 return and_res;
1792 }
1793
1794 /* Determine whether we can use a gather load or scatter store to vectorize
1795 strided load or store STMT_INFO by truncating the current offset to a
1796 smaller width. We need to be able to construct an offset vector:
1797
1798 { 0, X, X*2, X*3, ... }
1799
1800 without loss of precision, where X is STMT_INFO's DR_STEP.
1801
1802 Return true if this is possible, describing the gather load or scatter
1803 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1804
1805 static bool
1806 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1807 loop_vec_info loop_vinfo, bool masked_p,
1808 gather_scatter_info *gs_info)
1809 {
1810 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1811 data_reference *dr = dr_info->dr;
1812 tree step = DR_STEP (dr);
1813 if (TREE_CODE (step) != INTEGER_CST)
1814 {
1815 /* ??? Perhaps we could use range information here? */
1816 if (dump_enabled_p ())
1817 dump_printf_loc (MSG_NOTE, vect_location,
1818 "cannot truncate variable step.\n");
1819 return false;
1820 }
1821
1822 /* Get the number of bits in an element. */
1823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1824 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1825 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1826
1827 /* Set COUNT to the upper limit on the number of elements - 1.
1828 Start with the maximum vectorization factor. */
1829 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1830
1831 /* Try lowering COUNT to the number of scalar latch iterations. */
1832 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 widest_int max_iters;
1834 if (max_loop_iterations (loop, &max_iters)
1835 && max_iters < count)
1836 count = max_iters.to_shwi ();
1837
1838 /* Try scales of 1 and the element size. */
1839 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1840 wi::overflow_type overflow = wi::OVF_NONE;
1841 for (int i = 0; i < 2; ++i)
1842 {
1843 int scale = scales[i];
1844 widest_int factor;
1845 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1846 continue;
1847
1848 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1849 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1850 if (overflow)
1851 continue;
1852 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1853 unsigned int min_offset_bits = wi::min_precision (range, sign);
1854
1855 /* Find the narrowest viable offset type. */
1856 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1857 tree offset_type = build_nonstandard_integer_type (offset_bits,
1858 sign == UNSIGNED);
1859
1860 /* See whether the target supports the operation with an offset
1861 no narrower than OFFSET_TYPE. */
1862 tree memory_type = TREE_TYPE (DR_REF (dr));
1863 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1864 vectype, memory_type, offset_type, scale,
1865 &gs_info->ifn, &gs_info->offset_vectype))
1866 continue;
1867
1868 gs_info->decl = NULL_TREE;
1869 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1870 but we don't need to store that here. */
1871 gs_info->base = NULL_TREE;
1872 gs_info->element_type = TREE_TYPE (vectype);
1873 gs_info->offset = fold_convert (offset_type, step);
1874 gs_info->offset_dt = vect_constant_def;
1875 gs_info->scale = scale;
1876 gs_info->memory_type = memory_type;
1877 return true;
1878 }
1879
1880 if (overflow && dump_enabled_p ())
1881 dump_printf_loc (MSG_NOTE, vect_location,
1882 "truncating gather/scatter offset to %d bits"
1883 " might change its value.\n", element_bits);
1884
1885 return false;
1886 }
1887
1888 /* Return true if we can use gather/scatter internal functions to
1889 vectorize STMT_INFO, which is a grouped or strided load or store.
1890 MASKED_P is true if load or store is conditional. When returning
1891 true, fill in GS_INFO with the information required to perform the
1892 operation. */
1893
1894 static bool
1895 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1896 loop_vec_info loop_vinfo, bool masked_p,
1897 gather_scatter_info *gs_info)
1898 {
1899 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1900 || gs_info->decl)
1901 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1902 masked_p, gs_info);
1903
1904 tree old_offset_type = TREE_TYPE (gs_info->offset);
1905 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1906
1907 gcc_assert (TYPE_PRECISION (new_offset_type)
1908 >= TYPE_PRECISION (old_offset_type));
1909 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1910
1911 if (dump_enabled_p ())
1912 dump_printf_loc (MSG_NOTE, vect_location,
1913 "using gather/scatter for strided/grouped access,"
1914 " scale = %d\n", gs_info->scale);
1915
1916 return true;
1917 }
1918
1919 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1920 elements with a known constant step. Return -1 if that step
1921 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1922
1923 static int
1924 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1925 {
1926 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1927 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1928 size_zero_node);
1929 }
1930
1931 /* If the target supports a permute mask that reverses the elements in
1932 a vector of type VECTYPE, return that mask, otherwise return null. */
1933
1934 static tree
1935 perm_mask_for_reverse (tree vectype)
1936 {
1937 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1938
1939 /* The encoding has a single stepped pattern. */
1940 vec_perm_builder sel (nunits, 1, 3);
1941 for (int i = 0; i < 3; ++i)
1942 sel.quick_push (nunits - 1 - i);
1943
1944 vec_perm_indices indices (sel, 1, nunits);
1945 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1946 return NULL_TREE;
1947 return vect_gen_perm_mask_checked (vectype, indices);
1948 }
1949
1950 /* A subroutine of get_load_store_type, with a subset of the same
1951 arguments. Handle the case where STMT_INFO is a load or store that
1952 accesses consecutive elements with a negative step. */
1953
1954 static vect_memory_access_type
1955 get_negative_load_store_type (vec_info *vinfo,
1956 stmt_vec_info stmt_info, tree vectype,
1957 vec_load_store_type vls_type,
1958 unsigned int ncopies)
1959 {
1960 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961 dr_alignment_support alignment_support_scheme;
1962
1963 if (ncopies > 1)
1964 {
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967 "multiple types with negative step.\n");
1968 return VMAT_ELEMENTWISE;
1969 }
1970
1971 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1972 dr_info, false);
1973 if (alignment_support_scheme != dr_aligned
1974 && alignment_support_scheme != dr_unaligned_supported)
1975 {
1976 if (dump_enabled_p ())
1977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1978 "negative step but alignment required.\n");
1979 return VMAT_ELEMENTWISE;
1980 }
1981
1982 if (vls_type == VLS_STORE_INVARIANT)
1983 {
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "negative step with invariant source;"
1987 " no permute needed.\n");
1988 return VMAT_CONTIGUOUS_DOWN;
1989 }
1990
1991 if (!perm_mask_for_reverse (vectype))
1992 {
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "negative step and reversing not supported.\n");
1996 return VMAT_ELEMENTWISE;
1997 }
1998
1999 return VMAT_CONTIGUOUS_REVERSE;
2000 }
2001
2002 /* STMT_INFO is either a masked or unconditional store. Return the value
2003 being stored. */
2004
2005 tree
2006 vect_get_store_rhs (stmt_vec_info stmt_info)
2007 {
2008 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2009 {
2010 gcc_assert (gimple_assign_single_p (assign));
2011 return gimple_assign_rhs1 (assign);
2012 }
2013 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2014 {
2015 internal_fn ifn = gimple_call_internal_fn (call);
2016 int index = internal_fn_stored_value_index (ifn);
2017 gcc_assert (index >= 0);
2018 return gimple_call_arg (call, index);
2019 }
2020 gcc_unreachable ();
2021 }
2022
2023 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2024
2025 This function returns a vector type which can be composed with NETLS pieces,
2026 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2027 same vector size as the return vector. It checks target whether supports
2028 pieces-size vector mode for construction firstly, if target fails to, check
2029 pieces-size scalar mode for construction further. It returns NULL_TREE if
2030 fails to find the available composition.
2031
2032 For example, for (vtype=V16QI, nelts=4), we can probably get:
2033 - V16QI with PTYPE V4QI.
2034 - V4SI with PTYPE SI.
2035 - NULL_TREE. */
2036
2037 static tree
2038 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2039 {
2040 gcc_assert (VECTOR_TYPE_P (vtype));
2041 gcc_assert (known_gt (nelts, 0U));
2042
2043 machine_mode vmode = TYPE_MODE (vtype);
2044 if (!VECTOR_MODE_P (vmode))
2045 return NULL_TREE;
2046
2047 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2048 unsigned int pbsize;
2049 if (constant_multiple_p (vbsize, nelts, &pbsize))
2050 {
2051 /* First check if vec_init optab supports construction from
2052 vector pieces directly. */
2053 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2054 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2055 machine_mode rmode;
2056 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2057 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2058 != CODE_FOR_nothing))
2059 {
2060 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2061 return vtype;
2062 }
2063
2064 /* Otherwise check if exists an integer type of the same piece size and
2065 if vec_init optab supports construction from it directly. */
2066 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2067 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2068 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2069 != CODE_FOR_nothing))
2070 {
2071 *ptype = build_nonstandard_integer_type (pbsize, 1);
2072 return build_vector_type (*ptype, nelts);
2073 }
2074 }
2075
2076 return NULL_TREE;
2077 }
2078
2079 /* A subroutine of get_load_store_type, with a subset of the same
2080 arguments. Handle the case where STMT_INFO is part of a grouped load
2081 or store.
2082
2083 For stores, the statements in the group are all consecutive
2084 and there is no gap at the end. For loads, the statements in the
2085 group might not be consecutive; there can be gaps between statements
2086 as well as at the end. */
2087
2088 static bool
2089 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2090 tree vectype, slp_tree slp_node,
2091 bool masked_p, vec_load_store_type vls_type,
2092 vect_memory_access_type *memory_access_type,
2093 dr_alignment_support *alignment_support_scheme,
2094 gather_scatter_info *gs_info)
2095 {
2096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2097 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2098 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2099 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2100 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2104 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2105
2106 /* True if the vectorized statements would access beyond the last
2107 statement in the group. */
2108 bool overrun_p = false;
2109
2110 /* True if we can cope with such overrun by peeling for gaps, so that
2111 there is at least one final scalar iteration after the vector loop. */
2112 bool can_overrun_p = (!masked_p
2113 && vls_type == VLS_LOAD
2114 && loop_vinfo
2115 && !loop->inner);
2116
2117 /* There can only be a gap at the end of the group if the stride is
2118 known at compile time. */
2119 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2120
2121 /* Stores can't yet have gaps. */
2122 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2123
2124 if (slp_node)
2125 {
2126 /* For SLP vectorization we directly vectorize a subchain
2127 without permutation. */
2128 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2129 first_dr_info
2130 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2131 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2132 {
2133 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2134 separated by the stride, until we have a complete vector.
2135 Fall back to scalar accesses if that isn't possible. */
2136 if (multiple_p (nunits, group_size))
2137 *memory_access_type = VMAT_STRIDED_SLP;
2138 else
2139 *memory_access_type = VMAT_ELEMENTWISE;
2140 }
2141 else
2142 {
2143 overrun_p = loop_vinfo && gap != 0;
2144 if (overrun_p && vls_type != VLS_LOAD)
2145 {
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147 "Grouped store with gaps requires"
2148 " non-consecutive accesses\n");
2149 return false;
2150 }
2151 /* An overrun is fine if the trailing elements are smaller
2152 than the alignment boundary B. Every vector access will
2153 be a multiple of B and so we are guaranteed to access a
2154 non-gap element in the same B-sized block. */
2155 if (overrun_p
2156 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2159
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alignment_support_scheme;
2165 tree half_vtype;
2166 if (overrun_p
2167 && !masked_p
2168 && (((alignment_support_scheme
2169 = vect_supportable_dr_alignment (vinfo,
2170 first_dr_info, false)))
2171 == dr_aligned
2172 || alignment_support_scheme == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2178
2179 if (overrun_p && !can_overrun_p)
2180 {
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2185 }
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2188 {
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1);
2194 else
2195 {
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2203 }
2204 }
2205 else
2206 {
2207 gcc_assert (!loop_vinfo || cmp > 0);
2208 *memory_access_type = VMAT_CONTIGUOUS;
2209 }
2210 }
2211 }
2212 else
2213 {
2214 /* We can always handle this case using elementwise accesses,
2215 but see if something more efficient is available. */
2216 *memory_access_type = VMAT_ELEMENTWISE;
2217
2218 /* If there is a gap at the end of the group then these optimizations
2219 would access excess elements in the last iteration. */
2220 bool would_overrun_p = (gap != 0);
2221 /* An overrun is fine if the trailing elements are smaller than the
2222 alignment boundary B. Every vector access will be a multiple of B
2223 and so we are guaranteed to access a non-gap element in the
2224 same B-sized block. */
2225 if (would_overrun_p
2226 && !masked_p
2227 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2228 / vect_get_scalar_dr_size (first_dr_info)))
2229 would_overrun_p = false;
2230
2231 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2232 && (can_overrun_p || !would_overrun_p)
2233 && compare_step_with_zero (vinfo, stmt_info) > 0)
2234 {
2235 /* First cope with the degenerate case of a single-element
2236 vector. */
2237 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2238 ;
2239
2240 /* Otherwise try using LOAD/STORE_LANES. */
2241 else if (vls_type == VLS_LOAD
2242 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2243 : vect_store_lanes_supported (vectype, group_size,
2244 masked_p))
2245 {
2246 *memory_access_type = VMAT_LOAD_STORE_LANES;
2247 overrun_p = would_overrun_p;
2248 }
2249
2250 /* If that fails, try using permuting loads. */
2251 else if (vls_type == VLS_LOAD
2252 ? vect_grouped_load_supported (vectype, single_element_p,
2253 group_size)
2254 : vect_grouped_store_supported (vectype, group_size))
2255 {
2256 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2257 overrun_p = would_overrun_p;
2258 }
2259 }
2260
2261 /* As a last resort, trying using a gather load or scatter store.
2262
2263 ??? Although the code can handle all group sizes correctly,
2264 it probably isn't a win to use separate strided accesses based
2265 on nearby locations. Or, even if it's a win over scalar code,
2266 it might not be a win over vectorizing at a lower VF, if that
2267 allows us to use contiguous accesses. */
2268 if (*memory_access_type == VMAT_ELEMENTWISE
2269 && single_element_p
2270 && loop_vinfo
2271 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2272 masked_p, gs_info))
2273 *memory_access_type = VMAT_GATHER_SCATTER;
2274 }
2275
2276 if (*memory_access_type == VMAT_GATHER_SCATTER
2277 || *memory_access_type == VMAT_ELEMENTWISE)
2278 *alignment_support_scheme = dr_unaligned_supported;
2279 else
2280 *alignment_support_scheme
2281 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2282
2283 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2284 {
2285 /* STMT is the leader of the group. Check the operands of all the
2286 stmts of the group. */
2287 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2288 while (next_stmt_info)
2289 {
2290 tree op = vect_get_store_rhs (next_stmt_info);
2291 enum vect_def_type dt;
2292 if (!vect_is_simple_use (op, vinfo, &dt))
2293 {
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 "use not simple.\n");
2297 return false;
2298 }
2299 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2300 }
2301 }
2302
2303 if (overrun_p)
2304 {
2305 gcc_assert (can_overrun_p);
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "Data access with gaps requires scalar "
2309 "epilogue loop\n");
2310 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2311 }
2312
2313 return true;
2314 }
2315
2316 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2317 if there is a memory access type that the vectorized form can use,
2318 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2319 or scatters, fill in GS_INFO accordingly. In addition
2320 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2321 the target does not support the alignment scheme.
2322
2323 SLP says whether we're performing SLP rather than loop vectorization.
2324 MASKED_P is true if the statement is conditional on a vectorized mask.
2325 VECTYPE is the vector type that the vectorized statements will use.
2326 NCOPIES is the number of vector statements that will be needed. */
2327
2328 static bool
2329 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2330 tree vectype, slp_tree slp_node,
2331 bool masked_p, vec_load_store_type vls_type,
2332 unsigned int ncopies,
2333 vect_memory_access_type *memory_access_type,
2334 dr_alignment_support *alignment_support_scheme,
2335 gather_scatter_info *gs_info)
2336 {
2337 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2338 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2339 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2340 {
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2343 gcc_unreachable ();
2344 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2345 &gs_info->offset_dt,
2346 &gs_info->offset_vectype))
2347 {
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2350 "%s index use not simple.\n",
2351 vls_type == VLS_LOAD ? "gather" : "scatter");
2352 return false;
2353 }
2354 /* Gather-scatter accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2357 }
2358 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2359 {
2360 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2361 masked_p,
2362 vls_type, memory_access_type,
2363 alignment_support_scheme, gs_info))
2364 return false;
2365 }
2366 else if (STMT_VINFO_STRIDED_P (stmt_info))
2367 {
2368 gcc_assert (!slp_node);
2369 if (loop_vinfo
2370 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 masked_p, gs_info))
2372 *memory_access_type = VMAT_GATHER_SCATTER;
2373 else
2374 *memory_access_type = VMAT_ELEMENTWISE;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme = dr_unaligned_supported;
2377 }
2378 else
2379 {
2380 int cmp = compare_step_with_zero (vinfo, stmt_info);
2381 if (cmp < 0)
2382 *memory_access_type = get_negative_load_store_type
2383 (vinfo, stmt_info, vectype, vls_type, ncopies);
2384 else if (cmp == 0)
2385 {
2386 gcc_assert (vls_type == VLS_LOAD);
2387 *memory_access_type = VMAT_INVARIANT;
2388 }
2389 else
2390 *memory_access_type = VMAT_CONTIGUOUS;
2391 *alignment_support_scheme
2392 = vect_supportable_dr_alignment (vinfo,
2393 STMT_VINFO_DR_INFO (stmt_info), false);
2394 }
2395
2396 if ((*memory_access_type == VMAT_ELEMENTWISE
2397 || *memory_access_type == VMAT_STRIDED_SLP)
2398 && !nunits.is_constant ())
2399 {
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2402 "Not using elementwise accesses due to variable "
2403 "vectorization factor.\n");
2404 return false;
2405 }
2406
2407 if (*alignment_support_scheme == dr_unaligned_unsupported)
2408 {
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "unsupported unaligned access\n");
2412 return false;
2413 }
2414
2415 /* FIXME: At the moment the cost model seems to underestimate the
2416 cost of using elementwise accesses. This check preserves the
2417 traditional behavior until that can be fixed. */
2418 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2419 if (!first_stmt_info)
2420 first_stmt_info = stmt_info;
2421 if (*memory_access_type == VMAT_ELEMENTWISE
2422 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2423 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2424 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2425 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2426 {
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "not falling back to elementwise accesses\n");
2430 return false;
2431 }
2432 return true;
2433 }
2434
2435 /* Return true if boolean argument MASK is suitable for vectorizing
2436 conditional operation STMT_INFO. When returning true, store the type
2437 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2438 in *MASK_VECTYPE_OUT. */
2439
2440 static bool
2441 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2442 vect_def_type *mask_dt_out,
2443 tree *mask_vectype_out)
2444 {
2445 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2446 {
2447 if (dump_enabled_p ())
2448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2449 "mask argument is not a boolean.\n");
2450 return false;
2451 }
2452
2453 if (TREE_CODE (mask) != SSA_NAME)
2454 {
2455 if (dump_enabled_p ())
2456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2457 "mask argument is not an SSA name.\n");
2458 return false;
2459 }
2460
2461 enum vect_def_type mask_dt;
2462 tree mask_vectype;
2463 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2464 {
2465 if (dump_enabled_p ())
2466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2467 "mask use not simple.\n");
2468 return false;
2469 }
2470
2471 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2472 if (!mask_vectype)
2473 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2474
2475 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2476 {
2477 if (dump_enabled_p ())
2478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2479 "could not find an appropriate vector mask type.\n");
2480 return false;
2481 }
2482
2483 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2484 TYPE_VECTOR_SUBPARTS (vectype)))
2485 {
2486 if (dump_enabled_p ())
2487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2488 "vector mask type %T"
2489 " does not match vector data type %T.\n",
2490 mask_vectype, vectype);
2491
2492 return false;
2493 }
2494
2495 *mask_dt_out = mask_dt;
2496 *mask_vectype_out = mask_vectype;
2497 return true;
2498 }
2499
2500 /* Return true if stored value RHS is suitable for vectorizing store
2501 statement STMT_INFO. When returning true, store the type of the
2502 definition in *RHS_DT_OUT, the type of the vectorized store value in
2503 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2504
2505 static bool
2506 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2507 slp_tree slp_node, tree rhs,
2508 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2509 vec_load_store_type *vls_type_out)
2510 {
2511 /* In the case this is a store from a constant make sure
2512 native_encode_expr can handle it. */
2513 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2514 {
2515 if (dump_enabled_p ())
2516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2517 "cannot encode constant as a byte sequence.\n");
2518 return false;
2519 }
2520
2521 enum vect_def_type rhs_dt;
2522 tree rhs_vectype;
2523 slp_tree slp_op;
2524 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2525 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2526 {
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "use not simple.\n");
2530 return false;
2531 }
2532
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2535 {
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "incompatible vector types.\n");
2539 return false;
2540 }
2541
2542 *rhs_dt_out = rhs_dt;
2543 *rhs_vectype_out = rhs_vectype;
2544 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2545 *vls_type_out = VLS_STORE_INVARIANT;
2546 else
2547 *vls_type_out = VLS_STORE;
2548 return true;
2549 }
2550
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2554
2555 static tree
2556 vect_build_all_ones_mask (vec_info *vinfo,
2557 stmt_vec_info stmt_info, tree masktype)
2558 {
2559 if (TREE_CODE (masktype) == INTEGER_TYPE)
2560 return build_int_cst (masktype, -1);
2561 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2562 {
2563 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2564 mask = build_vector_from_val (masktype, mask);
2565 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2566 }
2567 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2568 {
2569 REAL_VALUE_TYPE r;
2570 long tmp[6];
2571 for (int j = 0; j < 6; ++j)
2572 tmp[j] = -1;
2573 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2574 tree mask = build_real (TREE_TYPE (masktype), r);
2575 mask = build_vector_from_val (masktype, mask);
2576 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2577 }
2578 gcc_unreachable ();
2579 }
2580
2581 /* Build an all-zero merge value of type VECTYPE while vectorizing
2582 STMT_INFO as a gather load. */
2583
2584 static tree
2585 vect_build_zero_merge_argument (vec_info *vinfo,
2586 stmt_vec_info stmt_info, tree vectype)
2587 {
2588 tree merge;
2589 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2590 merge = build_int_cst (TREE_TYPE (vectype), 0);
2591 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2592 {
2593 REAL_VALUE_TYPE r;
2594 long tmp[6];
2595 for (int j = 0; j < 6; ++j)
2596 tmp[j] = 0;
2597 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2598 merge = build_real (TREE_TYPE (vectype), r);
2599 }
2600 else
2601 gcc_unreachable ();
2602 merge = build_vector_from_val (vectype, merge);
2603 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2604 }
2605
2606 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2607 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2608 the gather load operation. If the load is conditional, MASK is the
2609 unvectorized condition and MASK_DT is its definition type, otherwise
2610 MASK is null. */
2611
2612 static void
2613 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2614 gimple_stmt_iterator *gsi,
2615 gimple **vec_stmt,
2616 gather_scatter_info *gs_info,
2617 tree mask)
2618 {
2619 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2620 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2621 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2622 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2623 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2624 edge pe = loop_preheader_edge (loop);
2625 enum { NARROW, NONE, WIDEN } modifier;
2626 poly_uint64 gather_off_nunits
2627 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2628
2629 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2630 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2631 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2632 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2633 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2634 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2635 tree scaletype = TREE_VALUE (arglist);
2636 tree real_masktype = masktype;
2637 gcc_checking_assert (types_compatible_p (srctype, rettype)
2638 && (!mask
2639 || TREE_CODE (masktype) == INTEGER_TYPE
2640 || types_compatible_p (srctype, masktype)));
2641 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2642 masktype = truth_type_for (srctype);
2643
2644 tree mask_halftype = masktype;
2645 tree perm_mask = NULL_TREE;
2646 tree mask_perm_mask = NULL_TREE;
2647 if (known_eq (nunits, gather_off_nunits))
2648 modifier = NONE;
2649 else if (known_eq (nunits * 2, gather_off_nunits))
2650 {
2651 modifier = WIDEN;
2652
2653 /* Currently widening gathers and scatters are only supported for
2654 fixed-length vectors. */
2655 int count = gather_off_nunits.to_constant ();
2656 vec_perm_builder sel (count, count, 1);
2657 for (int i = 0; i < count; ++i)
2658 sel.quick_push (i | (count / 2));
2659
2660 vec_perm_indices indices (sel, 1, count);
2661 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2662 indices);
2663 }
2664 else if (known_eq (nunits, gather_off_nunits * 2))
2665 {
2666 modifier = NARROW;
2667
2668 /* Currently narrowing gathers and scatters are only supported for
2669 fixed-length vectors. */
2670 int count = nunits.to_constant ();
2671 vec_perm_builder sel (count, count, 1);
2672 sel.quick_grow (count);
2673 for (int i = 0; i < count; ++i)
2674 sel[i] = i < count / 2 ? i : i + count / 2;
2675 vec_perm_indices indices (sel, 2, count);
2676 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2677
2678 ncopies *= 2;
2679
2680 if (mask && masktype == real_masktype)
2681 {
2682 for (int i = 0; i < count; ++i)
2683 sel[i] = i | (count / 2);
2684 indices.new_vector (sel, 2, count);
2685 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2686 }
2687 else if (mask)
2688 mask_halftype = truth_type_for (gs_info->offset_vectype);
2689 }
2690 else
2691 gcc_unreachable ();
2692
2693 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2694 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2695
2696 tree ptr = fold_convert (ptrtype, gs_info->base);
2697 if (!is_gimple_min_invariant (ptr))
2698 {
2699 gimple_seq seq;
2700 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2701 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2702 gcc_assert (!new_bb);
2703 }
2704
2705 tree scale = build_int_cst (scaletype, gs_info->scale);
2706
2707 tree vec_oprnd0 = NULL_TREE;
2708 tree vec_mask = NULL_TREE;
2709 tree src_op = NULL_TREE;
2710 tree mask_op = NULL_TREE;
2711 tree prev_res = NULL_TREE;
2712
2713 if (!mask)
2714 {
2715 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2716 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2717 }
2718
2719 auto_vec<tree> vec_oprnds0;
2720 auto_vec<tree> vec_masks;
2721 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2722 modifier == WIDEN ? ncopies / 2 : ncopies,
2723 gs_info->offset, &vec_oprnds0);
2724 if (mask)
2725 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2726 modifier == NARROW ? ncopies / 2 : ncopies,
2727 mask, &vec_masks);
2728 for (int j = 0; j < ncopies; ++j)
2729 {
2730 tree op, var;
2731 if (modifier == WIDEN && (j & 1))
2732 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2733 perm_mask, stmt_info, gsi);
2734 else
2735 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2736
2737 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2738 {
2739 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2740 TYPE_VECTOR_SUBPARTS (idxtype)));
2741 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2742 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2743 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2744 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2745 op = var;
2746 }
2747
2748 if (mask)
2749 {
2750 if (mask_perm_mask && (j & 1))
2751 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2752 mask_perm_mask, stmt_info, gsi);
2753 else
2754 {
2755 if (modifier == NARROW)
2756 {
2757 if ((j & 1) == 0)
2758 vec_mask = vec_masks[j / 2];
2759 }
2760 else
2761 vec_mask = vec_masks[j];
2762
2763 mask_op = vec_mask;
2764 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2765 {
2766 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2767 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2768 gcc_assert (known_eq (sub1, sub2));
2769 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2770 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2771 gassign *new_stmt
2772 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2773 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2774 mask_op = var;
2775 }
2776 }
2777 if (modifier == NARROW && masktype != real_masktype)
2778 {
2779 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2780 gassign *new_stmt
2781 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2782 : VEC_UNPACK_LO_EXPR,
2783 mask_op);
2784 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2785 mask_op = var;
2786 }
2787 src_op = mask_op;
2788 }
2789
2790 tree mask_arg = mask_op;
2791 if (masktype != real_masktype)
2792 {
2793 tree utype, optype = TREE_TYPE (mask_op);
2794 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2795 utype = real_masktype;
2796 else
2797 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2798 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2799 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2800 gassign *new_stmt
2801 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2802 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2803 mask_arg = var;
2804 if (!useless_type_conversion_p (real_masktype, utype))
2805 {
2806 gcc_assert (TYPE_PRECISION (utype)
2807 <= TYPE_PRECISION (real_masktype));
2808 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2809 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 mask_arg = var;
2812 }
2813 src_op = build_zero_cst (srctype);
2814 }
2815 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2816 mask_arg, scale);
2817
2818 if (!useless_type_conversion_p (vectype, rettype))
2819 {
2820 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2821 TYPE_VECTOR_SUBPARTS (rettype)));
2822 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2823 gimple_call_set_lhs (new_stmt, op);
2824 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2825 var = make_ssa_name (vec_dest);
2826 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2827 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2828 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2829 }
2830 else
2831 {
2832 var = make_ssa_name (vec_dest, new_stmt);
2833 gimple_call_set_lhs (new_stmt, var);
2834 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2835 }
2836
2837 if (modifier == NARROW)
2838 {
2839 if ((j & 1) == 0)
2840 {
2841 prev_res = var;
2842 continue;
2843 }
2844 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2845 stmt_info, gsi);
2846 new_stmt = SSA_NAME_DEF_STMT (var);
2847 }
2848
2849 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2850 }
2851 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2852 }
2853
2854 /* Prepare the base and offset in GS_INFO for vectorization.
2855 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2856 to the vectorized offset argument for the first copy of STMT_INFO.
2857 STMT_INFO is the statement described by GS_INFO and LOOP is the
2858 containing loop. */
2859
2860 static void
2861 vect_get_gather_scatter_ops (vec_info *vinfo,
2862 class loop *loop, stmt_vec_info stmt_info,
2863 gather_scatter_info *gs_info,
2864 tree *dataref_ptr, vec<tree> *vec_offset,
2865 unsigned ncopies)
2866 {
2867 gimple_seq stmts = NULL;
2868 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2869 if (stmts != NULL)
2870 {
2871 basic_block new_bb;
2872 edge pe = loop_preheader_edge (loop);
2873 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2874 gcc_assert (!new_bb);
2875 }
2876 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2877 vec_offset, gs_info->offset_vectype);
2878 }
2879
2880 /* Prepare to implement a grouped or strided load or store using
2881 the gather load or scatter store operation described by GS_INFO.
2882 STMT_INFO is the load or store statement.
2883
2884 Set *DATAREF_BUMP to the amount that should be added to the base
2885 address after each copy of the vectorized statement. Set *VEC_OFFSET
2886 to an invariant offset vector in which element I has the value
2887 I * DR_STEP / SCALE. */
2888
2889 static void
2890 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2891 loop_vec_info loop_vinfo,
2892 gather_scatter_info *gs_info,
2893 tree *dataref_bump, tree *vec_offset)
2894 {
2895 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2896 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2897
2898 tree bump = size_binop (MULT_EXPR,
2899 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2900 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2901 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2902
2903 /* The offset given in GS_INFO can have pointer type, so use the element
2904 type of the vector instead. */
2905 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2906
2907 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2908 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2909 ssize_int (gs_info->scale));
2910 step = fold_convert (offset_type, step);
2911
2912 /* Create {0, X, X*2, X*3, ...}. */
2913 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2914 build_zero_cst (offset_type), step);
2915 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2916 }
2917
2918 /* Return the amount that should be added to a vector pointer to move
2919 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2920 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2921 vectorization. */
2922
2923 static tree
2924 vect_get_data_ptr_increment (vec_info *vinfo,
2925 dr_vec_info *dr_info, tree aggr_type,
2926 vect_memory_access_type memory_access_type)
2927 {
2928 if (memory_access_type == VMAT_INVARIANT)
2929 return size_zero_node;
2930
2931 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2932 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2933 if (tree_int_cst_sgn (step) == -1)
2934 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2935 return iv_step;
2936 }
2937
2938 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2939
2940 static bool
2941 vectorizable_bswap (vec_info *vinfo,
2942 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2943 gimple **vec_stmt, slp_tree slp_node,
2944 slp_tree *slp_op,
2945 tree vectype_in, stmt_vector_for_cost *cost_vec)
2946 {
2947 tree op, vectype;
2948 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2949 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2950 unsigned ncopies;
2951
2952 op = gimple_call_arg (stmt, 0);
2953 vectype = STMT_VINFO_VECTYPE (stmt_info);
2954 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2955
2956 /* Multiple types in SLP are handled by creating the appropriate number of
2957 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2958 case of SLP. */
2959 if (slp_node)
2960 ncopies = 1;
2961 else
2962 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2963
2964 gcc_assert (ncopies >= 1);
2965
2966 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2967 if (! char_vectype)
2968 return false;
2969
2970 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2971 unsigned word_bytes;
2972 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2973 return false;
2974
2975 /* The encoding uses one stepped pattern for each byte in the word. */
2976 vec_perm_builder elts (num_bytes, word_bytes, 3);
2977 for (unsigned i = 0; i < 3; ++i)
2978 for (unsigned j = 0; j < word_bytes; ++j)
2979 elts.quick_push ((i + 1) * word_bytes - j - 1);
2980
2981 vec_perm_indices indices (elts, 1, num_bytes);
2982 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2983 return false;
2984
2985 if (! vec_stmt)
2986 {
2987 if (slp_node
2988 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2989 {
2990 if (dump_enabled_p ())
2991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2992 "incompatible vector types for invariants\n");
2993 return false;
2994 }
2995
2996 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2997 DUMP_VECT_SCOPE ("vectorizable_bswap");
2998 if (! slp_node)
2999 {
3000 record_stmt_cost (cost_vec,
3001 1, vector_stmt, stmt_info, 0, vect_prologue);
3002 record_stmt_cost (cost_vec,
3003 ncopies, vec_perm, stmt_info, 0, vect_body);
3004 }
3005 return true;
3006 }
3007
3008 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3009
3010 /* Transform. */
3011 vec<tree> vec_oprnds = vNULL;
3012 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3013 op, &vec_oprnds);
3014 /* Arguments are ready. create the new vector stmt. */
3015 unsigned i;
3016 tree vop;
3017 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3018 {
3019 gimple *new_stmt;
3020 tree tem = make_ssa_name (char_vectype);
3021 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3022 char_vectype, vop));
3023 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3024 tree tem2 = make_ssa_name (char_vectype);
3025 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3026 tem, tem, bswap_vconst);
3027 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3028 tem = make_ssa_name (vectype);
3029 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3030 vectype, tem2));
3031 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3032 if (slp_node)
3033 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3034 else
3035 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3036 }
3037
3038 if (!slp_node)
3039 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3040
3041 vec_oprnds.release ();
3042 return true;
3043 }
3044
3045 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3046 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3047 in a single step. On success, store the binary pack code in
3048 *CONVERT_CODE. */
3049
3050 static bool
3051 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3052 tree_code *convert_code)
3053 {
3054 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3055 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3056 return false;
3057
3058 tree_code code;
3059 int multi_step_cvt = 0;
3060 auto_vec <tree, 8> interm_types;
3061 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3062 &code, &multi_step_cvt, &interm_types)
3063 || multi_step_cvt)
3064 return false;
3065
3066 *convert_code = code;
3067 return true;
3068 }
3069
3070 /* Function vectorizable_call.
3071
3072 Check if STMT_INFO performs a function call that can be vectorized.
3073 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3074 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3075 Return true if STMT_INFO is vectorizable in this way. */
3076
3077 static bool
3078 vectorizable_call (vec_info *vinfo,
3079 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3080 gimple **vec_stmt, slp_tree slp_node,
3081 stmt_vector_for_cost *cost_vec)
3082 {
3083 gcall *stmt;
3084 tree vec_dest;
3085 tree scalar_dest;
3086 tree op;
3087 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3088 tree vectype_out, vectype_in;
3089 poly_uint64 nunits_in;
3090 poly_uint64 nunits_out;
3091 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3092 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3093 tree fndecl, new_temp, rhs_type;
3094 enum vect_def_type dt[4]
3095 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3096 vect_unknown_def_type };
3097 tree vectypes[ARRAY_SIZE (dt)] = {};
3098 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3099 int ndts = ARRAY_SIZE (dt);
3100 int ncopies, j;
3101 auto_vec<tree, 8> vargs;
3102 auto_vec<tree, 8> orig_vargs;
3103 enum { NARROW, NONE, WIDEN } modifier;
3104 size_t i, nargs;
3105 tree lhs;
3106
3107 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3108 return false;
3109
3110 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3111 && ! vec_stmt)
3112 return false;
3113
3114 /* Is STMT_INFO a vectorizable call? */
3115 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3116 if (!stmt)
3117 return false;
3118
3119 if (gimple_call_internal_p (stmt)
3120 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3121 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3122 /* Handled by vectorizable_load and vectorizable_store. */
3123 return false;
3124
3125 if (gimple_call_lhs (stmt) == NULL_TREE
3126 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3127 return false;
3128
3129 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3130
3131 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3132
3133 /* Process function arguments. */
3134 rhs_type = NULL_TREE;
3135 vectype_in = NULL_TREE;
3136 nargs = gimple_call_num_args (stmt);
3137
3138 /* Bail out if the function has more than four arguments, we do not have
3139 interesting builtin functions to vectorize with more than two arguments
3140 except for fma. No arguments is also not good. */
3141 if (nargs == 0 || nargs > 4)
3142 return false;
3143
3144 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3145 combined_fn cfn = gimple_call_combined_fn (stmt);
3146 if (cfn == CFN_GOMP_SIMD_LANE)
3147 {
3148 nargs = 0;
3149 rhs_type = unsigned_type_node;
3150 }
3151
3152 int mask_opno = -1;
3153 if (internal_fn_p (cfn))
3154 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3155
3156 for (i = 0; i < nargs; i++)
3157 {
3158 if ((int) i == mask_opno)
3159 {
3160 op = gimple_call_arg (stmt, i);
3161 if (!vect_check_scalar_mask (vinfo,
3162 stmt_info, op, &dt[i], &vectypes[i]))
3163 return false;
3164 continue;
3165 }
3166
3167 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3168 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3169 {
3170 if (dump_enabled_p ())
3171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3172 "use not simple.\n");
3173 return false;
3174 }
3175
3176 /* We can only handle calls with arguments of the same type. */
3177 if (rhs_type
3178 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3179 {
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3182 "argument types differ.\n");
3183 return false;
3184 }
3185 if (!rhs_type)
3186 rhs_type = TREE_TYPE (op);
3187
3188 if (!vectype_in)
3189 vectype_in = vectypes[i];
3190 else if (vectypes[i]
3191 && !types_compatible_p (vectypes[i], vectype_in))
3192 {
3193 if (dump_enabled_p ())
3194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3195 "argument vector types differ.\n");
3196 return false;
3197 }
3198 }
3199 /* If all arguments are external or constant defs, infer the vector type
3200 from the scalar type. */
3201 if (!vectype_in)
3202 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3203 if (vec_stmt)
3204 gcc_assert (vectype_in);
3205 if (!vectype_in)
3206 {
3207 if (dump_enabled_p ())
3208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3209 "no vectype for scalar type %T\n", rhs_type);
3210
3211 return false;
3212 }
3213 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3214 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3215 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3216 by a pack of the two vectors into an SI vector. We would need
3217 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3218 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3219 {
3220 if (dump_enabled_p ())
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3222 "mismatched vector sizes %T and %T\n",
3223 vectype_in, vectype_out);
3224 return false;
3225 }
3226
3227 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3228 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3229 {
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232 "mixed mask and nonmask vector types\n");
3233 return false;
3234 }
3235
3236 /* FORNOW */
3237 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3238 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3239 if (known_eq (nunits_in * 2, nunits_out))
3240 modifier = NARROW;
3241 else if (known_eq (nunits_out, nunits_in))
3242 modifier = NONE;
3243 else if (known_eq (nunits_out * 2, nunits_in))
3244 modifier = WIDEN;
3245 else
3246 return false;
3247
3248 /* We only handle functions that do not read or clobber memory. */
3249 if (gimple_vuse (stmt))
3250 {
3251 if (dump_enabled_p ())
3252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3253 "function reads from or writes to memory.\n");
3254 return false;
3255 }
3256
3257 /* For now, we only vectorize functions if a target specific builtin
3258 is available. TODO -- in some cases, it might be profitable to
3259 insert the calls for pieces of the vector, in order to be able
3260 to vectorize other operations in the loop. */
3261 fndecl = NULL_TREE;
3262 internal_fn ifn = IFN_LAST;
3263 tree callee = gimple_call_fndecl (stmt);
3264
3265 /* First try using an internal function. */
3266 tree_code convert_code = ERROR_MARK;
3267 if (cfn != CFN_LAST
3268 && (modifier == NONE
3269 || (modifier == NARROW
3270 && simple_integer_narrowing (vectype_out, vectype_in,
3271 &convert_code))))
3272 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3273 vectype_in);
3274
3275 /* If that fails, try asking for a target-specific built-in function. */
3276 if (ifn == IFN_LAST)
3277 {
3278 if (cfn != CFN_LAST)
3279 fndecl = targetm.vectorize.builtin_vectorized_function
3280 (cfn, vectype_out, vectype_in);
3281 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3282 fndecl = targetm.vectorize.builtin_md_vectorized_function
3283 (callee, vectype_out, vectype_in);
3284 }
3285
3286 if (ifn == IFN_LAST && !fndecl)
3287 {
3288 if (cfn == CFN_GOMP_SIMD_LANE
3289 && !slp_node
3290 && loop_vinfo
3291 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3292 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3293 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3294 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3295 {
3296 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3297 { 0, 1, 2, ... vf - 1 } vector. */
3298 gcc_assert (nargs == 0);
3299 }
3300 else if (modifier == NONE
3301 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3302 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3303 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3304 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3305 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3306 slp_op, vectype_in, cost_vec);
3307 else
3308 {
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3311 "function is not vectorizable.\n");
3312 return false;
3313 }
3314 }
3315
3316 if (slp_node)
3317 ncopies = 1;
3318 else if (modifier == NARROW && ifn == IFN_LAST)
3319 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3320 else
3321 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3322
3323 /* Sanity check: make sure that at least one copy of the vectorized stmt
3324 needs to be generated. */
3325 gcc_assert (ncopies >= 1);
3326
3327 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3328 if (!vec_stmt) /* transformation not required. */
3329 {
3330 if (slp_node)
3331 for (i = 0; i < nargs; ++i)
3332 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3333 {
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3336 "incompatible vector types for invariants\n");
3337 return false;
3338 }
3339 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3340 DUMP_VECT_SCOPE ("vectorizable_call");
3341 vect_model_simple_cost (vinfo, stmt_info,
3342 ncopies, dt, ndts, slp_node, cost_vec);
3343 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3344 record_stmt_cost (cost_vec, ncopies / 2,
3345 vec_promote_demote, stmt_info, 0, vect_body);
3346
3347 if (loop_vinfo && mask_opno >= 0)
3348 {
3349 unsigned int nvectors = (slp_node
3350 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3351 : ncopies);
3352 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3353 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3354 vectype_out, scalar_mask);
3355 }
3356 return true;
3357 }
3358
3359 /* Transform. */
3360
3361 if (dump_enabled_p ())
3362 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3363
3364 /* Handle def. */
3365 scalar_dest = gimple_call_lhs (stmt);
3366 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3367
3368 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3369
3370 if (modifier == NONE || ifn != IFN_LAST)
3371 {
3372 tree prev_res = NULL_TREE;
3373 vargs.safe_grow (nargs, true);
3374 orig_vargs.safe_grow (nargs, true);
3375 auto_vec<vec<tree> > vec_defs (nargs);
3376 for (j = 0; j < ncopies; ++j)
3377 {
3378 /* Build argument list for the vectorized call. */
3379 if (slp_node)
3380 {
3381 vec<tree> vec_oprnds0;
3382
3383 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3384 vec_oprnds0 = vec_defs[0];
3385
3386 /* Arguments are ready. Create the new vector stmt. */
3387 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3388 {
3389 size_t k;
3390 for (k = 0; k < nargs; k++)
3391 {
3392 vec<tree> vec_oprndsk = vec_defs[k];
3393 vargs[k] = vec_oprndsk[i];
3394 }
3395 gimple *new_stmt;
3396 if (modifier == NARROW)
3397 {
3398 /* We don't define any narrowing conditional functions
3399 at present. */
3400 gcc_assert (mask_opno < 0);
3401 tree half_res = make_ssa_name (vectype_in);
3402 gcall *call
3403 = gimple_build_call_internal_vec (ifn, vargs);
3404 gimple_call_set_lhs (call, half_res);
3405 gimple_call_set_nothrow (call, true);
3406 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3407 if ((i & 1) == 0)
3408 {
3409 prev_res = half_res;
3410 continue;
3411 }
3412 new_temp = make_ssa_name (vec_dest);
3413 new_stmt = gimple_build_assign (new_temp, convert_code,
3414 prev_res, half_res);
3415 vect_finish_stmt_generation (vinfo, stmt_info,
3416 new_stmt, gsi);
3417 }
3418 else
3419 {
3420 if (mask_opno >= 0 && masked_loop_p)
3421 {
3422 unsigned int vec_num = vec_oprnds0.length ();
3423 /* Always true for SLP. */
3424 gcc_assert (ncopies == 1);
3425 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3426 vectype_out, i);
3427 vargs[mask_opno] = prepare_load_store_mask
3428 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3429 }
3430
3431 gcall *call;
3432 if (ifn != IFN_LAST)
3433 call = gimple_build_call_internal_vec (ifn, vargs);
3434 else
3435 call = gimple_build_call_vec (fndecl, vargs);
3436 new_temp = make_ssa_name (vec_dest, call);
3437 gimple_call_set_lhs (call, new_temp);
3438 gimple_call_set_nothrow (call, true);
3439 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3440 new_stmt = call;
3441 }
3442 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3443 }
3444 continue;
3445 }
3446
3447 for (i = 0; i < nargs; i++)
3448 {
3449 op = gimple_call_arg (stmt, i);
3450 if (j == 0)
3451 {
3452 vec_defs.quick_push (vNULL);
3453 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3454 op, &vec_defs[i],
3455 vectypes[i]);
3456 }
3457 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3458 }
3459
3460 if (mask_opno >= 0 && masked_loop_p)
3461 {
3462 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3463 vectype_out, j);
3464 vargs[mask_opno]
3465 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3466 vargs[mask_opno], gsi);
3467 }
3468
3469 gimple *new_stmt;
3470 if (cfn == CFN_GOMP_SIMD_LANE)
3471 {
3472 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3473 tree new_var
3474 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3475 gimple *init_stmt = gimple_build_assign (new_var, cst);
3476 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3477 new_temp = make_ssa_name (vec_dest);
3478 new_stmt = gimple_build_assign (new_temp, new_var);
3479 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3480 }
3481 else if (modifier == NARROW)
3482 {
3483 /* We don't define any narrowing conditional functions at
3484 present. */
3485 gcc_assert (mask_opno < 0);
3486 tree half_res = make_ssa_name (vectype_in);
3487 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3488 gimple_call_set_lhs (call, half_res);
3489 gimple_call_set_nothrow (call, true);
3490 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3491 if ((j & 1) == 0)
3492 {
3493 prev_res = half_res;
3494 continue;
3495 }
3496 new_temp = make_ssa_name (vec_dest);
3497 new_stmt = gimple_build_assign (new_temp, convert_code,
3498 prev_res, half_res);
3499 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3500 }
3501 else
3502 {
3503 gcall *call;
3504 if (ifn != IFN_LAST)
3505 call = gimple_build_call_internal_vec (ifn, vargs);
3506 else
3507 call = gimple_build_call_vec (fndecl, vargs);
3508 new_temp = make_ssa_name (vec_dest, call);
3509 gimple_call_set_lhs (call, new_temp);
3510 gimple_call_set_nothrow (call, true);
3511 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3512 new_stmt = call;
3513 }
3514
3515 if (j == (modifier == NARROW ? 1 : 0))
3516 *vec_stmt = new_stmt;
3517 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3518 }
3519 for (i = 0; i < nargs; i++)
3520 {
3521 vec<tree> vec_oprndsi = vec_defs[i];
3522 vec_oprndsi.release ();
3523 }
3524 }
3525 else if (modifier == NARROW)
3526 {
3527 auto_vec<vec<tree> > vec_defs (nargs);
3528 /* We don't define any narrowing conditional functions at present. */
3529 gcc_assert (mask_opno < 0);
3530 for (j = 0; j < ncopies; ++j)
3531 {
3532 /* Build argument list for the vectorized call. */
3533 if (j == 0)
3534 vargs.create (nargs * 2);
3535 else
3536 vargs.truncate (0);
3537
3538 if (slp_node)
3539 {
3540 vec<tree> vec_oprnds0;
3541
3542 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3543 vec_oprnds0 = vec_defs[0];
3544
3545 /* Arguments are ready. Create the new vector stmt. */
3546 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3547 {
3548 size_t k;
3549 vargs.truncate (0);
3550 for (k = 0; k < nargs; k++)
3551 {
3552 vec<tree> vec_oprndsk = vec_defs[k];
3553 vargs.quick_push (vec_oprndsk[i]);
3554 vargs.quick_push (vec_oprndsk[i + 1]);
3555 }
3556 gcall *call;
3557 if (ifn != IFN_LAST)
3558 call = gimple_build_call_internal_vec (ifn, vargs);
3559 else
3560 call = gimple_build_call_vec (fndecl, vargs);
3561 new_temp = make_ssa_name (vec_dest, call);
3562 gimple_call_set_lhs (call, new_temp);
3563 gimple_call_set_nothrow (call, true);
3564 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3565 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3566 }
3567 continue;
3568 }
3569
3570 for (i = 0; i < nargs; i++)
3571 {
3572 op = gimple_call_arg (stmt, i);
3573 if (j == 0)
3574 {
3575 vec_defs.quick_push (vNULL);
3576 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3577 op, &vec_defs[i], vectypes[i]);
3578 }
3579 vec_oprnd0 = vec_defs[i][2*j];
3580 vec_oprnd1 = vec_defs[i][2*j+1];
3581
3582 vargs.quick_push (vec_oprnd0);
3583 vargs.quick_push (vec_oprnd1);
3584 }
3585
3586 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3587 new_temp = make_ssa_name (vec_dest, new_stmt);
3588 gimple_call_set_lhs (new_stmt, new_temp);
3589 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3590
3591 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3592 }
3593
3594 if (!slp_node)
3595 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3596
3597 for (i = 0; i < nargs; i++)
3598 {
3599 vec<tree> vec_oprndsi = vec_defs[i];
3600 vec_oprndsi.release ();
3601 }
3602 }
3603 else
3604 /* No current target implements this case. */
3605 return false;
3606
3607 vargs.release ();
3608
3609 /* The call in STMT might prevent it from being removed in dce.
3610 We however cannot remove it here, due to the way the ssa name
3611 it defines is mapped to the new definition. So just replace
3612 rhs of the statement with something harmless. */
3613
3614 if (slp_node)
3615 return true;
3616
3617 stmt_info = vect_orig_stmt (stmt_info);
3618 lhs = gimple_get_lhs (stmt_info->stmt);
3619
3620 gassign *new_stmt
3621 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3622 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3623
3624 return true;
3625 }
3626
3627
3628 struct simd_call_arg_info
3629 {
3630 tree vectype;
3631 tree op;
3632 HOST_WIDE_INT linear_step;
3633 enum vect_def_type dt;
3634 unsigned int align;
3635 bool simd_lane_linear;
3636 };
3637
3638 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3639 is linear within simd lane (but not within whole loop), note it in
3640 *ARGINFO. */
3641
3642 static void
3643 vect_simd_lane_linear (tree op, class loop *loop,
3644 struct simd_call_arg_info *arginfo)
3645 {
3646 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3647
3648 if (!is_gimple_assign (def_stmt)
3649 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3650 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3651 return;
3652
3653 tree base = gimple_assign_rhs1 (def_stmt);
3654 HOST_WIDE_INT linear_step = 0;
3655 tree v = gimple_assign_rhs2 (def_stmt);
3656 while (TREE_CODE (v) == SSA_NAME)
3657 {
3658 tree t;
3659 def_stmt = SSA_NAME_DEF_STMT (v);
3660 if (is_gimple_assign (def_stmt))
3661 switch (gimple_assign_rhs_code (def_stmt))
3662 {
3663 case PLUS_EXPR:
3664 t = gimple_assign_rhs2 (def_stmt);
3665 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3666 return;
3667 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3668 v = gimple_assign_rhs1 (def_stmt);
3669 continue;
3670 case MULT_EXPR:
3671 t = gimple_assign_rhs2 (def_stmt);
3672 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3673 return;
3674 linear_step = tree_to_shwi (t);
3675 v = gimple_assign_rhs1 (def_stmt);
3676 continue;
3677 CASE_CONVERT:
3678 t = gimple_assign_rhs1 (def_stmt);
3679 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3680 || (TYPE_PRECISION (TREE_TYPE (v))
3681 < TYPE_PRECISION (TREE_TYPE (t))))
3682 return;
3683 if (!linear_step)
3684 linear_step = 1;
3685 v = t;
3686 continue;
3687 default:
3688 return;
3689 }
3690 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3691 && loop->simduid
3692 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3693 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3694 == loop->simduid))
3695 {
3696 if (!linear_step)
3697 linear_step = 1;
3698 arginfo->linear_step = linear_step;
3699 arginfo->op = base;
3700 arginfo->simd_lane_linear = true;
3701 return;
3702 }
3703 }
3704 }
3705
3706 /* Return the number of elements in vector type VECTYPE, which is associated
3707 with a SIMD clone. At present these vectors always have a constant
3708 length. */
3709
3710 static unsigned HOST_WIDE_INT
3711 simd_clone_subparts (tree vectype)
3712 {
3713 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3714 }
3715
3716 /* Function vectorizable_simd_clone_call.
3717
3718 Check if STMT_INFO performs a function call that can be vectorized
3719 by calling a simd clone of the function.
3720 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3721 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3722 Return true if STMT_INFO is vectorizable in this way. */
3723
3724 static bool
3725 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3726 gimple_stmt_iterator *gsi,
3727 gimple **vec_stmt, slp_tree slp_node,
3728 stmt_vector_for_cost *)
3729 {
3730 tree vec_dest;
3731 tree scalar_dest;
3732 tree op, type;
3733 tree vec_oprnd0 = NULL_TREE;
3734 tree vectype;
3735 poly_uint64 nunits;
3736 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3737 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3738 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3739 tree fndecl, new_temp;
3740 int ncopies, j;
3741 auto_vec<simd_call_arg_info> arginfo;
3742 vec<tree> vargs = vNULL;
3743 size_t i, nargs;
3744 tree lhs, rtype, ratype;
3745 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3746
3747 /* Is STMT a vectorizable call? */
3748 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3749 if (!stmt)
3750 return false;
3751
3752 fndecl = gimple_call_fndecl (stmt);
3753 if (fndecl == NULL_TREE)
3754 return false;
3755
3756 struct cgraph_node *node = cgraph_node::get (fndecl);
3757 if (node == NULL || node->simd_clones == NULL)
3758 return false;
3759
3760 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3761 return false;
3762
3763 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3764 && ! vec_stmt)
3765 return false;
3766
3767 if (gimple_call_lhs (stmt)
3768 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3769 return false;
3770
3771 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3772
3773 vectype = STMT_VINFO_VECTYPE (stmt_info);
3774
3775 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3776 return false;
3777
3778 /* FORNOW */
3779 if (slp_node)
3780 return false;
3781
3782 /* Process function arguments. */
3783 nargs = gimple_call_num_args (stmt);
3784
3785 /* Bail out if the function has zero arguments. */
3786 if (nargs == 0)
3787 return false;
3788
3789 arginfo.reserve (nargs, true);
3790
3791 for (i = 0; i < nargs; i++)
3792 {
3793 simd_call_arg_info thisarginfo;
3794 affine_iv iv;
3795
3796 thisarginfo.linear_step = 0;
3797 thisarginfo.align = 0;
3798 thisarginfo.op = NULL_TREE;
3799 thisarginfo.simd_lane_linear = false;
3800
3801 op = gimple_call_arg (stmt, i);
3802 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3803 &thisarginfo.vectype)
3804 || thisarginfo.dt == vect_uninitialized_def)
3805 {
3806 if (dump_enabled_p ())
3807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3808 "use not simple.\n");
3809 return false;
3810 }
3811
3812 if (thisarginfo.dt == vect_constant_def
3813 || thisarginfo.dt == vect_external_def)
3814 gcc_assert (thisarginfo.vectype == NULL_TREE);
3815 else
3816 {
3817 gcc_assert (thisarginfo.vectype != NULL_TREE);
3818 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3819 {
3820 if (dump_enabled_p ())
3821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3822 "vector mask arguments are not supported\n");
3823 return false;
3824 }
3825 }
3826
3827 /* For linear arguments, the analyze phase should have saved
3828 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3829 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3830 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3831 {
3832 gcc_assert (vec_stmt);
3833 thisarginfo.linear_step
3834 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3835 thisarginfo.op
3836 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3837 thisarginfo.simd_lane_linear
3838 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3839 == boolean_true_node);
3840 /* If loop has been peeled for alignment, we need to adjust it. */
3841 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3842 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3843 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3844 {
3845 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3846 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3847 tree opt = TREE_TYPE (thisarginfo.op);
3848 bias = fold_convert (TREE_TYPE (step), bias);
3849 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3850 thisarginfo.op
3851 = fold_build2 (POINTER_TYPE_P (opt)
3852 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3853 thisarginfo.op, bias);
3854 }
3855 }
3856 else if (!vec_stmt
3857 && thisarginfo.dt != vect_constant_def
3858 && thisarginfo.dt != vect_external_def
3859 && loop_vinfo
3860 && TREE_CODE (op) == SSA_NAME
3861 && simple_iv (loop, loop_containing_stmt (stmt), op,
3862 &iv, false)
3863 && tree_fits_shwi_p (iv.step))
3864 {
3865 thisarginfo.linear_step = tree_to_shwi (iv.step);
3866 thisarginfo.op = iv.base;
3867 }
3868 else if ((thisarginfo.dt == vect_constant_def
3869 || thisarginfo.dt == vect_external_def)
3870 && POINTER_TYPE_P (TREE_TYPE (op)))
3871 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3872 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3873 linear too. */
3874 if (POINTER_TYPE_P (TREE_TYPE (op))
3875 && !thisarginfo.linear_step
3876 && !vec_stmt
3877 && thisarginfo.dt != vect_constant_def
3878 && thisarginfo.dt != vect_external_def
3879 && loop_vinfo
3880 && !slp_node
3881 && TREE_CODE (op) == SSA_NAME)
3882 vect_simd_lane_linear (op, loop, &thisarginfo);
3883
3884 arginfo.quick_push (thisarginfo);
3885 }
3886
3887 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3888 if (!vf.is_constant ())
3889 {
3890 if (dump_enabled_p ())
3891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3892 "not considering SIMD clones; not yet supported"
3893 " for variable-width vectors.\n");
3894 return false;
3895 }
3896
3897 unsigned int badness = 0;
3898 struct cgraph_node *bestn = NULL;
3899 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3900 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3901 else
3902 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3903 n = n->simdclone->next_clone)
3904 {
3905 unsigned int this_badness = 0;
3906 unsigned int num_calls;
3907 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3908 || n->simdclone->nargs != nargs)
3909 continue;
3910 if (num_calls != 1)
3911 this_badness += exact_log2 (num_calls) * 1024;
3912 if (n->simdclone->inbranch)
3913 this_badness += 2048;
3914 int target_badness = targetm.simd_clone.usable (n);
3915 if (target_badness < 0)
3916 continue;
3917 this_badness += target_badness * 512;
3918 /* FORNOW: Have to add code to add the mask argument. */
3919 if (n->simdclone->inbranch)
3920 continue;
3921 for (i = 0; i < nargs; i++)
3922 {
3923 switch (n->simdclone->args[i].arg_type)
3924 {
3925 case SIMD_CLONE_ARG_TYPE_VECTOR:
3926 if (!useless_type_conversion_p
3927 (n->simdclone->args[i].orig_type,
3928 TREE_TYPE (gimple_call_arg (stmt, i))))
3929 i = -1;
3930 else if (arginfo[i].dt == vect_constant_def
3931 || arginfo[i].dt == vect_external_def
3932 || arginfo[i].linear_step)
3933 this_badness += 64;
3934 break;
3935 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3936 if (arginfo[i].dt != vect_constant_def
3937 && arginfo[i].dt != vect_external_def)
3938 i = -1;
3939 break;
3940 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3941 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3942 if (arginfo[i].dt == vect_constant_def
3943 || arginfo[i].dt == vect_external_def
3944 || (arginfo[i].linear_step
3945 != n->simdclone->args[i].linear_step))
3946 i = -1;
3947 break;
3948 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3949 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3950 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3951 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3952 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3953 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3954 /* FORNOW */
3955 i = -1;
3956 break;
3957 case SIMD_CLONE_ARG_TYPE_MASK:
3958 gcc_unreachable ();
3959 }
3960 if (i == (size_t) -1)
3961 break;
3962 if (n->simdclone->args[i].alignment > arginfo[i].align)
3963 {
3964 i = -1;
3965 break;
3966 }
3967 if (arginfo[i].align)
3968 this_badness += (exact_log2 (arginfo[i].align)
3969 - exact_log2 (n->simdclone->args[i].alignment));
3970 }
3971 if (i == (size_t) -1)
3972 continue;
3973 if (bestn == NULL || this_badness < badness)
3974 {
3975 bestn = n;
3976 badness = this_badness;
3977 }
3978 }
3979
3980 if (bestn == NULL)
3981 return false;
3982
3983 for (i = 0; i < nargs; i++)
3984 if ((arginfo[i].dt == vect_constant_def
3985 || arginfo[i].dt == vect_external_def)
3986 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3987 {
3988 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3989 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3990 slp_node);
3991 if (arginfo[i].vectype == NULL
3992 || !constant_multiple_p (bestn->simdclone->simdlen,
3993 simd_clone_subparts (arginfo[i].vectype)))
3994 return false;
3995 }
3996
3997 fndecl = bestn->decl;
3998 nunits = bestn->simdclone->simdlen;
3999 ncopies = vector_unroll_factor (vf, nunits);
4000
4001 /* If the function isn't const, only allow it in simd loops where user
4002 has asserted that at least nunits consecutive iterations can be
4003 performed using SIMD instructions. */
4004 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4005 && gimple_vuse (stmt))
4006 return false;
4007
4008 /* Sanity check: make sure that at least one copy of the vectorized stmt
4009 needs to be generated. */
4010 gcc_assert (ncopies >= 1);
4011
4012 if (!vec_stmt) /* transformation not required. */
4013 {
4014 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4015 for (i = 0; i < nargs; i++)
4016 if ((bestn->simdclone->args[i].arg_type
4017 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4018 || (bestn->simdclone->args[i].arg_type
4019 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4020 {
4021 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4022 + 1,
4023 true);
4024 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4025 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4026 ? size_type_node : TREE_TYPE (arginfo[i].op);
4027 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4028 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4029 tree sll = arginfo[i].simd_lane_linear
4030 ? boolean_true_node : boolean_false_node;
4031 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4032 }
4033 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4034 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4035 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4036 dt, slp_node, cost_vec); */
4037 return true;
4038 }
4039
4040 /* Transform. */
4041
4042 if (dump_enabled_p ())
4043 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4044
4045 /* Handle def. */
4046 scalar_dest = gimple_call_lhs (stmt);
4047 vec_dest = NULL_TREE;
4048 rtype = NULL_TREE;
4049 ratype = NULL_TREE;
4050 if (scalar_dest)
4051 {
4052 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4053 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4054 if (TREE_CODE (rtype) == ARRAY_TYPE)
4055 {
4056 ratype = rtype;
4057 rtype = TREE_TYPE (ratype);
4058 }
4059 }
4060
4061 auto_vec<vec<tree> > vec_oprnds;
4062 auto_vec<unsigned> vec_oprnds_i;
4063 vec_oprnds.safe_grow_cleared (nargs, true);
4064 vec_oprnds_i.safe_grow_cleared (nargs, true);
4065 for (j = 0; j < ncopies; ++j)
4066 {
4067 /* Build argument list for the vectorized call. */
4068 if (j == 0)
4069 vargs.create (nargs);
4070 else
4071 vargs.truncate (0);
4072
4073 for (i = 0; i < nargs; i++)
4074 {
4075 unsigned int k, l, m, o;
4076 tree atype;
4077 op = gimple_call_arg (stmt, i);
4078 switch (bestn->simdclone->args[i].arg_type)
4079 {
4080 case SIMD_CLONE_ARG_TYPE_VECTOR:
4081 atype = bestn->simdclone->args[i].vector_type;
4082 o = vector_unroll_factor (nunits,
4083 simd_clone_subparts (atype));
4084 for (m = j * o; m < (j + 1) * o; m++)
4085 {
4086 if (simd_clone_subparts (atype)
4087 < simd_clone_subparts (arginfo[i].vectype))
4088 {
4089 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4090 k = (simd_clone_subparts (arginfo[i].vectype)
4091 / simd_clone_subparts (atype));
4092 gcc_assert ((k & (k - 1)) == 0);
4093 if (m == 0)
4094 {
4095 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4096 ncopies * o / k, op,
4097 &vec_oprnds[i]);
4098 vec_oprnds_i[i] = 0;
4099 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4100 }
4101 else
4102 {
4103 vec_oprnd0 = arginfo[i].op;
4104 if ((m & (k - 1)) == 0)
4105 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4106 }
4107 arginfo[i].op = vec_oprnd0;
4108 vec_oprnd0
4109 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4110 bitsize_int (prec),
4111 bitsize_int ((m & (k - 1)) * prec));
4112 gassign *new_stmt
4113 = gimple_build_assign (make_ssa_name (atype),
4114 vec_oprnd0);
4115 vect_finish_stmt_generation (vinfo, stmt_info,
4116 new_stmt, gsi);
4117 vargs.safe_push (gimple_assign_lhs (new_stmt));
4118 }
4119 else
4120 {
4121 k = (simd_clone_subparts (atype)
4122 / simd_clone_subparts (arginfo[i].vectype));
4123 gcc_assert ((k & (k - 1)) == 0);
4124 vec<constructor_elt, va_gc> *ctor_elts;
4125 if (k != 1)
4126 vec_alloc (ctor_elts, k);
4127 else
4128 ctor_elts = NULL;
4129 for (l = 0; l < k; l++)
4130 {
4131 if (m == 0 && l == 0)
4132 {
4133 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4134 k * o * ncopies,
4135 op,
4136 &vec_oprnds[i]);
4137 vec_oprnds_i[i] = 0;
4138 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4139 }
4140 else
4141 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4142 arginfo[i].op = vec_oprnd0;
4143 if (k == 1)
4144 break;
4145 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4146 vec_oprnd0);
4147 }
4148 if (k == 1)
4149 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4150 atype))
4151 {
4152 vec_oprnd0
4153 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4154 gassign *new_stmt
4155 = gimple_build_assign (make_ssa_name (atype),
4156 vec_oprnd0);
4157 vect_finish_stmt_generation (vinfo, stmt_info,
4158 new_stmt, gsi);
4159 vargs.safe_push (gimple_assign_lhs (new_stmt));
4160 }
4161 else
4162 vargs.safe_push (vec_oprnd0);
4163 else
4164 {
4165 vec_oprnd0 = build_constructor (atype, ctor_elts);
4166 gassign *new_stmt
4167 = gimple_build_assign (make_ssa_name (atype),
4168 vec_oprnd0);
4169 vect_finish_stmt_generation (vinfo, stmt_info,
4170 new_stmt, gsi);
4171 vargs.safe_push (gimple_assign_lhs (new_stmt));
4172 }
4173 }
4174 }
4175 break;
4176 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4177 vargs.safe_push (op);
4178 break;
4179 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4180 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4181 if (j == 0)
4182 {
4183 gimple_seq stmts;
4184 arginfo[i].op
4185 = force_gimple_operand (unshare_expr (arginfo[i].op),
4186 &stmts, true, NULL_TREE);
4187 if (stmts != NULL)
4188 {
4189 basic_block new_bb;
4190 edge pe = loop_preheader_edge (loop);
4191 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4192 gcc_assert (!new_bb);
4193 }
4194 if (arginfo[i].simd_lane_linear)
4195 {
4196 vargs.safe_push (arginfo[i].op);
4197 break;
4198 }
4199 tree phi_res = copy_ssa_name (op);
4200 gphi *new_phi = create_phi_node (phi_res, loop->header);
4201 add_phi_arg (new_phi, arginfo[i].op,
4202 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4203 enum tree_code code
4204 = POINTER_TYPE_P (TREE_TYPE (op))
4205 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4206 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4207 ? sizetype : TREE_TYPE (op);
4208 poly_widest_int cst
4209 = wi::mul (bestn->simdclone->args[i].linear_step,
4210 ncopies * nunits);
4211 tree tcst = wide_int_to_tree (type, cst);
4212 tree phi_arg = copy_ssa_name (op);
4213 gassign *new_stmt
4214 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4215 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4216 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4217 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4218 UNKNOWN_LOCATION);
4219 arginfo[i].op = phi_res;
4220 vargs.safe_push (phi_res);
4221 }
4222 else
4223 {
4224 enum tree_code code
4225 = POINTER_TYPE_P (TREE_TYPE (op))
4226 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4227 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4228 ? sizetype : TREE_TYPE (op);
4229 poly_widest_int cst
4230 = wi::mul (bestn->simdclone->args[i].linear_step,
4231 j * nunits);
4232 tree tcst = wide_int_to_tree (type, cst);
4233 new_temp = make_ssa_name (TREE_TYPE (op));
4234 gassign *new_stmt
4235 = gimple_build_assign (new_temp, code,
4236 arginfo[i].op, tcst);
4237 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4238 vargs.safe_push (new_temp);
4239 }
4240 break;
4241 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4242 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4243 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4244 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4245 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4246 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4247 default:
4248 gcc_unreachable ();
4249 }
4250 }
4251
4252 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4253 if (vec_dest)
4254 {
4255 gcc_assert (ratype
4256 || known_eq (simd_clone_subparts (rtype), nunits));
4257 if (ratype)
4258 new_temp = create_tmp_var (ratype);
4259 else if (useless_type_conversion_p (vectype, rtype))
4260 new_temp = make_ssa_name (vec_dest, new_call);
4261 else
4262 new_temp = make_ssa_name (rtype, new_call);
4263 gimple_call_set_lhs (new_call, new_temp);
4264 }
4265 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4266 gimple *new_stmt = new_call;
4267
4268 if (vec_dest)
4269 {
4270 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4271 {
4272 unsigned int k, l;
4273 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4274 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4275 k = vector_unroll_factor (nunits,
4276 simd_clone_subparts (vectype));
4277 gcc_assert ((k & (k - 1)) == 0);
4278 for (l = 0; l < k; l++)
4279 {
4280 tree t;
4281 if (ratype)
4282 {
4283 t = build_fold_addr_expr (new_temp);
4284 t = build2 (MEM_REF, vectype, t,
4285 build_int_cst (TREE_TYPE (t), l * bytes));
4286 }
4287 else
4288 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4289 bitsize_int (prec), bitsize_int (l * prec));
4290 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4291 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4292
4293 if (j == 0 && l == 0)
4294 *vec_stmt = new_stmt;
4295 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4296 }
4297
4298 if (ratype)
4299 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4300 continue;
4301 }
4302 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4303 {
4304 unsigned int k = (simd_clone_subparts (vectype)
4305 / simd_clone_subparts (rtype));
4306 gcc_assert ((k & (k - 1)) == 0);
4307 if ((j & (k - 1)) == 0)
4308 vec_alloc (ret_ctor_elts, k);
4309 if (ratype)
4310 {
4311 unsigned int m, o;
4312 o = vector_unroll_factor (nunits,
4313 simd_clone_subparts (rtype));
4314 for (m = 0; m < o; m++)
4315 {
4316 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4317 size_int (m), NULL_TREE, NULL_TREE);
4318 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4319 tem);
4320 vect_finish_stmt_generation (vinfo, stmt_info,
4321 new_stmt, gsi);
4322 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4323 gimple_assign_lhs (new_stmt));
4324 }
4325 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4326 }
4327 else
4328 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4329 if ((j & (k - 1)) != k - 1)
4330 continue;
4331 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4332 new_stmt
4333 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4335
4336 if ((unsigned) j == k - 1)
4337 *vec_stmt = new_stmt;
4338 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4339 continue;
4340 }
4341 else if (ratype)
4342 {
4343 tree t = build_fold_addr_expr (new_temp);
4344 t = build2 (MEM_REF, vectype, t,
4345 build_int_cst (TREE_TYPE (t), 0));
4346 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4347 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4348 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4349 }
4350 else if (!useless_type_conversion_p (vectype, rtype))
4351 {
4352 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4353 new_stmt
4354 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4355 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4356 }
4357 }
4358
4359 if (j == 0)
4360 *vec_stmt = new_stmt;
4361 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4362 }
4363
4364 for (i = 0; i < nargs; ++i)
4365 {
4366 vec<tree> oprndsi = vec_oprnds[i];
4367 oprndsi.release ();
4368 }
4369 vargs.release ();
4370
4371 /* The call in STMT might prevent it from being removed in dce.
4372 We however cannot remove it here, due to the way the ssa name
4373 it defines is mapped to the new definition. So just replace
4374 rhs of the statement with something harmless. */
4375
4376 if (slp_node)
4377 return true;
4378
4379 gimple *new_stmt;
4380 if (scalar_dest)
4381 {
4382 type = TREE_TYPE (scalar_dest);
4383 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4384 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4385 }
4386 else
4387 new_stmt = gimple_build_nop ();
4388 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4389 unlink_stmt_vdef (stmt);
4390
4391 return true;
4392 }
4393
4394
4395 /* Function vect_gen_widened_results_half
4396
4397 Create a vector stmt whose code, type, number of arguments, and result
4398 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4399 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4400 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4401 needs to be created (DECL is a function-decl of a target-builtin).
4402 STMT_INFO is the original scalar stmt that we are vectorizing. */
4403
4404 static gimple *
4405 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4406 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4407 tree vec_dest, gimple_stmt_iterator *gsi,
4408 stmt_vec_info stmt_info)
4409 {
4410 gimple *new_stmt;
4411 tree new_temp;
4412
4413 /* Generate half of the widened result: */
4414 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4415 if (op_type != binary_op)
4416 vec_oprnd1 = NULL;
4417 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4418 new_temp = make_ssa_name (vec_dest, new_stmt);
4419 gimple_assign_set_lhs (new_stmt, new_temp);
4420 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4421
4422 return new_stmt;
4423 }
4424
4425
4426 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4427 For multi-step conversions store the resulting vectors and call the function
4428 recursively. */
4429
4430 static void
4431 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4432 int multi_step_cvt,
4433 stmt_vec_info stmt_info,
4434 vec<tree> vec_dsts,
4435 gimple_stmt_iterator *gsi,
4436 slp_tree slp_node, enum tree_code code)
4437 {
4438 unsigned int i;
4439 tree vop0, vop1, new_tmp, vec_dest;
4440
4441 vec_dest = vec_dsts.pop ();
4442
4443 for (i = 0; i < vec_oprnds->length (); i += 2)
4444 {
4445 /* Create demotion operation. */
4446 vop0 = (*vec_oprnds)[i];
4447 vop1 = (*vec_oprnds)[i + 1];
4448 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4449 new_tmp = make_ssa_name (vec_dest, new_stmt);
4450 gimple_assign_set_lhs (new_stmt, new_tmp);
4451 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4452
4453 if (multi_step_cvt)
4454 /* Store the resulting vector for next recursive call. */
4455 (*vec_oprnds)[i/2] = new_tmp;
4456 else
4457 {
4458 /* This is the last step of the conversion sequence. Store the
4459 vectors in SLP_NODE or in vector info of the scalar statement
4460 (or in STMT_VINFO_RELATED_STMT chain). */
4461 if (slp_node)
4462 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4463 else
4464 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4465 }
4466 }
4467
4468 /* For multi-step demotion operations we first generate demotion operations
4469 from the source type to the intermediate types, and then combine the
4470 results (stored in VEC_OPRNDS) in demotion operation to the destination
4471 type. */
4472 if (multi_step_cvt)
4473 {
4474 /* At each level of recursion we have half of the operands we had at the
4475 previous level. */
4476 vec_oprnds->truncate ((i+1)/2);
4477 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4478 multi_step_cvt - 1,
4479 stmt_info, vec_dsts, gsi,
4480 slp_node, VEC_PACK_TRUNC_EXPR);
4481 }
4482
4483 vec_dsts.quick_push (vec_dest);
4484 }
4485
4486
4487 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4488 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4489 STMT_INFO. For multi-step conversions store the resulting vectors and
4490 call the function recursively. */
4491
4492 static void
4493 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4494 vec<tree> *vec_oprnds0,
4495 vec<tree> *vec_oprnds1,
4496 stmt_vec_info stmt_info, tree vec_dest,
4497 gimple_stmt_iterator *gsi,
4498 enum tree_code code1,
4499 enum tree_code code2, int op_type)
4500 {
4501 int i;
4502 tree vop0, vop1, new_tmp1, new_tmp2;
4503 gimple *new_stmt1, *new_stmt2;
4504 vec<tree> vec_tmp = vNULL;
4505
4506 vec_tmp.create (vec_oprnds0->length () * 2);
4507 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4508 {
4509 if (op_type == binary_op)
4510 vop1 = (*vec_oprnds1)[i];
4511 else
4512 vop1 = NULL_TREE;
4513
4514 /* Generate the two halves of promotion operation. */
4515 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4516 op_type, vec_dest, gsi,
4517 stmt_info);
4518 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4519 op_type, vec_dest, gsi,
4520 stmt_info);
4521 if (is_gimple_call (new_stmt1))
4522 {
4523 new_tmp1 = gimple_call_lhs (new_stmt1);
4524 new_tmp2 = gimple_call_lhs (new_stmt2);
4525 }
4526 else
4527 {
4528 new_tmp1 = gimple_assign_lhs (new_stmt1);
4529 new_tmp2 = gimple_assign_lhs (new_stmt2);
4530 }
4531
4532 /* Store the results for the next step. */
4533 vec_tmp.quick_push (new_tmp1);
4534 vec_tmp.quick_push (new_tmp2);
4535 }
4536
4537 vec_oprnds0->release ();
4538 *vec_oprnds0 = vec_tmp;
4539 }
4540
4541
4542 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4543 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4544 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4545 Return true if STMT_INFO is vectorizable in this way. */
4546
4547 static bool
4548 vectorizable_conversion (vec_info *vinfo,
4549 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4550 gimple **vec_stmt, slp_tree slp_node,
4551 stmt_vector_for_cost *cost_vec)
4552 {
4553 tree vec_dest;
4554 tree scalar_dest;
4555 tree op0, op1 = NULL_TREE;
4556 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4557 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4558 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4559 tree new_temp;
4560 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4561 int ndts = 2;
4562 poly_uint64 nunits_in;
4563 poly_uint64 nunits_out;
4564 tree vectype_out, vectype_in;
4565 int ncopies, i;
4566 tree lhs_type, rhs_type;
4567 enum { NARROW, NONE, WIDEN } modifier;
4568 vec<tree> vec_oprnds0 = vNULL;
4569 vec<tree> vec_oprnds1 = vNULL;
4570 tree vop0;
4571 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4572 int multi_step_cvt = 0;
4573 vec<tree> interm_types = vNULL;
4574 tree intermediate_type, cvt_type = NULL_TREE;
4575 int op_type;
4576 unsigned short fltsz;
4577
4578 /* Is STMT a vectorizable conversion? */
4579
4580 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4581 return false;
4582
4583 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4584 && ! vec_stmt)
4585 return false;
4586
4587 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4588 if (!stmt)
4589 return false;
4590
4591 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4592 return false;
4593
4594 code = gimple_assign_rhs_code (stmt);
4595 if (!CONVERT_EXPR_CODE_P (code)
4596 && code != FIX_TRUNC_EXPR
4597 && code != FLOAT_EXPR
4598 && code != WIDEN_PLUS_EXPR
4599 && code != WIDEN_MINUS_EXPR
4600 && code != WIDEN_MULT_EXPR
4601 && code != WIDEN_LSHIFT_EXPR)
4602 return false;
4603
4604 op_type = TREE_CODE_LENGTH (code);
4605
4606 /* Check types of lhs and rhs. */
4607 scalar_dest = gimple_assign_lhs (stmt);
4608 lhs_type = TREE_TYPE (scalar_dest);
4609 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4610
4611 /* Check the operands of the operation. */
4612 slp_tree slp_op0, slp_op1 = NULL;
4613 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4614 0, &op0, &slp_op0, &dt[0], &vectype_in))
4615 {
4616 if (dump_enabled_p ())
4617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4618 "use not simple.\n");
4619 return false;
4620 }
4621
4622 rhs_type = TREE_TYPE (op0);
4623 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4624 && !((INTEGRAL_TYPE_P (lhs_type)
4625 && INTEGRAL_TYPE_P (rhs_type))
4626 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4627 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4628 return false;
4629
4630 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4631 && ((INTEGRAL_TYPE_P (lhs_type)
4632 && !type_has_mode_precision_p (lhs_type))
4633 || (INTEGRAL_TYPE_P (rhs_type)
4634 && !type_has_mode_precision_p (rhs_type))))
4635 {
4636 if (dump_enabled_p ())
4637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4638 "type conversion to/from bit-precision unsupported."
4639 "\n");
4640 return false;
4641 }
4642
4643 if (op_type == binary_op)
4644 {
4645 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4646 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4647
4648 op1 = gimple_assign_rhs2 (stmt);
4649 tree vectype1_in;
4650 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4651 &op1, &slp_op1, &dt[1], &vectype1_in))
4652 {
4653 if (dump_enabled_p ())
4654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4655 "use not simple.\n");
4656 return false;
4657 }
4658 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4659 OP1. */
4660 if (!vectype_in)
4661 vectype_in = vectype1_in;
4662 }
4663
4664 /* If op0 is an external or constant def, infer the vector type
4665 from the scalar type. */
4666 if (!vectype_in)
4667 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4668 if (vec_stmt)
4669 gcc_assert (vectype_in);
4670 if (!vectype_in)
4671 {
4672 if (dump_enabled_p ())
4673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4674 "no vectype for scalar type %T\n", rhs_type);
4675
4676 return false;
4677 }
4678
4679 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4680 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4681 {
4682 if (dump_enabled_p ())
4683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4684 "can't convert between boolean and non "
4685 "boolean vectors %T\n", rhs_type);
4686
4687 return false;
4688 }
4689
4690 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4691 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4692 if (known_eq (nunits_out, nunits_in))
4693 modifier = NONE;
4694 else if (multiple_p (nunits_out, nunits_in))
4695 modifier = NARROW;
4696 else
4697 {
4698 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4699 modifier = WIDEN;
4700 }
4701
4702 /* Multiple types in SLP are handled by creating the appropriate number of
4703 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4704 case of SLP. */
4705 if (slp_node)
4706 ncopies = 1;
4707 else if (modifier == NARROW)
4708 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4709 else
4710 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4711
4712 /* Sanity check: make sure that at least one copy of the vectorized stmt
4713 needs to be generated. */
4714 gcc_assert (ncopies >= 1);
4715
4716 bool found_mode = false;
4717 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4718 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4719 opt_scalar_mode rhs_mode_iter;
4720
4721 /* Supportable by target? */
4722 switch (modifier)
4723 {
4724 case NONE:
4725 if (code != FIX_TRUNC_EXPR
4726 && code != FLOAT_EXPR
4727 && !CONVERT_EXPR_CODE_P (code))
4728 return false;
4729 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4730 break;
4731 /* FALLTHRU */
4732 unsupported:
4733 if (dump_enabled_p ())
4734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4735 "conversion not supported by target.\n");
4736 return false;
4737
4738 case WIDEN:
4739 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4740 vectype_in, &code1, &code2,
4741 &multi_step_cvt, &interm_types))
4742 {
4743 /* Binary widening operation can only be supported directly by the
4744 architecture. */
4745 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4746 break;
4747 }
4748
4749 if (code != FLOAT_EXPR
4750 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4751 goto unsupported;
4752
4753 fltsz = GET_MODE_SIZE (lhs_mode);
4754 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4755 {
4756 rhs_mode = rhs_mode_iter.require ();
4757 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4758 break;
4759
4760 cvt_type
4761 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4762 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4763 if (cvt_type == NULL_TREE)
4764 goto unsupported;
4765
4766 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4767 {
4768 if (!supportable_convert_operation (code, vectype_out,
4769 cvt_type, &codecvt1))
4770 goto unsupported;
4771 }
4772 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4773 vectype_out, cvt_type,
4774 &codecvt1, &codecvt2,
4775 &multi_step_cvt,
4776 &interm_types))
4777 continue;
4778 else
4779 gcc_assert (multi_step_cvt == 0);
4780
4781 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4782 cvt_type,
4783 vectype_in, &code1, &code2,
4784 &multi_step_cvt, &interm_types))
4785 {
4786 found_mode = true;
4787 break;
4788 }
4789 }
4790
4791 if (!found_mode)
4792 goto unsupported;
4793
4794 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4795 codecvt2 = ERROR_MARK;
4796 else
4797 {
4798 multi_step_cvt++;
4799 interm_types.safe_push (cvt_type);
4800 cvt_type = NULL_TREE;
4801 }
4802 break;
4803
4804 case NARROW:
4805 gcc_assert (op_type == unary_op);
4806 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4807 &code1, &multi_step_cvt,
4808 &interm_types))
4809 break;
4810
4811 if (code != FIX_TRUNC_EXPR
4812 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4813 goto unsupported;
4814
4815 cvt_type
4816 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4817 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4818 if (cvt_type == NULL_TREE)
4819 goto unsupported;
4820 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4821 &codecvt1))
4822 goto unsupported;
4823 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4824 &code1, &multi_step_cvt,
4825 &interm_types))
4826 break;
4827 goto unsupported;
4828
4829 default:
4830 gcc_unreachable ();
4831 }
4832
4833 if (!vec_stmt) /* transformation not required. */
4834 {
4835 if (slp_node
4836 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4837 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4838 {
4839 if (dump_enabled_p ())
4840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4841 "incompatible vector types for invariants\n");
4842 return false;
4843 }
4844 DUMP_VECT_SCOPE ("vectorizable_conversion");
4845 if (modifier == NONE)
4846 {
4847 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4848 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4849 cost_vec);
4850 }
4851 else if (modifier == NARROW)
4852 {
4853 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4854 /* The final packing step produces one vector result per copy. */
4855 unsigned int nvectors
4856 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4857 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4858 multi_step_cvt, cost_vec);
4859 }
4860 else
4861 {
4862 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4863 /* The initial unpacking step produces two vector results
4864 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4865 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4866 unsigned int nvectors
4867 = (slp_node
4868 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4869 : ncopies * 2);
4870 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4871 multi_step_cvt, cost_vec);
4872 }
4873 interm_types.release ();
4874 return true;
4875 }
4876
4877 /* Transform. */
4878 if (dump_enabled_p ())
4879 dump_printf_loc (MSG_NOTE, vect_location,
4880 "transform conversion. ncopies = %d.\n", ncopies);
4881
4882 if (op_type == binary_op)
4883 {
4884 if (CONSTANT_CLASS_P (op0))
4885 op0 = fold_convert (TREE_TYPE (op1), op0);
4886 else if (CONSTANT_CLASS_P (op1))
4887 op1 = fold_convert (TREE_TYPE (op0), op1);
4888 }
4889
4890 /* In case of multi-step conversion, we first generate conversion operations
4891 to the intermediate types, and then from that types to the final one.
4892 We create vector destinations for the intermediate type (TYPES) received
4893 from supportable_*_operation, and store them in the correct order
4894 for future use in vect_create_vectorized_*_stmts (). */
4895 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4896 vec_dest = vect_create_destination_var (scalar_dest,
4897 (cvt_type && modifier == WIDEN)
4898 ? cvt_type : vectype_out);
4899 vec_dsts.quick_push (vec_dest);
4900
4901 if (multi_step_cvt)
4902 {
4903 for (i = interm_types.length () - 1;
4904 interm_types.iterate (i, &intermediate_type); i--)
4905 {
4906 vec_dest = vect_create_destination_var (scalar_dest,
4907 intermediate_type);
4908 vec_dsts.quick_push (vec_dest);
4909 }
4910 }
4911
4912 if (cvt_type)
4913 vec_dest = vect_create_destination_var (scalar_dest,
4914 modifier == WIDEN
4915 ? vectype_out : cvt_type);
4916
4917 int ninputs = 1;
4918 if (!slp_node)
4919 {
4920 if (modifier == WIDEN)
4921 ;
4922 else if (modifier == NARROW)
4923 {
4924 if (multi_step_cvt)
4925 ninputs = vect_pow2 (multi_step_cvt);
4926 ninputs *= 2;
4927 }
4928 }
4929
4930 switch (modifier)
4931 {
4932 case NONE:
4933 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4934 op0, &vec_oprnds0);
4935 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4936 {
4937 /* Arguments are ready, create the new vector stmt. */
4938 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4939 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4940 new_temp = make_ssa_name (vec_dest, new_stmt);
4941 gimple_assign_set_lhs (new_stmt, new_temp);
4942 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4943
4944 if (slp_node)
4945 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4946 else
4947 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4948 }
4949 break;
4950
4951 case WIDEN:
4952 /* In case the vectorization factor (VF) is bigger than the number
4953 of elements that we can fit in a vectype (nunits), we have to
4954 generate more than one vector stmt - i.e - we need to "unroll"
4955 the vector stmt by a factor VF/nunits. */
4956 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4957 op0, &vec_oprnds0,
4958 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4959 &vec_oprnds1);
4960 if (code == WIDEN_LSHIFT_EXPR)
4961 {
4962 int oprnds_size = vec_oprnds0.length ();
4963 vec_oprnds1.create (oprnds_size);
4964 for (i = 0; i < oprnds_size; ++i)
4965 vec_oprnds1.quick_push (op1);
4966 }
4967 /* Arguments are ready. Create the new vector stmts. */
4968 for (i = multi_step_cvt; i >= 0; i--)
4969 {
4970 tree this_dest = vec_dsts[i];
4971 enum tree_code c1 = code1, c2 = code2;
4972 if (i == 0 && codecvt2 != ERROR_MARK)
4973 {
4974 c1 = codecvt1;
4975 c2 = codecvt2;
4976 }
4977 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4978 &vec_oprnds1, stmt_info,
4979 this_dest, gsi,
4980 c1, c2, op_type);
4981 }
4982
4983 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4984 {
4985 gimple *new_stmt;
4986 if (cvt_type)
4987 {
4988 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4989 new_temp = make_ssa_name (vec_dest);
4990 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4991 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4992 }
4993 else
4994 new_stmt = SSA_NAME_DEF_STMT (vop0);
4995
4996 if (slp_node)
4997 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4998 else
4999 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5000 }
5001 break;
5002
5003 case NARROW:
5004 /* In case the vectorization factor (VF) is bigger than the number
5005 of elements that we can fit in a vectype (nunits), we have to
5006 generate more than one vector stmt - i.e - we need to "unroll"
5007 the vector stmt by a factor VF/nunits. */
5008 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5009 op0, &vec_oprnds0);
5010 /* Arguments are ready. Create the new vector stmts. */
5011 if (cvt_type)
5012 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5013 {
5014 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5015 new_temp = make_ssa_name (vec_dest);
5016 gassign *new_stmt
5017 = gimple_build_assign (new_temp, codecvt1, vop0);
5018 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5019 vec_oprnds0[i] = new_temp;
5020 }
5021
5022 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5023 multi_step_cvt,
5024 stmt_info, vec_dsts, gsi,
5025 slp_node, code1);
5026 break;
5027 }
5028 if (!slp_node)
5029 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5030
5031 vec_oprnds0.release ();
5032 vec_oprnds1.release ();
5033 interm_types.release ();
5034
5035 return true;
5036 }
5037
5038 /* Return true if we can assume from the scalar form of STMT_INFO that
5039 neither the scalar nor the vector forms will generate code. STMT_INFO
5040 is known not to involve a data reference. */
5041
5042 bool
5043 vect_nop_conversion_p (stmt_vec_info stmt_info)
5044 {
5045 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5046 if (!stmt)
5047 return false;
5048
5049 tree lhs = gimple_assign_lhs (stmt);
5050 tree_code code = gimple_assign_rhs_code (stmt);
5051 tree rhs = gimple_assign_rhs1 (stmt);
5052
5053 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5054 return true;
5055
5056 if (CONVERT_EXPR_CODE_P (code))
5057 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5058
5059 return false;
5060 }
5061
5062 /* Function vectorizable_assignment.
5063
5064 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5065 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5066 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5067 Return true if STMT_INFO is vectorizable in this way. */
5068
5069 static bool
5070 vectorizable_assignment (vec_info *vinfo,
5071 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5072 gimple **vec_stmt, slp_tree slp_node,
5073 stmt_vector_for_cost *cost_vec)
5074 {
5075 tree vec_dest;
5076 tree scalar_dest;
5077 tree op;
5078 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5079 tree new_temp;
5080 enum vect_def_type dt[1] = {vect_unknown_def_type};
5081 int ndts = 1;
5082 int ncopies;
5083 int i;
5084 vec<tree> vec_oprnds = vNULL;
5085 tree vop;
5086 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5087 enum tree_code code;
5088 tree vectype_in;
5089
5090 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5091 return false;
5092
5093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5094 && ! vec_stmt)
5095 return false;
5096
5097 /* Is vectorizable assignment? */
5098 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5099 if (!stmt)
5100 return false;
5101
5102 scalar_dest = gimple_assign_lhs (stmt);
5103 if (TREE_CODE (scalar_dest) != SSA_NAME)
5104 return false;
5105
5106 if (STMT_VINFO_DATA_REF (stmt_info))
5107 return false;
5108
5109 code = gimple_assign_rhs_code (stmt);
5110 if (!(gimple_assign_single_p (stmt)
5111 || code == PAREN_EXPR
5112 || CONVERT_EXPR_CODE_P (code)))
5113 return false;
5114
5115 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5116 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5117
5118 /* Multiple types in SLP are handled by creating the appropriate number of
5119 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5120 case of SLP. */
5121 if (slp_node)
5122 ncopies = 1;
5123 else
5124 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5125
5126 gcc_assert (ncopies >= 1);
5127
5128 slp_tree slp_op;
5129 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5130 &dt[0], &vectype_in))
5131 {
5132 if (dump_enabled_p ())
5133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5134 "use not simple.\n");
5135 return false;
5136 }
5137 if (!vectype_in)
5138 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5139
5140 /* We can handle NOP_EXPR conversions that do not change the number
5141 of elements or the vector size. */
5142 if ((CONVERT_EXPR_CODE_P (code)
5143 || code == VIEW_CONVERT_EXPR)
5144 && (!vectype_in
5145 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5146 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5147 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5148 return false;
5149
5150 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5151 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5152 {
5153 if (dump_enabled_p ())
5154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5155 "can't convert between boolean and non "
5156 "boolean vectors %T\n", TREE_TYPE (op));
5157
5158 return false;
5159 }
5160
5161 /* We do not handle bit-precision changes. */
5162 if ((CONVERT_EXPR_CODE_P (code)
5163 || code == VIEW_CONVERT_EXPR)
5164 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5165 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5166 || !type_has_mode_precision_p (TREE_TYPE (op)))
5167 /* But a conversion that does not change the bit-pattern is ok. */
5168 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5169 > TYPE_PRECISION (TREE_TYPE (op)))
5170 && TYPE_UNSIGNED (TREE_TYPE (op))))
5171 {
5172 if (dump_enabled_p ())
5173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5174 "type conversion to/from bit-precision "
5175 "unsupported.\n");
5176 return false;
5177 }
5178
5179 if (!vec_stmt) /* transformation not required. */
5180 {
5181 if (slp_node
5182 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5183 {
5184 if (dump_enabled_p ())
5185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5186 "incompatible vector types for invariants\n");
5187 return false;
5188 }
5189 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5190 DUMP_VECT_SCOPE ("vectorizable_assignment");
5191 if (!vect_nop_conversion_p (stmt_info))
5192 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5193 cost_vec);
5194 return true;
5195 }
5196
5197 /* Transform. */
5198 if (dump_enabled_p ())
5199 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5200
5201 /* Handle def. */
5202 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5203
5204 /* Handle use. */
5205 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5206
5207 /* Arguments are ready. create the new vector stmt. */
5208 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5209 {
5210 if (CONVERT_EXPR_CODE_P (code)
5211 || code == VIEW_CONVERT_EXPR)
5212 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5213 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5214 new_temp = make_ssa_name (vec_dest, new_stmt);
5215 gimple_assign_set_lhs (new_stmt, new_temp);
5216 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5217 if (slp_node)
5218 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5219 else
5220 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5221 }
5222 if (!slp_node)
5223 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5224
5225 vec_oprnds.release ();
5226 return true;
5227 }
5228
5229
5230 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5231 either as shift by a scalar or by a vector. */
5232
5233 bool
5234 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5235 {
5236
5237 machine_mode vec_mode;
5238 optab optab;
5239 int icode;
5240 tree vectype;
5241
5242 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5243 if (!vectype)
5244 return false;
5245
5246 optab = optab_for_tree_code (code, vectype, optab_scalar);
5247 if (!optab
5248 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5249 {
5250 optab = optab_for_tree_code (code, vectype, optab_vector);
5251 if (!optab
5252 || (optab_handler (optab, TYPE_MODE (vectype))
5253 == CODE_FOR_nothing))
5254 return false;
5255 }
5256
5257 vec_mode = TYPE_MODE (vectype);
5258 icode = (int) optab_handler (optab, vec_mode);
5259 if (icode == CODE_FOR_nothing)
5260 return false;
5261
5262 return true;
5263 }
5264
5265
5266 /* Function vectorizable_shift.
5267
5268 Check if STMT_INFO performs a shift operation that can be vectorized.
5269 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5270 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5271 Return true if STMT_INFO is vectorizable in this way. */
5272
5273 static bool
5274 vectorizable_shift (vec_info *vinfo,
5275 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5276 gimple **vec_stmt, slp_tree slp_node,
5277 stmt_vector_for_cost *cost_vec)
5278 {
5279 tree vec_dest;
5280 tree scalar_dest;
5281 tree op0, op1 = NULL;
5282 tree vec_oprnd1 = NULL_TREE;
5283 tree vectype;
5284 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5285 enum tree_code code;
5286 machine_mode vec_mode;
5287 tree new_temp;
5288 optab optab;
5289 int icode;
5290 machine_mode optab_op2_mode;
5291 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5292 int ndts = 2;
5293 poly_uint64 nunits_in;
5294 poly_uint64 nunits_out;
5295 tree vectype_out;
5296 tree op1_vectype;
5297 int ncopies;
5298 int i;
5299 vec<tree> vec_oprnds0 = vNULL;
5300 vec<tree> vec_oprnds1 = vNULL;
5301 tree vop0, vop1;
5302 unsigned int k;
5303 bool scalar_shift_arg = true;
5304 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5305 bool incompatible_op1_vectype_p = false;
5306
5307 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5308 return false;
5309
5310 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5311 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5312 && ! vec_stmt)
5313 return false;
5314
5315 /* Is STMT a vectorizable binary/unary operation? */
5316 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5317 if (!stmt)
5318 return false;
5319
5320 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5321 return false;
5322
5323 code = gimple_assign_rhs_code (stmt);
5324
5325 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5326 || code == RROTATE_EXPR))
5327 return false;
5328
5329 scalar_dest = gimple_assign_lhs (stmt);
5330 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5331 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5332 {
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "bit-precision shifts not supported.\n");
5336 return false;
5337 }
5338
5339 slp_tree slp_op0;
5340 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5341 0, &op0, &slp_op0, &dt[0], &vectype))
5342 {
5343 if (dump_enabled_p ())
5344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5345 "use not simple.\n");
5346 return false;
5347 }
5348 /* If op0 is an external or constant def, infer the vector type
5349 from the scalar type. */
5350 if (!vectype)
5351 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5352 if (vec_stmt)
5353 gcc_assert (vectype);
5354 if (!vectype)
5355 {
5356 if (dump_enabled_p ())
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5358 "no vectype for scalar type\n");
5359 return false;
5360 }
5361
5362 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5363 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5364 if (maybe_ne (nunits_out, nunits_in))
5365 return false;
5366
5367 stmt_vec_info op1_def_stmt_info;
5368 slp_tree slp_op1;
5369 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5370 &dt[1], &op1_vectype, &op1_def_stmt_info))
5371 {
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5374 "use not simple.\n");
5375 return false;
5376 }
5377
5378 /* Multiple types in SLP are handled by creating the appropriate number of
5379 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5380 case of SLP. */
5381 if (slp_node)
5382 ncopies = 1;
5383 else
5384 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5385
5386 gcc_assert (ncopies >= 1);
5387
5388 /* Determine whether the shift amount is a vector, or scalar. If the
5389 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5390
5391 if ((dt[1] == vect_internal_def
5392 || dt[1] == vect_induction_def
5393 || dt[1] == vect_nested_cycle)
5394 && !slp_node)
5395 scalar_shift_arg = false;
5396 else if (dt[1] == vect_constant_def
5397 || dt[1] == vect_external_def
5398 || dt[1] == vect_internal_def)
5399 {
5400 /* In SLP, need to check whether the shift count is the same,
5401 in loops if it is a constant or invariant, it is always
5402 a scalar shift. */
5403 if (slp_node)
5404 {
5405 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5406 stmt_vec_info slpstmt_info;
5407
5408 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5409 {
5410 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5411 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5412 scalar_shift_arg = false;
5413 }
5414
5415 /* For internal SLP defs we have to make sure we see scalar stmts
5416 for all vector elements.
5417 ??? For different vectors we could resort to a different
5418 scalar shift operand but code-generation below simply always
5419 takes the first. */
5420 if (dt[1] == vect_internal_def
5421 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5422 stmts.length ()))
5423 scalar_shift_arg = false;
5424 }
5425
5426 /* If the shift amount is computed by a pattern stmt we cannot
5427 use the scalar amount directly thus give up and use a vector
5428 shift. */
5429 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5430 scalar_shift_arg = false;
5431 }
5432 else
5433 {
5434 if (dump_enabled_p ())
5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5436 "operand mode requires invariant argument.\n");
5437 return false;
5438 }
5439
5440 /* Vector shifted by vector. */
5441 bool was_scalar_shift_arg = scalar_shift_arg;
5442 if (!scalar_shift_arg)
5443 {
5444 optab = optab_for_tree_code (code, vectype, optab_vector);
5445 if (dump_enabled_p ())
5446 dump_printf_loc (MSG_NOTE, vect_location,
5447 "vector/vector shift/rotate found.\n");
5448
5449 if (!op1_vectype)
5450 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5451 slp_op1);
5452 incompatible_op1_vectype_p
5453 = (op1_vectype == NULL_TREE
5454 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5455 TYPE_VECTOR_SUBPARTS (vectype))
5456 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5457 if (incompatible_op1_vectype_p
5458 && (!slp_node
5459 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5460 || slp_op1->refcnt != 1))
5461 {
5462 if (dump_enabled_p ())
5463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5464 "unusable type for last operand in"
5465 " vector/vector shift/rotate.\n");
5466 return false;
5467 }
5468 }
5469 /* See if the machine has a vector shifted by scalar insn and if not
5470 then see if it has a vector shifted by vector insn. */
5471 else
5472 {
5473 optab = optab_for_tree_code (code, vectype, optab_scalar);
5474 if (optab
5475 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5476 {
5477 if (dump_enabled_p ())
5478 dump_printf_loc (MSG_NOTE, vect_location,
5479 "vector/scalar shift/rotate found.\n");
5480 }
5481 else
5482 {
5483 optab = optab_for_tree_code (code, vectype, optab_vector);
5484 if (optab
5485 && (optab_handler (optab, TYPE_MODE (vectype))
5486 != CODE_FOR_nothing))
5487 {
5488 scalar_shift_arg = false;
5489
5490 if (dump_enabled_p ())
5491 dump_printf_loc (MSG_NOTE, vect_location,
5492 "vector/vector shift/rotate found.\n");
5493
5494 if (!op1_vectype)
5495 op1_vectype = get_vectype_for_scalar_type (vinfo,
5496 TREE_TYPE (op1),
5497 slp_op1);
5498
5499 /* Unlike the other binary operators, shifts/rotates have
5500 the rhs being int, instead of the same type as the lhs,
5501 so make sure the scalar is the right type if we are
5502 dealing with vectors of long long/long/short/char. */
5503 incompatible_op1_vectype_p
5504 = (!op1_vectype
5505 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5506 TREE_TYPE (op1)));
5507 if (incompatible_op1_vectype_p
5508 && dt[1] == vect_internal_def)
5509 {
5510 if (dump_enabled_p ())
5511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5512 "unusable type for last operand in"
5513 " vector/vector shift/rotate.\n");
5514 return false;
5515 }
5516 }
5517 }
5518 }
5519
5520 /* Supportable by target? */
5521 if (!optab)
5522 {
5523 if (dump_enabled_p ())
5524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5525 "no optab.\n");
5526 return false;
5527 }
5528 vec_mode = TYPE_MODE (vectype);
5529 icode = (int) optab_handler (optab, vec_mode);
5530 if (icode == CODE_FOR_nothing)
5531 {
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5534 "op not supported by target.\n");
5535 /* Check only during analysis. */
5536 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5537 || (!vec_stmt
5538 && !vect_worthwhile_without_simd_p (vinfo, code)))
5539 return false;
5540 if (dump_enabled_p ())
5541 dump_printf_loc (MSG_NOTE, vect_location,
5542 "proceeding using word mode.\n");
5543 }
5544
5545 /* Worthwhile without SIMD support? Check only during analysis. */
5546 if (!vec_stmt
5547 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5548 && !vect_worthwhile_without_simd_p (vinfo, code))
5549 {
5550 if (dump_enabled_p ())
5551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5552 "not worthwhile without SIMD support.\n");
5553 return false;
5554 }
5555
5556 if (!vec_stmt) /* transformation not required. */
5557 {
5558 if (slp_node
5559 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5560 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5561 && (!incompatible_op1_vectype_p
5562 || dt[1] == vect_constant_def)
5563 && !vect_maybe_update_slp_op_vectype
5564 (slp_op1,
5565 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5566 {
5567 if (dump_enabled_p ())
5568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5569 "incompatible vector types for invariants\n");
5570 return false;
5571 }
5572 /* Now adjust the constant shift amount in place. */
5573 if (slp_node
5574 && incompatible_op1_vectype_p
5575 && dt[1] == vect_constant_def)
5576 {
5577 for (unsigned i = 0;
5578 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5579 {
5580 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5581 = fold_convert (TREE_TYPE (vectype),
5582 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5583 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5584 == INTEGER_CST));
5585 }
5586 }
5587 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5588 DUMP_VECT_SCOPE ("vectorizable_shift");
5589 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5590 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5591 return true;
5592 }
5593
5594 /* Transform. */
5595
5596 if (dump_enabled_p ())
5597 dump_printf_loc (MSG_NOTE, vect_location,
5598 "transform binary/unary operation.\n");
5599
5600 if (incompatible_op1_vectype_p && !slp_node)
5601 {
5602 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5603 op1 = fold_convert (TREE_TYPE (vectype), op1);
5604 if (dt[1] != vect_constant_def)
5605 op1 = vect_init_vector (vinfo, stmt_info, op1,
5606 TREE_TYPE (vectype), NULL);
5607 }
5608
5609 /* Handle def. */
5610 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5611
5612 if (scalar_shift_arg && dt[1] != vect_internal_def)
5613 {
5614 /* Vector shl and shr insn patterns can be defined with scalar
5615 operand 2 (shift operand). In this case, use constant or loop
5616 invariant op1 directly, without extending it to vector mode
5617 first. */
5618 optab_op2_mode = insn_data[icode].operand[2].mode;
5619 if (!VECTOR_MODE_P (optab_op2_mode))
5620 {
5621 if (dump_enabled_p ())
5622 dump_printf_loc (MSG_NOTE, vect_location,
5623 "operand 1 using scalar mode.\n");
5624 vec_oprnd1 = op1;
5625 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5626 vec_oprnds1.quick_push (vec_oprnd1);
5627 /* Store vec_oprnd1 for every vector stmt to be created.
5628 We check during the analysis that all the shift arguments
5629 are the same.
5630 TODO: Allow different constants for different vector
5631 stmts generated for an SLP instance. */
5632 for (k = 0;
5633 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5634 vec_oprnds1.quick_push (vec_oprnd1);
5635 }
5636 }
5637 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5638 {
5639 if (was_scalar_shift_arg)
5640 {
5641 /* If the argument was the same in all lanes create
5642 the correctly typed vector shift amount directly. */
5643 op1 = fold_convert (TREE_TYPE (vectype), op1);
5644 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5645 !loop_vinfo ? gsi : NULL);
5646 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5647 !loop_vinfo ? gsi : NULL);
5648 vec_oprnds1.create (slp_node->vec_stmts_size);
5649 for (k = 0; k < slp_node->vec_stmts_size; k++)
5650 vec_oprnds1.quick_push (vec_oprnd1);
5651 }
5652 else if (dt[1] == vect_constant_def)
5653 /* The constant shift amount has been adjusted in place. */
5654 ;
5655 else
5656 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5657 }
5658
5659 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5660 (a special case for certain kind of vector shifts); otherwise,
5661 operand 1 should be of a vector type (the usual case). */
5662 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5663 op0, &vec_oprnds0,
5664 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5665
5666 /* Arguments are ready. Create the new vector stmt. */
5667 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5668 {
5669 /* For internal defs where we need to use a scalar shift arg
5670 extract the first lane. */
5671 if (scalar_shift_arg && dt[1] == vect_internal_def)
5672 {
5673 vop1 = vec_oprnds1[0];
5674 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5675 gassign *new_stmt
5676 = gimple_build_assign (new_temp,
5677 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5678 vop1,
5679 TYPE_SIZE (TREE_TYPE (new_temp)),
5680 bitsize_zero_node));
5681 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5682 vop1 = new_temp;
5683 }
5684 else
5685 vop1 = vec_oprnds1[i];
5686 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5687 new_temp = make_ssa_name (vec_dest, new_stmt);
5688 gimple_assign_set_lhs (new_stmt, new_temp);
5689 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5690 if (slp_node)
5691 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5692 else
5693 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5694 }
5695
5696 if (!slp_node)
5697 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5698
5699 vec_oprnds0.release ();
5700 vec_oprnds1.release ();
5701
5702 return true;
5703 }
5704
5705
5706 /* Function vectorizable_operation.
5707
5708 Check if STMT_INFO performs a binary, unary or ternary operation that can
5709 be vectorized.
5710 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5711 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5712 Return true if STMT_INFO is vectorizable in this way. */
5713
5714 static bool
5715 vectorizable_operation (vec_info *vinfo,
5716 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5717 gimple **vec_stmt, slp_tree slp_node,
5718 stmt_vector_for_cost *cost_vec)
5719 {
5720 tree vec_dest;
5721 tree scalar_dest;
5722 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5723 tree vectype;
5724 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5725 enum tree_code code, orig_code;
5726 machine_mode vec_mode;
5727 tree new_temp;
5728 int op_type;
5729 optab optab;
5730 bool target_support_p;
5731 enum vect_def_type dt[3]
5732 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5733 int ndts = 3;
5734 poly_uint64 nunits_in;
5735 poly_uint64 nunits_out;
5736 tree vectype_out;
5737 int ncopies, vec_num;
5738 int i;
5739 vec<tree> vec_oprnds0 = vNULL;
5740 vec<tree> vec_oprnds1 = vNULL;
5741 vec<tree> vec_oprnds2 = vNULL;
5742 tree vop0, vop1, vop2;
5743 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5744
5745 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5746 return false;
5747
5748 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5749 && ! vec_stmt)
5750 return false;
5751
5752 /* Is STMT a vectorizable binary/unary operation? */
5753 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5754 if (!stmt)
5755 return false;
5756
5757 /* Loads and stores are handled in vectorizable_{load,store}. */
5758 if (STMT_VINFO_DATA_REF (stmt_info))
5759 return false;
5760
5761 orig_code = code = gimple_assign_rhs_code (stmt);
5762
5763 /* Shifts are handled in vectorizable_shift. */
5764 if (code == LSHIFT_EXPR
5765 || code == RSHIFT_EXPR
5766 || code == LROTATE_EXPR
5767 || code == RROTATE_EXPR)
5768 return false;
5769
5770 /* Comparisons are handled in vectorizable_comparison. */
5771 if (TREE_CODE_CLASS (code) == tcc_comparison)
5772 return false;
5773
5774 /* Conditions are handled in vectorizable_condition. */
5775 if (code == COND_EXPR)
5776 return false;
5777
5778 /* For pointer addition and subtraction, we should use the normal
5779 plus and minus for the vector operation. */
5780 if (code == POINTER_PLUS_EXPR)
5781 code = PLUS_EXPR;
5782 if (code == POINTER_DIFF_EXPR)
5783 code = MINUS_EXPR;
5784
5785 /* Support only unary or binary operations. */
5786 op_type = TREE_CODE_LENGTH (code);
5787 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5788 {
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5791 "num. args = %d (not unary/binary/ternary op).\n",
5792 op_type);
5793 return false;
5794 }
5795
5796 scalar_dest = gimple_assign_lhs (stmt);
5797 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5798
5799 /* Most operations cannot handle bit-precision types without extra
5800 truncations. */
5801 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5802 if (!mask_op_p
5803 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5804 /* Exception are bitwise binary operations. */
5805 && code != BIT_IOR_EXPR
5806 && code != BIT_XOR_EXPR
5807 && code != BIT_AND_EXPR)
5808 {
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5811 "bit-precision arithmetic not supported.\n");
5812 return false;
5813 }
5814
5815 slp_tree slp_op0;
5816 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5817 0, &op0, &slp_op0, &dt[0], &vectype))
5818 {
5819 if (dump_enabled_p ())
5820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5821 "use not simple.\n");
5822 return false;
5823 }
5824 /* If op0 is an external or constant def, infer the vector type
5825 from the scalar type. */
5826 if (!vectype)
5827 {
5828 /* For boolean type we cannot determine vectype by
5829 invariant value (don't know whether it is a vector
5830 of booleans or vector of integers). We use output
5831 vectype because operations on boolean don't change
5832 type. */
5833 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5834 {
5835 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5836 {
5837 if (dump_enabled_p ())
5838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5839 "not supported operation on bool value.\n");
5840 return false;
5841 }
5842 vectype = vectype_out;
5843 }
5844 else
5845 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5846 slp_node);
5847 }
5848 if (vec_stmt)
5849 gcc_assert (vectype);
5850 if (!vectype)
5851 {
5852 if (dump_enabled_p ())
5853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5854 "no vectype for scalar type %T\n",
5855 TREE_TYPE (op0));
5856
5857 return false;
5858 }
5859
5860 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5861 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5862 if (maybe_ne (nunits_out, nunits_in))
5863 return false;
5864
5865 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5866 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5867 if (op_type == binary_op || op_type == ternary_op)
5868 {
5869 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5870 1, &op1, &slp_op1, &dt[1], &vectype2))
5871 {
5872 if (dump_enabled_p ())
5873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5874 "use not simple.\n");
5875 return false;
5876 }
5877 }
5878 if (op_type == ternary_op)
5879 {
5880 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5881 2, &op2, &slp_op2, &dt[2], &vectype3))
5882 {
5883 if (dump_enabled_p ())
5884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5885 "use not simple.\n");
5886 return false;
5887 }
5888 }
5889
5890 /* Multiple types in SLP are handled by creating the appropriate number of
5891 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5892 case of SLP. */
5893 if (slp_node)
5894 {
5895 ncopies = 1;
5896 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5897 }
5898 else
5899 {
5900 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5901 vec_num = 1;
5902 }
5903
5904 gcc_assert (ncopies >= 1);
5905
5906 /* Reject attempts to combine mask types with nonmask types, e.g. if
5907 we have an AND between a (nonmask) boolean loaded from memory and
5908 a (mask) boolean result of a comparison.
5909
5910 TODO: We could easily fix these cases up using pattern statements. */
5911 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5912 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5913 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5914 {
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5917 "mixed mask and nonmask vector types\n");
5918 return false;
5919 }
5920
5921 /* Supportable by target? */
5922
5923 vec_mode = TYPE_MODE (vectype);
5924 if (code == MULT_HIGHPART_EXPR)
5925 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5926 else
5927 {
5928 optab = optab_for_tree_code (code, vectype, optab_default);
5929 if (!optab)
5930 {
5931 if (dump_enabled_p ())
5932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5933 "no optab.\n");
5934 return false;
5935 }
5936 target_support_p = (optab_handler (optab, vec_mode)
5937 != CODE_FOR_nothing);
5938 }
5939
5940 if (!target_support_p)
5941 {
5942 if (dump_enabled_p ())
5943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5944 "op not supported by target.\n");
5945 /* Check only during analysis. */
5946 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5947 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5948 return false;
5949 if (dump_enabled_p ())
5950 dump_printf_loc (MSG_NOTE, vect_location,
5951 "proceeding using word mode.\n");
5952 }
5953
5954 /* Worthwhile without SIMD support? Check only during analysis. */
5955 if (!VECTOR_MODE_P (vec_mode)
5956 && !vec_stmt
5957 && !vect_worthwhile_without_simd_p (vinfo, code))
5958 {
5959 if (dump_enabled_p ())
5960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5961 "not worthwhile without SIMD support.\n");
5962 return false;
5963 }
5964
5965 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5966 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5967 internal_fn cond_fn = get_conditional_internal_fn (code);
5968
5969 if (!vec_stmt) /* transformation not required. */
5970 {
5971 /* If this operation is part of a reduction, a fully-masked loop
5972 should only change the active lanes of the reduction chain,
5973 keeping the inactive lanes as-is. */
5974 if (loop_vinfo
5975 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5976 && reduc_idx >= 0)
5977 {
5978 if (cond_fn == IFN_LAST
5979 || !direct_internal_fn_supported_p (cond_fn, vectype,
5980 OPTIMIZE_FOR_SPEED))
5981 {
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5984 "can't use a fully-masked loop because no"
5985 " conditional operation is available.\n");
5986 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5987 }
5988 else
5989 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5990 vectype, NULL);
5991 }
5992
5993 /* Put types on constant and invariant SLP children. */
5994 if (slp_node
5995 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5996 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5997 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5998 {
5999 if (dump_enabled_p ())
6000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6001 "incompatible vector types for invariants\n");
6002 return false;
6003 }
6004
6005 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6006 DUMP_VECT_SCOPE ("vectorizable_operation");
6007 vect_model_simple_cost (vinfo, stmt_info,
6008 ncopies, dt, ndts, slp_node, cost_vec);
6009 return true;
6010 }
6011
6012 /* Transform. */
6013
6014 if (dump_enabled_p ())
6015 dump_printf_loc (MSG_NOTE, vect_location,
6016 "transform binary/unary operation.\n");
6017
6018 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6019
6020 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6021 vectors with unsigned elements, but the result is signed. So, we
6022 need to compute the MINUS_EXPR into vectype temporary and
6023 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6024 tree vec_cvt_dest = NULL_TREE;
6025 if (orig_code == POINTER_DIFF_EXPR)
6026 {
6027 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6028 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6029 }
6030 /* Handle def. */
6031 else
6032 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6033
6034 /* In case the vectorization factor (VF) is bigger than the number
6035 of elements that we can fit in a vectype (nunits), we have to generate
6036 more than one vector stmt - i.e - we need to "unroll" the
6037 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6038 from one copy of the vector stmt to the next, in the field
6039 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6040 stages to find the correct vector defs to be used when vectorizing
6041 stmts that use the defs of the current stmt. The example below
6042 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6043 we need to create 4 vectorized stmts):
6044
6045 before vectorization:
6046 RELATED_STMT VEC_STMT
6047 S1: x = memref - -
6048 S2: z = x + 1 - -
6049
6050 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6051 there):
6052 RELATED_STMT VEC_STMT
6053 VS1_0: vx0 = memref0 VS1_1 -
6054 VS1_1: vx1 = memref1 VS1_2 -
6055 VS1_2: vx2 = memref2 VS1_3 -
6056 VS1_3: vx3 = memref3 - -
6057 S1: x = load - VS1_0
6058 S2: z = x + 1 - -
6059
6060 step2: vectorize stmt S2 (done here):
6061 To vectorize stmt S2 we first need to find the relevant vector
6062 def for the first operand 'x'. This is, as usual, obtained from
6063 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6064 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6065 relevant vector def 'vx0'. Having found 'vx0' we can generate
6066 the vector stmt VS2_0, and as usual, record it in the
6067 STMT_VINFO_VEC_STMT of stmt S2.
6068 When creating the second copy (VS2_1), we obtain the relevant vector
6069 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6070 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6071 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6072 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6073 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6074 chain of stmts and pointers:
6075 RELATED_STMT VEC_STMT
6076 VS1_0: vx0 = memref0 VS1_1 -
6077 VS1_1: vx1 = memref1 VS1_2 -
6078 VS1_2: vx2 = memref2 VS1_3 -
6079 VS1_3: vx3 = memref3 - -
6080 S1: x = load - VS1_0
6081 VS2_0: vz0 = vx0 + v1 VS2_1 -
6082 VS2_1: vz1 = vx1 + v1 VS2_2 -
6083 VS2_2: vz2 = vx2 + v1 VS2_3 -
6084 VS2_3: vz3 = vx3 + v1 - -
6085 S2: z = x + 1 - VS2_0 */
6086
6087 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6088 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6089 /* Arguments are ready. Create the new vector stmt. */
6090 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6091 {
6092 gimple *new_stmt = NULL;
6093 vop1 = ((op_type == binary_op || op_type == ternary_op)
6094 ? vec_oprnds1[i] : NULL_TREE);
6095 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6096 if (masked_loop_p && reduc_idx >= 0)
6097 {
6098 /* Perform the operation on active elements only and take
6099 inactive elements from the reduction chain input. */
6100 gcc_assert (!vop2);
6101 vop2 = reduc_idx == 1 ? vop1 : vop0;
6102 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6103 vectype, i);
6104 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6105 vop0, vop1, vop2);
6106 new_temp = make_ssa_name (vec_dest, call);
6107 gimple_call_set_lhs (call, new_temp);
6108 gimple_call_set_nothrow (call, true);
6109 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6110 new_stmt = call;
6111 }
6112 else
6113 {
6114 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6115 new_temp = make_ssa_name (vec_dest, new_stmt);
6116 gimple_assign_set_lhs (new_stmt, new_temp);
6117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6118 if (vec_cvt_dest)
6119 {
6120 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6121 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6122 new_temp);
6123 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6124 gimple_assign_set_lhs (new_stmt, new_temp);
6125 vect_finish_stmt_generation (vinfo, stmt_info,
6126 new_stmt, gsi);
6127 }
6128 }
6129 if (slp_node)
6130 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6131 else
6132 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6133 }
6134
6135 if (!slp_node)
6136 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6137
6138 vec_oprnds0.release ();
6139 vec_oprnds1.release ();
6140 vec_oprnds2.release ();
6141
6142 return true;
6143 }
6144
6145 /* A helper function to ensure data reference DR_INFO's base alignment. */
6146
6147 static void
6148 ensure_base_align (dr_vec_info *dr_info)
6149 {
6150 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6151 return;
6152
6153 if (dr_info->base_misaligned)
6154 {
6155 tree base_decl = dr_info->base_decl;
6156
6157 // We should only be able to increase the alignment of a base object if
6158 // we know what its new alignment should be at compile time.
6159 unsigned HOST_WIDE_INT align_base_to =
6160 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6161
6162 if (decl_in_symtab_p (base_decl))
6163 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6164 else if (DECL_ALIGN (base_decl) < align_base_to)
6165 {
6166 SET_DECL_ALIGN (base_decl, align_base_to);
6167 DECL_USER_ALIGN (base_decl) = 1;
6168 }
6169 dr_info->base_misaligned = false;
6170 }
6171 }
6172
6173
6174 /* Function get_group_alias_ptr_type.
6175
6176 Return the alias type for the group starting at FIRST_STMT_INFO. */
6177
6178 static tree
6179 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6180 {
6181 struct data_reference *first_dr, *next_dr;
6182
6183 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6184 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6185 while (next_stmt_info)
6186 {
6187 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6188 if (get_alias_set (DR_REF (first_dr))
6189 != get_alias_set (DR_REF (next_dr)))
6190 {
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_NOTE, vect_location,
6193 "conflicting alias set types.\n");
6194 return ptr_type_node;
6195 }
6196 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6197 }
6198 return reference_alias_ptr_type (DR_REF (first_dr));
6199 }
6200
6201
6202 /* Function scan_operand_equal_p.
6203
6204 Helper function for check_scan_store. Compare two references
6205 with .GOMP_SIMD_LANE bases. */
6206
6207 static bool
6208 scan_operand_equal_p (tree ref1, tree ref2)
6209 {
6210 tree ref[2] = { ref1, ref2 };
6211 poly_int64 bitsize[2], bitpos[2];
6212 tree offset[2], base[2];
6213 for (int i = 0; i < 2; ++i)
6214 {
6215 machine_mode mode;
6216 int unsignedp, reversep, volatilep = 0;
6217 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6218 &offset[i], &mode, &unsignedp,
6219 &reversep, &volatilep);
6220 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6221 return false;
6222 if (TREE_CODE (base[i]) == MEM_REF
6223 && offset[i] == NULL_TREE
6224 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6225 {
6226 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6227 if (is_gimple_assign (def_stmt)
6228 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6229 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6230 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6231 {
6232 if (maybe_ne (mem_ref_offset (base[i]), 0))
6233 return false;
6234 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6235 offset[i] = gimple_assign_rhs2 (def_stmt);
6236 }
6237 }
6238 }
6239
6240 if (!operand_equal_p (base[0], base[1], 0))
6241 return false;
6242 if (maybe_ne (bitsize[0], bitsize[1]))
6243 return false;
6244 if (offset[0] != offset[1])
6245 {
6246 if (!offset[0] || !offset[1])
6247 return false;
6248 if (!operand_equal_p (offset[0], offset[1], 0))
6249 {
6250 tree step[2];
6251 for (int i = 0; i < 2; ++i)
6252 {
6253 step[i] = integer_one_node;
6254 if (TREE_CODE (offset[i]) == SSA_NAME)
6255 {
6256 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6257 if (is_gimple_assign (def_stmt)
6258 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6259 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6260 == INTEGER_CST))
6261 {
6262 step[i] = gimple_assign_rhs2 (def_stmt);
6263 offset[i] = gimple_assign_rhs1 (def_stmt);
6264 }
6265 }
6266 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6267 {
6268 step[i] = TREE_OPERAND (offset[i], 1);
6269 offset[i] = TREE_OPERAND (offset[i], 0);
6270 }
6271 tree rhs1 = NULL_TREE;
6272 if (TREE_CODE (offset[i]) == SSA_NAME)
6273 {
6274 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6275 if (gimple_assign_cast_p (def_stmt))
6276 rhs1 = gimple_assign_rhs1 (def_stmt);
6277 }
6278 else if (CONVERT_EXPR_P (offset[i]))
6279 rhs1 = TREE_OPERAND (offset[i], 0);
6280 if (rhs1
6281 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6282 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6283 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6284 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6285 offset[i] = rhs1;
6286 }
6287 if (!operand_equal_p (offset[0], offset[1], 0)
6288 || !operand_equal_p (step[0], step[1], 0))
6289 return false;
6290 }
6291 }
6292 return true;
6293 }
6294
6295
6296 enum scan_store_kind {
6297 /* Normal permutation. */
6298 scan_store_kind_perm,
6299
6300 /* Whole vector left shift permutation with zero init. */
6301 scan_store_kind_lshift_zero,
6302
6303 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6304 scan_store_kind_lshift_cond
6305 };
6306
6307 /* Function check_scan_store.
6308
6309 Verify if we can perform the needed permutations or whole vector shifts.
6310 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6311 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6312 to do at each step. */
6313
6314 static int
6315 scan_store_can_perm_p (tree vectype, tree init,
6316 vec<enum scan_store_kind> *use_whole_vector = NULL)
6317 {
6318 enum machine_mode vec_mode = TYPE_MODE (vectype);
6319 unsigned HOST_WIDE_INT nunits;
6320 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6321 return -1;
6322 int units_log2 = exact_log2 (nunits);
6323 if (units_log2 <= 0)
6324 return -1;
6325
6326 int i;
6327 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6328 for (i = 0; i <= units_log2; ++i)
6329 {
6330 unsigned HOST_WIDE_INT j, k;
6331 enum scan_store_kind kind = scan_store_kind_perm;
6332 vec_perm_builder sel (nunits, nunits, 1);
6333 sel.quick_grow (nunits);
6334 if (i == units_log2)
6335 {
6336 for (j = 0; j < nunits; ++j)
6337 sel[j] = nunits - 1;
6338 }
6339 else
6340 {
6341 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6342 sel[j] = j;
6343 for (k = 0; j < nunits; ++j, ++k)
6344 sel[j] = nunits + k;
6345 }
6346 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6347 if (!can_vec_perm_const_p (vec_mode, indices))
6348 {
6349 if (i == units_log2)
6350 return -1;
6351
6352 if (whole_vector_shift_kind == scan_store_kind_perm)
6353 {
6354 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6355 return -1;
6356 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6357 /* Whole vector shifts shift in zeros, so if init is all zero
6358 constant, there is no need to do anything further. */
6359 if ((TREE_CODE (init) != INTEGER_CST
6360 && TREE_CODE (init) != REAL_CST)
6361 || !initializer_zerop (init))
6362 {
6363 tree masktype = truth_type_for (vectype);
6364 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6365 return -1;
6366 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6367 }
6368 }
6369 kind = whole_vector_shift_kind;
6370 }
6371 if (use_whole_vector)
6372 {
6373 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6374 use_whole_vector->safe_grow_cleared (i, true);
6375 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6376 use_whole_vector->safe_push (kind);
6377 }
6378 }
6379
6380 return units_log2;
6381 }
6382
6383
6384 /* Function check_scan_store.
6385
6386 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6387
6388 static bool
6389 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6390 enum vect_def_type rhs_dt, bool slp, tree mask,
6391 vect_memory_access_type memory_access_type)
6392 {
6393 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6394 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6395 tree ref_type;
6396
6397 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6398 if (slp
6399 || mask
6400 || memory_access_type != VMAT_CONTIGUOUS
6401 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6402 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6403 || loop_vinfo == NULL
6404 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6405 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6406 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6407 || !integer_zerop (DR_INIT (dr_info->dr))
6408 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6409 || !alias_sets_conflict_p (get_alias_set (vectype),
6410 get_alias_set (TREE_TYPE (ref_type))))
6411 {
6412 if (dump_enabled_p ())
6413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6414 "unsupported OpenMP scan store.\n");
6415 return false;
6416 }
6417
6418 /* We need to pattern match code built by OpenMP lowering and simplified
6419 by following optimizations into something we can handle.
6420 #pragma omp simd reduction(inscan,+:r)
6421 for (...)
6422 {
6423 r += something ();
6424 #pragma omp scan inclusive (r)
6425 use (r);
6426 }
6427 shall have body with:
6428 // Initialization for input phase, store the reduction initializer:
6429 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6430 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6431 D.2042[_21] = 0;
6432 // Actual input phase:
6433 ...
6434 r.0_5 = D.2042[_20];
6435 _6 = _4 + r.0_5;
6436 D.2042[_20] = _6;
6437 // Initialization for scan phase:
6438 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6439 _26 = D.2043[_25];
6440 _27 = D.2042[_25];
6441 _28 = _26 + _27;
6442 D.2043[_25] = _28;
6443 D.2042[_25] = _28;
6444 // Actual scan phase:
6445 ...
6446 r.1_8 = D.2042[_20];
6447 ...
6448 The "omp simd array" variable D.2042 holds the privatized copy used
6449 inside of the loop and D.2043 is another one that holds copies of
6450 the current original list item. The separate GOMP_SIMD_LANE ifn
6451 kinds are there in order to allow optimizing the initializer store
6452 and combiner sequence, e.g. if it is originally some C++ish user
6453 defined reduction, but allow the vectorizer to pattern recognize it
6454 and turn into the appropriate vectorized scan.
6455
6456 For exclusive scan, this is slightly different:
6457 #pragma omp simd reduction(inscan,+:r)
6458 for (...)
6459 {
6460 use (r);
6461 #pragma omp scan exclusive (r)
6462 r += something ();
6463 }
6464 shall have body with:
6465 // Initialization for input phase, store the reduction initializer:
6466 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6467 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6468 D.2042[_21] = 0;
6469 // Actual input phase:
6470 ...
6471 r.0_5 = D.2042[_20];
6472 _6 = _4 + r.0_5;
6473 D.2042[_20] = _6;
6474 // Initialization for scan phase:
6475 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6476 _26 = D.2043[_25];
6477 D.2044[_25] = _26;
6478 _27 = D.2042[_25];
6479 _28 = _26 + _27;
6480 D.2043[_25] = _28;
6481 // Actual scan phase:
6482 ...
6483 r.1_8 = D.2044[_20];
6484 ... */
6485
6486 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6487 {
6488 /* Match the D.2042[_21] = 0; store above. Just require that
6489 it is a constant or external definition store. */
6490 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6491 {
6492 fail_init:
6493 if (dump_enabled_p ())
6494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6495 "unsupported OpenMP scan initializer store.\n");
6496 return false;
6497 }
6498
6499 if (! loop_vinfo->scan_map)
6500 loop_vinfo->scan_map = new hash_map<tree, tree>;
6501 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6502 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6503 if (cached)
6504 goto fail_init;
6505 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6506
6507 /* These stores can be vectorized normally. */
6508 return true;
6509 }
6510
6511 if (rhs_dt != vect_internal_def)
6512 {
6513 fail:
6514 if (dump_enabled_p ())
6515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6516 "unsupported OpenMP scan combiner pattern.\n");
6517 return false;
6518 }
6519
6520 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6521 tree rhs = gimple_assign_rhs1 (stmt);
6522 if (TREE_CODE (rhs) != SSA_NAME)
6523 goto fail;
6524
6525 gimple *other_store_stmt = NULL;
6526 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6527 bool inscan_var_store
6528 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6529
6530 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6531 {
6532 if (!inscan_var_store)
6533 {
6534 use_operand_p use_p;
6535 imm_use_iterator iter;
6536 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6537 {
6538 gimple *use_stmt = USE_STMT (use_p);
6539 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6540 continue;
6541 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6542 || !is_gimple_assign (use_stmt)
6543 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6544 || other_store_stmt
6545 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6546 goto fail;
6547 other_store_stmt = use_stmt;
6548 }
6549 if (other_store_stmt == NULL)
6550 goto fail;
6551 rhs = gimple_assign_lhs (other_store_stmt);
6552 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6553 goto fail;
6554 }
6555 }
6556 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6557 {
6558 use_operand_p use_p;
6559 imm_use_iterator iter;
6560 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6561 {
6562 gimple *use_stmt = USE_STMT (use_p);
6563 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6564 continue;
6565 if (other_store_stmt)
6566 goto fail;
6567 other_store_stmt = use_stmt;
6568 }
6569 }
6570 else
6571 goto fail;
6572
6573 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6574 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6575 || !is_gimple_assign (def_stmt)
6576 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6577 goto fail;
6578
6579 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6580 /* For pointer addition, we should use the normal plus for the vector
6581 operation. */
6582 switch (code)
6583 {
6584 case POINTER_PLUS_EXPR:
6585 code = PLUS_EXPR;
6586 break;
6587 case MULT_HIGHPART_EXPR:
6588 goto fail;
6589 default:
6590 break;
6591 }
6592 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6593 goto fail;
6594
6595 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6596 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6597 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6598 goto fail;
6599
6600 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6601 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6602 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6603 || !gimple_assign_load_p (load1_stmt)
6604 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6605 || !gimple_assign_load_p (load2_stmt))
6606 goto fail;
6607
6608 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6609 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6610 if (load1_stmt_info == NULL
6611 || load2_stmt_info == NULL
6612 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6613 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6614 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6615 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6616 goto fail;
6617
6618 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6619 {
6620 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6621 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6622 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6623 goto fail;
6624 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6625 tree lrhs;
6626 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6627 lrhs = rhs1;
6628 else
6629 lrhs = rhs2;
6630 use_operand_p use_p;
6631 imm_use_iterator iter;
6632 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6633 {
6634 gimple *use_stmt = USE_STMT (use_p);
6635 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6636 continue;
6637 if (other_store_stmt)
6638 goto fail;
6639 other_store_stmt = use_stmt;
6640 }
6641 }
6642
6643 if (other_store_stmt == NULL)
6644 goto fail;
6645 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6646 || !gimple_store_p (other_store_stmt))
6647 goto fail;
6648
6649 stmt_vec_info other_store_stmt_info
6650 = loop_vinfo->lookup_stmt (other_store_stmt);
6651 if (other_store_stmt_info == NULL
6652 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6653 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6654 goto fail;
6655
6656 gimple *stmt1 = stmt;
6657 gimple *stmt2 = other_store_stmt;
6658 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6659 std::swap (stmt1, stmt2);
6660 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6661 gimple_assign_rhs1 (load2_stmt)))
6662 {
6663 std::swap (rhs1, rhs2);
6664 std::swap (load1_stmt, load2_stmt);
6665 std::swap (load1_stmt_info, load2_stmt_info);
6666 }
6667 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6668 gimple_assign_rhs1 (load1_stmt)))
6669 goto fail;
6670
6671 tree var3 = NULL_TREE;
6672 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6673 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6674 gimple_assign_rhs1 (load2_stmt)))
6675 goto fail;
6676 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6677 {
6678 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6679 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6680 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6681 goto fail;
6682 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6683 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6684 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6685 || lookup_attribute ("omp simd inscan exclusive",
6686 DECL_ATTRIBUTES (var3)))
6687 goto fail;
6688 }
6689
6690 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6691 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6692 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6693 goto fail;
6694
6695 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6696 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6697 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6698 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6699 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6700 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6701 goto fail;
6702
6703 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6704 std::swap (var1, var2);
6705
6706 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6707 {
6708 if (!lookup_attribute ("omp simd inscan exclusive",
6709 DECL_ATTRIBUTES (var1)))
6710 goto fail;
6711 var1 = var3;
6712 }
6713
6714 if (loop_vinfo->scan_map == NULL)
6715 goto fail;
6716 tree *init = loop_vinfo->scan_map->get (var1);
6717 if (init == NULL)
6718 goto fail;
6719
6720 /* The IL is as expected, now check if we can actually vectorize it.
6721 Inclusive scan:
6722 _26 = D.2043[_25];
6723 _27 = D.2042[_25];
6724 _28 = _26 + _27;
6725 D.2043[_25] = _28;
6726 D.2042[_25] = _28;
6727 should be vectorized as (where _40 is the vectorized rhs
6728 from the D.2042[_21] = 0; store):
6729 _30 = MEM <vector(8) int> [(int *)&D.2043];
6730 _31 = MEM <vector(8) int> [(int *)&D.2042];
6731 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6732 _33 = _31 + _32;
6733 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6734 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6735 _35 = _33 + _34;
6736 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6737 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6738 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6739 _37 = _35 + _36;
6740 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6741 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6742 _38 = _30 + _37;
6743 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6744 MEM <vector(8) int> [(int *)&D.2043] = _39;
6745 MEM <vector(8) int> [(int *)&D.2042] = _38;
6746 Exclusive scan:
6747 _26 = D.2043[_25];
6748 D.2044[_25] = _26;
6749 _27 = D.2042[_25];
6750 _28 = _26 + _27;
6751 D.2043[_25] = _28;
6752 should be vectorized as (where _40 is the vectorized rhs
6753 from the D.2042[_21] = 0; store):
6754 _30 = MEM <vector(8) int> [(int *)&D.2043];
6755 _31 = MEM <vector(8) int> [(int *)&D.2042];
6756 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6757 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6758 _34 = _32 + _33;
6759 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6760 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6761 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6762 _36 = _34 + _35;
6763 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6764 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6765 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6766 _38 = _36 + _37;
6767 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6768 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6769 _39 = _30 + _38;
6770 _50 = _31 + _39;
6771 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6772 MEM <vector(8) int> [(int *)&D.2044] = _39;
6773 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6774 enum machine_mode vec_mode = TYPE_MODE (vectype);
6775 optab optab = optab_for_tree_code (code, vectype, optab_default);
6776 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6777 goto fail;
6778
6779 int units_log2 = scan_store_can_perm_p (vectype, *init);
6780 if (units_log2 == -1)
6781 goto fail;
6782
6783 return true;
6784 }
6785
6786
6787 /* Function vectorizable_scan_store.
6788
6789 Helper of vectorizable_score, arguments like on vectorizable_store.
6790 Handle only the transformation, checking is done in check_scan_store. */
6791
6792 static bool
6793 vectorizable_scan_store (vec_info *vinfo,
6794 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6795 gimple **vec_stmt, int ncopies)
6796 {
6797 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6798 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6799 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6800 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6801
6802 if (dump_enabled_p ())
6803 dump_printf_loc (MSG_NOTE, vect_location,
6804 "transform scan store. ncopies = %d\n", ncopies);
6805
6806 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6807 tree rhs = gimple_assign_rhs1 (stmt);
6808 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6809
6810 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6811 bool inscan_var_store
6812 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6813
6814 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6815 {
6816 use_operand_p use_p;
6817 imm_use_iterator iter;
6818 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6819 {
6820 gimple *use_stmt = USE_STMT (use_p);
6821 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6822 continue;
6823 rhs = gimple_assign_lhs (use_stmt);
6824 break;
6825 }
6826 }
6827
6828 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6829 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6830 if (code == POINTER_PLUS_EXPR)
6831 code = PLUS_EXPR;
6832 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6833 && commutative_tree_code (code));
6834 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6835 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6836 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6837 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6838 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6839 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6840 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6841 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6842 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6843 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6844 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6845
6846 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6847 {
6848 std::swap (rhs1, rhs2);
6849 std::swap (var1, var2);
6850 std::swap (load1_dr_info, load2_dr_info);
6851 }
6852
6853 tree *init = loop_vinfo->scan_map->get (var1);
6854 gcc_assert (init);
6855
6856 unsigned HOST_WIDE_INT nunits;
6857 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6858 gcc_unreachable ();
6859 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6860 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6861 gcc_assert (units_log2 > 0);
6862 auto_vec<tree, 16> perms;
6863 perms.quick_grow (units_log2 + 1);
6864 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6865 for (int i = 0; i <= units_log2; ++i)
6866 {
6867 unsigned HOST_WIDE_INT j, k;
6868 vec_perm_builder sel (nunits, nunits, 1);
6869 sel.quick_grow (nunits);
6870 if (i == units_log2)
6871 for (j = 0; j < nunits; ++j)
6872 sel[j] = nunits - 1;
6873 else
6874 {
6875 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6876 sel[j] = j;
6877 for (k = 0; j < nunits; ++j, ++k)
6878 sel[j] = nunits + k;
6879 }
6880 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6881 if (!use_whole_vector.is_empty ()
6882 && use_whole_vector[i] != scan_store_kind_perm)
6883 {
6884 if (zero_vec == NULL_TREE)
6885 zero_vec = build_zero_cst (vectype);
6886 if (masktype == NULL_TREE
6887 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6888 masktype = truth_type_for (vectype);
6889 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6890 }
6891 else
6892 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6893 }
6894
6895 tree vec_oprnd1 = NULL_TREE;
6896 tree vec_oprnd2 = NULL_TREE;
6897 tree vec_oprnd3 = NULL_TREE;
6898 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6899 tree dataref_offset = build_int_cst (ref_type, 0);
6900 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6901 vectype, VMAT_CONTIGUOUS);
6902 tree ldataref_ptr = NULL_TREE;
6903 tree orig = NULL_TREE;
6904 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6905 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6906 auto_vec<tree> vec_oprnds1;
6907 auto_vec<tree> vec_oprnds2;
6908 auto_vec<tree> vec_oprnds3;
6909 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6910 *init, &vec_oprnds1,
6911 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6912 rhs2, &vec_oprnds3);
6913 for (int j = 0; j < ncopies; j++)
6914 {
6915 vec_oprnd1 = vec_oprnds1[j];
6916 if (ldataref_ptr == NULL)
6917 vec_oprnd2 = vec_oprnds2[j];
6918 vec_oprnd3 = vec_oprnds3[j];
6919 if (j == 0)
6920 orig = vec_oprnd3;
6921 else if (!inscan_var_store)
6922 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6923
6924 if (ldataref_ptr)
6925 {
6926 vec_oprnd2 = make_ssa_name (vectype);
6927 tree data_ref = fold_build2 (MEM_REF, vectype,
6928 unshare_expr (ldataref_ptr),
6929 dataref_offset);
6930 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6931 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6932 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6933 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6934 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6935 }
6936
6937 tree v = vec_oprnd2;
6938 for (int i = 0; i < units_log2; ++i)
6939 {
6940 tree new_temp = make_ssa_name (vectype);
6941 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6942 (zero_vec
6943 && (use_whole_vector[i]
6944 != scan_store_kind_perm))
6945 ? zero_vec : vec_oprnd1, v,
6946 perms[i]);
6947 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6948 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6949 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6950
6951 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6952 {
6953 /* Whole vector shift shifted in zero bits, but if *init
6954 is not initializer_zerop, we need to replace those elements
6955 with elements from vec_oprnd1. */
6956 tree_vector_builder vb (masktype, nunits, 1);
6957 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6958 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6959 ? boolean_false_node : boolean_true_node);
6960
6961 tree new_temp2 = make_ssa_name (vectype);
6962 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6963 new_temp, vec_oprnd1);
6964 vect_finish_stmt_generation (vinfo, stmt_info,
6965 g, gsi);
6966 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6967 new_temp = new_temp2;
6968 }
6969
6970 /* For exclusive scan, perform the perms[i] permutation once
6971 more. */
6972 if (i == 0
6973 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6974 && v == vec_oprnd2)
6975 {
6976 v = new_temp;
6977 --i;
6978 continue;
6979 }
6980
6981 tree new_temp2 = make_ssa_name (vectype);
6982 g = gimple_build_assign (new_temp2, code, v, new_temp);
6983 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6984 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6985
6986 v = new_temp2;
6987 }
6988
6989 tree new_temp = make_ssa_name (vectype);
6990 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6991 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6992 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6993
6994 tree last_perm_arg = new_temp;
6995 /* For exclusive scan, new_temp computed above is the exclusive scan
6996 prefix sum. Turn it into inclusive prefix sum for the broadcast
6997 of the last element into orig. */
6998 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6999 {
7000 last_perm_arg = make_ssa_name (vectype);
7001 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7002 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7003 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7004 }
7005
7006 orig = make_ssa_name (vectype);
7007 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7008 last_perm_arg, perms[units_log2]);
7009 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7010 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7011
7012 if (!inscan_var_store)
7013 {
7014 tree data_ref = fold_build2 (MEM_REF, vectype,
7015 unshare_expr (dataref_ptr),
7016 dataref_offset);
7017 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7018 g = gimple_build_assign (data_ref, new_temp);
7019 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7020 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7021 }
7022 }
7023
7024 if (inscan_var_store)
7025 for (int j = 0; j < ncopies; j++)
7026 {
7027 if (j != 0)
7028 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7029
7030 tree data_ref = fold_build2 (MEM_REF, vectype,
7031 unshare_expr (dataref_ptr),
7032 dataref_offset);
7033 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7034 gimple *g = gimple_build_assign (data_ref, orig);
7035 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7036 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7037 }
7038 return true;
7039 }
7040
7041
7042 /* Function vectorizable_store.
7043
7044 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7045 that can be vectorized.
7046 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7047 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7048 Return true if STMT_INFO is vectorizable in this way. */
7049
7050 static bool
7051 vectorizable_store (vec_info *vinfo,
7052 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7053 gimple **vec_stmt, slp_tree slp_node,
7054 stmt_vector_for_cost *cost_vec)
7055 {
7056 tree data_ref;
7057 tree op;
7058 tree vec_oprnd = NULL_TREE;
7059 tree elem_type;
7060 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7061 class loop *loop = NULL;
7062 machine_mode vec_mode;
7063 tree dummy;
7064 enum vect_def_type rhs_dt = vect_unknown_def_type;
7065 enum vect_def_type mask_dt = vect_unknown_def_type;
7066 tree dataref_ptr = NULL_TREE;
7067 tree dataref_offset = NULL_TREE;
7068 gimple *ptr_incr = NULL;
7069 int ncopies;
7070 int j;
7071 stmt_vec_info first_stmt_info;
7072 bool grouped_store;
7073 unsigned int group_size, i;
7074 vec<tree> oprnds = vNULL;
7075 vec<tree> result_chain = vNULL;
7076 tree offset = NULL_TREE;
7077 vec<tree> vec_oprnds = vNULL;
7078 bool slp = (slp_node != NULL);
7079 unsigned int vec_num;
7080 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7081 tree aggr_type;
7082 gather_scatter_info gs_info;
7083 poly_uint64 vf;
7084 vec_load_store_type vls_type;
7085 tree ref_type;
7086
7087 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7088 return false;
7089
7090 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7091 && ! vec_stmt)
7092 return false;
7093
7094 /* Is vectorizable store? */
7095
7096 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7097 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7098 {
7099 tree scalar_dest = gimple_assign_lhs (assign);
7100 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7101 && is_pattern_stmt_p (stmt_info))
7102 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7103 if (TREE_CODE (scalar_dest) != ARRAY_REF
7104 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7105 && TREE_CODE (scalar_dest) != INDIRECT_REF
7106 && TREE_CODE (scalar_dest) != COMPONENT_REF
7107 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7108 && TREE_CODE (scalar_dest) != REALPART_EXPR
7109 && TREE_CODE (scalar_dest) != MEM_REF)
7110 return false;
7111 }
7112 else
7113 {
7114 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7115 if (!call || !gimple_call_internal_p (call))
7116 return false;
7117
7118 internal_fn ifn = gimple_call_internal_fn (call);
7119 if (!internal_store_fn_p (ifn))
7120 return false;
7121
7122 if (slp_node != NULL)
7123 {
7124 if (dump_enabled_p ())
7125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7126 "SLP of masked stores not supported.\n");
7127 return false;
7128 }
7129
7130 int mask_index = internal_fn_mask_index (ifn);
7131 if (mask_index >= 0)
7132 {
7133 mask = gimple_call_arg (call, mask_index);
7134 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7135 &mask_vectype))
7136 return false;
7137 }
7138 }
7139
7140 op = vect_get_store_rhs (stmt_info);
7141
7142 /* Cannot have hybrid store SLP -- that would mean storing to the
7143 same location twice. */
7144 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7145
7146 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7148
7149 if (loop_vinfo)
7150 {
7151 loop = LOOP_VINFO_LOOP (loop_vinfo);
7152 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7153 }
7154 else
7155 vf = 1;
7156
7157 /* Multiple types in SLP are handled by creating the appropriate number of
7158 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7159 case of SLP. */
7160 if (slp)
7161 ncopies = 1;
7162 else
7163 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7164
7165 gcc_assert (ncopies >= 1);
7166
7167 /* FORNOW. This restriction should be relaxed. */
7168 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7169 {
7170 if (dump_enabled_p ())
7171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7172 "multiple types in nested loop.\n");
7173 return false;
7174 }
7175
7176 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7177 op, &rhs_dt, &rhs_vectype, &vls_type))
7178 return false;
7179
7180 elem_type = TREE_TYPE (vectype);
7181 vec_mode = TYPE_MODE (vectype);
7182
7183 if (!STMT_VINFO_DATA_REF (stmt_info))
7184 return false;
7185
7186 vect_memory_access_type memory_access_type;
7187 enum dr_alignment_support alignment_support_scheme;
7188 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7189 ncopies, &memory_access_type,
7190 &alignment_support_scheme, &gs_info))
7191 return false;
7192
7193 if (mask)
7194 {
7195 if (memory_access_type == VMAT_CONTIGUOUS)
7196 {
7197 if (!VECTOR_MODE_P (vec_mode)
7198 || !can_vec_mask_load_store_p (vec_mode,
7199 TYPE_MODE (mask_vectype), false))
7200 return false;
7201 }
7202 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7203 && (memory_access_type != VMAT_GATHER_SCATTER
7204 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7205 {
7206 if (dump_enabled_p ())
7207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7208 "unsupported access type for masked store.\n");
7209 return false;
7210 }
7211 }
7212 else
7213 {
7214 /* FORNOW. In some cases can vectorize even if data-type not supported
7215 (e.g. - array initialization with 0). */
7216 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7217 return false;
7218 }
7219
7220 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7221 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7222 && memory_access_type != VMAT_GATHER_SCATTER
7223 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7224 if (grouped_store)
7225 {
7226 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7227 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7228 group_size = DR_GROUP_SIZE (first_stmt_info);
7229 }
7230 else
7231 {
7232 first_stmt_info = stmt_info;
7233 first_dr_info = dr_info;
7234 group_size = vec_num = 1;
7235 }
7236
7237 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7238 {
7239 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7240 memory_access_type))
7241 return false;
7242 }
7243
7244 if (!vec_stmt) /* transformation not required. */
7245 {
7246 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7247
7248 if (loop_vinfo
7249 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7250 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7251 group_size, memory_access_type,
7252 &gs_info, mask);
7253
7254 if (slp_node
7255 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7256 vectype))
7257 {
7258 if (dump_enabled_p ())
7259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7260 "incompatible vector types for invariants\n");
7261 return false;
7262 }
7263
7264 if (dump_enabled_p ()
7265 && memory_access_type != VMAT_ELEMENTWISE
7266 && memory_access_type != VMAT_GATHER_SCATTER
7267 && alignment_support_scheme != dr_aligned)
7268 dump_printf_loc (MSG_NOTE, vect_location,
7269 "Vectorizing an unaligned access.\n");
7270
7271 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7272 vect_model_store_cost (vinfo, stmt_info, ncopies,
7273 memory_access_type, vls_type, slp_node, cost_vec);
7274 return true;
7275 }
7276 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7277
7278 /* Transform. */
7279
7280 ensure_base_align (dr_info);
7281
7282 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7283 {
7284 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7285 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7286 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7287 tree ptr, var, scale, vec_mask;
7288 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7289 tree mask_halfvectype = mask_vectype;
7290 edge pe = loop_preheader_edge (loop);
7291 gimple_seq seq;
7292 basic_block new_bb;
7293 enum { NARROW, NONE, WIDEN } modifier;
7294 poly_uint64 scatter_off_nunits
7295 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7296
7297 if (known_eq (nunits, scatter_off_nunits))
7298 modifier = NONE;
7299 else if (known_eq (nunits * 2, scatter_off_nunits))
7300 {
7301 modifier = WIDEN;
7302
7303 /* Currently gathers and scatters are only supported for
7304 fixed-length vectors. */
7305 unsigned int count = scatter_off_nunits.to_constant ();
7306 vec_perm_builder sel (count, count, 1);
7307 for (i = 0; i < (unsigned int) count; ++i)
7308 sel.quick_push (i | (count / 2));
7309
7310 vec_perm_indices indices (sel, 1, count);
7311 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7312 indices);
7313 gcc_assert (perm_mask != NULL_TREE);
7314 }
7315 else if (known_eq (nunits, scatter_off_nunits * 2))
7316 {
7317 modifier = NARROW;
7318
7319 /* Currently gathers and scatters are only supported for
7320 fixed-length vectors. */
7321 unsigned int count = nunits.to_constant ();
7322 vec_perm_builder sel (count, count, 1);
7323 for (i = 0; i < (unsigned int) count; ++i)
7324 sel.quick_push (i | (count / 2));
7325
7326 vec_perm_indices indices (sel, 2, count);
7327 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7328 gcc_assert (perm_mask != NULL_TREE);
7329 ncopies *= 2;
7330
7331 if (mask)
7332 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7333 }
7334 else
7335 gcc_unreachable ();
7336
7337 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7338 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7339 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7340 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7341 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7342 scaletype = TREE_VALUE (arglist);
7343
7344 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7345 && TREE_CODE (rettype) == VOID_TYPE);
7346
7347 ptr = fold_convert (ptrtype, gs_info.base);
7348 if (!is_gimple_min_invariant (ptr))
7349 {
7350 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7351 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7352 gcc_assert (!new_bb);
7353 }
7354
7355 if (mask == NULL_TREE)
7356 {
7357 mask_arg = build_int_cst (masktype, -1);
7358 mask_arg = vect_init_vector (vinfo, stmt_info,
7359 mask_arg, masktype, NULL);
7360 }
7361
7362 scale = build_int_cst (scaletype, gs_info.scale);
7363
7364 auto_vec<tree> vec_oprnds0;
7365 auto_vec<tree> vec_oprnds1;
7366 auto_vec<tree> vec_masks;
7367 if (mask)
7368 {
7369 tree mask_vectype = truth_type_for (vectype);
7370 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7371 modifier == NARROW
7372 ? ncopies / 2 : ncopies,
7373 mask, &vec_masks, mask_vectype);
7374 }
7375 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7376 modifier == WIDEN
7377 ? ncopies / 2 : ncopies,
7378 gs_info.offset, &vec_oprnds0);
7379 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7380 modifier == NARROW
7381 ? ncopies / 2 : ncopies,
7382 op, &vec_oprnds1);
7383 for (j = 0; j < ncopies; ++j)
7384 {
7385 if (modifier == WIDEN)
7386 {
7387 if (j & 1)
7388 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7389 perm_mask, stmt_info, gsi);
7390 else
7391 op = vec_oprnd0 = vec_oprnds0[j / 2];
7392 src = vec_oprnd1 = vec_oprnds1[j];
7393 if (mask)
7394 mask_op = vec_mask = vec_masks[j];
7395 }
7396 else if (modifier == NARROW)
7397 {
7398 if (j & 1)
7399 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7400 perm_mask, stmt_info, gsi);
7401 else
7402 src = vec_oprnd1 = vec_oprnds1[j / 2];
7403 op = vec_oprnd0 = vec_oprnds0[j];
7404 if (mask)
7405 mask_op = vec_mask = vec_masks[j / 2];
7406 }
7407 else
7408 {
7409 op = vec_oprnd0 = vec_oprnds0[j];
7410 src = vec_oprnd1 = vec_oprnds1[j];
7411 if (mask)
7412 mask_op = vec_mask = vec_masks[j];
7413 }
7414
7415 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7416 {
7417 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7418 TYPE_VECTOR_SUBPARTS (srctype)));
7419 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7420 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7421 gassign *new_stmt
7422 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7423 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7424 src = var;
7425 }
7426
7427 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7428 {
7429 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7430 TYPE_VECTOR_SUBPARTS (idxtype)));
7431 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7432 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7433 gassign *new_stmt
7434 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7435 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7436 op = var;
7437 }
7438
7439 if (mask)
7440 {
7441 tree utype;
7442 mask_arg = mask_op;
7443 if (modifier == NARROW)
7444 {
7445 var = vect_get_new_ssa_name (mask_halfvectype,
7446 vect_simple_var);
7447 gassign *new_stmt
7448 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7449 : VEC_UNPACK_LO_EXPR,
7450 mask_op);
7451 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7452 mask_arg = var;
7453 }
7454 tree optype = TREE_TYPE (mask_arg);
7455 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7456 utype = masktype;
7457 else
7458 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7459 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7460 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7461 gassign *new_stmt
7462 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7463 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7464 mask_arg = var;
7465 if (!useless_type_conversion_p (masktype, utype))
7466 {
7467 gcc_assert (TYPE_PRECISION (utype)
7468 <= TYPE_PRECISION (masktype));
7469 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7470 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7471 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7472 mask_arg = var;
7473 }
7474 }
7475
7476 gcall *new_stmt
7477 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7478 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7479
7480 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7481 }
7482 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7483 return true;
7484 }
7485 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7486 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7487
7488 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7489 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7490
7491 if (grouped_store)
7492 {
7493 /* FORNOW */
7494 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7495
7496 /* We vectorize all the stmts of the interleaving group when we
7497 reach the last stmt in the group. */
7498 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7499 < DR_GROUP_SIZE (first_stmt_info)
7500 && !slp)
7501 {
7502 *vec_stmt = NULL;
7503 return true;
7504 }
7505
7506 if (slp)
7507 {
7508 grouped_store = false;
7509 /* VEC_NUM is the number of vect stmts to be created for this
7510 group. */
7511 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7512 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7513 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7514 == first_stmt_info);
7515 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7516 op = vect_get_store_rhs (first_stmt_info);
7517 }
7518 else
7519 /* VEC_NUM is the number of vect stmts to be created for this
7520 group. */
7521 vec_num = group_size;
7522
7523 ref_type = get_group_alias_ptr_type (first_stmt_info);
7524 }
7525 else
7526 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7527
7528 if (dump_enabled_p ())
7529 dump_printf_loc (MSG_NOTE, vect_location,
7530 "transform store. ncopies = %d\n", ncopies);
7531
7532 if (memory_access_type == VMAT_ELEMENTWISE
7533 || memory_access_type == VMAT_STRIDED_SLP)
7534 {
7535 gimple_stmt_iterator incr_gsi;
7536 bool insert_after;
7537 gimple *incr;
7538 tree offvar;
7539 tree ivstep;
7540 tree running_off;
7541 tree stride_base, stride_step, alias_off;
7542 tree vec_oprnd;
7543 tree dr_offset;
7544 unsigned int g;
7545 /* Checked by get_load_store_type. */
7546 unsigned int const_nunits = nunits.to_constant ();
7547
7548 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7549 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7550
7551 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7552 stride_base
7553 = fold_build_pointer_plus
7554 (DR_BASE_ADDRESS (first_dr_info->dr),
7555 size_binop (PLUS_EXPR,
7556 convert_to_ptrofftype (dr_offset),
7557 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7558 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7559
7560 /* For a store with loop-invariant (but other than power-of-2)
7561 stride (i.e. not a grouped access) like so:
7562
7563 for (i = 0; i < n; i += stride)
7564 array[i] = ...;
7565
7566 we generate a new induction variable and new stores from
7567 the components of the (vectorized) rhs:
7568
7569 for (j = 0; ; j += VF*stride)
7570 vectemp = ...;
7571 tmp1 = vectemp[0];
7572 array[j] = tmp1;
7573 tmp2 = vectemp[1];
7574 array[j + stride] = tmp2;
7575 ...
7576 */
7577
7578 unsigned nstores = const_nunits;
7579 unsigned lnel = 1;
7580 tree ltype = elem_type;
7581 tree lvectype = vectype;
7582 if (slp)
7583 {
7584 if (group_size < const_nunits
7585 && const_nunits % group_size == 0)
7586 {
7587 nstores = const_nunits / group_size;
7588 lnel = group_size;
7589 ltype = build_vector_type (elem_type, group_size);
7590 lvectype = vectype;
7591
7592 /* First check if vec_extract optab doesn't support extraction
7593 of vector elts directly. */
7594 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7595 machine_mode vmode;
7596 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7597 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7598 group_size).exists (&vmode)
7599 || (convert_optab_handler (vec_extract_optab,
7600 TYPE_MODE (vectype), vmode)
7601 == CODE_FOR_nothing))
7602 {
7603 /* Try to avoid emitting an extract of vector elements
7604 by performing the extracts using an integer type of the
7605 same size, extracting from a vector of those and then
7606 re-interpreting it as the original vector type if
7607 supported. */
7608 unsigned lsize
7609 = group_size * GET_MODE_BITSIZE (elmode);
7610 unsigned int lnunits = const_nunits / group_size;
7611 /* If we can't construct such a vector fall back to
7612 element extracts from the original vector type and
7613 element size stores. */
7614 if (int_mode_for_size (lsize, 0).exists (&elmode)
7615 && VECTOR_MODE_P (TYPE_MODE (vectype))
7616 && related_vector_mode (TYPE_MODE (vectype), elmode,
7617 lnunits).exists (&vmode)
7618 && (convert_optab_handler (vec_extract_optab,
7619 vmode, elmode)
7620 != CODE_FOR_nothing))
7621 {
7622 nstores = lnunits;
7623 lnel = group_size;
7624 ltype = build_nonstandard_integer_type (lsize, 1);
7625 lvectype = build_vector_type (ltype, nstores);
7626 }
7627 /* Else fall back to vector extraction anyway.
7628 Fewer stores are more important than avoiding spilling
7629 of the vector we extract from. Compared to the
7630 construction case in vectorizable_load no store-forwarding
7631 issue exists here for reasonable archs. */
7632 }
7633 }
7634 else if (group_size >= const_nunits
7635 && group_size % const_nunits == 0)
7636 {
7637 nstores = 1;
7638 lnel = const_nunits;
7639 ltype = vectype;
7640 lvectype = vectype;
7641 }
7642 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7643 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7644 }
7645
7646 ivstep = stride_step;
7647 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7648 build_int_cst (TREE_TYPE (ivstep), vf));
7649
7650 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7651
7652 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7653 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7654 create_iv (stride_base, ivstep, NULL,
7655 loop, &incr_gsi, insert_after,
7656 &offvar, NULL);
7657 incr = gsi_stmt (incr_gsi);
7658
7659 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7660
7661 alias_off = build_int_cst (ref_type, 0);
7662 stmt_vec_info next_stmt_info = first_stmt_info;
7663 for (g = 0; g < group_size; g++)
7664 {
7665 running_off = offvar;
7666 if (g)
7667 {
7668 tree size = TYPE_SIZE_UNIT (ltype);
7669 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7670 size);
7671 tree newoff = copy_ssa_name (running_off, NULL);
7672 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7673 running_off, pos);
7674 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7675 running_off = newoff;
7676 }
7677 if (!slp)
7678 op = vect_get_store_rhs (next_stmt_info);
7679 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7680 op, &vec_oprnds);
7681 unsigned int group_el = 0;
7682 unsigned HOST_WIDE_INT
7683 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7684 for (j = 0; j < ncopies; j++)
7685 {
7686 vec_oprnd = vec_oprnds[j];
7687 /* Pun the vector to extract from if necessary. */
7688 if (lvectype != vectype)
7689 {
7690 tree tem = make_ssa_name (lvectype);
7691 gimple *pun
7692 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7693 lvectype, vec_oprnd));
7694 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7695 vec_oprnd = tem;
7696 }
7697 for (i = 0; i < nstores; i++)
7698 {
7699 tree newref, newoff;
7700 gimple *incr, *assign;
7701 tree size = TYPE_SIZE (ltype);
7702 /* Extract the i'th component. */
7703 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7704 bitsize_int (i), size);
7705 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7706 size, pos);
7707
7708 elem = force_gimple_operand_gsi (gsi, elem, true,
7709 NULL_TREE, true,
7710 GSI_SAME_STMT);
7711
7712 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7713 group_el * elsz);
7714 newref = build2 (MEM_REF, ltype,
7715 running_off, this_off);
7716 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7717
7718 /* And store it to *running_off. */
7719 assign = gimple_build_assign (newref, elem);
7720 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7721
7722 group_el += lnel;
7723 if (! slp
7724 || group_el == group_size)
7725 {
7726 newoff = copy_ssa_name (running_off, NULL);
7727 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7728 running_off, stride_step);
7729 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7730
7731 running_off = newoff;
7732 group_el = 0;
7733 }
7734 if (g == group_size - 1
7735 && !slp)
7736 {
7737 if (j == 0 && i == 0)
7738 *vec_stmt = assign;
7739 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7740 }
7741 }
7742 }
7743 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7744 vec_oprnds.release ();
7745 if (slp)
7746 break;
7747 }
7748
7749 return true;
7750 }
7751
7752 auto_vec<tree> dr_chain (group_size);
7753 oprnds.create (group_size);
7754
7755 /* Gather-scatter accesses perform only component accesses, alignment
7756 is irrelevant for them. */
7757 if (memory_access_type == VMAT_GATHER_SCATTER)
7758 alignment_support_scheme = dr_unaligned_supported;
7759 else
7760 alignment_support_scheme
7761 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7762
7763 gcc_assert (alignment_support_scheme);
7764 vec_loop_masks *loop_masks
7765 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7766 ? &LOOP_VINFO_MASKS (loop_vinfo)
7767 : NULL);
7768 vec_loop_lens *loop_lens
7769 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7770 ? &LOOP_VINFO_LENS (loop_vinfo)
7771 : NULL);
7772
7773 /* Shouldn't go with length-based approach if fully masked. */
7774 gcc_assert (!loop_lens || !loop_masks);
7775
7776 /* Targets with store-lane instructions must not require explicit
7777 realignment. vect_supportable_dr_alignment always returns either
7778 dr_aligned or dr_unaligned_supported for masked operations. */
7779 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7780 && !mask
7781 && !loop_masks)
7782 || alignment_support_scheme == dr_aligned
7783 || alignment_support_scheme == dr_unaligned_supported);
7784
7785 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7786 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7787 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7788
7789 tree bump;
7790 tree vec_offset = NULL_TREE;
7791 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7792 {
7793 aggr_type = NULL_TREE;
7794 bump = NULL_TREE;
7795 }
7796 else if (memory_access_type == VMAT_GATHER_SCATTER)
7797 {
7798 aggr_type = elem_type;
7799 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7800 &bump, &vec_offset);
7801 }
7802 else
7803 {
7804 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7805 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7806 else
7807 aggr_type = vectype;
7808 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7809 memory_access_type);
7810 }
7811
7812 if (mask)
7813 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7814
7815 /* In case the vectorization factor (VF) is bigger than the number
7816 of elements that we can fit in a vectype (nunits), we have to generate
7817 more than one vector stmt - i.e - we need to "unroll" the
7818 vector stmt by a factor VF/nunits. */
7819
7820 /* In case of interleaving (non-unit grouped access):
7821
7822 S1: &base + 2 = x2
7823 S2: &base = x0
7824 S3: &base + 1 = x1
7825 S4: &base + 3 = x3
7826
7827 We create vectorized stores starting from base address (the access of the
7828 first stmt in the chain (S2 in the above example), when the last store stmt
7829 of the chain (S4) is reached:
7830
7831 VS1: &base = vx2
7832 VS2: &base + vec_size*1 = vx0
7833 VS3: &base + vec_size*2 = vx1
7834 VS4: &base + vec_size*3 = vx3
7835
7836 Then permutation statements are generated:
7837
7838 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7839 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7840 ...
7841
7842 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7843 (the order of the data-refs in the output of vect_permute_store_chain
7844 corresponds to the order of scalar stmts in the interleaving chain - see
7845 the documentation of vect_permute_store_chain()).
7846
7847 In case of both multiple types and interleaving, above vector stores and
7848 permutation stmts are created for every copy. The result vector stmts are
7849 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7850 STMT_VINFO_RELATED_STMT for the next copies.
7851 */
7852
7853 auto_vec<tree> vec_masks;
7854 tree vec_mask = NULL;
7855 auto_vec<tree> vec_offsets;
7856 auto_vec<vec<tree> > gvec_oprnds;
7857 gvec_oprnds.safe_grow_cleared (group_size, true);
7858 for (j = 0; j < ncopies; j++)
7859 {
7860 gimple *new_stmt;
7861 if (j == 0)
7862 {
7863 if (slp)
7864 {
7865 /* Get vectorized arguments for SLP_NODE. */
7866 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7867 op, &vec_oprnds);
7868 vec_oprnd = vec_oprnds[0];
7869 }
7870 else
7871 {
7872 /* For interleaved stores we collect vectorized defs for all the
7873 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7874 used as an input to vect_permute_store_chain().
7875
7876 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7877 and OPRNDS are of size 1. */
7878 stmt_vec_info next_stmt_info = first_stmt_info;
7879 for (i = 0; i < group_size; i++)
7880 {
7881 /* Since gaps are not supported for interleaved stores,
7882 DR_GROUP_SIZE is the exact number of stmts in the chain.
7883 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7884 that there is no interleaving, DR_GROUP_SIZE is 1,
7885 and only one iteration of the loop will be executed. */
7886 op = vect_get_store_rhs (next_stmt_info);
7887 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7888 ncopies, op, &gvec_oprnds[i]);
7889 vec_oprnd = gvec_oprnds[i][0];
7890 dr_chain.quick_push (gvec_oprnds[i][0]);
7891 oprnds.quick_push (gvec_oprnds[i][0]);
7892 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7893 }
7894 if (mask)
7895 {
7896 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7897 mask, &vec_masks, mask_vectype);
7898 vec_mask = vec_masks[0];
7899 }
7900 }
7901
7902 /* We should have catched mismatched types earlier. */
7903 gcc_assert (useless_type_conversion_p (vectype,
7904 TREE_TYPE (vec_oprnd)));
7905 bool simd_lane_access_p
7906 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7907 if (simd_lane_access_p
7908 && !loop_masks
7909 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7910 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7911 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7912 && integer_zerop (DR_INIT (first_dr_info->dr))
7913 && alias_sets_conflict_p (get_alias_set (aggr_type),
7914 get_alias_set (TREE_TYPE (ref_type))))
7915 {
7916 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7917 dataref_offset = build_int_cst (ref_type, 0);
7918 }
7919 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7920 {
7921 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7922 &dataref_ptr, &vec_offsets, ncopies);
7923 vec_offset = vec_offsets[0];
7924 }
7925 else
7926 dataref_ptr
7927 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7928 simd_lane_access_p ? loop : NULL,
7929 offset, &dummy, gsi, &ptr_incr,
7930 simd_lane_access_p, NULL_TREE, bump);
7931 }
7932 else
7933 {
7934 /* For interleaved stores we created vectorized defs for all the
7935 defs stored in OPRNDS in the previous iteration (previous copy).
7936 DR_CHAIN is then used as an input to vect_permute_store_chain().
7937 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7938 OPRNDS are of size 1. */
7939 for (i = 0; i < group_size; i++)
7940 {
7941 vec_oprnd = gvec_oprnds[i][j];
7942 dr_chain[i] = gvec_oprnds[i][j];
7943 oprnds[i] = gvec_oprnds[i][j];
7944 }
7945 if (mask)
7946 vec_mask = vec_masks[j];
7947 if (dataref_offset)
7948 dataref_offset
7949 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7950 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7951 vec_offset = vec_offsets[j];
7952 else
7953 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7954 stmt_info, bump);
7955 }
7956
7957 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7958 {
7959 tree vec_array;
7960
7961 /* Get an array into which we can store the individual vectors. */
7962 vec_array = create_vector_array (vectype, vec_num);
7963
7964 /* Invalidate the current contents of VEC_ARRAY. This should
7965 become an RTL clobber too, which prevents the vector registers
7966 from being upward-exposed. */
7967 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7968
7969 /* Store the individual vectors into the array. */
7970 for (i = 0; i < vec_num; i++)
7971 {
7972 vec_oprnd = dr_chain[i];
7973 write_vector_array (vinfo, stmt_info,
7974 gsi, vec_oprnd, vec_array, i);
7975 }
7976
7977 tree final_mask = NULL;
7978 if (loop_masks)
7979 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7980 vectype, j);
7981 if (vec_mask)
7982 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7983 vec_mask, gsi);
7984
7985 gcall *call;
7986 if (final_mask)
7987 {
7988 /* Emit:
7989 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7990 VEC_ARRAY). */
7991 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
7992 tree alias_ptr = build_int_cst (ref_type, align);
7993 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7994 dataref_ptr, alias_ptr,
7995 final_mask, vec_array);
7996 }
7997 else
7998 {
7999 /* Emit:
8000 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8001 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8002 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8003 vec_array);
8004 gimple_call_set_lhs (call, data_ref);
8005 }
8006 gimple_call_set_nothrow (call, true);
8007 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8008 new_stmt = call;
8009
8010 /* Record that VEC_ARRAY is now dead. */
8011 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8012 }
8013 else
8014 {
8015 new_stmt = NULL;
8016 if (grouped_store)
8017 {
8018 if (j == 0)
8019 result_chain.create (group_size);
8020 /* Permute. */
8021 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8022 gsi, &result_chain);
8023 }
8024
8025 stmt_vec_info next_stmt_info = first_stmt_info;
8026 for (i = 0; i < vec_num; i++)
8027 {
8028 unsigned misalign;
8029 unsigned HOST_WIDE_INT align;
8030
8031 tree final_mask = NULL_TREE;
8032 if (loop_masks)
8033 final_mask = vect_get_loop_mask (gsi, loop_masks,
8034 vec_num * ncopies,
8035 vectype, vec_num * j + i);
8036 if (vec_mask)
8037 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8038 vec_mask, gsi);
8039
8040 if (memory_access_type == VMAT_GATHER_SCATTER)
8041 {
8042 tree scale = size_int (gs_info.scale);
8043 gcall *call;
8044 if (loop_masks)
8045 call = gimple_build_call_internal
8046 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8047 scale, vec_oprnd, final_mask);
8048 else
8049 call = gimple_build_call_internal
8050 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8051 scale, vec_oprnd);
8052 gimple_call_set_nothrow (call, true);
8053 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8054 new_stmt = call;
8055 break;
8056 }
8057
8058 if (i > 0)
8059 /* Bump the vector pointer. */
8060 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8061 gsi, stmt_info, bump);
8062
8063 if (slp)
8064 vec_oprnd = vec_oprnds[i];
8065 else if (grouped_store)
8066 /* For grouped stores vectorized defs are interleaved in
8067 vect_permute_store_chain(). */
8068 vec_oprnd = result_chain[i];
8069
8070 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8071 if (aligned_access_p (first_dr_info))
8072 misalign = 0;
8073 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8074 {
8075 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8076 misalign = 0;
8077 }
8078 else
8079 misalign = DR_MISALIGNMENT (first_dr_info);
8080 if (dataref_offset == NULL_TREE
8081 && TREE_CODE (dataref_ptr) == SSA_NAME)
8082 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8083 misalign);
8084
8085 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8086 {
8087 tree perm_mask = perm_mask_for_reverse (vectype);
8088 tree perm_dest = vect_create_destination_var
8089 (vect_get_store_rhs (stmt_info), vectype);
8090 tree new_temp = make_ssa_name (perm_dest);
8091
8092 /* Generate the permute statement. */
8093 gimple *perm_stmt
8094 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8095 vec_oprnd, perm_mask);
8096 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8097
8098 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8099 vec_oprnd = new_temp;
8100 }
8101
8102 /* Arguments are ready. Create the new vector stmt. */
8103 if (final_mask)
8104 {
8105 align = least_bit_hwi (misalign | align);
8106 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8107 gcall *call
8108 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8109 dataref_ptr, ptr,
8110 final_mask, vec_oprnd);
8111 gimple_call_set_nothrow (call, true);
8112 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8113 new_stmt = call;
8114 }
8115 else if (loop_lens)
8116 {
8117 tree final_len
8118 = vect_get_loop_len (loop_vinfo, loop_lens,
8119 vec_num * ncopies, vec_num * j + i);
8120 align = least_bit_hwi (misalign | align);
8121 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8122 machine_mode vmode = TYPE_MODE (vectype);
8123 opt_machine_mode new_ovmode
8124 = get_len_load_store_mode (vmode, false);
8125 machine_mode new_vmode = new_ovmode.require ();
8126 /* Need conversion if it's wrapped with VnQI. */
8127 if (vmode != new_vmode)
8128 {
8129 tree new_vtype
8130 = build_vector_type_for_mode (unsigned_intQI_type_node,
8131 new_vmode);
8132 tree var
8133 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8134 vec_oprnd
8135 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8136 gassign *new_stmt
8137 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8138 vec_oprnd);
8139 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8140 gsi);
8141 vec_oprnd = var;
8142 }
8143 gcall *call
8144 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8145 ptr, final_len, vec_oprnd);
8146 gimple_call_set_nothrow (call, true);
8147 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8148 new_stmt = call;
8149 }
8150 else
8151 {
8152 data_ref = fold_build2 (MEM_REF, vectype,
8153 dataref_ptr,
8154 dataref_offset
8155 ? dataref_offset
8156 : build_int_cst (ref_type, 0));
8157 if (aligned_access_p (first_dr_info))
8158 ;
8159 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8160 TREE_TYPE (data_ref)
8161 = build_aligned_type (TREE_TYPE (data_ref),
8162 align * BITS_PER_UNIT);
8163 else
8164 TREE_TYPE (data_ref)
8165 = build_aligned_type (TREE_TYPE (data_ref),
8166 TYPE_ALIGN (elem_type));
8167 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8168 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8169 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8170 }
8171
8172 if (slp)
8173 continue;
8174
8175 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8176 if (!next_stmt_info)
8177 break;
8178 }
8179 }
8180 if (!slp)
8181 {
8182 if (j == 0)
8183 *vec_stmt = new_stmt;
8184 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8185 }
8186 }
8187
8188 for (i = 0; i < group_size; ++i)
8189 {
8190 vec<tree> oprndsi = gvec_oprnds[i];
8191 oprndsi.release ();
8192 }
8193 oprnds.release ();
8194 result_chain.release ();
8195 vec_oprnds.release ();
8196
8197 return true;
8198 }
8199
8200 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8201 VECTOR_CST mask. No checks are made that the target platform supports the
8202 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8203 vect_gen_perm_mask_checked. */
8204
8205 tree
8206 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8207 {
8208 tree mask_type;
8209
8210 poly_uint64 nunits = sel.length ();
8211 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8212
8213 mask_type = build_vector_type (ssizetype, nunits);
8214 return vec_perm_indices_to_tree (mask_type, sel);
8215 }
8216
8217 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8218 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8219
8220 tree
8221 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8222 {
8223 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8224 return vect_gen_perm_mask_any (vectype, sel);
8225 }
8226
8227 /* Given a vector variable X and Y, that was generated for the scalar
8228 STMT_INFO, generate instructions to permute the vector elements of X and Y
8229 using permutation mask MASK_VEC, insert them at *GSI and return the
8230 permuted vector variable. */
8231
8232 static tree
8233 permute_vec_elements (vec_info *vinfo,
8234 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8235 gimple_stmt_iterator *gsi)
8236 {
8237 tree vectype = TREE_TYPE (x);
8238 tree perm_dest, data_ref;
8239 gimple *perm_stmt;
8240
8241 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8242 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8243 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8244 else
8245 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8246 data_ref = make_ssa_name (perm_dest);
8247
8248 /* Generate the permute statement. */
8249 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8250 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8251
8252 return data_ref;
8253 }
8254
8255 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8256 inserting them on the loops preheader edge. Returns true if we
8257 were successful in doing so (and thus STMT_INFO can be moved then),
8258 otherwise returns false. */
8259
8260 static bool
8261 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8262 {
8263 ssa_op_iter i;
8264 tree op;
8265 bool any = false;
8266
8267 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8268 {
8269 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8270 if (!gimple_nop_p (def_stmt)
8271 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8272 {
8273 /* Make sure we don't need to recurse. While we could do
8274 so in simple cases when there are more complex use webs
8275 we don't have an easy way to preserve stmt order to fulfil
8276 dependencies within them. */
8277 tree op2;
8278 ssa_op_iter i2;
8279 if (gimple_code (def_stmt) == GIMPLE_PHI)
8280 return false;
8281 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8282 {
8283 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8284 if (!gimple_nop_p (def_stmt2)
8285 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8286 return false;
8287 }
8288 any = true;
8289 }
8290 }
8291
8292 if (!any)
8293 return true;
8294
8295 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8296 {
8297 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8298 if (!gimple_nop_p (def_stmt)
8299 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8300 {
8301 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8302 gsi_remove (&gsi, false);
8303 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8304 }
8305 }
8306
8307 return true;
8308 }
8309
8310 /* vectorizable_load.
8311
8312 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8313 that can be vectorized.
8314 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8315 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8316 Return true if STMT_INFO is vectorizable in this way. */
8317
8318 static bool
8319 vectorizable_load (vec_info *vinfo,
8320 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8321 gimple **vec_stmt, slp_tree slp_node,
8322 stmt_vector_for_cost *cost_vec)
8323 {
8324 tree scalar_dest;
8325 tree vec_dest = NULL;
8326 tree data_ref = NULL;
8327 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8328 class loop *loop = NULL;
8329 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8330 bool nested_in_vect_loop = false;
8331 tree elem_type;
8332 tree new_temp;
8333 machine_mode mode;
8334 tree dummy;
8335 tree dataref_ptr = NULL_TREE;
8336 tree dataref_offset = NULL_TREE;
8337 gimple *ptr_incr = NULL;
8338 int ncopies;
8339 int i, j;
8340 unsigned int group_size;
8341 poly_uint64 group_gap_adj;
8342 tree msq = NULL_TREE, lsq;
8343 tree offset = NULL_TREE;
8344 tree byte_offset = NULL_TREE;
8345 tree realignment_token = NULL_TREE;
8346 gphi *phi = NULL;
8347 vec<tree> dr_chain = vNULL;
8348 bool grouped_load = false;
8349 stmt_vec_info first_stmt_info;
8350 stmt_vec_info first_stmt_info_for_drptr = NULL;
8351 bool compute_in_loop = false;
8352 class loop *at_loop;
8353 int vec_num;
8354 bool slp = (slp_node != NULL);
8355 bool slp_perm = false;
8356 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8357 poly_uint64 vf;
8358 tree aggr_type;
8359 gather_scatter_info gs_info;
8360 tree ref_type;
8361 enum vect_def_type mask_dt = vect_unknown_def_type;
8362
8363 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8364 return false;
8365
8366 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8367 && ! vec_stmt)
8368 return false;
8369
8370 if (!STMT_VINFO_DATA_REF (stmt_info))
8371 return false;
8372
8373 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8374 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8375 which can be different when reduction chains were re-ordered.
8376 Now that we figured we're a dataref reset stmt_info back to
8377 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8378 refactored in a way to maintain the dr_vec_info pointer for the
8379 relevant access explicitely. */
8380 stmt_vec_info orig_stmt_info = stmt_info;
8381 if (slp_node)
8382 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8383
8384 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8385 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8386 {
8387 scalar_dest = gimple_assign_lhs (assign);
8388 if (TREE_CODE (scalar_dest) != SSA_NAME)
8389 return false;
8390
8391 tree_code code = gimple_assign_rhs_code (assign);
8392 if (code != ARRAY_REF
8393 && code != BIT_FIELD_REF
8394 && code != INDIRECT_REF
8395 && code != COMPONENT_REF
8396 && code != IMAGPART_EXPR
8397 && code != REALPART_EXPR
8398 && code != MEM_REF
8399 && TREE_CODE_CLASS (code) != tcc_declaration)
8400 return false;
8401 }
8402 else
8403 {
8404 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8405 if (!call || !gimple_call_internal_p (call))
8406 return false;
8407
8408 internal_fn ifn = gimple_call_internal_fn (call);
8409 if (!internal_load_fn_p (ifn))
8410 return false;
8411
8412 scalar_dest = gimple_call_lhs (call);
8413 if (!scalar_dest)
8414 return false;
8415
8416 int mask_index = internal_fn_mask_index (ifn);
8417 if (mask_index >= 0)
8418 {
8419 mask = gimple_call_arg (call, mask_index);
8420 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8421 &mask_vectype))
8422 return false;
8423 }
8424 }
8425
8426 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8427 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8428
8429 if (loop_vinfo)
8430 {
8431 loop = LOOP_VINFO_LOOP (loop_vinfo);
8432 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8433 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8434 }
8435 else
8436 vf = 1;
8437
8438 /* Multiple types in SLP are handled by creating the appropriate number of
8439 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8440 case of SLP. */
8441 if (slp)
8442 ncopies = 1;
8443 else
8444 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8445
8446 gcc_assert (ncopies >= 1);
8447
8448 /* FORNOW. This restriction should be relaxed. */
8449 if (nested_in_vect_loop && ncopies > 1)
8450 {
8451 if (dump_enabled_p ())
8452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8453 "multiple types in nested loop.\n");
8454 return false;
8455 }
8456
8457 /* Invalidate assumptions made by dependence analysis when vectorization
8458 on the unrolled body effectively re-orders stmts. */
8459 if (ncopies > 1
8460 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8461 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8462 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8463 {
8464 if (dump_enabled_p ())
8465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8466 "cannot perform implicit CSE when unrolling "
8467 "with negative dependence distance\n");
8468 return false;
8469 }
8470
8471 elem_type = TREE_TYPE (vectype);
8472 mode = TYPE_MODE (vectype);
8473
8474 /* FORNOW. In some cases can vectorize even if data-type not supported
8475 (e.g. - data copies). */
8476 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8477 {
8478 if (dump_enabled_p ())
8479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8480 "Aligned load, but unsupported type.\n");
8481 return false;
8482 }
8483
8484 /* Check if the load is a part of an interleaving chain. */
8485 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8486 {
8487 grouped_load = true;
8488 /* FORNOW */
8489 gcc_assert (!nested_in_vect_loop);
8490 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8491
8492 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8493 group_size = DR_GROUP_SIZE (first_stmt_info);
8494
8495 /* Refuse non-SLP vectorization of SLP-only groups. */
8496 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8497 {
8498 if (dump_enabled_p ())
8499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8500 "cannot vectorize load in non-SLP mode.\n");
8501 return false;
8502 }
8503
8504 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8505 {
8506 slp_perm = true;
8507
8508 if (!loop_vinfo)
8509 {
8510 /* In BB vectorization we may not actually use a loaded vector
8511 accessing elements in excess of DR_GROUP_SIZE. */
8512 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8513 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8514 unsigned HOST_WIDE_INT nunits;
8515 unsigned j, k, maxk = 0;
8516 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8517 if (k > maxk)
8518 maxk = k;
8519 tree vectype = STMT_VINFO_VECTYPE (group_info);
8520 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8521 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8522 {
8523 if (dump_enabled_p ())
8524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8525 "BB vectorization with gaps at the end of "
8526 "a load is not supported\n");
8527 return false;
8528 }
8529 }
8530
8531 auto_vec<tree> tem;
8532 unsigned n_perms;
8533 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8534 true, &n_perms))
8535 {
8536 if (dump_enabled_p ())
8537 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8538 vect_location,
8539 "unsupported load permutation\n");
8540 return false;
8541 }
8542 }
8543
8544 /* Invalidate assumptions made by dependence analysis when vectorization
8545 on the unrolled body effectively re-orders stmts. */
8546 if (!PURE_SLP_STMT (stmt_info)
8547 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8548 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8549 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8550 {
8551 if (dump_enabled_p ())
8552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8553 "cannot perform implicit CSE when performing "
8554 "group loads with negative dependence distance\n");
8555 return false;
8556 }
8557 }
8558 else
8559 group_size = 1;
8560
8561 vect_memory_access_type memory_access_type;
8562 enum dr_alignment_support alignment_support_scheme;
8563 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8564 ncopies, &memory_access_type,
8565 &alignment_support_scheme, &gs_info))
8566 return false;
8567
8568 if (mask)
8569 {
8570 if (memory_access_type == VMAT_CONTIGUOUS)
8571 {
8572 machine_mode vec_mode = TYPE_MODE (vectype);
8573 if (!VECTOR_MODE_P (vec_mode)
8574 || !can_vec_mask_load_store_p (vec_mode,
8575 TYPE_MODE (mask_vectype), true))
8576 return false;
8577 }
8578 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8579 && memory_access_type != VMAT_GATHER_SCATTER)
8580 {
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8583 "unsupported access type for masked load.\n");
8584 return false;
8585 }
8586 }
8587
8588 if (!vec_stmt) /* transformation not required. */
8589 {
8590 if (slp_node
8591 && mask
8592 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8593 mask_vectype))
8594 {
8595 if (dump_enabled_p ())
8596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8597 "incompatible vector types for invariants\n");
8598 return false;
8599 }
8600
8601 if (!slp)
8602 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8603
8604 if (loop_vinfo
8605 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8606 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8607 group_size, memory_access_type,
8608 &gs_info, mask);
8609
8610 if (dump_enabled_p ()
8611 && memory_access_type != VMAT_ELEMENTWISE
8612 && memory_access_type != VMAT_GATHER_SCATTER
8613 && alignment_support_scheme != dr_aligned)
8614 dump_printf_loc (MSG_NOTE, vect_location,
8615 "Vectorizing an unaligned access.\n");
8616
8617 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8618 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8619 slp_node, cost_vec);
8620 return true;
8621 }
8622
8623 if (!slp)
8624 gcc_assert (memory_access_type
8625 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8626
8627 if (dump_enabled_p ())
8628 dump_printf_loc (MSG_NOTE, vect_location,
8629 "transform load. ncopies = %d\n", ncopies);
8630
8631 /* Transform. */
8632
8633 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8634 ensure_base_align (dr_info);
8635
8636 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8637 {
8638 vect_build_gather_load_calls (vinfo,
8639 stmt_info, gsi, vec_stmt, &gs_info, mask);
8640 return true;
8641 }
8642
8643 if (memory_access_type == VMAT_INVARIANT)
8644 {
8645 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8646 /* If we have versioned for aliasing or the loop doesn't
8647 have any data dependencies that would preclude this,
8648 then we are sure this is a loop invariant load and
8649 thus we can insert it on the preheader edge. */
8650 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8651 && !nested_in_vect_loop
8652 && hoist_defs_of_uses (stmt_info, loop));
8653 if (hoist_p)
8654 {
8655 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8656 if (dump_enabled_p ())
8657 dump_printf_loc (MSG_NOTE, vect_location,
8658 "hoisting out of the vectorized loop: %G", stmt);
8659 scalar_dest = copy_ssa_name (scalar_dest);
8660 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8661 gsi_insert_on_edge_immediate
8662 (loop_preheader_edge (loop),
8663 gimple_build_assign (scalar_dest, rhs));
8664 }
8665 /* These copies are all equivalent, but currently the representation
8666 requires a separate STMT_VINFO_VEC_STMT for each one. */
8667 gimple_stmt_iterator gsi2 = *gsi;
8668 gsi_next (&gsi2);
8669 for (j = 0; j < ncopies; j++)
8670 {
8671 if (hoist_p)
8672 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8673 vectype, NULL);
8674 else
8675 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8676 vectype, &gsi2);
8677 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8678 if (slp)
8679 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8680 else
8681 {
8682 if (j == 0)
8683 *vec_stmt = new_stmt;
8684 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8685 }
8686 }
8687 return true;
8688 }
8689
8690 if (memory_access_type == VMAT_ELEMENTWISE
8691 || memory_access_type == VMAT_STRIDED_SLP)
8692 {
8693 gimple_stmt_iterator incr_gsi;
8694 bool insert_after;
8695 tree offvar;
8696 tree ivstep;
8697 tree running_off;
8698 vec<constructor_elt, va_gc> *v = NULL;
8699 tree stride_base, stride_step, alias_off;
8700 /* Checked by get_load_store_type. */
8701 unsigned int const_nunits = nunits.to_constant ();
8702 unsigned HOST_WIDE_INT cst_offset = 0;
8703 tree dr_offset;
8704
8705 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8706 gcc_assert (!nested_in_vect_loop);
8707
8708 if (grouped_load)
8709 {
8710 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8711 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8712 }
8713 else
8714 {
8715 first_stmt_info = stmt_info;
8716 first_dr_info = dr_info;
8717 }
8718 if (slp && grouped_load)
8719 {
8720 group_size = DR_GROUP_SIZE (first_stmt_info);
8721 ref_type = get_group_alias_ptr_type (first_stmt_info);
8722 }
8723 else
8724 {
8725 if (grouped_load)
8726 cst_offset
8727 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8728 * vect_get_place_in_interleaving_chain (stmt_info,
8729 first_stmt_info));
8730 group_size = 1;
8731 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8732 }
8733
8734 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8735 stride_base
8736 = fold_build_pointer_plus
8737 (DR_BASE_ADDRESS (first_dr_info->dr),
8738 size_binop (PLUS_EXPR,
8739 convert_to_ptrofftype (dr_offset),
8740 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8741 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8742
8743 /* For a load with loop-invariant (but other than power-of-2)
8744 stride (i.e. not a grouped access) like so:
8745
8746 for (i = 0; i < n; i += stride)
8747 ... = array[i];
8748
8749 we generate a new induction variable and new accesses to
8750 form a new vector (or vectors, depending on ncopies):
8751
8752 for (j = 0; ; j += VF*stride)
8753 tmp1 = array[j];
8754 tmp2 = array[j + stride];
8755 ...
8756 vectemp = {tmp1, tmp2, ...}
8757 */
8758
8759 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8760 build_int_cst (TREE_TYPE (stride_step), vf));
8761
8762 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8763
8764 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8765 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8766 create_iv (stride_base, ivstep, NULL,
8767 loop, &incr_gsi, insert_after,
8768 &offvar, NULL);
8769
8770 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8771
8772 running_off = offvar;
8773 alias_off = build_int_cst (ref_type, 0);
8774 int nloads = const_nunits;
8775 int lnel = 1;
8776 tree ltype = TREE_TYPE (vectype);
8777 tree lvectype = vectype;
8778 auto_vec<tree> dr_chain;
8779 if (memory_access_type == VMAT_STRIDED_SLP)
8780 {
8781 if (group_size < const_nunits)
8782 {
8783 /* First check if vec_init optab supports construction from vector
8784 elts directly. Otherwise avoid emitting a constructor of
8785 vector elements by performing the loads using an integer type
8786 of the same size, constructing a vector of those and then
8787 re-interpreting it as the original vector type. This avoids a
8788 huge runtime penalty due to the general inability to perform
8789 store forwarding from smaller stores to a larger load. */
8790 tree ptype;
8791 tree vtype
8792 = vector_vector_composition_type (vectype,
8793 const_nunits / group_size,
8794 &ptype);
8795 if (vtype != NULL_TREE)
8796 {
8797 nloads = const_nunits / group_size;
8798 lnel = group_size;
8799 lvectype = vtype;
8800 ltype = ptype;
8801 }
8802 }
8803 else
8804 {
8805 nloads = 1;
8806 lnel = const_nunits;
8807 ltype = vectype;
8808 }
8809 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8810 }
8811 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8812 else if (nloads == 1)
8813 ltype = vectype;
8814
8815 if (slp)
8816 {
8817 /* For SLP permutation support we need to load the whole group,
8818 not only the number of vector stmts the permutation result
8819 fits in. */
8820 if (slp_perm)
8821 {
8822 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8823 variable VF. */
8824 unsigned int const_vf = vf.to_constant ();
8825 ncopies = CEIL (group_size * const_vf, const_nunits);
8826 dr_chain.create (ncopies);
8827 }
8828 else
8829 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8830 }
8831 unsigned int group_el = 0;
8832 unsigned HOST_WIDE_INT
8833 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8834 for (j = 0; j < ncopies; j++)
8835 {
8836 if (nloads > 1)
8837 vec_alloc (v, nloads);
8838 gimple *new_stmt = NULL;
8839 for (i = 0; i < nloads; i++)
8840 {
8841 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8842 group_el * elsz + cst_offset);
8843 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8844 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8845 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8846 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8847 if (nloads > 1)
8848 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8849 gimple_assign_lhs (new_stmt));
8850
8851 group_el += lnel;
8852 if (! slp
8853 || group_el == group_size)
8854 {
8855 tree newoff = copy_ssa_name (running_off);
8856 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8857 running_off, stride_step);
8858 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8859
8860 running_off = newoff;
8861 group_el = 0;
8862 }
8863 }
8864 if (nloads > 1)
8865 {
8866 tree vec_inv = build_constructor (lvectype, v);
8867 new_temp = vect_init_vector (vinfo, stmt_info,
8868 vec_inv, lvectype, gsi);
8869 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8870 if (lvectype != vectype)
8871 {
8872 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8873 VIEW_CONVERT_EXPR,
8874 build1 (VIEW_CONVERT_EXPR,
8875 vectype, new_temp));
8876 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8877 }
8878 }
8879
8880 if (slp)
8881 {
8882 if (slp_perm)
8883 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8884 else
8885 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8886 }
8887 else
8888 {
8889 if (j == 0)
8890 *vec_stmt = new_stmt;
8891 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8892 }
8893 }
8894 if (slp_perm)
8895 {
8896 unsigned n_perms;
8897 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8898 false, &n_perms);
8899 }
8900 return true;
8901 }
8902
8903 if (memory_access_type == VMAT_GATHER_SCATTER
8904 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8905 grouped_load = false;
8906
8907 if (grouped_load)
8908 {
8909 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8910 group_size = DR_GROUP_SIZE (first_stmt_info);
8911 /* For SLP vectorization we directly vectorize a subchain
8912 without permutation. */
8913 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8914 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8915 /* For BB vectorization always use the first stmt to base
8916 the data ref pointer on. */
8917 if (bb_vinfo)
8918 first_stmt_info_for_drptr
8919 = vect_find_first_scalar_stmt_in_slp (slp_node);
8920
8921 /* Check if the chain of loads is already vectorized. */
8922 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8923 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8924 ??? But we can only do so if there is exactly one
8925 as we have no way to get at the rest. Leave the CSE
8926 opportunity alone.
8927 ??? With the group load eventually participating
8928 in multiple different permutations (having multiple
8929 slp nodes which refer to the same group) the CSE
8930 is even wrong code. See PR56270. */
8931 && !slp)
8932 {
8933 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8934 return true;
8935 }
8936 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8937 group_gap_adj = 0;
8938
8939 /* VEC_NUM is the number of vect stmts to be created for this group. */
8940 if (slp)
8941 {
8942 grouped_load = false;
8943 /* If an SLP permutation is from N elements to N elements,
8944 and if one vector holds a whole number of N, we can load
8945 the inputs to the permutation in the same way as an
8946 unpermuted sequence. In other cases we need to load the
8947 whole group, not only the number of vector stmts the
8948 permutation result fits in. */
8949 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8950 if (slp_perm
8951 && (group_size != scalar_lanes
8952 || !multiple_p (nunits, group_size)))
8953 {
8954 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8955 variable VF; see vect_transform_slp_perm_load. */
8956 unsigned int const_vf = vf.to_constant ();
8957 unsigned int const_nunits = nunits.to_constant ();
8958 vec_num = CEIL (group_size * const_vf, const_nunits);
8959 group_gap_adj = vf * group_size - nunits * vec_num;
8960 }
8961 else
8962 {
8963 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8964 group_gap_adj
8965 = group_size - scalar_lanes;
8966 }
8967 }
8968 else
8969 vec_num = group_size;
8970
8971 ref_type = get_group_alias_ptr_type (first_stmt_info);
8972 }
8973 else
8974 {
8975 first_stmt_info = stmt_info;
8976 first_dr_info = dr_info;
8977 group_size = vec_num = 1;
8978 group_gap_adj = 0;
8979 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8980 }
8981
8982 gcc_assert (alignment_support_scheme);
8983 vec_loop_masks *loop_masks
8984 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8985 ? &LOOP_VINFO_MASKS (loop_vinfo)
8986 : NULL);
8987 vec_loop_lens *loop_lens
8988 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8989 ? &LOOP_VINFO_LENS (loop_vinfo)
8990 : NULL);
8991
8992 /* Shouldn't go with length-based approach if fully masked. */
8993 gcc_assert (!loop_lens || !loop_masks);
8994
8995 /* Targets with store-lane instructions must not require explicit
8996 realignment. vect_supportable_dr_alignment always returns either
8997 dr_aligned or dr_unaligned_supported for masked operations. */
8998 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8999 && !mask
9000 && !loop_masks)
9001 || alignment_support_scheme == dr_aligned
9002 || alignment_support_scheme == dr_unaligned_supported);
9003
9004 /* In case the vectorization factor (VF) is bigger than the number
9005 of elements that we can fit in a vectype (nunits), we have to generate
9006 more than one vector stmt - i.e - we need to "unroll" the
9007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9008 from one copy of the vector stmt to the next, in the field
9009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9010 stages to find the correct vector defs to be used when vectorizing
9011 stmts that use the defs of the current stmt. The example below
9012 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9013 need to create 4 vectorized stmts):
9014
9015 before vectorization:
9016 RELATED_STMT VEC_STMT
9017 S1: x = memref - -
9018 S2: z = x + 1 - -
9019
9020 step 1: vectorize stmt S1:
9021 We first create the vector stmt VS1_0, and, as usual, record a
9022 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9023 Next, we create the vector stmt VS1_1, and record a pointer to
9024 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9025 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9026 stmts and pointers:
9027 RELATED_STMT VEC_STMT
9028 VS1_0: vx0 = memref0 VS1_1 -
9029 VS1_1: vx1 = memref1 VS1_2 -
9030 VS1_2: vx2 = memref2 VS1_3 -
9031 VS1_3: vx3 = memref3 - -
9032 S1: x = load - VS1_0
9033 S2: z = x + 1 - -
9034 */
9035
9036 /* In case of interleaving (non-unit grouped access):
9037
9038 S1: x2 = &base + 2
9039 S2: x0 = &base
9040 S3: x1 = &base + 1
9041 S4: x3 = &base + 3
9042
9043 Vectorized loads are created in the order of memory accesses
9044 starting from the access of the first stmt of the chain:
9045
9046 VS1: vx0 = &base
9047 VS2: vx1 = &base + vec_size*1
9048 VS3: vx3 = &base + vec_size*2
9049 VS4: vx4 = &base + vec_size*3
9050
9051 Then permutation statements are generated:
9052
9053 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9054 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9055 ...
9056
9057 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9058 (the order of the data-refs in the output of vect_permute_load_chain
9059 corresponds to the order of scalar stmts in the interleaving chain - see
9060 the documentation of vect_permute_load_chain()).
9061 The generation of permutation stmts and recording them in
9062 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9063
9064 In case of both multiple types and interleaving, the vector loads and
9065 permutation stmts above are created for every copy. The result vector
9066 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9067 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9068
9069 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9070 on a target that supports unaligned accesses (dr_unaligned_supported)
9071 we generate the following code:
9072 p = initial_addr;
9073 indx = 0;
9074 loop {
9075 p = p + indx * vectype_size;
9076 vec_dest = *(p);
9077 indx = indx + 1;
9078 }
9079
9080 Otherwise, the data reference is potentially unaligned on a target that
9081 does not support unaligned accesses (dr_explicit_realign_optimized) -
9082 then generate the following code, in which the data in each iteration is
9083 obtained by two vector loads, one from the previous iteration, and one
9084 from the current iteration:
9085 p1 = initial_addr;
9086 msq_init = *(floor(p1))
9087 p2 = initial_addr + VS - 1;
9088 realignment_token = call target_builtin;
9089 indx = 0;
9090 loop {
9091 p2 = p2 + indx * vectype_size
9092 lsq = *(floor(p2))
9093 vec_dest = realign_load (msq, lsq, realignment_token)
9094 indx = indx + 1;
9095 msq = lsq;
9096 } */
9097
9098 /* If the misalignment remains the same throughout the execution of the
9099 loop, we can create the init_addr and permutation mask at the loop
9100 preheader. Otherwise, it needs to be created inside the loop.
9101 This can only occur when vectorizing memory accesses in the inner-loop
9102 nested within an outer-loop that is being vectorized. */
9103
9104 if (nested_in_vect_loop
9105 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9106 GET_MODE_SIZE (TYPE_MODE (vectype))))
9107 {
9108 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9109 compute_in_loop = true;
9110 }
9111
9112 bool diff_first_stmt_info
9113 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9114
9115 if ((alignment_support_scheme == dr_explicit_realign_optimized
9116 || alignment_support_scheme == dr_explicit_realign)
9117 && !compute_in_loop)
9118 {
9119 /* If we have different first_stmt_info, we can't set up realignment
9120 here, since we can't guarantee first_stmt_info DR has been
9121 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9122 distance from first_stmt_info DR instead as below. */
9123 if (!diff_first_stmt_info)
9124 msq = vect_setup_realignment (vinfo,
9125 first_stmt_info, gsi, &realignment_token,
9126 alignment_support_scheme, NULL_TREE,
9127 &at_loop);
9128 if (alignment_support_scheme == dr_explicit_realign_optimized)
9129 {
9130 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9131 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9132 size_one_node);
9133 gcc_assert (!first_stmt_info_for_drptr);
9134 }
9135 }
9136 else
9137 at_loop = loop;
9138
9139 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9140 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9141
9142 tree bump;
9143 tree vec_offset = NULL_TREE;
9144 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9145 {
9146 aggr_type = NULL_TREE;
9147 bump = NULL_TREE;
9148 }
9149 else if (memory_access_type == VMAT_GATHER_SCATTER)
9150 {
9151 aggr_type = elem_type;
9152 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9153 &bump, &vec_offset);
9154 }
9155 else
9156 {
9157 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9158 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9159 else
9160 aggr_type = vectype;
9161 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9162 memory_access_type);
9163 }
9164
9165 vec<tree> vec_offsets = vNULL;
9166 auto_vec<tree> vec_masks;
9167 if (mask)
9168 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9169 mask, &vec_masks, mask_vectype, NULL_TREE);
9170 tree vec_mask = NULL_TREE;
9171 poly_uint64 group_elt = 0;
9172 for (j = 0; j < ncopies; j++)
9173 {
9174 /* 1. Create the vector or array pointer update chain. */
9175 if (j == 0)
9176 {
9177 bool simd_lane_access_p
9178 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9179 if (simd_lane_access_p
9180 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9181 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9182 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9183 && integer_zerop (DR_INIT (first_dr_info->dr))
9184 && alias_sets_conflict_p (get_alias_set (aggr_type),
9185 get_alias_set (TREE_TYPE (ref_type)))
9186 && (alignment_support_scheme == dr_aligned
9187 || alignment_support_scheme == dr_unaligned_supported))
9188 {
9189 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9190 dataref_offset = build_int_cst (ref_type, 0);
9191 }
9192 else if (diff_first_stmt_info)
9193 {
9194 dataref_ptr
9195 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9196 aggr_type, at_loop, offset, &dummy,
9197 gsi, &ptr_incr, simd_lane_access_p,
9198 byte_offset, bump);
9199 /* Adjust the pointer by the difference to first_stmt. */
9200 data_reference_p ptrdr
9201 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9202 tree diff
9203 = fold_convert (sizetype,
9204 size_binop (MINUS_EXPR,
9205 DR_INIT (first_dr_info->dr),
9206 DR_INIT (ptrdr)));
9207 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9208 stmt_info, diff);
9209 if (alignment_support_scheme == dr_explicit_realign)
9210 {
9211 msq = vect_setup_realignment (vinfo,
9212 first_stmt_info_for_drptr, gsi,
9213 &realignment_token,
9214 alignment_support_scheme,
9215 dataref_ptr, &at_loop);
9216 gcc_assert (!compute_in_loop);
9217 }
9218 }
9219 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9220 {
9221 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9222 &dataref_ptr, &vec_offsets, ncopies);
9223 vec_offset = vec_offsets[0];
9224 }
9225 else
9226 dataref_ptr
9227 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9228 at_loop,
9229 offset, &dummy, gsi, &ptr_incr,
9230 simd_lane_access_p,
9231 byte_offset, bump);
9232 if (mask)
9233 vec_mask = vec_masks[0];
9234 }
9235 else
9236 {
9237 if (dataref_offset)
9238 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9239 bump);
9240 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9241 vec_offset = vec_offsets[j];
9242 else
9243 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9244 stmt_info, bump);
9245 if (mask)
9246 vec_mask = vec_masks[j];
9247 }
9248
9249 if (grouped_load || slp_perm)
9250 dr_chain.create (vec_num);
9251
9252 gimple *new_stmt = NULL;
9253 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9254 {
9255 tree vec_array;
9256
9257 vec_array = create_vector_array (vectype, vec_num);
9258
9259 tree final_mask = NULL_TREE;
9260 if (loop_masks)
9261 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9262 vectype, j);
9263 if (vec_mask)
9264 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9265 vec_mask, gsi);
9266
9267 gcall *call;
9268 if (final_mask)
9269 {
9270 /* Emit:
9271 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9272 VEC_MASK). */
9273 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9274 tree alias_ptr = build_int_cst (ref_type, align);
9275 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9276 dataref_ptr, alias_ptr,
9277 final_mask);
9278 }
9279 else
9280 {
9281 /* Emit:
9282 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9283 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9284 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9285 }
9286 gimple_call_set_lhs (call, vec_array);
9287 gimple_call_set_nothrow (call, true);
9288 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9289 new_stmt = call;
9290
9291 /* Extract each vector into an SSA_NAME. */
9292 for (i = 0; i < vec_num; i++)
9293 {
9294 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9295 vec_array, i);
9296 dr_chain.quick_push (new_temp);
9297 }
9298
9299 /* Record the mapping between SSA_NAMEs and statements. */
9300 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9301
9302 /* Record that VEC_ARRAY is now dead. */
9303 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9304 }
9305 else
9306 {
9307 for (i = 0; i < vec_num; i++)
9308 {
9309 tree final_mask = NULL_TREE;
9310 if (loop_masks
9311 && memory_access_type != VMAT_INVARIANT)
9312 final_mask = vect_get_loop_mask (gsi, loop_masks,
9313 vec_num * ncopies,
9314 vectype, vec_num * j + i);
9315 if (vec_mask)
9316 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9317 vec_mask, gsi);
9318
9319 if (i > 0)
9320 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9321 gsi, stmt_info, bump);
9322
9323 /* 2. Create the vector-load in the loop. */
9324 switch (alignment_support_scheme)
9325 {
9326 case dr_aligned:
9327 case dr_unaligned_supported:
9328 {
9329 unsigned int misalign;
9330 unsigned HOST_WIDE_INT align;
9331
9332 if (memory_access_type == VMAT_GATHER_SCATTER)
9333 {
9334 tree zero = build_zero_cst (vectype);
9335 tree scale = size_int (gs_info.scale);
9336 gcall *call;
9337 if (loop_masks)
9338 call = gimple_build_call_internal
9339 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9340 vec_offset, scale, zero, final_mask);
9341 else
9342 call = gimple_build_call_internal
9343 (IFN_GATHER_LOAD, 4, dataref_ptr,
9344 vec_offset, scale, zero);
9345 gimple_call_set_nothrow (call, true);
9346 new_stmt = call;
9347 data_ref = NULL_TREE;
9348 break;
9349 }
9350
9351 align =
9352 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9353 if (alignment_support_scheme == dr_aligned)
9354 {
9355 gcc_assert (aligned_access_p (first_dr_info));
9356 misalign = 0;
9357 }
9358 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9359 {
9360 align = dr_alignment
9361 (vect_dr_behavior (vinfo, first_dr_info));
9362 misalign = 0;
9363 }
9364 else
9365 misalign = DR_MISALIGNMENT (first_dr_info);
9366 if (dataref_offset == NULL_TREE
9367 && TREE_CODE (dataref_ptr) == SSA_NAME)
9368 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9369 align, misalign);
9370
9371 if (final_mask)
9372 {
9373 align = least_bit_hwi (misalign | align);
9374 tree ptr = build_int_cst (ref_type,
9375 align * BITS_PER_UNIT);
9376 gcall *call
9377 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9378 dataref_ptr, ptr,
9379 final_mask);
9380 gimple_call_set_nothrow (call, true);
9381 new_stmt = call;
9382 data_ref = NULL_TREE;
9383 }
9384 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9385 {
9386 tree final_len
9387 = vect_get_loop_len (loop_vinfo, loop_lens,
9388 vec_num * ncopies,
9389 vec_num * j + i);
9390 align = least_bit_hwi (misalign | align);
9391 tree ptr = build_int_cst (ref_type,
9392 align * BITS_PER_UNIT);
9393 gcall *call
9394 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9395 dataref_ptr, ptr,
9396 final_len);
9397 gimple_call_set_nothrow (call, true);
9398 new_stmt = call;
9399 data_ref = NULL_TREE;
9400
9401 /* Need conversion if it's wrapped with VnQI. */
9402 machine_mode vmode = TYPE_MODE (vectype);
9403 opt_machine_mode new_ovmode
9404 = get_len_load_store_mode (vmode, true);
9405 machine_mode new_vmode = new_ovmode.require ();
9406 if (vmode != new_vmode)
9407 {
9408 tree qi_type = unsigned_intQI_type_node;
9409 tree new_vtype
9410 = build_vector_type_for_mode (qi_type, new_vmode);
9411 tree var = vect_get_new_ssa_name (new_vtype,
9412 vect_simple_var);
9413 gimple_set_lhs (call, var);
9414 vect_finish_stmt_generation (vinfo, stmt_info, call,
9415 gsi);
9416 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9417 new_stmt
9418 = gimple_build_assign (vec_dest,
9419 VIEW_CONVERT_EXPR, op);
9420 }
9421 }
9422 else
9423 {
9424 tree ltype = vectype;
9425 tree new_vtype = NULL_TREE;
9426 unsigned HOST_WIDE_INT gap
9427 = DR_GROUP_GAP (first_stmt_info);
9428 unsigned int vect_align
9429 = vect_known_alignment_in_bytes (first_dr_info);
9430 unsigned int scalar_dr_size
9431 = vect_get_scalar_dr_size (first_dr_info);
9432 /* If there's no peeling for gaps but we have a gap
9433 with slp loads then load the lower half of the
9434 vector only. See get_group_load_store_type for
9435 when we apply this optimization. */
9436 if (slp
9437 && loop_vinfo
9438 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9439 && gap != 0
9440 && known_eq (nunits, (group_size - gap) * 2)
9441 && known_eq (nunits, group_size)
9442 && gap >= (vect_align / scalar_dr_size))
9443 {
9444 tree half_vtype;
9445 new_vtype
9446 = vector_vector_composition_type (vectype, 2,
9447 &half_vtype);
9448 if (new_vtype != NULL_TREE)
9449 ltype = half_vtype;
9450 }
9451 tree offset
9452 = (dataref_offset ? dataref_offset
9453 : build_int_cst (ref_type, 0));
9454 if (ltype != vectype
9455 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9456 {
9457 unsigned HOST_WIDE_INT gap_offset
9458 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9459 tree gapcst = build_int_cst (ref_type, gap_offset);
9460 offset = size_binop (PLUS_EXPR, offset, gapcst);
9461 }
9462 data_ref
9463 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9464 if (alignment_support_scheme == dr_aligned)
9465 ;
9466 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9467 TREE_TYPE (data_ref)
9468 = build_aligned_type (TREE_TYPE (data_ref),
9469 align * BITS_PER_UNIT);
9470 else
9471 TREE_TYPE (data_ref)
9472 = build_aligned_type (TREE_TYPE (data_ref),
9473 TYPE_ALIGN (elem_type));
9474 if (ltype != vectype)
9475 {
9476 vect_copy_ref_info (data_ref,
9477 DR_REF (first_dr_info->dr));
9478 tree tem = make_ssa_name (ltype);
9479 new_stmt = gimple_build_assign (tem, data_ref);
9480 vect_finish_stmt_generation (vinfo, stmt_info,
9481 new_stmt, gsi);
9482 data_ref = NULL;
9483 vec<constructor_elt, va_gc> *v;
9484 vec_alloc (v, 2);
9485 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9486 {
9487 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9488 build_zero_cst (ltype));
9489 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9490 }
9491 else
9492 {
9493 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9494 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9495 build_zero_cst (ltype));
9496 }
9497 gcc_assert (new_vtype != NULL_TREE);
9498 if (new_vtype == vectype)
9499 new_stmt = gimple_build_assign (
9500 vec_dest, build_constructor (vectype, v));
9501 else
9502 {
9503 tree new_vname = make_ssa_name (new_vtype);
9504 new_stmt = gimple_build_assign (
9505 new_vname, build_constructor (new_vtype, v));
9506 vect_finish_stmt_generation (vinfo, stmt_info,
9507 new_stmt, gsi);
9508 new_stmt = gimple_build_assign (
9509 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9510 new_vname));
9511 }
9512 }
9513 }
9514 break;
9515 }
9516 case dr_explicit_realign:
9517 {
9518 tree ptr, bump;
9519
9520 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9521
9522 if (compute_in_loop)
9523 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9524 &realignment_token,
9525 dr_explicit_realign,
9526 dataref_ptr, NULL);
9527
9528 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9529 ptr = copy_ssa_name (dataref_ptr);
9530 else
9531 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9532 // For explicit realign the target alignment should be
9533 // known at compile time.
9534 unsigned HOST_WIDE_INT align =
9535 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9536 new_stmt = gimple_build_assign
9537 (ptr, BIT_AND_EXPR, dataref_ptr,
9538 build_int_cst
9539 (TREE_TYPE (dataref_ptr),
9540 -(HOST_WIDE_INT) align));
9541 vect_finish_stmt_generation (vinfo, stmt_info,
9542 new_stmt, gsi);
9543 data_ref
9544 = build2 (MEM_REF, vectype, ptr,
9545 build_int_cst (ref_type, 0));
9546 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9547 vec_dest = vect_create_destination_var (scalar_dest,
9548 vectype);
9549 new_stmt = gimple_build_assign (vec_dest, data_ref);
9550 new_temp = make_ssa_name (vec_dest, new_stmt);
9551 gimple_assign_set_lhs (new_stmt, new_temp);
9552 gimple_move_vops (new_stmt, stmt_info->stmt);
9553 vect_finish_stmt_generation (vinfo, stmt_info,
9554 new_stmt, gsi);
9555 msq = new_temp;
9556
9557 bump = size_binop (MULT_EXPR, vs,
9558 TYPE_SIZE_UNIT (elem_type));
9559 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9560 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9561 stmt_info, bump);
9562 new_stmt = gimple_build_assign
9563 (NULL_TREE, BIT_AND_EXPR, ptr,
9564 build_int_cst
9565 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9566 ptr = copy_ssa_name (ptr, new_stmt);
9567 gimple_assign_set_lhs (new_stmt, ptr);
9568 vect_finish_stmt_generation (vinfo, stmt_info,
9569 new_stmt, gsi);
9570 data_ref
9571 = build2 (MEM_REF, vectype, ptr,
9572 build_int_cst (ref_type, 0));
9573 break;
9574 }
9575 case dr_explicit_realign_optimized:
9576 {
9577 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9578 new_temp = copy_ssa_name (dataref_ptr);
9579 else
9580 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9581 // We should only be doing this if we know the target
9582 // alignment at compile time.
9583 unsigned HOST_WIDE_INT align =
9584 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9585 new_stmt = gimple_build_assign
9586 (new_temp, BIT_AND_EXPR, dataref_ptr,
9587 build_int_cst (TREE_TYPE (dataref_ptr),
9588 -(HOST_WIDE_INT) align));
9589 vect_finish_stmt_generation (vinfo, stmt_info,
9590 new_stmt, gsi);
9591 data_ref
9592 = build2 (MEM_REF, vectype, new_temp,
9593 build_int_cst (ref_type, 0));
9594 break;
9595 }
9596 default:
9597 gcc_unreachable ();
9598 }
9599 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9600 /* DATA_REF is null if we've already built the statement. */
9601 if (data_ref)
9602 {
9603 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9604 new_stmt = gimple_build_assign (vec_dest, data_ref);
9605 }
9606 new_temp = make_ssa_name (vec_dest, new_stmt);
9607 gimple_set_lhs (new_stmt, new_temp);
9608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9609
9610 /* 3. Handle explicit realignment if necessary/supported.
9611 Create in loop:
9612 vec_dest = realign_load (msq, lsq, realignment_token) */
9613 if (alignment_support_scheme == dr_explicit_realign_optimized
9614 || alignment_support_scheme == dr_explicit_realign)
9615 {
9616 lsq = gimple_assign_lhs (new_stmt);
9617 if (!realignment_token)
9618 realignment_token = dataref_ptr;
9619 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9620 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9621 msq, lsq, realignment_token);
9622 new_temp = make_ssa_name (vec_dest, new_stmt);
9623 gimple_assign_set_lhs (new_stmt, new_temp);
9624 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9625
9626 if (alignment_support_scheme == dr_explicit_realign_optimized)
9627 {
9628 gcc_assert (phi);
9629 if (i == vec_num - 1 && j == ncopies - 1)
9630 add_phi_arg (phi, lsq,
9631 loop_latch_edge (containing_loop),
9632 UNKNOWN_LOCATION);
9633 msq = lsq;
9634 }
9635 }
9636
9637 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9638 {
9639 tree perm_mask = perm_mask_for_reverse (vectype);
9640 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9641 perm_mask, stmt_info, gsi);
9642 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9643 }
9644
9645 /* Collect vector loads and later create their permutation in
9646 vect_transform_grouped_load (). */
9647 if (grouped_load || slp_perm)
9648 dr_chain.quick_push (new_temp);
9649
9650 /* Store vector loads in the corresponding SLP_NODE. */
9651 if (slp && !slp_perm)
9652 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9653
9654 /* With SLP permutation we load the gaps as well, without
9655 we need to skip the gaps after we manage to fully load
9656 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9657 group_elt += nunits;
9658 if (maybe_ne (group_gap_adj, 0U)
9659 && !slp_perm
9660 && known_eq (group_elt, group_size - group_gap_adj))
9661 {
9662 poly_wide_int bump_val
9663 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9664 * group_gap_adj);
9665 tree bump = wide_int_to_tree (sizetype, bump_val);
9666 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9667 gsi, stmt_info, bump);
9668 group_elt = 0;
9669 }
9670 }
9671 /* Bump the vector pointer to account for a gap or for excess
9672 elements loaded for a permuted SLP load. */
9673 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9674 {
9675 poly_wide_int bump_val
9676 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9677 * group_gap_adj);
9678 tree bump = wide_int_to_tree (sizetype, bump_val);
9679 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9680 stmt_info, bump);
9681 }
9682 }
9683
9684 if (slp && !slp_perm)
9685 continue;
9686
9687 if (slp_perm)
9688 {
9689 unsigned n_perms;
9690 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9691 gsi, vf, false, &n_perms);
9692 gcc_assert (ok);
9693 }
9694 else
9695 {
9696 if (grouped_load)
9697 {
9698 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9699 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9700 group_size, gsi);
9701 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9702 }
9703 else
9704 {
9705 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9706 }
9707 }
9708 dr_chain.release ();
9709 }
9710 if (!slp)
9711 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9712
9713 return true;
9714 }
9715
9716 /* Function vect_is_simple_cond.
9717
9718 Input:
9719 LOOP - the loop that is being vectorized.
9720 COND - Condition that is checked for simple use.
9721
9722 Output:
9723 *COMP_VECTYPE - the vector type for the comparison.
9724 *DTS - The def types for the arguments of the comparison
9725
9726 Returns whether a COND can be vectorized. Checks whether
9727 condition operands are supportable using vec_is_simple_use. */
9728
9729 static bool
9730 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9731 slp_tree slp_node, tree *comp_vectype,
9732 enum vect_def_type *dts, tree vectype)
9733 {
9734 tree lhs, rhs;
9735 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9736 slp_tree slp_op;
9737
9738 /* Mask case. */
9739 if (TREE_CODE (cond) == SSA_NAME
9740 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9741 {
9742 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9743 &slp_op, &dts[0], comp_vectype)
9744 || !*comp_vectype
9745 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9746 return false;
9747 return true;
9748 }
9749
9750 if (!COMPARISON_CLASS_P (cond))
9751 return false;
9752
9753 lhs = TREE_OPERAND (cond, 0);
9754 rhs = TREE_OPERAND (cond, 1);
9755
9756 if (TREE_CODE (lhs) == SSA_NAME)
9757 {
9758 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9759 &lhs, &slp_op, &dts[0], &vectype1))
9760 return false;
9761 }
9762 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9763 || TREE_CODE (lhs) == FIXED_CST)
9764 dts[0] = vect_constant_def;
9765 else
9766 return false;
9767
9768 if (TREE_CODE (rhs) == SSA_NAME)
9769 {
9770 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9771 &rhs, &slp_op, &dts[1], &vectype2))
9772 return false;
9773 }
9774 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9775 || TREE_CODE (rhs) == FIXED_CST)
9776 dts[1] = vect_constant_def;
9777 else
9778 return false;
9779
9780 if (vectype1 && vectype2
9781 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9782 TYPE_VECTOR_SUBPARTS (vectype2)))
9783 return false;
9784
9785 *comp_vectype = vectype1 ? vectype1 : vectype2;
9786 /* Invariant comparison. */
9787 if (! *comp_vectype)
9788 {
9789 tree scalar_type = TREE_TYPE (lhs);
9790 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9791 *comp_vectype = truth_type_for (vectype);
9792 else
9793 {
9794 /* If we can widen the comparison to match vectype do so. */
9795 if (INTEGRAL_TYPE_P (scalar_type)
9796 && !slp_node
9797 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9798 TYPE_SIZE (TREE_TYPE (vectype))))
9799 scalar_type = build_nonstandard_integer_type
9800 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9801 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9802 slp_node);
9803 }
9804 }
9805
9806 return true;
9807 }
9808
9809 /* vectorizable_condition.
9810
9811 Check if STMT_INFO is conditional modify expression that can be vectorized.
9812 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9813 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9814 at GSI.
9815
9816 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9817
9818 Return true if STMT_INFO is vectorizable in this way. */
9819
9820 static bool
9821 vectorizable_condition (vec_info *vinfo,
9822 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9823 gimple **vec_stmt,
9824 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9825 {
9826 tree scalar_dest = NULL_TREE;
9827 tree vec_dest = NULL_TREE;
9828 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9829 tree then_clause, else_clause;
9830 tree comp_vectype = NULL_TREE;
9831 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9832 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9833 tree vec_compare;
9834 tree new_temp;
9835 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9836 enum vect_def_type dts[4]
9837 = {vect_unknown_def_type, vect_unknown_def_type,
9838 vect_unknown_def_type, vect_unknown_def_type};
9839 int ndts = 4;
9840 int ncopies;
9841 int vec_num;
9842 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9843 int i;
9844 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9845 vec<tree> vec_oprnds0 = vNULL;
9846 vec<tree> vec_oprnds1 = vNULL;
9847 vec<tree> vec_oprnds2 = vNULL;
9848 vec<tree> vec_oprnds3 = vNULL;
9849 tree vec_cmp_type;
9850 bool masked = false;
9851
9852 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9853 return false;
9854
9855 /* Is vectorizable conditional operation? */
9856 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9857 if (!stmt)
9858 return false;
9859
9860 code = gimple_assign_rhs_code (stmt);
9861 if (code != COND_EXPR)
9862 return false;
9863
9864 stmt_vec_info reduc_info = NULL;
9865 int reduc_index = -1;
9866 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9867 bool for_reduction
9868 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9869 if (for_reduction)
9870 {
9871 if (STMT_SLP_TYPE (stmt_info))
9872 return false;
9873 reduc_info = info_for_reduction (vinfo, stmt_info);
9874 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9875 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9876 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9877 || reduc_index != -1);
9878 }
9879 else
9880 {
9881 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9882 return false;
9883 }
9884
9885 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9886 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9887
9888 if (slp_node)
9889 {
9890 ncopies = 1;
9891 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9892 }
9893 else
9894 {
9895 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9896 vec_num = 1;
9897 }
9898
9899 gcc_assert (ncopies >= 1);
9900 if (for_reduction && ncopies > 1)
9901 return false; /* FORNOW */
9902
9903 cond_expr = gimple_assign_rhs1 (stmt);
9904
9905 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9906 &comp_vectype, &dts[0], vectype)
9907 || !comp_vectype)
9908 return false;
9909
9910 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9911 slp_tree then_slp_node, else_slp_node;
9912 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9913 &then_clause, &then_slp_node, &dts[2], &vectype1))
9914 return false;
9915 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9916 &else_clause, &else_slp_node, &dts[3], &vectype2))
9917 return false;
9918
9919 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9920 return false;
9921
9922 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9923 return false;
9924
9925 masked = !COMPARISON_CLASS_P (cond_expr);
9926 vec_cmp_type = truth_type_for (comp_vectype);
9927
9928 if (vec_cmp_type == NULL_TREE)
9929 return false;
9930
9931 cond_code = TREE_CODE (cond_expr);
9932 if (!masked)
9933 {
9934 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9935 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9936 }
9937
9938 /* For conditional reductions, the "then" value needs to be the candidate
9939 value calculated by this iteration while the "else" value needs to be
9940 the result carried over from previous iterations. If the COND_EXPR
9941 is the other way around, we need to swap it. */
9942 bool must_invert_cmp_result = false;
9943 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9944 {
9945 if (masked)
9946 must_invert_cmp_result = true;
9947 else
9948 {
9949 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9950 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9951 if (new_code == ERROR_MARK)
9952 must_invert_cmp_result = true;
9953 else
9954 {
9955 cond_code = new_code;
9956 /* Make sure we don't accidentally use the old condition. */
9957 cond_expr = NULL_TREE;
9958 }
9959 }
9960 std::swap (then_clause, else_clause);
9961 }
9962
9963 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9964 {
9965 /* Boolean values may have another representation in vectors
9966 and therefore we prefer bit operations over comparison for
9967 them (which also works for scalar masks). We store opcodes
9968 to use in bitop1 and bitop2. Statement is vectorized as
9969 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9970 depending on bitop1 and bitop2 arity. */
9971 switch (cond_code)
9972 {
9973 case GT_EXPR:
9974 bitop1 = BIT_NOT_EXPR;
9975 bitop2 = BIT_AND_EXPR;
9976 break;
9977 case GE_EXPR:
9978 bitop1 = BIT_NOT_EXPR;
9979 bitop2 = BIT_IOR_EXPR;
9980 break;
9981 case LT_EXPR:
9982 bitop1 = BIT_NOT_EXPR;
9983 bitop2 = BIT_AND_EXPR;
9984 std::swap (cond_expr0, cond_expr1);
9985 break;
9986 case LE_EXPR:
9987 bitop1 = BIT_NOT_EXPR;
9988 bitop2 = BIT_IOR_EXPR;
9989 std::swap (cond_expr0, cond_expr1);
9990 break;
9991 case NE_EXPR:
9992 bitop1 = BIT_XOR_EXPR;
9993 break;
9994 case EQ_EXPR:
9995 bitop1 = BIT_XOR_EXPR;
9996 bitop2 = BIT_NOT_EXPR;
9997 break;
9998 default:
9999 return false;
10000 }
10001 cond_code = SSA_NAME;
10002 }
10003
10004 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10005 && reduction_type == EXTRACT_LAST_REDUCTION
10006 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10007 {
10008 if (dump_enabled_p ())
10009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10010 "reduction comparison operation not supported.\n");
10011 return false;
10012 }
10013
10014 if (!vec_stmt)
10015 {
10016 if (bitop1 != NOP_EXPR)
10017 {
10018 machine_mode mode = TYPE_MODE (comp_vectype);
10019 optab optab;
10020
10021 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10022 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10023 return false;
10024
10025 if (bitop2 != NOP_EXPR)
10026 {
10027 optab = optab_for_tree_code (bitop2, comp_vectype,
10028 optab_default);
10029 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10030 return false;
10031 }
10032 }
10033
10034 vect_cost_for_stmt kind = vector_stmt;
10035 if (reduction_type == EXTRACT_LAST_REDUCTION)
10036 /* Count one reduction-like operation per vector. */
10037 kind = vec_to_scalar;
10038 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10039 return false;
10040
10041 if (slp_node
10042 && (!vect_maybe_update_slp_op_vectype
10043 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10044 || (op_adjust == 1
10045 && !vect_maybe_update_slp_op_vectype
10046 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10047 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10048 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10049 {
10050 if (dump_enabled_p ())
10051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10052 "incompatible vector types for invariants\n");
10053 return false;
10054 }
10055
10056 if (loop_vinfo && for_reduction
10057 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10058 {
10059 if (reduction_type == EXTRACT_LAST_REDUCTION)
10060 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10061 ncopies * vec_num, vectype, NULL);
10062 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10063 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10064 {
10065 if (dump_enabled_p ())
10066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10067 "conditional reduction prevents the use"
10068 " of partial vectors.\n");
10069 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10070 }
10071 }
10072
10073 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10074 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10075 cost_vec, kind);
10076 return true;
10077 }
10078
10079 /* Transform. */
10080
10081 if (!slp_node)
10082 {
10083 vec_oprnds0.create (1);
10084 vec_oprnds1.create (1);
10085 vec_oprnds2.create (1);
10086 vec_oprnds3.create (1);
10087 }
10088
10089 /* Handle def. */
10090 scalar_dest = gimple_assign_lhs (stmt);
10091 if (reduction_type != EXTRACT_LAST_REDUCTION)
10092 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10093
10094 bool swap_cond_operands = false;
10095
10096 /* See whether another part of the vectorized code applies a loop
10097 mask to the condition, or to its inverse. */
10098
10099 vec_loop_masks *masks = NULL;
10100 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10101 {
10102 if (reduction_type == EXTRACT_LAST_REDUCTION)
10103 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10104 else
10105 {
10106 scalar_cond_masked_key cond (cond_expr, ncopies);
10107 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10108 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10109 else
10110 {
10111 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10112 cond.code = invert_tree_comparison (cond.code, honor_nans);
10113 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10114 {
10115 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10116 cond_code = cond.code;
10117 swap_cond_operands = true;
10118 }
10119 }
10120 }
10121 }
10122
10123 /* Handle cond expr. */
10124 if (masked)
10125 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10126 cond_expr, &vec_oprnds0, comp_vectype,
10127 then_clause, &vec_oprnds2, vectype,
10128 reduction_type != EXTRACT_LAST_REDUCTION
10129 ? else_clause : NULL, &vec_oprnds3, vectype);
10130 else
10131 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10132 cond_expr0, &vec_oprnds0, comp_vectype,
10133 cond_expr1, &vec_oprnds1, comp_vectype,
10134 then_clause, &vec_oprnds2, vectype,
10135 reduction_type != EXTRACT_LAST_REDUCTION
10136 ? else_clause : NULL, &vec_oprnds3, vectype);
10137
10138 /* Arguments are ready. Create the new vector stmt. */
10139 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10140 {
10141 vec_then_clause = vec_oprnds2[i];
10142 if (reduction_type != EXTRACT_LAST_REDUCTION)
10143 vec_else_clause = vec_oprnds3[i];
10144
10145 if (swap_cond_operands)
10146 std::swap (vec_then_clause, vec_else_clause);
10147
10148 if (masked)
10149 vec_compare = vec_cond_lhs;
10150 else
10151 {
10152 vec_cond_rhs = vec_oprnds1[i];
10153 if (bitop1 == NOP_EXPR)
10154 {
10155 gimple_seq stmts = NULL;
10156 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10157 vec_cond_lhs, vec_cond_rhs);
10158 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10159 }
10160 else
10161 {
10162 new_temp = make_ssa_name (vec_cmp_type);
10163 gassign *new_stmt;
10164 if (bitop1 == BIT_NOT_EXPR)
10165 new_stmt = gimple_build_assign (new_temp, bitop1,
10166 vec_cond_rhs);
10167 else
10168 new_stmt
10169 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10170 vec_cond_rhs);
10171 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10172 if (bitop2 == NOP_EXPR)
10173 vec_compare = new_temp;
10174 else if (bitop2 == BIT_NOT_EXPR)
10175 {
10176 /* Instead of doing ~x ? y : z do x ? z : y. */
10177 vec_compare = new_temp;
10178 std::swap (vec_then_clause, vec_else_clause);
10179 }
10180 else
10181 {
10182 vec_compare = make_ssa_name (vec_cmp_type);
10183 new_stmt
10184 = gimple_build_assign (vec_compare, bitop2,
10185 vec_cond_lhs, new_temp);
10186 vect_finish_stmt_generation (vinfo, stmt_info,
10187 new_stmt, gsi);
10188 }
10189 }
10190 }
10191
10192 /* If we decided to apply a loop mask to the result of the vector
10193 comparison, AND the comparison with the mask now. Later passes
10194 should then be able to reuse the AND results between mulitple
10195 vector statements.
10196
10197 For example:
10198 for (int i = 0; i < 100; ++i)
10199 x[i] = y[i] ? z[i] : 10;
10200
10201 results in following optimized GIMPLE:
10202
10203 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10204 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10205 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10206 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10207 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10208 vect_iftmp.11_47, { 10, ... }>;
10209
10210 instead of using a masked and unmasked forms of
10211 vec != { 0, ... } (masked in the MASK_LOAD,
10212 unmasked in the VEC_COND_EXPR). */
10213
10214 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10215 in cases where that's necessary. */
10216
10217 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10218 {
10219 if (!is_gimple_val (vec_compare))
10220 {
10221 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10222 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10223 vec_compare);
10224 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10225 vec_compare = vec_compare_name;
10226 }
10227
10228 if (must_invert_cmp_result)
10229 {
10230 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10231 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10232 BIT_NOT_EXPR,
10233 vec_compare);
10234 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10235 vec_compare = vec_compare_name;
10236 }
10237
10238 if (masks)
10239 {
10240 unsigned vec_num = vec_oprnds0.length ();
10241 tree loop_mask
10242 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10243 vectype, i);
10244 tree tmp2 = make_ssa_name (vec_cmp_type);
10245 gassign *g
10246 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10247 loop_mask);
10248 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10249 vec_compare = tmp2;
10250 }
10251 }
10252
10253 gimple *new_stmt;
10254 if (reduction_type == EXTRACT_LAST_REDUCTION)
10255 {
10256 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10257 tree lhs = gimple_get_lhs (old_stmt);
10258 new_stmt = gimple_build_call_internal
10259 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10260 vec_then_clause);
10261 gimple_call_set_lhs (new_stmt, lhs);
10262 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10263 if (old_stmt == gsi_stmt (*gsi))
10264 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10265 else
10266 {
10267 /* In this case we're moving the definition to later in the
10268 block. That doesn't matter because the only uses of the
10269 lhs are in phi statements. */
10270 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10271 gsi_remove (&old_gsi, true);
10272 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10273 }
10274 }
10275 else
10276 {
10277 new_temp = make_ssa_name (vec_dest);
10278 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10279 vec_then_clause, vec_else_clause);
10280 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10281 }
10282 if (slp_node)
10283 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10284 else
10285 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10286 }
10287
10288 if (!slp_node)
10289 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10290
10291 vec_oprnds0.release ();
10292 vec_oprnds1.release ();
10293 vec_oprnds2.release ();
10294 vec_oprnds3.release ();
10295
10296 return true;
10297 }
10298
10299 /* vectorizable_comparison.
10300
10301 Check if STMT_INFO is comparison expression that can be vectorized.
10302 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10303 comparison, put it in VEC_STMT, and insert it at GSI.
10304
10305 Return true if STMT_INFO is vectorizable in this way. */
10306
10307 static bool
10308 vectorizable_comparison (vec_info *vinfo,
10309 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10310 gimple **vec_stmt,
10311 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10312 {
10313 tree lhs, rhs1, rhs2;
10314 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10315 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10316 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10317 tree new_temp;
10318 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10319 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10320 int ndts = 2;
10321 poly_uint64 nunits;
10322 int ncopies;
10323 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10324 int i;
10325 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10326 vec<tree> vec_oprnds0 = vNULL;
10327 vec<tree> vec_oprnds1 = vNULL;
10328 tree mask_type;
10329 tree mask;
10330
10331 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10332 return false;
10333
10334 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10335 return false;
10336
10337 mask_type = vectype;
10338 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10339
10340 if (slp_node)
10341 ncopies = 1;
10342 else
10343 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10344
10345 gcc_assert (ncopies >= 1);
10346 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10347 return false;
10348
10349 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10350 if (!stmt)
10351 return false;
10352
10353 code = gimple_assign_rhs_code (stmt);
10354
10355 if (TREE_CODE_CLASS (code) != tcc_comparison)
10356 return false;
10357
10358 slp_tree slp_rhs1, slp_rhs2;
10359 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10360 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10361 return false;
10362
10363 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10364 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10365 return false;
10366
10367 if (vectype1 && vectype2
10368 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10369 TYPE_VECTOR_SUBPARTS (vectype2)))
10370 return false;
10371
10372 vectype = vectype1 ? vectype1 : vectype2;
10373
10374 /* Invariant comparison. */
10375 if (!vectype)
10376 {
10377 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10378 vectype = mask_type;
10379 else
10380 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10381 slp_node);
10382 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10383 return false;
10384 }
10385 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10386 return false;
10387
10388 /* Can't compare mask and non-mask types. */
10389 if (vectype1 && vectype2
10390 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10391 return false;
10392
10393 /* Boolean values may have another representation in vectors
10394 and therefore we prefer bit operations over comparison for
10395 them (which also works for scalar masks). We store opcodes
10396 to use in bitop1 and bitop2. Statement is vectorized as
10397 BITOP2 (rhs1 BITOP1 rhs2) or
10398 rhs1 BITOP2 (BITOP1 rhs2)
10399 depending on bitop1 and bitop2 arity. */
10400 bool swap_p = false;
10401 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10402 {
10403 if (code == GT_EXPR)
10404 {
10405 bitop1 = BIT_NOT_EXPR;
10406 bitop2 = BIT_AND_EXPR;
10407 }
10408 else if (code == GE_EXPR)
10409 {
10410 bitop1 = BIT_NOT_EXPR;
10411 bitop2 = BIT_IOR_EXPR;
10412 }
10413 else if (code == LT_EXPR)
10414 {
10415 bitop1 = BIT_NOT_EXPR;
10416 bitop2 = BIT_AND_EXPR;
10417 swap_p = true;
10418 }
10419 else if (code == LE_EXPR)
10420 {
10421 bitop1 = BIT_NOT_EXPR;
10422 bitop2 = BIT_IOR_EXPR;
10423 swap_p = true;
10424 }
10425 else
10426 {
10427 bitop1 = BIT_XOR_EXPR;
10428 if (code == EQ_EXPR)
10429 bitop2 = BIT_NOT_EXPR;
10430 }
10431 }
10432
10433 if (!vec_stmt)
10434 {
10435 if (bitop1 == NOP_EXPR)
10436 {
10437 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10438 return false;
10439 }
10440 else
10441 {
10442 machine_mode mode = TYPE_MODE (vectype);
10443 optab optab;
10444
10445 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10446 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10447 return false;
10448
10449 if (bitop2 != NOP_EXPR)
10450 {
10451 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10452 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10453 return false;
10454 }
10455 }
10456
10457 /* Put types on constant and invariant SLP children. */
10458 if (slp_node
10459 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10460 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10461 {
10462 if (dump_enabled_p ())
10463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10464 "incompatible vector types for invariants\n");
10465 return false;
10466 }
10467
10468 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10469 vect_model_simple_cost (vinfo, stmt_info,
10470 ncopies * (1 + (bitop2 != NOP_EXPR)),
10471 dts, ndts, slp_node, cost_vec);
10472 return true;
10473 }
10474
10475 /* Transform. */
10476 if (!slp_node)
10477 {
10478 vec_oprnds0.create (1);
10479 vec_oprnds1.create (1);
10480 }
10481
10482 /* Handle def. */
10483 lhs = gimple_assign_lhs (stmt);
10484 mask = vect_create_destination_var (lhs, mask_type);
10485
10486 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10487 rhs1, &vec_oprnds0, vectype,
10488 rhs2, &vec_oprnds1, vectype);
10489 if (swap_p)
10490 std::swap (vec_oprnds0, vec_oprnds1);
10491
10492 /* Arguments are ready. Create the new vector stmt. */
10493 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10494 {
10495 gimple *new_stmt;
10496 vec_rhs2 = vec_oprnds1[i];
10497
10498 new_temp = make_ssa_name (mask);
10499 if (bitop1 == NOP_EXPR)
10500 {
10501 new_stmt = gimple_build_assign (new_temp, code,
10502 vec_rhs1, vec_rhs2);
10503 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10504 }
10505 else
10506 {
10507 if (bitop1 == BIT_NOT_EXPR)
10508 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10509 else
10510 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10511 vec_rhs2);
10512 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10513 if (bitop2 != NOP_EXPR)
10514 {
10515 tree res = make_ssa_name (mask);
10516 if (bitop2 == BIT_NOT_EXPR)
10517 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10518 else
10519 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10520 new_temp);
10521 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10522 }
10523 }
10524 if (slp_node)
10525 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10526 else
10527 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10528 }
10529
10530 if (!slp_node)
10531 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10532
10533 vec_oprnds0.release ();
10534 vec_oprnds1.release ();
10535
10536 return true;
10537 }
10538
10539 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10540 can handle all live statements in the node. Otherwise return true
10541 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10542 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10543
10544 static bool
10545 can_vectorize_live_stmts (vec_info *vinfo,
10546 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10547 slp_tree slp_node, slp_instance slp_node_instance,
10548 bool vec_stmt_p,
10549 stmt_vector_for_cost *cost_vec)
10550 {
10551 if (slp_node)
10552 {
10553 stmt_vec_info slp_stmt_info;
10554 unsigned int i;
10555 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10556 {
10557 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10558 && !vectorizable_live_operation (vinfo,
10559 slp_stmt_info, gsi, slp_node,
10560 slp_node_instance, i,
10561 vec_stmt_p, cost_vec))
10562 return false;
10563 }
10564 }
10565 else if (STMT_VINFO_LIVE_P (stmt_info)
10566 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10567 slp_node, slp_node_instance, -1,
10568 vec_stmt_p, cost_vec))
10569 return false;
10570
10571 return true;
10572 }
10573
10574 /* Make sure the statement is vectorizable. */
10575
10576 opt_result
10577 vect_analyze_stmt (vec_info *vinfo,
10578 stmt_vec_info stmt_info, bool *need_to_vectorize,
10579 slp_tree node, slp_instance node_instance,
10580 stmt_vector_for_cost *cost_vec)
10581 {
10582 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10583 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10584 bool ok;
10585 gimple_seq pattern_def_seq;
10586
10587 if (dump_enabled_p ())
10588 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10589 stmt_info->stmt);
10590
10591 if (gimple_has_volatile_ops (stmt_info->stmt))
10592 return opt_result::failure_at (stmt_info->stmt,
10593 "not vectorized:"
10594 " stmt has volatile operands: %G\n",
10595 stmt_info->stmt);
10596
10597 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10598 && node == NULL
10599 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10600 {
10601 gimple_stmt_iterator si;
10602
10603 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10604 {
10605 stmt_vec_info pattern_def_stmt_info
10606 = vinfo->lookup_stmt (gsi_stmt (si));
10607 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10608 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10609 {
10610 /* Analyze def stmt of STMT if it's a pattern stmt. */
10611 if (dump_enabled_p ())
10612 dump_printf_loc (MSG_NOTE, vect_location,
10613 "==> examining pattern def statement: %G",
10614 pattern_def_stmt_info->stmt);
10615
10616 opt_result res
10617 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10618 need_to_vectorize, node, node_instance,
10619 cost_vec);
10620 if (!res)
10621 return res;
10622 }
10623 }
10624 }
10625
10626 /* Skip stmts that do not need to be vectorized. In loops this is expected
10627 to include:
10628 - the COND_EXPR which is the loop exit condition
10629 - any LABEL_EXPRs in the loop
10630 - computations that are used only for array indexing or loop control.
10631 In basic blocks we only analyze statements that are a part of some SLP
10632 instance, therefore, all the statements are relevant.
10633
10634 Pattern statement needs to be analyzed instead of the original statement
10635 if the original statement is not relevant. Otherwise, we analyze both
10636 statements. In basic blocks we are called from some SLP instance
10637 traversal, don't analyze pattern stmts instead, the pattern stmts
10638 already will be part of SLP instance. */
10639
10640 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10641 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10642 && !STMT_VINFO_LIVE_P (stmt_info))
10643 {
10644 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10645 && pattern_stmt_info
10646 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10647 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10648 {
10649 /* Analyze PATTERN_STMT instead of the original stmt. */
10650 stmt_info = pattern_stmt_info;
10651 if (dump_enabled_p ())
10652 dump_printf_loc (MSG_NOTE, vect_location,
10653 "==> examining pattern statement: %G",
10654 stmt_info->stmt);
10655 }
10656 else
10657 {
10658 if (dump_enabled_p ())
10659 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10660
10661 return opt_result::success ();
10662 }
10663 }
10664 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10665 && node == NULL
10666 && pattern_stmt_info
10667 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10668 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10669 {
10670 /* Analyze PATTERN_STMT too. */
10671 if (dump_enabled_p ())
10672 dump_printf_loc (MSG_NOTE, vect_location,
10673 "==> examining pattern statement: %G",
10674 pattern_stmt_info->stmt);
10675
10676 opt_result res
10677 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10678 node_instance, cost_vec);
10679 if (!res)
10680 return res;
10681 }
10682
10683 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10684 {
10685 case vect_internal_def:
10686 break;
10687
10688 case vect_reduction_def:
10689 case vect_nested_cycle:
10690 gcc_assert (!bb_vinfo
10691 && (relevance == vect_used_in_outer
10692 || relevance == vect_used_in_outer_by_reduction
10693 || relevance == vect_used_by_reduction
10694 || relevance == vect_unused_in_scope
10695 || relevance == vect_used_only_live));
10696 break;
10697
10698 case vect_induction_def:
10699 gcc_assert (!bb_vinfo);
10700 break;
10701
10702 case vect_constant_def:
10703 case vect_external_def:
10704 case vect_unknown_def_type:
10705 default:
10706 gcc_unreachable ();
10707 }
10708
10709 if (STMT_VINFO_RELEVANT_P (stmt_info))
10710 {
10711 tree type = gimple_expr_type (stmt_info->stmt);
10712 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10713 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10714 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10715 || (call && gimple_call_lhs (call) == NULL_TREE));
10716 *need_to_vectorize = true;
10717 }
10718
10719 if (PURE_SLP_STMT (stmt_info) && !node)
10720 {
10721 if (dump_enabled_p ())
10722 dump_printf_loc (MSG_NOTE, vect_location,
10723 "handled only by SLP analysis\n");
10724 return opt_result::success ();
10725 }
10726
10727 ok = true;
10728 if (!bb_vinfo
10729 && (STMT_VINFO_RELEVANT_P (stmt_info)
10730 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10731 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10732 -mveclibabi= takes preference over library functions with
10733 the simd attribute. */
10734 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10735 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10736 cost_vec)
10737 || vectorizable_conversion (vinfo, stmt_info,
10738 NULL, NULL, node, cost_vec)
10739 || vectorizable_operation (vinfo, stmt_info,
10740 NULL, NULL, node, cost_vec)
10741 || vectorizable_assignment (vinfo, stmt_info,
10742 NULL, NULL, node, cost_vec)
10743 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10744 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10745 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10746 node, node_instance, cost_vec)
10747 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10748 NULL, node, cost_vec)
10749 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10750 || vectorizable_condition (vinfo, stmt_info,
10751 NULL, NULL, node, cost_vec)
10752 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10753 cost_vec)
10754 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10755 stmt_info, NULL, node));
10756 else
10757 {
10758 if (bb_vinfo)
10759 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10760 || vectorizable_simd_clone_call (vinfo, stmt_info,
10761 NULL, NULL, node, cost_vec)
10762 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10763 cost_vec)
10764 || vectorizable_shift (vinfo, stmt_info,
10765 NULL, NULL, node, cost_vec)
10766 || vectorizable_operation (vinfo, stmt_info,
10767 NULL, NULL, node, cost_vec)
10768 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10769 cost_vec)
10770 || vectorizable_load (vinfo, stmt_info,
10771 NULL, NULL, node, cost_vec)
10772 || vectorizable_store (vinfo, stmt_info,
10773 NULL, NULL, node, cost_vec)
10774 || vectorizable_condition (vinfo, stmt_info,
10775 NULL, NULL, node, cost_vec)
10776 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10777 cost_vec)
10778 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10779 }
10780
10781 if (!ok)
10782 return opt_result::failure_at (stmt_info->stmt,
10783 "not vectorized:"
10784 " relevant stmt not supported: %G",
10785 stmt_info->stmt);
10786
10787 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10788 need extra handling, except for vectorizable reductions. */
10789 if (!bb_vinfo
10790 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10791 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10792 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10793 stmt_info, NULL, node, node_instance,
10794 false, cost_vec))
10795 return opt_result::failure_at (stmt_info->stmt,
10796 "not vectorized:"
10797 " live stmt not supported: %G",
10798 stmt_info->stmt);
10799
10800 return opt_result::success ();
10801 }
10802
10803
10804 /* Function vect_transform_stmt.
10805
10806 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10807
10808 bool
10809 vect_transform_stmt (vec_info *vinfo,
10810 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10811 slp_tree slp_node, slp_instance slp_node_instance)
10812 {
10813 bool is_store = false;
10814 gimple *vec_stmt = NULL;
10815 bool done;
10816
10817 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10818
10819 switch (STMT_VINFO_TYPE (stmt_info))
10820 {
10821 case type_demotion_vec_info_type:
10822 case type_promotion_vec_info_type:
10823 case type_conversion_vec_info_type:
10824 done = vectorizable_conversion (vinfo, stmt_info,
10825 gsi, &vec_stmt, slp_node, NULL);
10826 gcc_assert (done);
10827 break;
10828
10829 case induc_vec_info_type:
10830 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10831 stmt_info, &vec_stmt, slp_node,
10832 NULL);
10833 gcc_assert (done);
10834 break;
10835
10836 case shift_vec_info_type:
10837 done = vectorizable_shift (vinfo, stmt_info,
10838 gsi, &vec_stmt, slp_node, NULL);
10839 gcc_assert (done);
10840 break;
10841
10842 case op_vec_info_type:
10843 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10844 NULL);
10845 gcc_assert (done);
10846 break;
10847
10848 case assignment_vec_info_type:
10849 done = vectorizable_assignment (vinfo, stmt_info,
10850 gsi, &vec_stmt, slp_node, NULL);
10851 gcc_assert (done);
10852 break;
10853
10854 case load_vec_info_type:
10855 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10856 NULL);
10857 gcc_assert (done);
10858 break;
10859
10860 case store_vec_info_type:
10861 done = vectorizable_store (vinfo, stmt_info,
10862 gsi, &vec_stmt, slp_node, NULL);
10863 gcc_assert (done);
10864 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10865 {
10866 /* In case of interleaving, the whole chain is vectorized when the
10867 last store in the chain is reached. Store stmts before the last
10868 one are skipped, and there vec_stmt_info shouldn't be freed
10869 meanwhile. */
10870 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10871 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10872 is_store = true;
10873 }
10874 else
10875 is_store = true;
10876 break;
10877
10878 case condition_vec_info_type:
10879 done = vectorizable_condition (vinfo, stmt_info,
10880 gsi, &vec_stmt, slp_node, NULL);
10881 gcc_assert (done);
10882 break;
10883
10884 case comparison_vec_info_type:
10885 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10886 slp_node, NULL);
10887 gcc_assert (done);
10888 break;
10889
10890 case call_vec_info_type:
10891 done = vectorizable_call (vinfo, stmt_info,
10892 gsi, &vec_stmt, slp_node, NULL);
10893 break;
10894
10895 case call_simd_clone_vec_info_type:
10896 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10897 slp_node, NULL);
10898 break;
10899
10900 case reduc_vec_info_type:
10901 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10902 gsi, &vec_stmt, slp_node);
10903 gcc_assert (done);
10904 break;
10905
10906 case cycle_phi_info_type:
10907 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10908 &vec_stmt, slp_node, slp_node_instance);
10909 gcc_assert (done);
10910 break;
10911
10912 case lc_phi_info_type:
10913 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10914 stmt_info, &vec_stmt, slp_node);
10915 gcc_assert (done);
10916 break;
10917
10918 case phi_info_type:
10919 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
10920 gcc_assert (done);
10921 break;
10922
10923 default:
10924 if (!STMT_VINFO_LIVE_P (stmt_info))
10925 {
10926 if (dump_enabled_p ())
10927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10928 "stmt not supported.\n");
10929 gcc_unreachable ();
10930 }
10931 done = true;
10932 }
10933
10934 if (!slp_node && vec_stmt)
10935 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10936
10937 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10938 return is_store;
10939
10940 /* Handle stmts whose DEF is used outside the loop-nest that is
10941 being vectorized. */
10942 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
10943 slp_node_instance, true, NULL);
10944 gcc_assert (done);
10945
10946 return false;
10947 }
10948
10949
10950 /* Remove a group of stores (for SLP or interleaving), free their
10951 stmt_vec_info. */
10952
10953 void
10954 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10955 {
10956 stmt_vec_info next_stmt_info = first_stmt_info;
10957
10958 while (next_stmt_info)
10959 {
10960 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10961 next_stmt_info = vect_orig_stmt (next_stmt_info);
10962 /* Free the attached stmt_vec_info and remove the stmt. */
10963 vinfo->remove_stmt (next_stmt_info);
10964 next_stmt_info = tmp;
10965 }
10966 }
10967
10968 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10969 elements of type SCALAR_TYPE, or null if the target doesn't support
10970 such a type.
10971
10972 If NUNITS is zero, return a vector type that contains elements of
10973 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10974
10975 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10976 for this vectorization region and want to "autodetect" the best choice.
10977 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10978 and we want the new type to be interoperable with it. PREVAILING_MODE
10979 in this case can be a scalar integer mode or a vector mode; when it
10980 is a vector mode, the function acts like a tree-level version of
10981 related_vector_mode. */
10982
10983 tree
10984 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10985 tree scalar_type, poly_uint64 nunits)
10986 {
10987 tree orig_scalar_type = scalar_type;
10988 scalar_mode inner_mode;
10989 machine_mode simd_mode;
10990 tree vectype;
10991
10992 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10993 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10994 return NULL_TREE;
10995
10996 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10997
10998 /* For vector types of elements whose mode precision doesn't
10999 match their types precision we use a element type of mode
11000 precision. The vectorization routines will have to make sure
11001 they support the proper result truncation/extension.
11002 We also make sure to build vector types with INTEGER_TYPE
11003 component type only. */
11004 if (INTEGRAL_TYPE_P (scalar_type)
11005 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11006 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11007 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11008 TYPE_UNSIGNED (scalar_type));
11009
11010 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11011 When the component mode passes the above test simply use a type
11012 corresponding to that mode. The theory is that any use that
11013 would cause problems with this will disable vectorization anyway. */
11014 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11015 && !INTEGRAL_TYPE_P (scalar_type))
11016 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11017
11018 /* We can't build a vector type of elements with alignment bigger than
11019 their size. */
11020 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11021 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11022 TYPE_UNSIGNED (scalar_type));
11023
11024 /* If we felt back to using the mode fail if there was
11025 no scalar type for it. */
11026 if (scalar_type == NULL_TREE)
11027 return NULL_TREE;
11028
11029 /* If no prevailing mode was supplied, use the mode the target prefers.
11030 Otherwise lookup a vector mode based on the prevailing mode. */
11031 if (prevailing_mode == VOIDmode)
11032 {
11033 gcc_assert (known_eq (nunits, 0U));
11034 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11035 if (SCALAR_INT_MODE_P (simd_mode))
11036 {
11037 /* Traditional behavior is not to take the integer mode
11038 literally, but simply to use it as a way of determining
11039 the vector size. It is up to mode_for_vector to decide
11040 what the TYPE_MODE should be.
11041
11042 Note that nunits == 1 is allowed in order to support single
11043 element vector types. */
11044 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11045 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11046 return NULL_TREE;
11047 }
11048 }
11049 else if (SCALAR_INT_MODE_P (prevailing_mode)
11050 || !related_vector_mode (prevailing_mode,
11051 inner_mode, nunits).exists (&simd_mode))
11052 {
11053 /* Fall back to using mode_for_vector, mostly in the hope of being
11054 able to use an integer mode. */
11055 if (known_eq (nunits, 0U)
11056 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11057 return NULL_TREE;
11058
11059 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11060 return NULL_TREE;
11061 }
11062
11063 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11064
11065 /* In cases where the mode was chosen by mode_for_vector, check that
11066 the target actually supports the chosen mode, or that it at least
11067 allows the vector mode to be replaced by a like-sized integer. */
11068 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11069 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11070 return NULL_TREE;
11071
11072 /* Re-attach the address-space qualifier if we canonicalized the scalar
11073 type. */
11074 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11075 return build_qualified_type
11076 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11077
11078 return vectype;
11079 }
11080
11081 /* Function get_vectype_for_scalar_type.
11082
11083 Returns the vector type corresponding to SCALAR_TYPE as supported
11084 by the target. If GROUP_SIZE is nonzero and we're performing BB
11085 vectorization, make sure that the number of elements in the vector
11086 is no bigger than GROUP_SIZE. */
11087
11088 tree
11089 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11090 unsigned int group_size)
11091 {
11092 /* For BB vectorization, we should always have a group size once we've
11093 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11094 are tentative requests during things like early data reference
11095 analysis and pattern recognition. */
11096 if (is_a <bb_vec_info> (vinfo))
11097 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11098 else
11099 group_size = 0;
11100
11101 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11102 scalar_type);
11103 if (vectype && vinfo->vector_mode == VOIDmode)
11104 vinfo->vector_mode = TYPE_MODE (vectype);
11105
11106 /* Register the natural choice of vector type, before the group size
11107 has been applied. */
11108 if (vectype)
11109 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11110
11111 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11112 try again with an explicit number of elements. */
11113 if (vectype
11114 && group_size
11115 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11116 {
11117 /* Start with the biggest number of units that fits within
11118 GROUP_SIZE and halve it until we find a valid vector type.
11119 Usually either the first attempt will succeed or all will
11120 fail (in the latter case because GROUP_SIZE is too small
11121 for the target), but it's possible that a target could have
11122 a hole between supported vector types.
11123
11124 If GROUP_SIZE is not a power of 2, this has the effect of
11125 trying the largest power of 2 that fits within the group,
11126 even though the group is not a multiple of that vector size.
11127 The BB vectorizer will then try to carve up the group into
11128 smaller pieces. */
11129 unsigned int nunits = 1 << floor_log2 (group_size);
11130 do
11131 {
11132 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11133 scalar_type, nunits);
11134 nunits /= 2;
11135 }
11136 while (nunits > 1 && !vectype);
11137 }
11138
11139 return vectype;
11140 }
11141
11142 /* Return the vector type corresponding to SCALAR_TYPE as supported
11143 by the target. NODE, if nonnull, is the SLP tree node that will
11144 use the returned vector type. */
11145
11146 tree
11147 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11148 {
11149 unsigned int group_size = 0;
11150 if (node)
11151 group_size = SLP_TREE_LANES (node);
11152 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11153 }
11154
11155 /* Function get_mask_type_for_scalar_type.
11156
11157 Returns the mask type corresponding to a result of comparison
11158 of vectors of specified SCALAR_TYPE as supported by target.
11159 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11160 make sure that the number of elements in the vector is no bigger
11161 than GROUP_SIZE. */
11162
11163 tree
11164 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11165 unsigned int group_size)
11166 {
11167 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11168
11169 if (!vectype)
11170 return NULL;
11171
11172 return truth_type_for (vectype);
11173 }
11174
11175 /* Function get_same_sized_vectype
11176
11177 Returns a vector type corresponding to SCALAR_TYPE of size
11178 VECTOR_TYPE if supported by the target. */
11179
11180 tree
11181 get_same_sized_vectype (tree scalar_type, tree vector_type)
11182 {
11183 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11184 return truth_type_for (vector_type);
11185
11186 poly_uint64 nunits;
11187 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11188 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11189 return NULL_TREE;
11190
11191 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11192 scalar_type, nunits);
11193 }
11194
11195 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11196 would not change the chosen vector modes. */
11197
11198 bool
11199 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11200 {
11201 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11202 i != vinfo->used_vector_modes.end (); ++i)
11203 if (!VECTOR_MODE_P (*i)
11204 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11205 return false;
11206 return true;
11207 }
11208
11209 /* Function vect_is_simple_use.
11210
11211 Input:
11212 VINFO - the vect info of the loop or basic block that is being vectorized.
11213 OPERAND - operand in the loop or bb.
11214 Output:
11215 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11216 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11217 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11218 the definition could be anywhere in the function
11219 DT - the type of definition
11220
11221 Returns whether a stmt with OPERAND can be vectorized.
11222 For loops, supportable operands are constants, loop invariants, and operands
11223 that are defined by the current iteration of the loop. Unsupportable
11224 operands are those that are defined by a previous iteration of the loop (as
11225 is the case in reduction/induction computations).
11226 For basic blocks, supportable operands are constants and bb invariants.
11227 For now, operands defined outside the basic block are not supported. */
11228
11229 bool
11230 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11231 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11232 {
11233 if (def_stmt_info_out)
11234 *def_stmt_info_out = NULL;
11235 if (def_stmt_out)
11236 *def_stmt_out = NULL;
11237 *dt = vect_unknown_def_type;
11238
11239 if (dump_enabled_p ())
11240 {
11241 dump_printf_loc (MSG_NOTE, vect_location,
11242 "vect_is_simple_use: operand ");
11243 if (TREE_CODE (operand) == SSA_NAME
11244 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11245 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11246 else
11247 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11248 }
11249
11250 if (CONSTANT_CLASS_P (operand))
11251 *dt = vect_constant_def;
11252 else if (is_gimple_min_invariant (operand))
11253 *dt = vect_external_def;
11254 else if (TREE_CODE (operand) != SSA_NAME)
11255 *dt = vect_unknown_def_type;
11256 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11257 *dt = vect_external_def;
11258 else
11259 {
11260 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11261 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11262 if (!stmt_vinfo)
11263 *dt = vect_external_def;
11264 else
11265 {
11266 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11267 def_stmt = stmt_vinfo->stmt;
11268 switch (gimple_code (def_stmt))
11269 {
11270 case GIMPLE_PHI:
11271 case GIMPLE_ASSIGN:
11272 case GIMPLE_CALL:
11273 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11274 break;
11275 default:
11276 *dt = vect_unknown_def_type;
11277 break;
11278 }
11279 if (def_stmt_info_out)
11280 *def_stmt_info_out = stmt_vinfo;
11281 }
11282 if (def_stmt_out)
11283 *def_stmt_out = def_stmt;
11284 }
11285
11286 if (dump_enabled_p ())
11287 {
11288 dump_printf (MSG_NOTE, ", type of def: ");
11289 switch (*dt)
11290 {
11291 case vect_uninitialized_def:
11292 dump_printf (MSG_NOTE, "uninitialized\n");
11293 break;
11294 case vect_constant_def:
11295 dump_printf (MSG_NOTE, "constant\n");
11296 break;
11297 case vect_external_def:
11298 dump_printf (MSG_NOTE, "external\n");
11299 break;
11300 case vect_internal_def:
11301 dump_printf (MSG_NOTE, "internal\n");
11302 break;
11303 case vect_induction_def:
11304 dump_printf (MSG_NOTE, "induction\n");
11305 break;
11306 case vect_reduction_def:
11307 dump_printf (MSG_NOTE, "reduction\n");
11308 break;
11309 case vect_double_reduction_def:
11310 dump_printf (MSG_NOTE, "double reduction\n");
11311 break;
11312 case vect_nested_cycle:
11313 dump_printf (MSG_NOTE, "nested cycle\n");
11314 break;
11315 case vect_unknown_def_type:
11316 dump_printf (MSG_NOTE, "unknown\n");
11317 break;
11318 }
11319 }
11320
11321 if (*dt == vect_unknown_def_type)
11322 {
11323 if (dump_enabled_p ())
11324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11325 "Unsupported pattern.\n");
11326 return false;
11327 }
11328
11329 return true;
11330 }
11331
11332 /* Function vect_is_simple_use.
11333
11334 Same as vect_is_simple_use but also determines the vector operand
11335 type of OPERAND and stores it to *VECTYPE. If the definition of
11336 OPERAND is vect_uninitialized_def, vect_constant_def or
11337 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11338 is responsible to compute the best suited vector type for the
11339 scalar operand. */
11340
11341 bool
11342 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11343 tree *vectype, stmt_vec_info *def_stmt_info_out,
11344 gimple **def_stmt_out)
11345 {
11346 stmt_vec_info def_stmt_info;
11347 gimple *def_stmt;
11348 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11349 return false;
11350
11351 if (def_stmt_out)
11352 *def_stmt_out = def_stmt;
11353 if (def_stmt_info_out)
11354 *def_stmt_info_out = def_stmt_info;
11355
11356 /* Now get a vector type if the def is internal, otherwise supply
11357 NULL_TREE and leave it up to the caller to figure out a proper
11358 type for the use stmt. */
11359 if (*dt == vect_internal_def
11360 || *dt == vect_induction_def
11361 || *dt == vect_reduction_def
11362 || *dt == vect_double_reduction_def
11363 || *dt == vect_nested_cycle)
11364 {
11365 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11366 gcc_assert (*vectype != NULL_TREE);
11367 if (dump_enabled_p ())
11368 dump_printf_loc (MSG_NOTE, vect_location,
11369 "vect_is_simple_use: vectype %T\n", *vectype);
11370 }
11371 else if (*dt == vect_uninitialized_def
11372 || *dt == vect_constant_def
11373 || *dt == vect_external_def)
11374 *vectype = NULL_TREE;
11375 else
11376 gcc_unreachable ();
11377
11378 return true;
11379 }
11380
11381 /* Function vect_is_simple_use.
11382
11383 Same as vect_is_simple_use but determines the operand by operand
11384 position OPERAND from either STMT or SLP_NODE, filling in *OP
11385 and *SLP_DEF (when SLP_NODE is not NULL). */
11386
11387 bool
11388 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11389 unsigned operand, tree *op, slp_tree *slp_def,
11390 enum vect_def_type *dt,
11391 tree *vectype, stmt_vec_info *def_stmt_info_out)
11392 {
11393 if (slp_node)
11394 {
11395 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11396 *slp_def = child;
11397 *vectype = SLP_TREE_VECTYPE (child);
11398 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11399 {
11400 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11401 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11402 }
11403 else
11404 {
11405 if (def_stmt_info_out)
11406 *def_stmt_info_out = NULL;
11407 *op = SLP_TREE_SCALAR_OPS (child)[0];
11408 *dt = SLP_TREE_DEF_TYPE (child);
11409 return true;
11410 }
11411 }
11412 else
11413 {
11414 *slp_def = NULL;
11415 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11416 {
11417 if (gimple_assign_rhs_code (ass) == COND_EXPR
11418 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11419 {
11420 if (operand < 2)
11421 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11422 else
11423 *op = gimple_op (ass, operand);
11424 }
11425 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11426 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11427 else
11428 *op = gimple_op (ass, operand + 1);
11429 }
11430 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11431 {
11432 if (gimple_call_internal_p (call)
11433 && internal_store_fn_p (gimple_call_internal_fn (call)))
11434 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11435 (call));
11436 *op = gimple_call_arg (call, operand);
11437 }
11438 else
11439 gcc_unreachable ();
11440 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11441 }
11442 }
11443
11444 /* If OP is not NULL and is external or constant update its vector
11445 type with VECTYPE. Returns true if successful or false if not,
11446 for example when conflicting vector types are present. */
11447
11448 bool
11449 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11450 {
11451 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11452 return true;
11453 if (SLP_TREE_VECTYPE (op))
11454 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11455 SLP_TREE_VECTYPE (op) = vectype;
11456 return true;
11457 }
11458
11459 /* Function supportable_widening_operation
11460
11461 Check whether an operation represented by the code CODE is a
11462 widening operation that is supported by the target platform in
11463 vector form (i.e., when operating on arguments of type VECTYPE_IN
11464 producing a result of type VECTYPE_OUT).
11465
11466 Widening operations we currently support are NOP (CONVERT), FLOAT,
11467 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11468 are supported by the target platform either directly (via vector
11469 tree-codes), or via target builtins.
11470
11471 Output:
11472 - CODE1 and CODE2 are codes of vector operations to be used when
11473 vectorizing the operation, if available.
11474 - MULTI_STEP_CVT determines the number of required intermediate steps in
11475 case of multi-step conversion (like char->short->int - in that case
11476 MULTI_STEP_CVT will be 1).
11477 - INTERM_TYPES contains the intermediate type required to perform the
11478 widening operation (short in the above example). */
11479
11480 bool
11481 supportable_widening_operation (vec_info *vinfo,
11482 enum tree_code code, stmt_vec_info stmt_info,
11483 tree vectype_out, tree vectype_in,
11484 enum tree_code *code1, enum tree_code *code2,
11485 int *multi_step_cvt,
11486 vec<tree> *interm_types)
11487 {
11488 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11489 class loop *vect_loop = NULL;
11490 machine_mode vec_mode;
11491 enum insn_code icode1, icode2;
11492 optab optab1, optab2;
11493 tree vectype = vectype_in;
11494 tree wide_vectype = vectype_out;
11495 enum tree_code c1, c2;
11496 int i;
11497 tree prev_type, intermediate_type;
11498 machine_mode intermediate_mode, prev_mode;
11499 optab optab3, optab4;
11500
11501 *multi_step_cvt = 0;
11502 if (loop_info)
11503 vect_loop = LOOP_VINFO_LOOP (loop_info);
11504
11505 switch (code)
11506 {
11507 case WIDEN_MULT_EXPR:
11508 /* The result of a vectorized widening operation usually requires
11509 two vectors (because the widened results do not fit into one vector).
11510 The generated vector results would normally be expected to be
11511 generated in the same order as in the original scalar computation,
11512 i.e. if 8 results are generated in each vector iteration, they are
11513 to be organized as follows:
11514 vect1: [res1,res2,res3,res4],
11515 vect2: [res5,res6,res7,res8].
11516
11517 However, in the special case that the result of the widening
11518 operation is used in a reduction computation only, the order doesn't
11519 matter (because when vectorizing a reduction we change the order of
11520 the computation). Some targets can take advantage of this and
11521 generate more efficient code. For example, targets like Altivec,
11522 that support widen_mult using a sequence of {mult_even,mult_odd}
11523 generate the following vectors:
11524 vect1: [res1,res3,res5,res7],
11525 vect2: [res2,res4,res6,res8].
11526
11527 When vectorizing outer-loops, we execute the inner-loop sequentially
11528 (each vectorized inner-loop iteration contributes to VF outer-loop
11529 iterations in parallel). We therefore don't allow to change the
11530 order of the computation in the inner-loop during outer-loop
11531 vectorization. */
11532 /* TODO: Another case in which order doesn't *really* matter is when we
11533 widen and then contract again, e.g. (short)((int)x * y >> 8).
11534 Normally, pack_trunc performs an even/odd permute, whereas the
11535 repack from an even/odd expansion would be an interleave, which
11536 would be significantly simpler for e.g. AVX2. */
11537 /* In any case, in order to avoid duplicating the code below, recurse
11538 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11539 are properly set up for the caller. If we fail, we'll continue with
11540 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11541 if (vect_loop
11542 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11543 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11544 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11545 stmt_info, vectype_out,
11546 vectype_in, code1, code2,
11547 multi_step_cvt, interm_types))
11548 {
11549 /* Elements in a vector with vect_used_by_reduction property cannot
11550 be reordered if the use chain with this property does not have the
11551 same operation. One such an example is s += a * b, where elements
11552 in a and b cannot be reordered. Here we check if the vector defined
11553 by STMT is only directly used in the reduction statement. */
11554 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11555 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11556 if (use_stmt_info
11557 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11558 return true;
11559 }
11560 c1 = VEC_WIDEN_MULT_LO_EXPR;
11561 c2 = VEC_WIDEN_MULT_HI_EXPR;
11562 break;
11563
11564 case DOT_PROD_EXPR:
11565 c1 = DOT_PROD_EXPR;
11566 c2 = DOT_PROD_EXPR;
11567 break;
11568
11569 case SAD_EXPR:
11570 c1 = SAD_EXPR;
11571 c2 = SAD_EXPR;
11572 break;
11573
11574 case VEC_WIDEN_MULT_EVEN_EXPR:
11575 /* Support the recursion induced just above. */
11576 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11577 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11578 break;
11579
11580 case WIDEN_LSHIFT_EXPR:
11581 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11582 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11583 break;
11584
11585 case WIDEN_PLUS_EXPR:
11586 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11587 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11588 break;
11589
11590 case WIDEN_MINUS_EXPR:
11591 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11592 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11593 break;
11594
11595 CASE_CONVERT:
11596 c1 = VEC_UNPACK_LO_EXPR;
11597 c2 = VEC_UNPACK_HI_EXPR;
11598 break;
11599
11600 case FLOAT_EXPR:
11601 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11602 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11603 break;
11604
11605 case FIX_TRUNC_EXPR:
11606 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11607 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11608 break;
11609
11610 default:
11611 gcc_unreachable ();
11612 }
11613
11614 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11615 std::swap (c1, c2);
11616
11617 if (code == FIX_TRUNC_EXPR)
11618 {
11619 /* The signedness is determined from output operand. */
11620 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11621 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11622 }
11623 else if (CONVERT_EXPR_CODE_P (code)
11624 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11625 && VECTOR_BOOLEAN_TYPE_P (vectype)
11626 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11627 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11628 {
11629 /* If the input and result modes are the same, a different optab
11630 is needed where we pass in the number of units in vectype. */
11631 optab1 = vec_unpacks_sbool_lo_optab;
11632 optab2 = vec_unpacks_sbool_hi_optab;
11633 }
11634 else
11635 {
11636 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11637 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11638 }
11639
11640 if (!optab1 || !optab2)
11641 return false;
11642
11643 vec_mode = TYPE_MODE (vectype);
11644 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11645 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11646 return false;
11647
11648 *code1 = c1;
11649 *code2 = c2;
11650
11651 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11652 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11653 {
11654 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11655 return true;
11656 /* For scalar masks we may have different boolean
11657 vector types having the same QImode. Thus we
11658 add additional check for elements number. */
11659 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11660 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11661 return true;
11662 }
11663
11664 /* Check if it's a multi-step conversion that can be done using intermediate
11665 types. */
11666
11667 prev_type = vectype;
11668 prev_mode = vec_mode;
11669
11670 if (!CONVERT_EXPR_CODE_P (code))
11671 return false;
11672
11673 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11674 intermediate steps in promotion sequence. We try
11675 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11676 not. */
11677 interm_types->create (MAX_INTERM_CVT_STEPS);
11678 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11679 {
11680 intermediate_mode = insn_data[icode1].operand[0].mode;
11681 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11682 intermediate_type
11683 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11684 else
11685 intermediate_type
11686 = lang_hooks.types.type_for_mode (intermediate_mode,
11687 TYPE_UNSIGNED (prev_type));
11688
11689 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11690 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11691 && intermediate_mode == prev_mode
11692 && SCALAR_INT_MODE_P (prev_mode))
11693 {
11694 /* If the input and result modes are the same, a different optab
11695 is needed where we pass in the number of units in vectype. */
11696 optab3 = vec_unpacks_sbool_lo_optab;
11697 optab4 = vec_unpacks_sbool_hi_optab;
11698 }
11699 else
11700 {
11701 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11702 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11703 }
11704
11705 if (!optab3 || !optab4
11706 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11707 || insn_data[icode1].operand[0].mode != intermediate_mode
11708 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11709 || insn_data[icode2].operand[0].mode != intermediate_mode
11710 || ((icode1 = optab_handler (optab3, intermediate_mode))
11711 == CODE_FOR_nothing)
11712 || ((icode2 = optab_handler (optab4, intermediate_mode))
11713 == CODE_FOR_nothing))
11714 break;
11715
11716 interm_types->quick_push (intermediate_type);
11717 (*multi_step_cvt)++;
11718
11719 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11720 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11721 {
11722 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11723 return true;
11724 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11725 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11726 return true;
11727 }
11728
11729 prev_type = intermediate_type;
11730 prev_mode = intermediate_mode;
11731 }
11732
11733 interm_types->release ();
11734 return false;
11735 }
11736
11737
11738 /* Function supportable_narrowing_operation
11739
11740 Check whether an operation represented by the code CODE is a
11741 narrowing operation that is supported by the target platform in
11742 vector form (i.e., when operating on arguments of type VECTYPE_IN
11743 and producing a result of type VECTYPE_OUT).
11744
11745 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11746 and FLOAT. This function checks if these operations are supported by
11747 the target platform directly via vector tree-codes.
11748
11749 Output:
11750 - CODE1 is the code of a vector operation to be used when
11751 vectorizing the operation, if available.
11752 - MULTI_STEP_CVT determines the number of required intermediate steps in
11753 case of multi-step conversion (like int->short->char - in that case
11754 MULTI_STEP_CVT will be 1).
11755 - INTERM_TYPES contains the intermediate type required to perform the
11756 narrowing operation (short in the above example). */
11757
11758 bool
11759 supportable_narrowing_operation (enum tree_code code,
11760 tree vectype_out, tree vectype_in,
11761 enum tree_code *code1, int *multi_step_cvt,
11762 vec<tree> *interm_types)
11763 {
11764 machine_mode vec_mode;
11765 enum insn_code icode1;
11766 optab optab1, interm_optab;
11767 tree vectype = vectype_in;
11768 tree narrow_vectype = vectype_out;
11769 enum tree_code c1;
11770 tree intermediate_type, prev_type;
11771 machine_mode intermediate_mode, prev_mode;
11772 int i;
11773 bool uns;
11774
11775 *multi_step_cvt = 0;
11776 switch (code)
11777 {
11778 CASE_CONVERT:
11779 c1 = VEC_PACK_TRUNC_EXPR;
11780 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11781 && VECTOR_BOOLEAN_TYPE_P (vectype)
11782 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11783 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11784 optab1 = vec_pack_sbool_trunc_optab;
11785 else
11786 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11787 break;
11788
11789 case FIX_TRUNC_EXPR:
11790 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11791 /* The signedness is determined from output operand. */
11792 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11793 break;
11794
11795 case FLOAT_EXPR:
11796 c1 = VEC_PACK_FLOAT_EXPR;
11797 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11798 break;
11799
11800 default:
11801 gcc_unreachable ();
11802 }
11803
11804 if (!optab1)
11805 return false;
11806
11807 vec_mode = TYPE_MODE (vectype);
11808 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11809 return false;
11810
11811 *code1 = c1;
11812
11813 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11814 {
11815 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11816 return true;
11817 /* For scalar masks we may have different boolean
11818 vector types having the same QImode. Thus we
11819 add additional check for elements number. */
11820 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11821 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11822 return true;
11823 }
11824
11825 if (code == FLOAT_EXPR)
11826 return false;
11827
11828 /* Check if it's a multi-step conversion that can be done using intermediate
11829 types. */
11830 prev_mode = vec_mode;
11831 prev_type = vectype;
11832 if (code == FIX_TRUNC_EXPR)
11833 uns = TYPE_UNSIGNED (vectype_out);
11834 else
11835 uns = TYPE_UNSIGNED (vectype);
11836
11837 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11838 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11839 costly than signed. */
11840 if (code == FIX_TRUNC_EXPR && uns)
11841 {
11842 enum insn_code icode2;
11843
11844 intermediate_type
11845 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11846 interm_optab
11847 = optab_for_tree_code (c1, intermediate_type, optab_default);
11848 if (interm_optab != unknown_optab
11849 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11850 && insn_data[icode1].operand[0].mode
11851 == insn_data[icode2].operand[0].mode)
11852 {
11853 uns = false;
11854 optab1 = interm_optab;
11855 icode1 = icode2;
11856 }
11857 }
11858
11859 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11860 intermediate steps in promotion sequence. We try
11861 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11862 interm_types->create (MAX_INTERM_CVT_STEPS);
11863 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11864 {
11865 intermediate_mode = insn_data[icode1].operand[0].mode;
11866 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11867 intermediate_type
11868 = vect_double_mask_nunits (prev_type, intermediate_mode);
11869 else
11870 intermediate_type
11871 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11872 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11873 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11874 && intermediate_mode == prev_mode
11875 && SCALAR_INT_MODE_P (prev_mode))
11876 interm_optab = vec_pack_sbool_trunc_optab;
11877 else
11878 interm_optab
11879 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11880 optab_default);
11881 if (!interm_optab
11882 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11883 || insn_data[icode1].operand[0].mode != intermediate_mode
11884 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11885 == CODE_FOR_nothing))
11886 break;
11887
11888 interm_types->quick_push (intermediate_type);
11889 (*multi_step_cvt)++;
11890
11891 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11892 {
11893 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11894 return true;
11895 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11896 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11897 return true;
11898 }
11899
11900 prev_mode = intermediate_mode;
11901 prev_type = intermediate_type;
11902 optab1 = interm_optab;
11903 }
11904
11905 interm_types->release ();
11906 return false;
11907 }
11908
11909 /* Generate and return a statement that sets vector mask MASK such that
11910 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11911
11912 gcall *
11913 vect_gen_while (tree mask, tree start_index, tree end_index)
11914 {
11915 tree cmp_type = TREE_TYPE (start_index);
11916 tree mask_type = TREE_TYPE (mask);
11917 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11918 cmp_type, mask_type,
11919 OPTIMIZE_FOR_SPEED));
11920 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11921 start_index, end_index,
11922 build_zero_cst (mask_type));
11923 gimple_call_set_lhs (call, mask);
11924 return call;
11925 }
11926
11927 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11928 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11929
11930 tree
11931 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11932 tree end_index)
11933 {
11934 tree tmp = make_ssa_name (mask_type);
11935 gcall *call = vect_gen_while (tmp, start_index, end_index);
11936 gimple_seq_add_stmt (seq, call);
11937 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11938 }
11939
11940 /* Try to compute the vector types required to vectorize STMT_INFO,
11941 returning true on success and false if vectorization isn't possible.
11942 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11943 take sure that the number of elements in the vectors is no bigger
11944 than GROUP_SIZE.
11945
11946 On success:
11947
11948 - Set *STMT_VECTYPE_OUT to:
11949 - NULL_TREE if the statement doesn't need to be vectorized;
11950 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11951
11952 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11953 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11954 statement does not help to determine the overall number of units. */
11955
11956 opt_result
11957 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11958 tree *stmt_vectype_out,
11959 tree *nunits_vectype_out,
11960 unsigned int group_size)
11961 {
11962 gimple *stmt = stmt_info->stmt;
11963
11964 /* For BB vectorization, we should always have a group size once we've
11965 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11966 are tentative requests during things like early data reference
11967 analysis and pattern recognition. */
11968 if (is_a <bb_vec_info> (vinfo))
11969 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11970 else
11971 group_size = 0;
11972
11973 *stmt_vectype_out = NULL_TREE;
11974 *nunits_vectype_out = NULL_TREE;
11975
11976 if (gimple_get_lhs (stmt) == NULL_TREE
11977 /* MASK_STORE has no lhs, but is ok. */
11978 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11979 {
11980 if (is_a <gcall *> (stmt))
11981 {
11982 /* Ignore calls with no lhs. These must be calls to
11983 #pragma omp simd functions, and what vectorization factor
11984 it really needs can't be determined until
11985 vectorizable_simd_clone_call. */
11986 if (dump_enabled_p ())
11987 dump_printf_loc (MSG_NOTE, vect_location,
11988 "defer to SIMD clone analysis.\n");
11989 return opt_result::success ();
11990 }
11991
11992 return opt_result::failure_at (stmt,
11993 "not vectorized: irregular stmt.%G", stmt);
11994 }
11995
11996 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11997 return opt_result::failure_at (stmt,
11998 "not vectorized: vector stmt in loop:%G",
11999 stmt);
12000
12001 tree vectype;
12002 tree scalar_type = NULL_TREE;
12003 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12004 {
12005 vectype = STMT_VINFO_VECTYPE (stmt_info);
12006 if (dump_enabled_p ())
12007 dump_printf_loc (MSG_NOTE, vect_location,
12008 "precomputed vectype: %T\n", vectype);
12009 }
12010 else if (vect_use_mask_type_p (stmt_info))
12011 {
12012 unsigned int precision = stmt_info->mask_precision;
12013 scalar_type = build_nonstandard_integer_type (precision, 1);
12014 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12015 if (!vectype)
12016 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12017 " data-type %T\n", scalar_type);
12018 if (dump_enabled_p ())
12019 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12020 }
12021 else
12022 {
12023 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12024 scalar_type = TREE_TYPE (DR_REF (dr));
12025 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12026 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12027 else
12028 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12029
12030 if (dump_enabled_p ())
12031 {
12032 if (group_size)
12033 dump_printf_loc (MSG_NOTE, vect_location,
12034 "get vectype for scalar type (group size %d):"
12035 " %T\n", group_size, scalar_type);
12036 else
12037 dump_printf_loc (MSG_NOTE, vect_location,
12038 "get vectype for scalar type: %T\n", scalar_type);
12039 }
12040 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12041 if (!vectype)
12042 return opt_result::failure_at (stmt,
12043 "not vectorized:"
12044 " unsupported data-type %T\n",
12045 scalar_type);
12046
12047 if (dump_enabled_p ())
12048 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12049 }
12050 *stmt_vectype_out = vectype;
12051
12052 /* Don't try to compute scalar types if the stmt produces a boolean
12053 vector; use the existing vector type instead. */
12054 tree nunits_vectype = vectype;
12055 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12056 {
12057 /* The number of units is set according to the smallest scalar
12058 type (or the largest vector size, but we only support one
12059 vector size per vectorization). */
12060 HOST_WIDE_INT dummy;
12061 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12062 if (scalar_type != TREE_TYPE (vectype))
12063 {
12064 if (dump_enabled_p ())
12065 dump_printf_loc (MSG_NOTE, vect_location,
12066 "get vectype for smallest scalar type: %T\n",
12067 scalar_type);
12068 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12069 group_size);
12070 if (!nunits_vectype)
12071 return opt_result::failure_at
12072 (stmt, "not vectorized: unsupported data-type %T\n",
12073 scalar_type);
12074 if (dump_enabled_p ())
12075 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12076 nunits_vectype);
12077 }
12078 }
12079
12080 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12081 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12082
12083 if (dump_enabled_p ())
12084 {
12085 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12086 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12087 dump_printf (MSG_NOTE, "\n");
12088 }
12089
12090 *nunits_vectype_out = nunits_vectype;
12091 return opt_result::success ();
12092 }
12093
12094 /* Generate and return statement sequence that sets vector length LEN that is:
12095
12096 min_of_start_and_end = min (START_INDEX, END_INDEX);
12097 left_len = END_INDEX - min_of_start_and_end;
12098 rhs = min (left_len, LEN_LIMIT);
12099 LEN = rhs;
12100
12101 Note: the cost of the code generated by this function is modeled
12102 by vect_estimate_min_profitable_iters, so changes here may need
12103 corresponding changes there. */
12104
12105 gimple_seq
12106 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12107 {
12108 gimple_seq stmts = NULL;
12109 tree len_type = TREE_TYPE (len);
12110 gcc_assert (TREE_TYPE (start_index) == len_type);
12111
12112 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12113 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12114 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12115 gimple* stmt = gimple_build_assign (len, rhs);
12116 gimple_seq_add_stmt (&stmts, stmt);
12117
12118 return stmts;
12119 }
12120