a6c7f333fc7df5b0ca1a404ff2f8d2fac42267a9
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
643
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
646 }
647 }
648
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
659
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
664
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
667
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
675
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
677 {
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
687
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
695
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
703
704 default:
705 break;
706 }
707
708 if (is_pattern_stmt_p (stmt_vinfo))
709 {
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
714 {
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
717
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
720 {
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
731 }
732 for (; i < gimple_num_ops (assign); i++)
733 {
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
736 {
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
742 }
743 }
744 }
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
746 {
747 for (i = 0; i < gimple_call_num_args (call); i++)
748 {
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
755 }
756 }
757 }
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
760 {
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
767 }
768
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
770 {
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 {
779 if (fatal)
780 *fatal = false;
781 return res;
782 }
783 }
784 } /* while worklist */
785
786 return opt_result::success ();
787 }
788
789 /* Function vect_model_simple_cost.
790
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
794
795 static void
796 vect_model_simple_cost (vec_info *,
797 stmt_vec_info stmt_info, int ncopies,
798 enum vect_def_type *dt,
799 int ndts,
800 slp_tree node,
801 stmt_vector_for_cost *cost_vec,
802 vect_cost_for_stmt kind = vector_stmt)
803 {
804 int inside_cost = 0, prologue_cost = 0;
805
806 gcc_assert (cost_vec != NULL);
807
808 /* ??? Somehow we need to fix this at the callers. */
809 if (node)
810 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811
812 if (!node)
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
815 cost model. */
816 for (int i = 0; i < ndts; i++)
817 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
818 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
819 stmt_info, 0, vect_prologue);
820
821 /* Adjust for two-operator SLP nodes. */
822 if (node && SLP_TREE_TWO_OPERATORS (node))
823 {
824 ncopies *= 2;
825 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
826 stmt_info, 0, vect_body);
827 }
828
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
831 stmt_info, 0, vect_body);
832
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost, prologue_cost);
837 }
838
839
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
846
847 static void
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
849 enum vect_def_type *dt,
850 unsigned int ncopies, int pwr,
851 stmt_vector_for_cost *cost_vec)
852 {
853 int i;
854 int inside_cost = 0, prologue_cost = 0;
855
856 for (i = 0; i < pwr + 1; i++)
857 {
858 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
859 stmt_info, 0, vect_body);
860 ncopies *= 2;
861 }
862
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i = 0; i < 2; i++)
865 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
866 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
867 stmt_info, 0, vect_prologue);
868
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE, vect_location,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 }
874
875 /* Returns true if the current function returns DECL. */
876
877 static bool
878 cfun_returns (tree decl)
879 {
880 edge_iterator ei;
881 edge e;
882 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
883 {
884 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
885 if (!ret)
886 continue;
887 if (gimple_return_retval (ret) == decl)
888 return true;
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
891 though. */
892 gimple *def = ret;
893 do
894 {
895 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
896 }
897 while (gimple_clobber_p (def));
898 if (is_a <gassign *> (def)
899 && gimple_assign_lhs (def) == gimple_return_retval (ret)
900 && gimple_assign_rhs1 (def) == decl)
901 return true;
902 }
903 return false;
904 }
905
906 /* Function vect_model_store_cost
907
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
910
911 static void
912 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
913 vect_memory_access_type memory_access_type,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
916 {
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924
925 if (vls_type == VLS_STORE_INVARIANT)
926 {
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
930 }
931
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
941
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948 {
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
955
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
960 }
961
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
966 {
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
972 }
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
975
976 if (memory_access_type == VMAT_ELEMENTWISE
977 || memory_access_type == VMAT_STRIDED_SLP)
978 {
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
981 inside_cost += record_stmt_cost (cost_vec,
982 ncopies * assumed_nunits,
983 vec_to_scalar, stmt_info, 0, vect_body);
984 }
985
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
991 if (base
992 && (TREE_CODE (base) == RESULT_DECL
993 || (DECL_P (base) && cfun_returns (base)))
994 && !aggregate_value_p (base, cfun->decl))
995 {
996 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
998 if (REG_P (reg))
999 {
1000 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1003 if (nregs > 1)
1004 {
1005 /* Spill. */
1006 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1007 vector_store,
1008 stmt_info, 0, vect_epilogue);
1009 /* Loads. */
1010 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1011 scalar_load,
1012 stmt_info, 0, vect_epilogue);
1013 }
1014 }
1015 }
1016
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1021 }
1022
1023
1024 /* Calculate cost of DR's memory access. */
1025 void
1026 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1027 unsigned int *inside_cost,
1028 stmt_vector_for_cost *body_cost_vec)
1029 {
1030 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1033
1034 switch (alignment_support_scheme)
1035 {
1036 case dr_aligned:
1037 {
1038 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1039 vector_store, stmt_info, 0,
1040 vect_body);
1041
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE, vect_location,
1044 "vect_model_store_cost: aligned.\n");
1045 break;
1046 }
1047
1048 case dr_unaligned_supported:
1049 {
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1052 unaligned_store, stmt_info,
1053 DR_MISALIGNMENT (dr_info),
1054 vect_body);
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE, vect_location,
1057 "vect_model_store_cost: unaligned supported by "
1058 "hardware.\n");
1059 break;
1060 }
1061
1062 case dr_unaligned_unsupported:
1063 {
1064 *inside_cost = VECT_MAX_COST;
1065
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1068 "vect_model_store_cost: unsupported access.\n");
1069 break;
1070 }
1071
1072 default:
1073 gcc_unreachable ();
1074 }
1075 }
1076
1077
1078 /* Function vect_model_load_cost
1079
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1084
1085 static void
1086 vect_model_load_cost (vec_info *vinfo,
1087 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1088 vect_memory_access_type memory_access_type,
1089 slp_tree slp_node,
1090 stmt_vector_for_cost *cost_vec)
1091 {
1092 unsigned int inside_cost = 0, prologue_cost = 0;
1093 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1094
1095 gcc_assert (cost_vec);
1096
1097 /* ??? Somehow we need to fix this at the callers. */
1098 if (slp_node)
1099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1100
1101 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1102 {
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1106 /* Record the cost for the permutation. */
1107 unsigned n_perms;
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1110 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1111 vf, true, &n_perms);
1112 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1113 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1117 bitmap_clear (perm);
1118 for (unsigned i = 0;
1119 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1120 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1121 ncopies = 0;
1122 bool load_seen = false;
1123 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1124 {
1125 if (i % assumed_nunits == 0)
1126 {
1127 if (load_seen)
1128 ncopies++;
1129 load_seen = false;
1130 }
1131 if (bitmap_bit_p (perm, i))
1132 load_seen = true;
1133 }
1134 if (load_seen)
1135 ncopies++;
1136 gcc_assert (ncopies
1137 <= (DR_GROUP_SIZE (first_stmt_info)
1138 - DR_GROUP_GAP (first_stmt_info)
1139 + assumed_nunits - 1) / assumed_nunits);
1140 }
1141
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info = stmt_info;
1145 if (!slp_node && grouped_access_p)
1146 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1147
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p = (first_stmt_info == stmt_info);
1152
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1157 if (first_stmt_p
1158 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1159 {
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size = DR_GROUP_SIZE (first_stmt_info);
1163 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1164 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1165 stmt_info, 0, vect_body);
1166
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1170 group_size);
1171 }
1172
1173 /* The loads themselves. */
1174 if (memory_access_type == VMAT_ELEMENTWISE
1175 || memory_access_type == VMAT_GATHER_SCATTER)
1176 {
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1179 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1180 inside_cost += record_stmt_cost (cost_vec,
1181 ncopies * assumed_nunits,
1182 scalar_load, stmt_info, 0, vect_body);
1183 }
1184 else
1185 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1186 &inside_cost, &prologue_cost,
1187 cost_vec, cost_vec, true);
1188 if (memory_access_type == VMAT_ELEMENTWISE
1189 || memory_access_type == VMAT_STRIDED_SLP)
1190 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1191 stmt_info, 0, vect_body);
1192
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE, vect_location,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1197 }
1198
1199
1200 /* Calculate cost of DR's memory access. */
1201 void
1202 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1203 bool add_realign_cost, unsigned int *inside_cost,
1204 unsigned int *prologue_cost,
1205 stmt_vector_for_cost *prologue_cost_vec,
1206 stmt_vector_for_cost *body_cost_vec,
1207 bool record_prologue_costs)
1208 {
1209 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1212
1213 switch (alignment_support_scheme)
1214 {
1215 case dr_aligned:
1216 {
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1218 stmt_info, 0, vect_body);
1219
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: aligned.\n");
1223
1224 break;
1225 }
1226 case dr_unaligned_supported:
1227 {
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1230 unaligned_load, stmt_info,
1231 DR_MISALIGNMENT (dr_info),
1232 vect_body);
1233
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE, vect_location,
1236 "vect_model_load_cost: unaligned supported by "
1237 "hardware.\n");
1238
1239 break;
1240 }
1241 case dr_explicit_realign:
1242 {
1243 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1244 vector_load, stmt_info, 0, vect_body);
1245 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1246 vec_perm, stmt_info, 0, vect_body);
1247
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1250 prologue costs. */
1251 if (targetm.vectorize.builtin_mask_for_load)
1252 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1253 stmt_info, 0, vect_body);
1254
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_model_load_cost: explicit realign\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign_optimized:
1262 {
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location,
1265 "vect_model_load_cost: unaligned software "
1266 "pipelined.\n");
1267
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1274
1275 if (add_realign_cost && record_prologue_costs)
1276 {
1277 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1278 vector_stmt, stmt_info,
1279 0, vect_prologue);
1280 if (targetm.vectorize.builtin_mask_for_load)
1281 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1282 vector_stmt, stmt_info,
1283 0, vect_prologue);
1284 }
1285
1286 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1287 stmt_info, 0, vect_body);
1288 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1289 stmt_info, 0, vect_body);
1290
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_model_load_cost: explicit realign optimized"
1294 "\n");
1295
1296 break;
1297 }
1298
1299 case dr_unaligned_unsupported:
1300 {
1301 *inside_cost = VECT_MAX_COST;
1302
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1305 "vect_model_load_cost: unsupported access.\n");
1306 break;
1307 }
1308
1309 default:
1310 gcc_unreachable ();
1311 }
1312 }
1313
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1316
1317 static void
1318 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1319 gimple_stmt_iterator *gsi)
1320 {
1321 if (gsi)
1322 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1323 else
1324 {
1325 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1326
1327 if (loop_vinfo)
1328 {
1329 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1330 basic_block new_bb;
1331 edge pe;
1332
1333 if (stmt_vinfo && nested_in_vect_loop_p (loop, stmt_vinfo))
1334 loop = loop->inner;
1335
1336 pe = loop_preheader_edge (loop);
1337 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1338 gcc_assert (!new_bb);
1339 }
1340 else
1341 {
1342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
1343 gimple_stmt_iterator gsi_region_begin = bb_vinfo->region_begin;
1344 gsi_insert_before (&gsi_region_begin, new_stmt, GSI_SAME_STMT);
1345 }
1346 }
1347
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE, vect_location,
1350 "created new init_stmt: %G", new_stmt);
1351 }
1352
1353 /* Function vect_init_vector.
1354
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1362
1363 tree
1364 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1365 gimple_stmt_iterator *gsi)
1366 {
1367 gimple *init_stmt;
1368 tree new_temp;
1369
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1372 {
1373 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1374 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1375 {
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type))
1379 {
1380 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1381 tree false_val = build_zero_cst (TREE_TYPE (type));
1382
1383 if (CONSTANT_CLASS_P (val))
1384 val = integer_zerop (val) ? false_val : true_val;
1385 else
1386 {
1387 new_temp = make_ssa_name (TREE_TYPE (type));
1388 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1389 val, true_val, false_val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 val = new_temp;
1392 }
1393 }
1394 else
1395 {
1396 gimple_seq stmts = NULL;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1398 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1399 TREE_TYPE (type), val);
1400 else
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1404 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1405 !gsi_end_p (gsi2); )
1406 {
1407 init_stmt = gsi_stmt (gsi2);
1408 gsi_remove (&gsi2, false);
1409 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1410 }
1411 }
1412 }
1413 val = build_vector_from_val (type, val);
1414 }
1415
1416 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1417 init_stmt = gimple_build_assign (new_temp, val);
1418 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1419 return new_temp;
1420 }
1421
1422 /* Function vect_get_vec_def_for_operand_1.
1423
1424 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1425 with type DT that will be used in the vectorized stmt. */
1426
1427 tree
1428 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1429 enum vect_def_type dt)
1430 {
1431 tree vec_oprnd;
1432 stmt_vec_info vec_stmt_info;
1433
1434 switch (dt)
1435 {
1436 /* operand is a constant or a loop invariant. */
1437 case vect_constant_def:
1438 case vect_external_def:
1439 /* Code should use vect_get_vec_def_for_operand. */
1440 gcc_unreachable ();
1441
1442 /* Operand is defined by a loop header phi. In case of nested
1443 cycles we also may have uses of the backedge def. */
1444 case vect_reduction_def:
1445 case vect_double_reduction_def:
1446 case vect_nested_cycle:
1447 case vect_induction_def:
1448 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1449 || dt == vect_nested_cycle);
1450 /* Fallthru. */
1451
1452 /* operand is defined inside the loop. */
1453 case vect_internal_def:
1454 {
1455 /* Get the def from the vectorized stmt. */
1456 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1457 /* Get vectorized pattern statement. */
1458 if (!vec_stmt_info
1459 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1460 && !STMT_VINFO_RELEVANT (def_stmt_info))
1461 vec_stmt_info = (STMT_VINFO_VEC_STMT
1462 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1463 gcc_assert (vec_stmt_info);
1464 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1465 vec_oprnd = PHI_RESULT (phi);
1466 else
1467 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1468 return vec_oprnd;
1469 }
1470
1471 default:
1472 gcc_unreachable ();
1473 }
1474 }
1475
1476
1477 /* Function vect_get_vec_def_for_operand.
1478
1479 OP is an operand in STMT_VINFO. This function returns a (vector) def
1480 that will be used in the vectorized stmt for STMT_VINFO.
1481
1482 In the case that OP is an SSA_NAME which is defined in the loop, then
1483 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1484
1485 In case OP is an invariant or constant, a new stmt that creates a vector def
1486 needs to be introduced. VECTYPE may be used to specify a required type for
1487 vector invariant. */
1488
1489 tree
1490 vect_get_vec_def_for_operand (vec_info *vinfo,
1491 tree op, stmt_vec_info stmt_vinfo, tree vectype)
1492 {
1493 gimple *def_stmt;
1494 enum vect_def_type dt;
1495 bool is_simple_use;
1496 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1497
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE, vect_location,
1500 "vect_get_vec_def_for_operand: %T\n", op);
1501
1502 stmt_vec_info def_stmt_info;
1503 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1504 &def_stmt_info, &def_stmt);
1505 gcc_assert (is_simple_use);
1506 if (def_stmt && dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1508
1509 if (dt == vect_constant_def || dt == vect_external_def)
1510 {
1511 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1512 tree vector_type;
1513
1514 if (vectype)
1515 vector_type = vectype;
1516 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1517 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1518 vector_type = truth_type_for (stmt_vectype);
1519 else
1520 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1521
1522 gcc_assert (vector_type);
1523 return vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1524 }
1525 else
1526 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1527 }
1528
1529
1530 /* Function vect_get_vec_def_for_stmt_copy
1531
1532 Return a vector-def for an operand. This function is used when the
1533 vectorized stmt to be created (by the caller to this function) is a "copy"
1534 created in case the vectorized result cannot fit in one vector, and several
1535 copies of the vector-stmt are required. In this case the vector-def is
1536 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1537 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1538
1539 Context:
1540 In case the vectorization factor (VF) is bigger than the number
1541 of elements that can fit in a vectype (nunits), we have to generate
1542 more than one vector stmt to vectorize the scalar stmt. This situation
1543 arises when there are multiple data-types operated upon in the loop; the
1544 smallest data-type determines the VF, and as a result, when vectorizing
1545 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1546 vector stmt (each computing a vector of 'nunits' results, and together
1547 computing 'VF' results in each iteration). This function is called when
1548 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1549 which VF=16 and nunits=4, so the number of copies required is 4):
1550
1551 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1552
1553 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1554 VS1.1: vx.1 = memref1 VS1.2
1555 VS1.2: vx.2 = memref2 VS1.3
1556 VS1.3: vx.3 = memref3
1557
1558 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1559 VSnew.1: vz1 = vx.1 + ... VSnew.2
1560 VSnew.2: vz2 = vx.2 + ... VSnew.3
1561 VSnew.3: vz3 = vx.3 + ...
1562
1563 The vectorization of S1 is explained in vectorizable_load.
1564 The vectorization of S2:
1565 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1566 the function 'vect_get_vec_def_for_operand' is called to
1567 get the relevant vector-def for each operand of S2. For operand x it
1568 returns the vector-def 'vx.0'.
1569
1570 To create the remaining copies of the vector-stmt (VSnew.j), this
1571 function is called to get the relevant vector-def for each operand. It is
1572 obtained from the respective VS1.j stmt, which is recorded in the
1573 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1574
1575 For example, to obtain the vector-def 'vx.1' in order to create the
1576 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1577 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1578 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1579 and return its def ('vx.1').
1580 Overall, to create the above sequence this function will be called 3 times:
1581 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1582 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1583 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1584
1585 tree
1586 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1587 {
1588 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1589 if (!def_stmt_info)
1590 /* Do nothing; can reuse same def. */
1591 return vec_oprnd;
1592
1593 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1594 gcc_assert (def_stmt_info);
1595 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1596 vec_oprnd = PHI_RESULT (phi);
1597 else
1598 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1599 return vec_oprnd;
1600 }
1601
1602
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1605
1606 void
1607 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1608 vec<tree> *vec_oprnds0,
1609 vec<tree> *vec_oprnds1)
1610 {
1611 tree vec_oprnd = vec_oprnds0->pop ();
1612
1613 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1614 vec_oprnds0->quick_push (vec_oprnd);
1615
1616 if (vec_oprnds1 && vec_oprnds1->length ())
1617 {
1618 vec_oprnd = vec_oprnds1->pop ();
1619 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1620 vec_oprnds1->quick_push (vec_oprnd);
1621 }
1622 }
1623
1624
1625 /* Get vectorized definitions for OP0 and OP1. */
1626
1627 void
1628 vect_get_vec_defs (vec_info *vinfo, tree op0, tree op1, stmt_vec_info stmt_info,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1,
1631 slp_tree slp_node)
1632 {
1633 if (slp_node)
1634 {
1635 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1636 vect_get_slp_defs (vinfo, slp_node, &vec_defs, op1 ? 2 : 1);
1637 *vec_oprnds0 = vec_defs[0];
1638 if (op1)
1639 *vec_oprnds1 = vec_defs[1];
1640 }
1641 else
1642 {
1643 tree vec_oprnd;
1644
1645 vec_oprnds0->create (1);
1646 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op0, stmt_info);
1647 vec_oprnds0->quick_push (vec_oprnd);
1648
1649 if (op1)
1650 {
1651 vec_oprnds1->create (1);
1652 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op1, stmt_info);
1653 vec_oprnds1->quick_push (vec_oprnd);
1654 }
1655 }
1656 }
1657
1658 /* Helper function called by vect_finish_replace_stmt and
1659 vect_finish_stmt_generation. Set the location of the new
1660 statement and create and return a stmt_vec_info for it. */
1661
1662 static stmt_vec_info
1663 vect_finish_stmt_generation_1 (vec_info *vinfo,
1664 stmt_vec_info stmt_info, gimple *vec_stmt)
1665 {
1666 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1667
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1670
1671 if (stmt_info)
1672 {
1673 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1674
1675 /* While EH edges will generally prevent vectorization, stmt might
1676 e.g. be in a must-not-throw region. Ensure newly created stmts
1677 that could throw are part of the same region. */
1678 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1679 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1680 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1681 }
1682 else
1683 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1684
1685 return vec_stmt_info;
1686 }
1687
1688 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1689 which sets the same scalar result as STMT_INFO did. Create and return a
1690 stmt_vec_info for VEC_STMT. */
1691
1692 stmt_vec_info
1693 vect_finish_replace_stmt (vec_info *vinfo,
1694 stmt_vec_info stmt_info, gimple *vec_stmt)
1695 {
1696 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1697 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1698
1699 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1700 gsi_replace (&gsi, vec_stmt, true);
1701
1702 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1703 }
1704
1705 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1706 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1707
1708 stmt_vec_info
1709 vect_finish_stmt_generation (vec_info *vinfo,
1710 stmt_vec_info stmt_info, gimple *vec_stmt,
1711 gimple_stmt_iterator *gsi)
1712 {
1713 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1714
1715 if (!gsi_end_p (*gsi)
1716 && gimple_has_mem_ops (vec_stmt))
1717 {
1718 gimple *at_stmt = gsi_stmt (*gsi);
1719 tree vuse = gimple_vuse (at_stmt);
1720 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1721 {
1722 tree vdef = gimple_vdef (at_stmt);
1723 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1724 /* If we have an SSA vuse and insert a store, update virtual
1725 SSA form to avoid triggering the renamer. Do so only
1726 if we can easily see all uses - which is what almost always
1727 happens with the way vectorized stmts are inserted. */
1728 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1729 && ((is_gimple_assign (vec_stmt)
1730 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1731 || (is_gimple_call (vec_stmt)
1732 && !(gimple_call_flags (vec_stmt)
1733 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1734 {
1735 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1736 gimple_set_vdef (vec_stmt, new_vdef);
1737 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1738 }
1739 }
1740 }
1741 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1742 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1743 }
1744
1745 /* We want to vectorize a call to combined function CFN with function
1746 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1747 as the types of all inputs. Check whether this is possible using
1748 an internal function, returning its code if so or IFN_LAST if not. */
1749
1750 static internal_fn
1751 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1752 tree vectype_out, tree vectype_in)
1753 {
1754 internal_fn ifn;
1755 if (internal_fn_p (cfn))
1756 ifn = as_internal_fn (cfn);
1757 else
1758 ifn = associated_internal_fn (fndecl);
1759 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1760 {
1761 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1762 if (info.vectorizable)
1763 {
1764 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1765 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1766 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1767 OPTIMIZE_FOR_SPEED))
1768 return ifn;
1769 }
1770 }
1771 return IFN_LAST;
1772 }
1773
1774
1775 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1776 gimple_stmt_iterator *);
1777
1778 /* Check whether a load or store statement in the loop described by
1779 LOOP_VINFO is possible in a fully-masked loop. This is testing
1780 whether the vectorizer pass has the appropriate support, as well as
1781 whether the target does.
1782
1783 VLS_TYPE says whether the statement is a load or store and VECTYPE
1784 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1785 says how the load or store is going to be implemented and GROUP_SIZE
1786 is the number of load or store statements in the containing group.
1787 If the access is a gather load or scatter store, GS_INFO describes
1788 its arguments. If the load or store is conditional, SCALAR_MASK is the
1789 condition under which it occurs.
1790
1791 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1792 supported, otherwise record the required mask types. */
1793
1794 static void
1795 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1796 vec_load_store_type vls_type, int group_size,
1797 vect_memory_access_type memory_access_type,
1798 gather_scatter_info *gs_info, tree scalar_mask)
1799 {
1800 /* Invariant loads need no special support. */
1801 if (memory_access_type == VMAT_INVARIANT)
1802 return;
1803
1804 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1805 machine_mode vecmode = TYPE_MODE (vectype);
1806 bool is_load = (vls_type == VLS_LOAD);
1807 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1808 {
1809 if (is_load
1810 ? !vect_load_lanes_supported (vectype, group_size, true)
1811 : !vect_store_lanes_supported (vectype, group_size, true))
1812 {
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 "can't use a fully-masked loop because the"
1816 " target doesn't have an appropriate masked"
1817 " load/store-lanes instruction.\n");
1818 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1819 return;
1820 }
1821 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1822 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1823 return;
1824 }
1825
1826 if (memory_access_type == VMAT_GATHER_SCATTER)
1827 {
1828 internal_fn ifn = (is_load
1829 ? IFN_MASK_GATHER_LOAD
1830 : IFN_MASK_SCATTER_STORE);
1831 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1832 gs_info->memory_type,
1833 gs_info->offset_vectype,
1834 gs_info->scale))
1835 {
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "can't use a fully-masked loop because the"
1839 " target doesn't have an appropriate masked"
1840 " gather load or scatter store instruction.\n");
1841 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1842 return;
1843 }
1844 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1845 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1846 return;
1847 }
1848
1849 if (memory_access_type != VMAT_CONTIGUOUS
1850 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1851 {
1852 /* Element X of the data must come from iteration i * VF + X of the
1853 scalar loop. We need more work to support other mappings. */
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1856 "can't use a fully-masked loop because an access"
1857 " isn't contiguous.\n");
1858 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1859 return;
1860 }
1861
1862 machine_mode mask_mode;
1863 if (!VECTOR_MODE_P (vecmode)
1864 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1865 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1866 {
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1869 "can't use a fully-masked loop because the target"
1870 " doesn't have the appropriate masked load or"
1871 " store.\n");
1872 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1873 return;
1874 }
1875 /* We might load more scalars than we need for permuting SLP loads.
1876 We checked in get_group_load_store_type that the extra elements
1877 don't leak into a new vector. */
1878 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1879 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1880 unsigned int nvectors;
1881 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1882 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1883 else
1884 gcc_unreachable ();
1885 }
1886
1887 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1888 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1889 that needs to be applied to all loads and stores in a vectorized loop.
1890 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1891
1892 MASK_TYPE is the type of both masks. If new statements are needed,
1893 insert them before GSI. */
1894
1895 static tree
1896 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1897 gimple_stmt_iterator *gsi)
1898 {
1899 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1900 if (!loop_mask)
1901 return vec_mask;
1902
1903 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1904 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1905 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1906 vec_mask, loop_mask);
1907 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1908 return and_res;
1909 }
1910
1911 /* Determine whether we can use a gather load or scatter store to vectorize
1912 strided load or store STMT_INFO by truncating the current offset to a
1913 smaller width. We need to be able to construct an offset vector:
1914
1915 { 0, X, X*2, X*3, ... }
1916
1917 without loss of precision, where X is STMT_INFO's DR_STEP.
1918
1919 Return true if this is possible, describing the gather load or scatter
1920 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1921
1922 static bool
1923 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1924 loop_vec_info loop_vinfo, bool masked_p,
1925 gather_scatter_info *gs_info)
1926 {
1927 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1928 data_reference *dr = dr_info->dr;
1929 tree step = DR_STEP (dr);
1930 if (TREE_CODE (step) != INTEGER_CST)
1931 {
1932 /* ??? Perhaps we could use range information here? */
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_NOTE, vect_location,
1935 "cannot truncate variable step.\n");
1936 return false;
1937 }
1938
1939 /* Get the number of bits in an element. */
1940 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1941 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1942 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1943
1944 /* Set COUNT to the upper limit on the number of elements - 1.
1945 Start with the maximum vectorization factor. */
1946 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1947
1948 /* Try lowering COUNT to the number of scalar latch iterations. */
1949 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1950 widest_int max_iters;
1951 if (max_loop_iterations (loop, &max_iters)
1952 && max_iters < count)
1953 count = max_iters.to_shwi ();
1954
1955 /* Try scales of 1 and the element size. */
1956 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1957 wi::overflow_type overflow = wi::OVF_NONE;
1958 for (int i = 0; i < 2; ++i)
1959 {
1960 int scale = scales[i];
1961 widest_int factor;
1962 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1963 continue;
1964
1965 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1966 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1967 if (overflow)
1968 continue;
1969 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1970 unsigned int min_offset_bits = wi::min_precision (range, sign);
1971
1972 /* Find the narrowest viable offset type. */
1973 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1974 tree offset_type = build_nonstandard_integer_type (offset_bits,
1975 sign == UNSIGNED);
1976
1977 /* See whether the target supports the operation with an offset
1978 no narrower than OFFSET_TYPE. */
1979 tree memory_type = TREE_TYPE (DR_REF (dr));
1980 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1981 vectype, memory_type, offset_type, scale,
1982 &gs_info->ifn, &gs_info->offset_vectype))
1983 continue;
1984
1985 gs_info->decl = NULL_TREE;
1986 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1987 but we don't need to store that here. */
1988 gs_info->base = NULL_TREE;
1989 gs_info->element_type = TREE_TYPE (vectype);
1990 gs_info->offset = fold_convert (offset_type, step);
1991 gs_info->offset_dt = vect_constant_def;
1992 gs_info->scale = scale;
1993 gs_info->memory_type = memory_type;
1994 return true;
1995 }
1996
1997 if (overflow && dump_enabled_p ())
1998 dump_printf_loc (MSG_NOTE, vect_location,
1999 "truncating gather/scatter offset to %d bits"
2000 " might change its value.\n", element_bits);
2001
2002 return false;
2003 }
2004
2005 /* Return true if we can use gather/scatter internal functions to
2006 vectorize STMT_INFO, which is a grouped or strided load or store.
2007 MASKED_P is true if load or store is conditional. When returning
2008 true, fill in GS_INFO with the information required to perform the
2009 operation. */
2010
2011 static bool
2012 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2013 loop_vec_info loop_vinfo, bool masked_p,
2014 gather_scatter_info *gs_info)
2015 {
2016 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2017 || gs_info->decl)
2018 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2019 masked_p, gs_info);
2020
2021 tree old_offset_type = TREE_TYPE (gs_info->offset);
2022 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2023
2024 gcc_assert (TYPE_PRECISION (new_offset_type)
2025 >= TYPE_PRECISION (old_offset_type));
2026 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2027
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE, vect_location,
2030 "using gather/scatter for strided/grouped access,"
2031 " scale = %d\n", gs_info->scale);
2032
2033 return true;
2034 }
2035
2036 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2037 elements with a known constant step. Return -1 if that step
2038 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2039
2040 static int
2041 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2042 {
2043 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2044 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2045 size_zero_node);
2046 }
2047
2048 /* If the target supports a permute mask that reverses the elements in
2049 a vector of type VECTYPE, return that mask, otherwise return null. */
2050
2051 static tree
2052 perm_mask_for_reverse (tree vectype)
2053 {
2054 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2055
2056 /* The encoding has a single stepped pattern. */
2057 vec_perm_builder sel (nunits, 1, 3);
2058 for (int i = 0; i < 3; ++i)
2059 sel.quick_push (nunits - 1 - i);
2060
2061 vec_perm_indices indices (sel, 1, nunits);
2062 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2063 return NULL_TREE;
2064 return vect_gen_perm_mask_checked (vectype, indices);
2065 }
2066
2067 /* A subroutine of get_load_store_type, with a subset of the same
2068 arguments. Handle the case where STMT_INFO is a load or store that
2069 accesses consecutive elements with a negative step. */
2070
2071 static vect_memory_access_type
2072 get_negative_load_store_type (vec_info *vinfo,
2073 stmt_vec_info stmt_info, tree vectype,
2074 vec_load_store_type vls_type,
2075 unsigned int ncopies)
2076 {
2077 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2078 dr_alignment_support alignment_support_scheme;
2079
2080 if (ncopies > 1)
2081 {
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2084 "multiple types with negative step.\n");
2085 return VMAT_ELEMENTWISE;
2086 }
2087
2088 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
2089 dr_info, false);
2090 if (alignment_support_scheme != dr_aligned
2091 && alignment_support_scheme != dr_unaligned_supported)
2092 {
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2095 "negative step but alignment required.\n");
2096 return VMAT_ELEMENTWISE;
2097 }
2098
2099 if (vls_type == VLS_STORE_INVARIANT)
2100 {
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_NOTE, vect_location,
2103 "negative step with invariant source;"
2104 " no permute needed.\n");
2105 return VMAT_CONTIGUOUS_DOWN;
2106 }
2107
2108 if (!perm_mask_for_reverse (vectype))
2109 {
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112 "negative step and reversing not supported.\n");
2113 return VMAT_ELEMENTWISE;
2114 }
2115
2116 return VMAT_CONTIGUOUS_REVERSE;
2117 }
2118
2119 /* STMT_INFO is either a masked or unconditional store. Return the value
2120 being stored. */
2121
2122 tree
2123 vect_get_store_rhs (stmt_vec_info stmt_info)
2124 {
2125 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2126 {
2127 gcc_assert (gimple_assign_single_p (assign));
2128 return gimple_assign_rhs1 (assign);
2129 }
2130 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2131 {
2132 internal_fn ifn = gimple_call_internal_fn (call);
2133 int index = internal_fn_stored_value_index (ifn);
2134 gcc_assert (index >= 0);
2135 return gimple_call_arg (call, index);
2136 }
2137 gcc_unreachable ();
2138 }
2139
2140 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2141
2142 This function returns a vector type which can be composed with NETLS pieces,
2143 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2144 same vector size as the return vector. It checks target whether supports
2145 pieces-size vector mode for construction firstly, if target fails to, check
2146 pieces-size scalar mode for construction further. It returns NULL_TREE if
2147 fails to find the available composition.
2148
2149 For example, for (vtype=V16QI, nelts=4), we can probably get:
2150 - V16QI with PTYPE V4QI.
2151 - V4SI with PTYPE SI.
2152 - NULL_TREE. */
2153
2154 static tree
2155 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2156 {
2157 gcc_assert (VECTOR_TYPE_P (vtype));
2158 gcc_assert (known_gt (nelts, 0U));
2159
2160 machine_mode vmode = TYPE_MODE (vtype);
2161 if (!VECTOR_MODE_P (vmode))
2162 return NULL_TREE;
2163
2164 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2165 unsigned int pbsize;
2166 if (constant_multiple_p (vbsize, nelts, &pbsize))
2167 {
2168 /* First check if vec_init optab supports construction from
2169 vector pieces directly. */
2170 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2171 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2172 machine_mode rmode;
2173 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2174 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2175 != CODE_FOR_nothing))
2176 {
2177 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2178 return vtype;
2179 }
2180
2181 /* Otherwise check if exists an integer type of the same piece size and
2182 if vec_init optab supports construction from it directly. */
2183 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2184 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2185 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2186 != CODE_FOR_nothing))
2187 {
2188 *ptype = build_nonstandard_integer_type (pbsize, 1);
2189 return build_vector_type (*ptype, nelts);
2190 }
2191 }
2192
2193 return NULL_TREE;
2194 }
2195
2196 /* A subroutine of get_load_store_type, with a subset of the same
2197 arguments. Handle the case where STMT_INFO is part of a grouped load
2198 or store.
2199
2200 For stores, the statements in the group are all consecutive
2201 and there is no gap at the end. For loads, the statements in the
2202 group might not be consecutive; there can be gaps between statements
2203 as well as at the end. */
2204
2205 static bool
2206 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2207 tree vectype, bool slp,
2208 bool masked_p, vec_load_store_type vls_type,
2209 vect_memory_access_type *memory_access_type,
2210 gather_scatter_info *gs_info)
2211 {
2212 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2213 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2214 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2215 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2216 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2217 bool single_element_p = (stmt_info == first_stmt_info
2218 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2219 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2220 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2221
2222 /* True if the vectorized statements would access beyond the last
2223 statement in the group. */
2224 bool overrun_p = false;
2225
2226 /* True if we can cope with such overrun by peeling for gaps, so that
2227 there is at least one final scalar iteration after the vector loop. */
2228 bool can_overrun_p = (!masked_p
2229 && vls_type == VLS_LOAD
2230 && loop_vinfo
2231 && !loop->inner);
2232
2233 /* There can only be a gap at the end of the group if the stride is
2234 known at compile time. */
2235 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2236
2237 /* Stores can't yet have gaps. */
2238 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2239
2240 if (slp)
2241 {
2242 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2243 {
2244 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2245 separated by the stride, until we have a complete vector.
2246 Fall back to scalar accesses if that isn't possible. */
2247 if (multiple_p (nunits, group_size))
2248 *memory_access_type = VMAT_STRIDED_SLP;
2249 else
2250 *memory_access_type = VMAT_ELEMENTWISE;
2251 }
2252 else
2253 {
2254 overrun_p = loop_vinfo && gap != 0;
2255 if (overrun_p && vls_type != VLS_LOAD)
2256 {
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2258 "Grouped store with gaps requires"
2259 " non-consecutive accesses\n");
2260 return false;
2261 }
2262 /* An overrun is fine if the trailing elements are smaller
2263 than the alignment boundary B. Every vector access will
2264 be a multiple of B and so we are guaranteed to access a
2265 non-gap element in the same B-sized block. */
2266 if (overrun_p
2267 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2268 / vect_get_scalar_dr_size (first_dr_info)))
2269 overrun_p = false;
2270
2271 /* If the gap splits the vector in half and the target
2272 can do half-vector operations avoid the epilogue peeling
2273 by simply loading half of the vector only. Usually
2274 the construction with an upper zero half will be elided. */
2275 dr_alignment_support alignment_support_scheme;
2276 tree half_vtype;
2277 if (overrun_p
2278 && !masked_p
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (vinfo,
2281 first_dr_info, false)))
2282 == dr_aligned
2283 || alignment_support_scheme == dr_unaligned_supported)
2284 && known_eq (nunits, (group_size - gap) * 2)
2285 && known_eq (nunits, group_size)
2286 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2287 != NULL_TREE))
2288 overrun_p = false;
2289
2290 if (overrun_p && !can_overrun_p)
2291 {
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "Peeling for outer loop is not supported\n");
2295 return false;
2296 }
2297 int cmp = compare_step_with_zero (vinfo, stmt_info);
2298 if (cmp < 0)
2299 *memory_access_type = get_negative_load_store_type
2300 (vinfo, stmt_info, vectype, vls_type, 1);
2301 else
2302 {
2303 gcc_assert (!loop_vinfo || cmp > 0);
2304 *memory_access_type = VMAT_CONTIGUOUS;
2305 }
2306 }
2307 }
2308 else
2309 {
2310 /* We can always handle this case using elementwise accesses,
2311 but see if something more efficient is available. */
2312 *memory_access_type = VMAT_ELEMENTWISE;
2313
2314 /* If there is a gap at the end of the group then these optimizations
2315 would access excess elements in the last iteration. */
2316 bool would_overrun_p = (gap != 0);
2317 /* An overrun is fine if the trailing elements are smaller than the
2318 alignment boundary B. Every vector access will be a multiple of B
2319 and so we are guaranteed to access a non-gap element in the
2320 same B-sized block. */
2321 if (would_overrun_p
2322 && !masked_p
2323 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2324 / vect_get_scalar_dr_size (first_dr_info)))
2325 would_overrun_p = false;
2326
2327 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2328 && (can_overrun_p || !would_overrun_p)
2329 && compare_step_with_zero (vinfo, stmt_info) > 0)
2330 {
2331 /* First cope with the degenerate case of a single-element
2332 vector. */
2333 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2334 *memory_access_type = VMAT_CONTIGUOUS;
2335
2336 /* Otherwise try using LOAD/STORE_LANES. */
2337 if (*memory_access_type == VMAT_ELEMENTWISE
2338 && (vls_type == VLS_LOAD
2339 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2340 : vect_store_lanes_supported (vectype, group_size,
2341 masked_p)))
2342 {
2343 *memory_access_type = VMAT_LOAD_STORE_LANES;
2344 overrun_p = would_overrun_p;
2345 }
2346
2347 /* If that fails, try using permuting loads. */
2348 if (*memory_access_type == VMAT_ELEMENTWISE
2349 && (vls_type == VLS_LOAD
2350 ? vect_grouped_load_supported (vectype, single_element_p,
2351 group_size)
2352 : vect_grouped_store_supported (vectype, group_size)))
2353 {
2354 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2355 overrun_p = would_overrun_p;
2356 }
2357 }
2358
2359 /* As a last resort, trying using a gather load or scatter store.
2360
2361 ??? Although the code can handle all group sizes correctly,
2362 it probably isn't a win to use separate strided accesses based
2363 on nearby locations. Or, even if it's a win over scalar code,
2364 it might not be a win over vectorizing at a lower VF, if that
2365 allows us to use contiguous accesses. */
2366 if (*memory_access_type == VMAT_ELEMENTWISE
2367 && single_element_p
2368 && loop_vinfo
2369 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2370 masked_p, gs_info))
2371 *memory_access_type = VMAT_GATHER_SCATTER;
2372 }
2373
2374 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2375 {
2376 /* STMT is the leader of the group. Check the operands of all the
2377 stmts of the group. */
2378 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2379 while (next_stmt_info)
2380 {
2381 tree op = vect_get_store_rhs (next_stmt_info);
2382 enum vect_def_type dt;
2383 if (!vect_is_simple_use (op, vinfo, &dt))
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "use not simple.\n");
2388 return false;
2389 }
2390 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2391 }
2392 }
2393
2394 if (overrun_p)
2395 {
2396 gcc_assert (can_overrun_p);
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "Data access with gaps requires scalar "
2400 "epilogue loop\n");
2401 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2402 }
2403
2404 return true;
2405 }
2406
2407 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2408 if there is a memory access type that the vectorized form can use,
2409 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2410 or scatters, fill in GS_INFO accordingly.
2411
2412 SLP says whether we're performing SLP rather than loop vectorization.
2413 MASKED_P is true if the statement is conditional on a vectorized mask.
2414 VECTYPE is the vector type that the vectorized statements will use.
2415 NCOPIES is the number of vector statements that will be needed. */
2416
2417 static bool
2418 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2419 tree vectype, bool slp,
2420 bool masked_p, vec_load_store_type vls_type,
2421 unsigned int ncopies,
2422 vect_memory_access_type *memory_access_type,
2423 gather_scatter_info *gs_info)
2424 {
2425 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2426 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2428 {
2429 *memory_access_type = VMAT_GATHER_SCATTER;
2430 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2431 gcc_unreachable ();
2432 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2433 &gs_info->offset_dt,
2434 &gs_info->offset_vectype))
2435 {
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438 "%s index use not simple.\n",
2439 vls_type == VLS_LOAD ? "gather" : "scatter");
2440 return false;
2441 }
2442 }
2443 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2444 {
2445 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
2446 vls_type, memory_access_type, gs_info))
2447 return false;
2448 }
2449 else if (STMT_VINFO_STRIDED_P (stmt_info))
2450 {
2451 gcc_assert (!slp);
2452 if (loop_vinfo
2453 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2454 masked_p, gs_info))
2455 *memory_access_type = VMAT_GATHER_SCATTER;
2456 else
2457 *memory_access_type = VMAT_ELEMENTWISE;
2458 }
2459 else
2460 {
2461 int cmp = compare_step_with_zero (vinfo, stmt_info);
2462 if (cmp < 0)
2463 *memory_access_type = get_negative_load_store_type
2464 (vinfo, stmt_info, vectype, vls_type, ncopies);
2465 else if (cmp == 0)
2466 {
2467 gcc_assert (vls_type == VLS_LOAD);
2468 *memory_access_type = VMAT_INVARIANT;
2469 }
2470 else
2471 *memory_access_type = VMAT_CONTIGUOUS;
2472 }
2473
2474 if ((*memory_access_type == VMAT_ELEMENTWISE
2475 || *memory_access_type == VMAT_STRIDED_SLP)
2476 && !nunits.is_constant ())
2477 {
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480 "Not using elementwise accesses due to variable "
2481 "vectorization factor.\n");
2482 return false;
2483 }
2484
2485 /* FIXME: At the moment the cost model seems to underestimate the
2486 cost of using elementwise accesses. This check preserves the
2487 traditional behavior until that can be fixed. */
2488 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2489 if (!first_stmt_info)
2490 first_stmt_info = stmt_info;
2491 if (*memory_access_type == VMAT_ELEMENTWISE
2492 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2493 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2494 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2495 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2496 {
2497 if (dump_enabled_p ())
2498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2499 "not falling back to elementwise accesses\n");
2500 return false;
2501 }
2502 return true;
2503 }
2504
2505 /* Return true if boolean argument MASK is suitable for vectorizing
2506 conditional operation STMT_INFO. When returning true, store the type
2507 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2508 in *MASK_VECTYPE_OUT. */
2509
2510 static bool
2511 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2512 vect_def_type *mask_dt_out,
2513 tree *mask_vectype_out)
2514 {
2515 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "mask argument is not a boolean.\n");
2520 return false;
2521 }
2522
2523 if (TREE_CODE (mask) != SSA_NAME)
2524 {
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "mask argument is not an SSA name.\n");
2528 return false;
2529 }
2530
2531 enum vect_def_type mask_dt;
2532 tree mask_vectype;
2533 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2534 {
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2537 "mask use not simple.\n");
2538 return false;
2539 }
2540
2541 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2542 if (!mask_vectype)
2543 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2544
2545 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2546 {
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2549 "could not find an appropriate vector mask type.\n");
2550 return false;
2551 }
2552
2553 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2554 TYPE_VECTOR_SUBPARTS (vectype)))
2555 {
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2558 "vector mask type %T"
2559 " does not match vector data type %T.\n",
2560 mask_vectype, vectype);
2561
2562 return false;
2563 }
2564
2565 *mask_dt_out = mask_dt;
2566 *mask_vectype_out = mask_vectype;
2567 return true;
2568 }
2569
2570 /* Return true if stored value RHS is suitable for vectorizing store
2571 statement STMT_INFO. When returning true, store the type of the
2572 definition in *RHS_DT_OUT, the type of the vectorized store value in
2573 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2574
2575 static bool
2576 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs,
2577 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2578 vec_load_store_type *vls_type_out)
2579 {
2580 /* In the case this is a store from a constant make sure
2581 native_encode_expr can handle it. */
2582 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2583 {
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 "cannot encode constant as a byte sequence.\n");
2587 return false;
2588 }
2589
2590 enum vect_def_type rhs_dt;
2591 tree rhs_vectype;
2592 if (!vect_is_simple_use (rhs, vinfo, &rhs_dt, &rhs_vectype))
2593 {
2594 if (dump_enabled_p ())
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2596 "use not simple.\n");
2597 return false;
2598 }
2599
2600 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2601 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2602 {
2603 if (dump_enabled_p ())
2604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2605 "incompatible vector types.\n");
2606 return false;
2607 }
2608
2609 *rhs_dt_out = rhs_dt;
2610 *rhs_vectype_out = rhs_vectype;
2611 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2612 *vls_type_out = VLS_STORE_INVARIANT;
2613 else
2614 *vls_type_out = VLS_STORE;
2615 return true;
2616 }
2617
2618 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2619 Note that we support masks with floating-point type, in which case the
2620 floats are interpreted as a bitmask. */
2621
2622 static tree
2623 vect_build_all_ones_mask (vec_info *vinfo,
2624 stmt_vec_info stmt_info, tree masktype)
2625 {
2626 if (TREE_CODE (masktype) == INTEGER_TYPE)
2627 return build_int_cst (masktype, -1);
2628 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2629 {
2630 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2631 mask = build_vector_from_val (masktype, mask);
2632 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2633 }
2634 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2635 {
2636 REAL_VALUE_TYPE r;
2637 long tmp[6];
2638 for (int j = 0; j < 6; ++j)
2639 tmp[j] = -1;
2640 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2641 tree mask = build_real (TREE_TYPE (masktype), r);
2642 mask = build_vector_from_val (masktype, mask);
2643 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2644 }
2645 gcc_unreachable ();
2646 }
2647
2648 /* Build an all-zero merge value of type VECTYPE while vectorizing
2649 STMT_INFO as a gather load. */
2650
2651 static tree
2652 vect_build_zero_merge_argument (vec_info *vinfo,
2653 stmt_vec_info stmt_info, tree vectype)
2654 {
2655 tree merge;
2656 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2657 merge = build_int_cst (TREE_TYPE (vectype), 0);
2658 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2659 {
2660 REAL_VALUE_TYPE r;
2661 long tmp[6];
2662 for (int j = 0; j < 6; ++j)
2663 tmp[j] = 0;
2664 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2665 merge = build_real (TREE_TYPE (vectype), r);
2666 }
2667 else
2668 gcc_unreachable ();
2669 merge = build_vector_from_val (vectype, merge);
2670 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2671 }
2672
2673 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2674 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2675 the gather load operation. If the load is conditional, MASK is the
2676 unvectorized condition and MASK_DT is its definition type, otherwise
2677 MASK is null. */
2678
2679 static void
2680 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2681 gimple_stmt_iterator *gsi,
2682 stmt_vec_info *vec_stmt,
2683 gather_scatter_info *gs_info,
2684 tree mask)
2685 {
2686 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2687 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2689 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2690 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2691 edge pe = loop_preheader_edge (loop);
2692 enum { NARROW, NONE, WIDEN } modifier;
2693 poly_uint64 gather_off_nunits
2694 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2695
2696 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2697 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2698 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2699 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2700 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2701 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree scaletype = TREE_VALUE (arglist);
2703 tree real_masktype = masktype;
2704 gcc_checking_assert (types_compatible_p (srctype, rettype)
2705 && (!mask
2706 || TREE_CODE (masktype) == INTEGER_TYPE
2707 || types_compatible_p (srctype, masktype)));
2708 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2709 masktype = truth_type_for (srctype);
2710
2711 tree mask_halftype = masktype;
2712 tree perm_mask = NULL_TREE;
2713 tree mask_perm_mask = NULL_TREE;
2714 if (known_eq (nunits, gather_off_nunits))
2715 modifier = NONE;
2716 else if (known_eq (nunits * 2, gather_off_nunits))
2717 {
2718 modifier = WIDEN;
2719
2720 /* Currently widening gathers and scatters are only supported for
2721 fixed-length vectors. */
2722 int count = gather_off_nunits.to_constant ();
2723 vec_perm_builder sel (count, count, 1);
2724 for (int i = 0; i < count; ++i)
2725 sel.quick_push (i | (count / 2));
2726
2727 vec_perm_indices indices (sel, 1, count);
2728 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2729 indices);
2730 }
2731 else if (known_eq (nunits, gather_off_nunits * 2))
2732 {
2733 modifier = NARROW;
2734
2735 /* Currently narrowing gathers and scatters are only supported for
2736 fixed-length vectors. */
2737 int count = nunits.to_constant ();
2738 vec_perm_builder sel (count, count, 1);
2739 sel.quick_grow (count);
2740 for (int i = 0; i < count; ++i)
2741 sel[i] = i < count / 2 ? i : i + count / 2;
2742 vec_perm_indices indices (sel, 2, count);
2743 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2744
2745 ncopies *= 2;
2746
2747 if (mask && masktype == real_masktype)
2748 {
2749 for (int i = 0; i < count; ++i)
2750 sel[i] = i | (count / 2);
2751 indices.new_vector (sel, 2, count);
2752 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2753 }
2754 else if (mask)
2755 mask_halftype = truth_type_for (gs_info->offset_vectype);
2756 }
2757 else
2758 gcc_unreachable ();
2759
2760 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2761 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2762
2763 tree ptr = fold_convert (ptrtype, gs_info->base);
2764 if (!is_gimple_min_invariant (ptr))
2765 {
2766 gimple_seq seq;
2767 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2768 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2769 gcc_assert (!new_bb);
2770 }
2771
2772 tree scale = build_int_cst (scaletype, gs_info->scale);
2773
2774 tree vec_oprnd0 = NULL_TREE;
2775 tree vec_mask = NULL_TREE;
2776 tree src_op = NULL_TREE;
2777 tree mask_op = NULL_TREE;
2778 tree prev_res = NULL_TREE;
2779 stmt_vec_info prev_stmt_info = NULL;
2780
2781 if (!mask)
2782 {
2783 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2784 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2785 }
2786
2787 for (int j = 0; j < ncopies; ++j)
2788 {
2789 tree op, var;
2790 if (modifier == WIDEN && (j & 1))
2791 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2792 perm_mask, stmt_info, gsi);
2793 else if (j == 0)
2794 op = vec_oprnd0
2795 = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info);
2796 else
2797 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2798 vec_oprnd0);
2799
2800 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2801 {
2802 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2803 TYPE_VECTOR_SUBPARTS (idxtype)));
2804 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2805 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2806 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2807 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2808 op = var;
2809 }
2810
2811 if (mask)
2812 {
2813 if (mask_perm_mask && (j & 1))
2814 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2815 mask_perm_mask, stmt_info, gsi);
2816 else
2817 {
2818 if (j == 0)
2819 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info);
2820 else if (modifier != NARROW || (j & 1) == 0)
2821 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2822 vec_mask);
2823
2824 mask_op = vec_mask;
2825 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2826 {
2827 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2828 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2829 gcc_assert (known_eq (sub1, sub2));
2830 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2831 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2832 gassign *new_stmt
2833 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2834 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2835 mask_op = var;
2836 }
2837 }
2838 if (modifier == NARROW && masktype != real_masktype)
2839 {
2840 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2841 gassign *new_stmt
2842 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2843 : VEC_UNPACK_LO_EXPR,
2844 mask_op);
2845 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2846 mask_op = var;
2847 }
2848 src_op = mask_op;
2849 }
2850
2851 tree mask_arg = mask_op;
2852 if (masktype != real_masktype)
2853 {
2854 tree utype, optype = TREE_TYPE (mask_op);
2855 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2856 utype = real_masktype;
2857 else
2858 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2859 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2860 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2861 gassign *new_stmt
2862 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2863 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2864 mask_arg = var;
2865 if (!useless_type_conversion_p (real_masktype, utype))
2866 {
2867 gcc_assert (TYPE_PRECISION (utype)
2868 <= TYPE_PRECISION (real_masktype));
2869 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2870 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2871 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2872 mask_arg = var;
2873 }
2874 src_op = build_zero_cst (srctype);
2875 }
2876 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2877 mask_arg, scale);
2878
2879 stmt_vec_info new_stmt_info;
2880 if (!useless_type_conversion_p (vectype, rettype))
2881 {
2882 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2883 TYPE_VECTOR_SUBPARTS (rettype)));
2884 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2885 gimple_call_set_lhs (new_call, op);
2886 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2887 var = make_ssa_name (vec_dest);
2888 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2889 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2890 new_stmt_info
2891 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2892 }
2893 else
2894 {
2895 var = make_ssa_name (vec_dest, new_call);
2896 gimple_call_set_lhs (new_call, var);
2897 new_stmt_info
2898 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2899 }
2900
2901 if (modifier == NARROW)
2902 {
2903 if ((j & 1) == 0)
2904 {
2905 prev_res = var;
2906 continue;
2907 }
2908 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2909 stmt_info, gsi);
2910 new_stmt_info = loop_vinfo->lookup_def (var);
2911 }
2912
2913 if (prev_stmt_info == NULL)
2914 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2915 else
2916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2917 prev_stmt_info = new_stmt_info;
2918 }
2919 }
2920
2921 /* Prepare the base and offset in GS_INFO for vectorization.
2922 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2923 to the vectorized offset argument for the first copy of STMT_INFO.
2924 STMT_INFO is the statement described by GS_INFO and LOOP is the
2925 containing loop. */
2926
2927 static void
2928 vect_get_gather_scatter_ops (vec_info *vinfo,
2929 class loop *loop, stmt_vec_info stmt_info,
2930 gather_scatter_info *gs_info,
2931 tree *dataref_ptr, tree *vec_offset)
2932 {
2933 gimple_seq stmts = NULL;
2934 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2935 if (stmts != NULL)
2936 {
2937 basic_block new_bb;
2938 edge pe = loop_preheader_edge (loop);
2939 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2940 gcc_assert (!new_bb);
2941 }
2942 *vec_offset = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info,
2943 gs_info->offset_vectype);
2944 }
2945
2946 /* Prepare to implement a grouped or strided load or store using
2947 the gather load or scatter store operation described by GS_INFO.
2948 STMT_INFO is the load or store statement.
2949
2950 Set *DATAREF_BUMP to the amount that should be added to the base
2951 address after each copy of the vectorized statement. Set *VEC_OFFSET
2952 to an invariant offset vector in which element I has the value
2953 I * DR_STEP / SCALE. */
2954
2955 static void
2956 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2957 loop_vec_info loop_vinfo,
2958 gather_scatter_info *gs_info,
2959 tree *dataref_bump, tree *vec_offset)
2960 {
2961 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2962 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2964 gimple_seq stmts;
2965
2966 tree bump = size_binop (MULT_EXPR,
2967 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2968 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2969 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2970 if (stmts)
2971 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2972
2973 /* The offset given in GS_INFO can have pointer type, so use the element
2974 type of the vector instead. */
2975 tree offset_type = TREE_TYPE (gs_info->offset);
2976 offset_type = TREE_TYPE (gs_info->offset_vectype);
2977
2978 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2979 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2980 ssize_int (gs_info->scale));
2981 step = fold_convert (offset_type, step);
2982 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2983
2984 /* Create {0, X, X*2, X*3, ...}. */
2985 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
2986 build_zero_cst (offset_type), step);
2987 if (stmts)
2988 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2989 }
2990
2991 /* Return the amount that should be added to a vector pointer to move
2992 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2993 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2994 vectorization. */
2995
2996 static tree
2997 vect_get_data_ptr_increment (vec_info *vinfo,
2998 dr_vec_info *dr_info, tree aggr_type,
2999 vect_memory_access_type memory_access_type)
3000 {
3001 if (memory_access_type == VMAT_INVARIANT)
3002 return size_zero_node;
3003
3004 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3005 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3006 if (tree_int_cst_sgn (step) == -1)
3007 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3008 return iv_step;
3009 }
3010
3011 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3012
3013 static bool
3014 vectorizable_bswap (vec_info *vinfo,
3015 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3016 stmt_vec_info *vec_stmt, slp_tree slp_node,
3017 slp_tree *slp_op,
3018 tree vectype_in, stmt_vector_for_cost *cost_vec)
3019 {
3020 tree op, vectype;
3021 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3022 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3023 unsigned ncopies;
3024
3025 op = gimple_call_arg (stmt, 0);
3026 vectype = STMT_VINFO_VECTYPE (stmt_info);
3027 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3028
3029 /* Multiple types in SLP are handled by creating the appropriate number of
3030 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3031 case of SLP. */
3032 if (slp_node)
3033 ncopies = 1;
3034 else
3035 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3036
3037 gcc_assert (ncopies >= 1);
3038
3039 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3040 if (! char_vectype)
3041 return false;
3042
3043 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3044 unsigned word_bytes;
3045 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3046 return false;
3047
3048 /* The encoding uses one stepped pattern for each byte in the word. */
3049 vec_perm_builder elts (num_bytes, word_bytes, 3);
3050 for (unsigned i = 0; i < 3; ++i)
3051 for (unsigned j = 0; j < word_bytes; ++j)
3052 elts.quick_push ((i + 1) * word_bytes - j - 1);
3053
3054 vec_perm_indices indices (elts, 1, num_bytes);
3055 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3056 return false;
3057
3058 if (! vec_stmt)
3059 {
3060 if (slp_node
3061 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3062 {
3063 if (dump_enabled_p ())
3064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3065 "incompatible vector types for invariants\n");
3066 return false;
3067 }
3068
3069 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3070 DUMP_VECT_SCOPE ("vectorizable_bswap");
3071 if (! slp_node)
3072 {
3073 record_stmt_cost (cost_vec,
3074 1, vector_stmt, stmt_info, 0, vect_prologue);
3075 record_stmt_cost (cost_vec,
3076 ncopies, vec_perm, stmt_info, 0, vect_body);
3077 }
3078 return true;
3079 }
3080
3081 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3082
3083 /* Transform. */
3084 vec<tree> vec_oprnds = vNULL;
3085 stmt_vec_info new_stmt_info = NULL;
3086 stmt_vec_info prev_stmt_info = NULL;
3087 for (unsigned j = 0; j < ncopies; j++)
3088 {
3089 /* Handle uses. */
3090 if (j == 0)
3091 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
3092 slp_node);
3093 else
3094 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3095
3096 /* Arguments are ready. create the new vector stmt. */
3097 unsigned i;
3098 tree vop;
3099 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3100 {
3101 gimple *new_stmt;
3102 tree tem = make_ssa_name (char_vectype);
3103 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3104 char_vectype, vop));
3105 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3106 tree tem2 = make_ssa_name (char_vectype);
3107 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3108 tem, tem, bswap_vconst);
3109 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3110 tem = make_ssa_name (vectype);
3111 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3112 vectype, tem2));
3113 new_stmt_info
3114 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3115 if (slp_node)
3116 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3117 }
3118
3119 if (slp_node)
3120 continue;
3121
3122 if (j == 0)
3123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3124 else
3125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3126
3127 prev_stmt_info = new_stmt_info;
3128 }
3129
3130 vec_oprnds.release ();
3131 return true;
3132 }
3133
3134 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3135 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3136 in a single step. On success, store the binary pack code in
3137 *CONVERT_CODE. */
3138
3139 static bool
3140 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3141 tree_code *convert_code)
3142 {
3143 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3144 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3145 return false;
3146
3147 tree_code code;
3148 int multi_step_cvt = 0;
3149 auto_vec <tree, 8> interm_types;
3150 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3151 &code, &multi_step_cvt, &interm_types)
3152 || multi_step_cvt)
3153 return false;
3154
3155 *convert_code = code;
3156 return true;
3157 }
3158
3159 /* Function vectorizable_call.
3160
3161 Check if STMT_INFO performs a function call that can be vectorized.
3162 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3163 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3164 Return true if STMT_INFO is vectorizable in this way. */
3165
3166 static bool
3167 vectorizable_call (vec_info *vinfo,
3168 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3169 stmt_vec_info *vec_stmt, slp_tree slp_node,
3170 stmt_vector_for_cost *cost_vec)
3171 {
3172 gcall *stmt;
3173 tree vec_dest;
3174 tree scalar_dest;
3175 tree op;
3176 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3177 stmt_vec_info prev_stmt_info;
3178 tree vectype_out, vectype_in;
3179 poly_uint64 nunits_in;
3180 poly_uint64 nunits_out;
3181 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3182 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3183 tree fndecl, new_temp, rhs_type;
3184 enum vect_def_type dt[4]
3185 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3186 vect_unknown_def_type };
3187 tree vectypes[ARRAY_SIZE (dt)] = {};
3188 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3189 int ndts = ARRAY_SIZE (dt);
3190 int ncopies, j;
3191 auto_vec<tree, 8> vargs;
3192 auto_vec<tree, 8> orig_vargs;
3193 enum { NARROW, NONE, WIDEN } modifier;
3194 size_t i, nargs;
3195 tree lhs;
3196
3197 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3198 return false;
3199
3200 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3201 && ! vec_stmt)
3202 return false;
3203
3204 /* Is STMT_INFO a vectorizable call? */
3205 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3206 if (!stmt)
3207 return false;
3208
3209 if (gimple_call_internal_p (stmt)
3210 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3211 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3212 /* Handled by vectorizable_load and vectorizable_store. */
3213 return false;
3214
3215 if (gimple_call_lhs (stmt) == NULL_TREE
3216 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3217 return false;
3218
3219 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3220
3221 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3222
3223 /* Process function arguments. */
3224 rhs_type = NULL_TREE;
3225 vectype_in = NULL_TREE;
3226 nargs = gimple_call_num_args (stmt);
3227
3228 /* Bail out if the function has more than four arguments, we do not have
3229 interesting builtin functions to vectorize with more than two arguments
3230 except for fma. No arguments is also not good. */
3231 if (nargs == 0 || nargs > 4)
3232 return false;
3233
3234 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3235 combined_fn cfn = gimple_call_combined_fn (stmt);
3236 if (cfn == CFN_GOMP_SIMD_LANE)
3237 {
3238 nargs = 0;
3239 rhs_type = unsigned_type_node;
3240 }
3241
3242 int mask_opno = -1;
3243 if (internal_fn_p (cfn))
3244 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3245
3246 for (i = 0; i < nargs; i++)
3247 {
3248 if ((int) i == mask_opno)
3249 {
3250 op = gimple_call_arg (stmt, i);
3251 if (!vect_check_scalar_mask (vinfo,
3252 stmt_info, op, &dt[i], &vectypes[i]))
3253 return false;
3254 continue;
3255 }
3256
3257 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3258 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3259 {
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3262 "use not simple.\n");
3263 return false;
3264 }
3265
3266 /* We can only handle calls with arguments of the same type. */
3267 if (rhs_type
3268 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3269 {
3270 if (dump_enabled_p ())
3271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3272 "argument types differ.\n");
3273 return false;
3274 }
3275 if (!rhs_type)
3276 rhs_type = TREE_TYPE (op);
3277
3278 if (!vectype_in)
3279 vectype_in = vectypes[i];
3280 else if (vectypes[i]
3281 && !types_compatible_p (vectypes[i], vectype_in))
3282 {
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 "argument vector types differ.\n");
3286 return false;
3287 }
3288 }
3289 /* If all arguments are external or constant defs, infer the vector type
3290 from the scalar type. */
3291 if (!vectype_in)
3292 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3293 if (vec_stmt)
3294 gcc_assert (vectype_in);
3295 if (!vectype_in)
3296 {
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3299 "no vectype for scalar type %T\n", rhs_type);
3300
3301 return false;
3302 }
3303 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3304 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3305 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3306 by a pack of the two vectors into an SI vector. We would need
3307 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3308 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3309 {
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "mismatched vector sizes %T and %T\n",
3313 vectype_in, vectype_out);
3314 return false;
3315 }
3316
3317 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3318 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3319 {
3320 if (dump_enabled_p ())
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3322 "mixed mask and nonmask vector types\n");
3323 return false;
3324 }
3325
3326 /* FORNOW */
3327 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3328 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3329 if (known_eq (nunits_in * 2, nunits_out))
3330 modifier = NARROW;
3331 else if (known_eq (nunits_out, nunits_in))
3332 modifier = NONE;
3333 else if (known_eq (nunits_out * 2, nunits_in))
3334 modifier = WIDEN;
3335 else
3336 return false;
3337
3338 /* We only handle functions that do not read or clobber memory. */
3339 if (gimple_vuse (stmt))
3340 {
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3343 "function reads from or writes to memory.\n");
3344 return false;
3345 }
3346
3347 /* For now, we only vectorize functions if a target specific builtin
3348 is available. TODO -- in some cases, it might be profitable to
3349 insert the calls for pieces of the vector, in order to be able
3350 to vectorize other operations in the loop. */
3351 fndecl = NULL_TREE;
3352 internal_fn ifn = IFN_LAST;
3353 tree callee = gimple_call_fndecl (stmt);
3354
3355 /* First try using an internal function. */
3356 tree_code convert_code = ERROR_MARK;
3357 if (cfn != CFN_LAST
3358 && (modifier == NONE
3359 || (modifier == NARROW
3360 && simple_integer_narrowing (vectype_out, vectype_in,
3361 &convert_code))))
3362 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3363 vectype_in);
3364
3365 /* If that fails, try asking for a target-specific built-in function. */
3366 if (ifn == IFN_LAST)
3367 {
3368 if (cfn != CFN_LAST)
3369 fndecl = targetm.vectorize.builtin_vectorized_function
3370 (cfn, vectype_out, vectype_in);
3371 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3372 fndecl = targetm.vectorize.builtin_md_vectorized_function
3373 (callee, vectype_out, vectype_in);
3374 }
3375
3376 if (ifn == IFN_LAST && !fndecl)
3377 {
3378 if (cfn == CFN_GOMP_SIMD_LANE
3379 && !slp_node
3380 && loop_vinfo
3381 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3382 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3383 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3384 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3385 {
3386 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3387 { 0, 1, 2, ... vf - 1 } vector. */
3388 gcc_assert (nargs == 0);
3389 }
3390 else if (modifier == NONE
3391 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3392 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3393 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3394 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3395 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3396 slp_op, vectype_in, cost_vec);
3397 else
3398 {
3399 if (dump_enabled_p ())
3400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3401 "function is not vectorizable.\n");
3402 return false;
3403 }
3404 }
3405
3406 if (slp_node)
3407 ncopies = 1;
3408 else if (modifier == NARROW && ifn == IFN_LAST)
3409 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3410 else
3411 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3412
3413 /* Sanity check: make sure that at least one copy of the vectorized stmt
3414 needs to be generated. */
3415 gcc_assert (ncopies >= 1);
3416
3417 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3418 if (!vec_stmt) /* transformation not required. */
3419 {
3420 if (slp_node)
3421 for (i = 0; i < nargs; ++i)
3422 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3423 {
3424 if (dump_enabled_p ())
3425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3426 "incompatible vector types for invariants\n");
3427 return false;
3428 }
3429 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3430 DUMP_VECT_SCOPE ("vectorizable_call");
3431 vect_model_simple_cost (vinfo, stmt_info,
3432 ncopies, dt, ndts, slp_node, cost_vec);
3433 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3434 record_stmt_cost (cost_vec, ncopies / 2,
3435 vec_promote_demote, stmt_info, 0, vect_body);
3436
3437 if (loop_vinfo && mask_opno >= 0)
3438 {
3439 unsigned int nvectors = (slp_node
3440 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3441 : ncopies);
3442 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3443 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3444 vectype_out, scalar_mask);
3445 }
3446 return true;
3447 }
3448
3449 /* Transform. */
3450
3451 if (dump_enabled_p ())
3452 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3453
3454 /* Handle def. */
3455 scalar_dest = gimple_call_lhs (stmt);
3456 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3457
3458 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3459
3460 stmt_vec_info new_stmt_info = NULL;
3461 prev_stmt_info = NULL;
3462 if (modifier == NONE || ifn != IFN_LAST)
3463 {
3464 tree prev_res = NULL_TREE;
3465 vargs.safe_grow (nargs);
3466 orig_vargs.safe_grow (nargs);
3467 for (j = 0; j < ncopies; ++j)
3468 {
3469 /* Build argument list for the vectorized call. */
3470 if (slp_node)
3471 {
3472 auto_vec<vec<tree> > vec_defs (nargs);
3473 vec<tree> vec_oprnds0;
3474
3475 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3476 vec_oprnds0 = vec_defs[0];
3477
3478 /* Arguments are ready. Create the new vector stmt. */
3479 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3480 {
3481 size_t k;
3482 for (k = 0; k < nargs; k++)
3483 {
3484 vec<tree> vec_oprndsk = vec_defs[k];
3485 vargs[k] = vec_oprndsk[i];
3486 }
3487 if (modifier == NARROW)
3488 {
3489 /* We don't define any narrowing conditional functions
3490 at present. */
3491 gcc_assert (mask_opno < 0);
3492 tree half_res = make_ssa_name (vectype_in);
3493 gcall *call
3494 = gimple_build_call_internal_vec (ifn, vargs);
3495 gimple_call_set_lhs (call, half_res);
3496 gimple_call_set_nothrow (call, true);
3497 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3498 if ((i & 1) == 0)
3499 {
3500 prev_res = half_res;
3501 continue;
3502 }
3503 new_temp = make_ssa_name (vec_dest);
3504 gimple *new_stmt
3505 = gimple_build_assign (new_temp, convert_code,
3506 prev_res, half_res);
3507 new_stmt_info
3508 = vect_finish_stmt_generation (vinfo, stmt_info,
3509 new_stmt, gsi);
3510 }
3511 else
3512 {
3513 if (mask_opno >= 0 && masked_loop_p)
3514 {
3515 unsigned int vec_num = vec_oprnds0.length ();
3516 /* Always true for SLP. */
3517 gcc_assert (ncopies == 1);
3518 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3519 vectype_out, i);
3520 vargs[mask_opno] = prepare_load_store_mask
3521 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3522 }
3523
3524 gcall *call;
3525 if (ifn != IFN_LAST)
3526 call = gimple_build_call_internal_vec (ifn, vargs);
3527 else
3528 call = gimple_build_call_vec (fndecl, vargs);
3529 new_temp = make_ssa_name (vec_dest, call);
3530 gimple_call_set_lhs (call, new_temp);
3531 gimple_call_set_nothrow (call, true);
3532 new_stmt_info
3533 = vect_finish_stmt_generation (vinfo, stmt_info,
3534 call, gsi);
3535 }
3536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3537 }
3538
3539 for (i = 0; i < nargs; i++)
3540 {
3541 vec<tree> vec_oprndsi = vec_defs[i];
3542 vec_oprndsi.release ();
3543 }
3544 continue;
3545 }
3546
3547 for (i = 0; i < nargs; i++)
3548 {
3549 op = gimple_call_arg (stmt, i);
3550 if (j == 0)
3551 vec_oprnd0
3552 = vect_get_vec_def_for_operand (vinfo,
3553 op, stmt_info, vectypes[i]);
3554 else
3555 vec_oprnd0
3556 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3557
3558 orig_vargs[i] = vargs[i] = vec_oprnd0;
3559 }
3560
3561 if (mask_opno >= 0 && masked_loop_p)
3562 {
3563 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3564 vectype_out, j);
3565 vargs[mask_opno]
3566 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3567 vargs[mask_opno], gsi);
3568 }
3569
3570 if (cfn == CFN_GOMP_SIMD_LANE)
3571 {
3572 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3573 tree new_var
3574 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3575 gimple *init_stmt = gimple_build_assign (new_var, cst);
3576 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3577 new_temp = make_ssa_name (vec_dest);
3578 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3579 new_stmt_info
3580 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3581 }
3582 else if (modifier == NARROW)
3583 {
3584 /* We don't define any narrowing conditional functions at
3585 present. */
3586 gcc_assert (mask_opno < 0);
3587 tree half_res = make_ssa_name (vectype_in);
3588 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3589 gimple_call_set_lhs (call, half_res);
3590 gimple_call_set_nothrow (call, true);
3591 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3592 if ((j & 1) == 0)
3593 {
3594 prev_res = half_res;
3595 continue;
3596 }
3597 new_temp = make_ssa_name (vec_dest);
3598 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3599 prev_res, half_res);
3600 new_stmt_info
3601 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3602 }
3603 else
3604 {
3605 gcall *call;
3606 if (ifn != IFN_LAST)
3607 call = gimple_build_call_internal_vec (ifn, vargs);
3608 else
3609 call = gimple_build_call_vec (fndecl, vargs);
3610 new_temp = make_ssa_name (vec_dest, call);
3611 gimple_call_set_lhs (call, new_temp);
3612 gimple_call_set_nothrow (call, true);
3613 new_stmt_info
3614 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3615 }
3616
3617 if (j == (modifier == NARROW ? 1 : 0))
3618 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3619 else
3620 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3621
3622 prev_stmt_info = new_stmt_info;
3623 }
3624 }
3625 else if (modifier == NARROW)
3626 {
3627 /* We don't define any narrowing conditional functions at present. */
3628 gcc_assert (mask_opno < 0);
3629 for (j = 0; j < ncopies; ++j)
3630 {
3631 /* Build argument list for the vectorized call. */
3632 if (j == 0)
3633 vargs.create (nargs * 2);
3634 else
3635 vargs.truncate (0);
3636
3637 if (slp_node)
3638 {
3639 auto_vec<vec<tree> > vec_defs (nargs);
3640 vec<tree> vec_oprnds0;
3641
3642 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3643 vec_oprnds0 = vec_defs[0];
3644
3645 /* Arguments are ready. Create the new vector stmt. */
3646 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3647 {
3648 size_t k;
3649 vargs.truncate (0);
3650 for (k = 0; k < nargs; k++)
3651 {
3652 vec<tree> vec_oprndsk = vec_defs[k];
3653 vargs.quick_push (vec_oprndsk[i]);
3654 vargs.quick_push (vec_oprndsk[i + 1]);
3655 }
3656 gcall *call;
3657 if (ifn != IFN_LAST)
3658 call = gimple_build_call_internal_vec (ifn, vargs);
3659 else
3660 call = gimple_build_call_vec (fndecl, vargs);
3661 new_temp = make_ssa_name (vec_dest, call);
3662 gimple_call_set_lhs (call, new_temp);
3663 gimple_call_set_nothrow (call, true);
3664 new_stmt_info
3665 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3666 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3667 }
3668
3669 for (i = 0; i < nargs; i++)
3670 {
3671 vec<tree> vec_oprndsi = vec_defs[i];
3672 vec_oprndsi.release ();
3673 }
3674 continue;
3675 }
3676
3677 for (i = 0; i < nargs; i++)
3678 {
3679 op = gimple_call_arg (stmt, i);
3680 if (j == 0)
3681 {
3682 vec_oprnd0
3683 = vect_get_vec_def_for_operand (vinfo, op, stmt_info,
3684 vectypes[i]);
3685 vec_oprnd1
3686 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3687 }
3688 else
3689 {
3690 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3691 2 * i + 1);
3692 vec_oprnd0
3693 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3694 vec_oprnd1
3695 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3696 }
3697
3698 vargs.quick_push (vec_oprnd0);
3699 vargs.quick_push (vec_oprnd1);
3700 }
3701
3702 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3703 new_temp = make_ssa_name (vec_dest, new_stmt);
3704 gimple_call_set_lhs (new_stmt, new_temp);
3705 new_stmt_info
3706 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3707
3708 if (j == 0)
3709 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3710 else
3711 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3712
3713 prev_stmt_info = new_stmt_info;
3714 }
3715
3716 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3717 }
3718 else
3719 /* No current target implements this case. */
3720 return false;
3721
3722 vargs.release ();
3723
3724 /* The call in STMT might prevent it from being removed in dce.
3725 We however cannot remove it here, due to the way the ssa name
3726 it defines is mapped to the new definition. So just replace
3727 rhs of the statement with something harmless. */
3728
3729 if (slp_node)
3730 return true;
3731
3732 stmt_info = vect_orig_stmt (stmt_info);
3733 lhs = gimple_get_lhs (stmt_info->stmt);
3734
3735 gassign *new_stmt
3736 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3737 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3738
3739 return true;
3740 }
3741
3742
3743 struct simd_call_arg_info
3744 {
3745 tree vectype;
3746 tree op;
3747 HOST_WIDE_INT linear_step;
3748 enum vect_def_type dt;
3749 unsigned int align;
3750 bool simd_lane_linear;
3751 };
3752
3753 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3754 is linear within simd lane (but not within whole loop), note it in
3755 *ARGINFO. */
3756
3757 static void
3758 vect_simd_lane_linear (tree op, class loop *loop,
3759 struct simd_call_arg_info *arginfo)
3760 {
3761 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3762
3763 if (!is_gimple_assign (def_stmt)
3764 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3765 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3766 return;
3767
3768 tree base = gimple_assign_rhs1 (def_stmt);
3769 HOST_WIDE_INT linear_step = 0;
3770 tree v = gimple_assign_rhs2 (def_stmt);
3771 while (TREE_CODE (v) == SSA_NAME)
3772 {
3773 tree t;
3774 def_stmt = SSA_NAME_DEF_STMT (v);
3775 if (is_gimple_assign (def_stmt))
3776 switch (gimple_assign_rhs_code (def_stmt))
3777 {
3778 case PLUS_EXPR:
3779 t = gimple_assign_rhs2 (def_stmt);
3780 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3781 return;
3782 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3783 v = gimple_assign_rhs1 (def_stmt);
3784 continue;
3785 case MULT_EXPR:
3786 t = gimple_assign_rhs2 (def_stmt);
3787 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3788 return;
3789 linear_step = tree_to_shwi (t);
3790 v = gimple_assign_rhs1 (def_stmt);
3791 continue;
3792 CASE_CONVERT:
3793 t = gimple_assign_rhs1 (def_stmt);
3794 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3795 || (TYPE_PRECISION (TREE_TYPE (v))
3796 < TYPE_PRECISION (TREE_TYPE (t))))
3797 return;
3798 if (!linear_step)
3799 linear_step = 1;
3800 v = t;
3801 continue;
3802 default:
3803 return;
3804 }
3805 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3806 && loop->simduid
3807 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3808 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3809 == loop->simduid))
3810 {
3811 if (!linear_step)
3812 linear_step = 1;
3813 arginfo->linear_step = linear_step;
3814 arginfo->op = base;
3815 arginfo->simd_lane_linear = true;
3816 return;
3817 }
3818 }
3819 }
3820
3821 /* Return the number of elements in vector type VECTYPE, which is associated
3822 with a SIMD clone. At present these vectors always have a constant
3823 length. */
3824
3825 static unsigned HOST_WIDE_INT
3826 simd_clone_subparts (tree vectype)
3827 {
3828 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3829 }
3830
3831 /* Function vectorizable_simd_clone_call.
3832
3833 Check if STMT_INFO performs a function call that can be vectorized
3834 by calling a simd clone of the function.
3835 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3836 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3837 Return true if STMT_INFO is vectorizable in this way. */
3838
3839 static bool
3840 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3841 gimple_stmt_iterator *gsi,
3842 stmt_vec_info *vec_stmt, slp_tree slp_node,
3843 stmt_vector_for_cost *)
3844 {
3845 tree vec_dest;
3846 tree scalar_dest;
3847 tree op, type;
3848 tree vec_oprnd0 = NULL_TREE;
3849 stmt_vec_info prev_stmt_info;
3850 tree vectype;
3851 unsigned int nunits;
3852 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3853 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3854 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3855 tree fndecl, new_temp;
3856 int ncopies, j;
3857 auto_vec<simd_call_arg_info> arginfo;
3858 vec<tree> vargs = vNULL;
3859 size_t i, nargs;
3860 tree lhs, rtype, ratype;
3861 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3862
3863 /* Is STMT a vectorizable call? */
3864 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3865 if (!stmt)
3866 return false;
3867
3868 fndecl = gimple_call_fndecl (stmt);
3869 if (fndecl == NULL_TREE)
3870 return false;
3871
3872 struct cgraph_node *node = cgraph_node::get (fndecl);
3873 if (node == NULL || node->simd_clones == NULL)
3874 return false;
3875
3876 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3877 return false;
3878
3879 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3880 && ! vec_stmt)
3881 return false;
3882
3883 if (gimple_call_lhs (stmt)
3884 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3885 return false;
3886
3887 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3888
3889 vectype = STMT_VINFO_VECTYPE (stmt_info);
3890
3891 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3892 return false;
3893
3894 /* FORNOW */
3895 if (slp_node)
3896 return false;
3897
3898 /* Process function arguments. */
3899 nargs = gimple_call_num_args (stmt);
3900
3901 /* Bail out if the function has zero arguments. */
3902 if (nargs == 0)
3903 return false;
3904
3905 arginfo.reserve (nargs, true);
3906
3907 for (i = 0; i < nargs; i++)
3908 {
3909 simd_call_arg_info thisarginfo;
3910 affine_iv iv;
3911
3912 thisarginfo.linear_step = 0;
3913 thisarginfo.align = 0;
3914 thisarginfo.op = NULL_TREE;
3915 thisarginfo.simd_lane_linear = false;
3916
3917 op = gimple_call_arg (stmt, i);
3918 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3919 &thisarginfo.vectype)
3920 || thisarginfo.dt == vect_uninitialized_def)
3921 {
3922 if (dump_enabled_p ())
3923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3924 "use not simple.\n");
3925 return false;
3926 }
3927
3928 if (thisarginfo.dt == vect_constant_def
3929 || thisarginfo.dt == vect_external_def)
3930 gcc_assert (thisarginfo.vectype == NULL_TREE);
3931 else
3932 {
3933 gcc_assert (thisarginfo.vectype != NULL_TREE);
3934 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3935 {
3936 if (dump_enabled_p ())
3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3938 "vector mask arguments are not supported\n");
3939 return false;
3940 }
3941 }
3942
3943 /* For linear arguments, the analyze phase should have saved
3944 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3945 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3946 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3947 {
3948 gcc_assert (vec_stmt);
3949 thisarginfo.linear_step
3950 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3951 thisarginfo.op
3952 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3953 thisarginfo.simd_lane_linear
3954 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3955 == boolean_true_node);
3956 /* If loop has been peeled for alignment, we need to adjust it. */
3957 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3958 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3959 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3960 {
3961 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3962 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3963 tree opt = TREE_TYPE (thisarginfo.op);
3964 bias = fold_convert (TREE_TYPE (step), bias);
3965 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3966 thisarginfo.op
3967 = fold_build2 (POINTER_TYPE_P (opt)
3968 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3969 thisarginfo.op, bias);
3970 }
3971 }
3972 else if (!vec_stmt
3973 && thisarginfo.dt != vect_constant_def
3974 && thisarginfo.dt != vect_external_def
3975 && loop_vinfo
3976 && TREE_CODE (op) == SSA_NAME
3977 && simple_iv (loop, loop_containing_stmt (stmt), op,
3978 &iv, false)
3979 && tree_fits_shwi_p (iv.step))
3980 {
3981 thisarginfo.linear_step = tree_to_shwi (iv.step);
3982 thisarginfo.op = iv.base;
3983 }
3984 else if ((thisarginfo.dt == vect_constant_def
3985 || thisarginfo.dt == vect_external_def)
3986 && POINTER_TYPE_P (TREE_TYPE (op)))
3987 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3988 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3989 linear too. */
3990 if (POINTER_TYPE_P (TREE_TYPE (op))
3991 && !thisarginfo.linear_step
3992 && !vec_stmt
3993 && thisarginfo.dt != vect_constant_def
3994 && thisarginfo.dt != vect_external_def
3995 && loop_vinfo
3996 && !slp_node
3997 && TREE_CODE (op) == SSA_NAME)
3998 vect_simd_lane_linear (op, loop, &thisarginfo);
3999
4000 arginfo.quick_push (thisarginfo);
4001 }
4002
4003 unsigned HOST_WIDE_INT vf;
4004 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4005 {
4006 if (dump_enabled_p ())
4007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4008 "not considering SIMD clones; not yet supported"
4009 " for variable-width vectors.\n");
4010 return false;
4011 }
4012
4013 unsigned int badness = 0;
4014 struct cgraph_node *bestn = NULL;
4015 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4016 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4017 else
4018 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4019 n = n->simdclone->next_clone)
4020 {
4021 unsigned int this_badness = 0;
4022 if (n->simdclone->simdlen > vf
4023 || n->simdclone->nargs != nargs)
4024 continue;
4025 if (n->simdclone->simdlen < vf)
4026 this_badness += (exact_log2 (vf)
4027 - exact_log2 (n->simdclone->simdlen)) * 1024;
4028 if (n->simdclone->inbranch)
4029 this_badness += 2048;
4030 int target_badness = targetm.simd_clone.usable (n);
4031 if (target_badness < 0)
4032 continue;
4033 this_badness += target_badness * 512;
4034 /* FORNOW: Have to add code to add the mask argument. */
4035 if (n->simdclone->inbranch)
4036 continue;
4037 for (i = 0; i < nargs; i++)
4038 {
4039 switch (n->simdclone->args[i].arg_type)
4040 {
4041 case SIMD_CLONE_ARG_TYPE_VECTOR:
4042 if (!useless_type_conversion_p
4043 (n->simdclone->args[i].orig_type,
4044 TREE_TYPE (gimple_call_arg (stmt, i))))
4045 i = -1;
4046 else if (arginfo[i].dt == vect_constant_def
4047 || arginfo[i].dt == vect_external_def
4048 || arginfo[i].linear_step)
4049 this_badness += 64;
4050 break;
4051 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4052 if (arginfo[i].dt != vect_constant_def
4053 && arginfo[i].dt != vect_external_def)
4054 i = -1;
4055 break;
4056 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4057 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4058 if (arginfo[i].dt == vect_constant_def
4059 || arginfo[i].dt == vect_external_def
4060 || (arginfo[i].linear_step
4061 != n->simdclone->args[i].linear_step))
4062 i = -1;
4063 break;
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4065 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4066 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4067 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4068 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4070 /* FORNOW */
4071 i = -1;
4072 break;
4073 case SIMD_CLONE_ARG_TYPE_MASK:
4074 gcc_unreachable ();
4075 }
4076 if (i == (size_t) -1)
4077 break;
4078 if (n->simdclone->args[i].alignment > arginfo[i].align)
4079 {
4080 i = -1;
4081 break;
4082 }
4083 if (arginfo[i].align)
4084 this_badness += (exact_log2 (arginfo[i].align)
4085 - exact_log2 (n->simdclone->args[i].alignment));
4086 }
4087 if (i == (size_t) -1)
4088 continue;
4089 if (bestn == NULL || this_badness < badness)
4090 {
4091 bestn = n;
4092 badness = this_badness;
4093 }
4094 }
4095
4096 if (bestn == NULL)
4097 return false;
4098
4099 for (i = 0; i < nargs; i++)
4100 if ((arginfo[i].dt == vect_constant_def
4101 || arginfo[i].dt == vect_external_def)
4102 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4103 {
4104 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4105 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4106 slp_node);
4107 if (arginfo[i].vectype == NULL
4108 || (simd_clone_subparts (arginfo[i].vectype)
4109 > bestn->simdclone->simdlen))
4110 return false;
4111 }
4112
4113 fndecl = bestn->decl;
4114 nunits = bestn->simdclone->simdlen;
4115 ncopies = vf / nunits;
4116
4117 /* If the function isn't const, only allow it in simd loops where user
4118 has asserted that at least nunits consecutive iterations can be
4119 performed using SIMD instructions. */
4120 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4121 && gimple_vuse (stmt))
4122 return false;
4123
4124 /* Sanity check: make sure that at least one copy of the vectorized stmt
4125 needs to be generated. */
4126 gcc_assert (ncopies >= 1);
4127
4128 if (!vec_stmt) /* transformation not required. */
4129 {
4130 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4131 for (i = 0; i < nargs; i++)
4132 if ((bestn->simdclone->args[i].arg_type
4133 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4134 || (bestn->simdclone->args[i].arg_type
4135 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4136 {
4137 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4138 + 1);
4139 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4140 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4141 ? size_type_node : TREE_TYPE (arginfo[i].op);
4142 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4143 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4144 tree sll = arginfo[i].simd_lane_linear
4145 ? boolean_true_node : boolean_false_node;
4146 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4147 }
4148 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4149 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4150 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4151 dt, slp_node, cost_vec); */
4152 return true;
4153 }
4154
4155 /* Transform. */
4156
4157 if (dump_enabled_p ())
4158 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4159
4160 /* Handle def. */
4161 scalar_dest = gimple_call_lhs (stmt);
4162 vec_dest = NULL_TREE;
4163 rtype = NULL_TREE;
4164 ratype = NULL_TREE;
4165 if (scalar_dest)
4166 {
4167 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4168 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4169 if (TREE_CODE (rtype) == ARRAY_TYPE)
4170 {
4171 ratype = rtype;
4172 rtype = TREE_TYPE (ratype);
4173 }
4174 }
4175
4176 prev_stmt_info = NULL;
4177 for (j = 0; j < ncopies; ++j)
4178 {
4179 /* Build argument list for the vectorized call. */
4180 if (j == 0)
4181 vargs.create (nargs);
4182 else
4183 vargs.truncate (0);
4184
4185 for (i = 0; i < nargs; i++)
4186 {
4187 unsigned int k, l, m, o;
4188 tree atype;
4189 op = gimple_call_arg (stmt, i);
4190 switch (bestn->simdclone->args[i].arg_type)
4191 {
4192 case SIMD_CLONE_ARG_TYPE_VECTOR:
4193 atype = bestn->simdclone->args[i].vector_type;
4194 o = nunits / simd_clone_subparts (atype);
4195 for (m = j * o; m < (j + 1) * o; m++)
4196 {
4197 if (simd_clone_subparts (atype)
4198 < simd_clone_subparts (arginfo[i].vectype))
4199 {
4200 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4201 k = (simd_clone_subparts (arginfo[i].vectype)
4202 / simd_clone_subparts (atype));
4203 gcc_assert ((k & (k - 1)) == 0);
4204 if (m == 0)
4205 vec_oprnd0
4206 = vect_get_vec_def_for_operand (vinfo, op, stmt_info);
4207 else
4208 {
4209 vec_oprnd0 = arginfo[i].op;
4210 if ((m & (k - 1)) == 0)
4211 vec_oprnd0
4212 = vect_get_vec_def_for_stmt_copy (vinfo,
4213 vec_oprnd0);
4214 }
4215 arginfo[i].op = vec_oprnd0;
4216 vec_oprnd0
4217 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4218 bitsize_int (prec),
4219 bitsize_int ((m & (k - 1)) * prec));
4220 gassign *new_stmt
4221 = gimple_build_assign (make_ssa_name (atype),
4222 vec_oprnd0);
4223 vect_finish_stmt_generation (vinfo, stmt_info,
4224 new_stmt, gsi);
4225 vargs.safe_push (gimple_assign_lhs (new_stmt));
4226 }
4227 else
4228 {
4229 k = (simd_clone_subparts (atype)
4230 / simd_clone_subparts (arginfo[i].vectype));
4231 gcc_assert ((k & (k - 1)) == 0);
4232 vec<constructor_elt, va_gc> *ctor_elts;
4233 if (k != 1)
4234 vec_alloc (ctor_elts, k);
4235 else
4236 ctor_elts = NULL;
4237 for (l = 0; l < k; l++)
4238 {
4239 if (m == 0 && l == 0)
4240 vec_oprnd0
4241 = vect_get_vec_def_for_operand (vinfo,
4242 op, stmt_info);
4243 else
4244 vec_oprnd0
4245 = vect_get_vec_def_for_stmt_copy (vinfo,
4246 arginfo[i].op);
4247 arginfo[i].op = vec_oprnd0;
4248 if (k == 1)
4249 break;
4250 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4251 vec_oprnd0);
4252 }
4253 if (k == 1)
4254 vargs.safe_push (vec_oprnd0);
4255 else
4256 {
4257 vec_oprnd0 = build_constructor (atype, ctor_elts);
4258 gassign *new_stmt
4259 = gimple_build_assign (make_ssa_name (atype),
4260 vec_oprnd0);
4261 vect_finish_stmt_generation (vinfo, stmt_info,
4262 new_stmt, gsi);
4263 vargs.safe_push (gimple_assign_lhs (new_stmt));
4264 }
4265 }
4266 }
4267 break;
4268 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4269 vargs.safe_push (op);
4270 break;
4271 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4272 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4273 if (j == 0)
4274 {
4275 gimple_seq stmts;
4276 arginfo[i].op
4277 = force_gimple_operand (unshare_expr (arginfo[i].op),
4278 &stmts, true, NULL_TREE);
4279 if (stmts != NULL)
4280 {
4281 basic_block new_bb;
4282 edge pe = loop_preheader_edge (loop);
4283 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4284 gcc_assert (!new_bb);
4285 }
4286 if (arginfo[i].simd_lane_linear)
4287 {
4288 vargs.safe_push (arginfo[i].op);
4289 break;
4290 }
4291 tree phi_res = copy_ssa_name (op);
4292 gphi *new_phi = create_phi_node (phi_res, loop->header);
4293 loop_vinfo->add_stmt (new_phi);
4294 add_phi_arg (new_phi, arginfo[i].op,
4295 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4296 enum tree_code code
4297 = POINTER_TYPE_P (TREE_TYPE (op))
4298 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4299 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4300 ? sizetype : TREE_TYPE (op);
4301 widest_int cst
4302 = wi::mul (bestn->simdclone->args[i].linear_step,
4303 ncopies * nunits);
4304 tree tcst = wide_int_to_tree (type, cst);
4305 tree phi_arg = copy_ssa_name (op);
4306 gassign *new_stmt
4307 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4308 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4309 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4310 loop_vinfo->add_stmt (new_stmt);
4311 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4312 UNKNOWN_LOCATION);
4313 arginfo[i].op = phi_res;
4314 vargs.safe_push (phi_res);
4315 }
4316 else
4317 {
4318 enum tree_code code
4319 = POINTER_TYPE_P (TREE_TYPE (op))
4320 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4321 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4322 ? sizetype : TREE_TYPE (op);
4323 widest_int cst
4324 = wi::mul (bestn->simdclone->args[i].linear_step,
4325 j * nunits);
4326 tree tcst = wide_int_to_tree (type, cst);
4327 new_temp = make_ssa_name (TREE_TYPE (op));
4328 gassign *new_stmt
4329 = gimple_build_assign (new_temp, code,
4330 arginfo[i].op, tcst);
4331 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4332 vargs.safe_push (new_temp);
4333 }
4334 break;
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4341 default:
4342 gcc_unreachable ();
4343 }
4344 }
4345
4346 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4347 if (vec_dest)
4348 {
4349 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4350 if (ratype)
4351 new_temp = create_tmp_var (ratype);
4352 else if (simd_clone_subparts (vectype)
4353 == simd_clone_subparts (rtype))
4354 new_temp = make_ssa_name (vec_dest, new_call);
4355 else
4356 new_temp = make_ssa_name (rtype, new_call);
4357 gimple_call_set_lhs (new_call, new_temp);
4358 }
4359 stmt_vec_info new_stmt_info
4360 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4361
4362 if (vec_dest)
4363 {
4364 if (simd_clone_subparts (vectype) < nunits)
4365 {
4366 unsigned int k, l;
4367 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4368 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4369 k = nunits / simd_clone_subparts (vectype);
4370 gcc_assert ((k & (k - 1)) == 0);
4371 for (l = 0; l < k; l++)
4372 {
4373 tree t;
4374 if (ratype)
4375 {
4376 t = build_fold_addr_expr (new_temp);
4377 t = build2 (MEM_REF, vectype, t,
4378 build_int_cst (TREE_TYPE (t), l * bytes));
4379 }
4380 else
4381 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4382 bitsize_int (prec), bitsize_int (l * prec));
4383 gimple *new_stmt
4384 = gimple_build_assign (make_ssa_name (vectype), t);
4385 new_stmt_info
4386 = vect_finish_stmt_generation (vinfo, stmt_info,
4387 new_stmt, gsi);
4388
4389 if (j == 0 && l == 0)
4390 STMT_VINFO_VEC_STMT (stmt_info)
4391 = *vec_stmt = new_stmt_info;
4392 else
4393 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4394
4395 prev_stmt_info = new_stmt_info;
4396 }
4397
4398 if (ratype)
4399 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4400 continue;
4401 }
4402 else if (simd_clone_subparts (vectype) > nunits)
4403 {
4404 unsigned int k = (simd_clone_subparts (vectype)
4405 / simd_clone_subparts (rtype));
4406 gcc_assert ((k & (k - 1)) == 0);
4407 if ((j & (k - 1)) == 0)
4408 vec_alloc (ret_ctor_elts, k);
4409 if (ratype)
4410 {
4411 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4412 for (m = 0; m < o; m++)
4413 {
4414 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4415 size_int (m), NULL_TREE, NULL_TREE);
4416 gimple *new_stmt
4417 = gimple_build_assign (make_ssa_name (rtype), tem);
4418 new_stmt_info
4419 = vect_finish_stmt_generation (vinfo, stmt_info,
4420 new_stmt, gsi);
4421 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4422 gimple_assign_lhs (new_stmt));
4423 }
4424 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4425 }
4426 else
4427 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4428 if ((j & (k - 1)) != k - 1)
4429 continue;
4430 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4431 gimple *new_stmt
4432 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4433 new_stmt_info
4434 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4435
4436 if ((unsigned) j == k - 1)
4437 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4438 else
4439 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4440
4441 prev_stmt_info = new_stmt_info;
4442 continue;
4443 }
4444 else if (ratype)
4445 {
4446 tree t = build_fold_addr_expr (new_temp);
4447 t = build2 (MEM_REF, vectype, t,
4448 build_int_cst (TREE_TYPE (t), 0));
4449 gimple *new_stmt
4450 = gimple_build_assign (make_ssa_name (vec_dest), t);
4451 new_stmt_info
4452 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4453 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4454 }
4455 }
4456
4457 if (j == 0)
4458 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4459 else
4460 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4461
4462 prev_stmt_info = new_stmt_info;
4463 }
4464
4465 vargs.release ();
4466
4467 /* The call in STMT might prevent it from being removed in dce.
4468 We however cannot remove it here, due to the way the ssa name
4469 it defines is mapped to the new definition. So just replace
4470 rhs of the statement with something harmless. */
4471
4472 if (slp_node)
4473 return true;
4474
4475 gimple *new_stmt;
4476 if (scalar_dest)
4477 {
4478 type = TREE_TYPE (scalar_dest);
4479 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4480 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4481 }
4482 else
4483 new_stmt = gimple_build_nop ();
4484 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4485 unlink_stmt_vdef (stmt);
4486
4487 return true;
4488 }
4489
4490
4491 /* Function vect_gen_widened_results_half
4492
4493 Create a vector stmt whose code, type, number of arguments, and result
4494 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4495 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4496 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4497 needs to be created (DECL is a function-decl of a target-builtin).
4498 STMT_INFO is the original scalar stmt that we are vectorizing. */
4499
4500 static gimple *
4501 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4502 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4503 tree vec_dest, gimple_stmt_iterator *gsi,
4504 stmt_vec_info stmt_info)
4505 {
4506 gimple *new_stmt;
4507 tree new_temp;
4508
4509 /* Generate half of the widened result: */
4510 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4511 if (op_type != binary_op)
4512 vec_oprnd1 = NULL;
4513 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4514 new_temp = make_ssa_name (vec_dest, new_stmt);
4515 gimple_assign_set_lhs (new_stmt, new_temp);
4516 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4517
4518 return new_stmt;
4519 }
4520
4521
4522 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4523 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4524 containing scalar operand), and for the rest we get a copy with
4525 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4526 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4527 The vectors are collected into VEC_OPRNDS. */
4528
4529 static void
4530 vect_get_loop_based_defs (vec_info *vinfo, tree *oprnd, stmt_vec_info stmt_info,
4531 vec<tree> *vec_oprnds, int multi_step_cvt)
4532 {
4533 tree vec_oprnd;
4534
4535 /* Get first vector operand. */
4536 /* All the vector operands except the very first one (that is scalar oprnd)
4537 are stmt copies. */
4538 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4539 vec_oprnd = vect_get_vec_def_for_operand (vinfo, *oprnd, stmt_info);
4540 else
4541 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4542
4543 vec_oprnds->quick_push (vec_oprnd);
4544
4545 /* Get second vector operand. */
4546 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4547 vec_oprnds->quick_push (vec_oprnd);
4548
4549 *oprnd = vec_oprnd;
4550
4551 /* For conversion in multiple steps, continue to get operands
4552 recursively. */
4553 if (multi_step_cvt)
4554 vect_get_loop_based_defs (vinfo, oprnd, stmt_info, vec_oprnds,
4555 multi_step_cvt - 1);
4556 }
4557
4558
4559 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4560 For multi-step conversions store the resulting vectors and call the function
4561 recursively. */
4562
4563 static void
4564 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4565 int multi_step_cvt,
4566 stmt_vec_info stmt_info,
4567 vec<tree> vec_dsts,
4568 gimple_stmt_iterator *gsi,
4569 slp_tree slp_node, enum tree_code code,
4570 stmt_vec_info *prev_stmt_info)
4571 {
4572 unsigned int i;
4573 tree vop0, vop1, new_tmp, vec_dest;
4574
4575 vec_dest = vec_dsts.pop ();
4576
4577 for (i = 0; i < vec_oprnds->length (); i += 2)
4578 {
4579 /* Create demotion operation. */
4580 vop0 = (*vec_oprnds)[i];
4581 vop1 = (*vec_oprnds)[i + 1];
4582 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4583 new_tmp = make_ssa_name (vec_dest, new_stmt);
4584 gimple_assign_set_lhs (new_stmt, new_tmp);
4585 stmt_vec_info new_stmt_info
4586 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4587
4588 if (multi_step_cvt)
4589 /* Store the resulting vector for next recursive call. */
4590 (*vec_oprnds)[i/2] = new_tmp;
4591 else
4592 {
4593 /* This is the last step of the conversion sequence. Store the
4594 vectors in SLP_NODE or in vector info of the scalar statement
4595 (or in STMT_VINFO_RELATED_STMT chain). */
4596 if (slp_node)
4597 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4598 else
4599 {
4600 if (!*prev_stmt_info)
4601 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4602 else
4603 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4604
4605 *prev_stmt_info = new_stmt_info;
4606 }
4607 }
4608 }
4609
4610 /* For multi-step demotion operations we first generate demotion operations
4611 from the source type to the intermediate types, and then combine the
4612 results (stored in VEC_OPRNDS) in demotion operation to the destination
4613 type. */
4614 if (multi_step_cvt)
4615 {
4616 /* At each level of recursion we have half of the operands we had at the
4617 previous level. */
4618 vec_oprnds->truncate ((i+1)/2);
4619 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4620 multi_step_cvt - 1,
4621 stmt_info, vec_dsts, gsi,
4622 slp_node, VEC_PACK_TRUNC_EXPR,
4623 prev_stmt_info);
4624 }
4625
4626 vec_dsts.quick_push (vec_dest);
4627 }
4628
4629
4630 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4631 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4632 STMT_INFO. For multi-step conversions store the resulting vectors and
4633 call the function recursively. */
4634
4635 static void
4636 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4637 vec<tree> *vec_oprnds0,
4638 vec<tree> *vec_oprnds1,
4639 stmt_vec_info stmt_info, tree vec_dest,
4640 gimple_stmt_iterator *gsi,
4641 enum tree_code code1,
4642 enum tree_code code2, int op_type)
4643 {
4644 int i;
4645 tree vop0, vop1, new_tmp1, new_tmp2;
4646 gimple *new_stmt1, *new_stmt2;
4647 vec<tree> vec_tmp = vNULL;
4648
4649 vec_tmp.create (vec_oprnds0->length () * 2);
4650 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4651 {
4652 if (op_type == binary_op)
4653 vop1 = (*vec_oprnds1)[i];
4654 else
4655 vop1 = NULL_TREE;
4656
4657 /* Generate the two halves of promotion operation. */
4658 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4659 op_type, vec_dest, gsi,
4660 stmt_info);
4661 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4662 op_type, vec_dest, gsi,
4663 stmt_info);
4664 if (is_gimple_call (new_stmt1))
4665 {
4666 new_tmp1 = gimple_call_lhs (new_stmt1);
4667 new_tmp2 = gimple_call_lhs (new_stmt2);
4668 }
4669 else
4670 {
4671 new_tmp1 = gimple_assign_lhs (new_stmt1);
4672 new_tmp2 = gimple_assign_lhs (new_stmt2);
4673 }
4674
4675 /* Store the results for the next step. */
4676 vec_tmp.quick_push (new_tmp1);
4677 vec_tmp.quick_push (new_tmp2);
4678 }
4679
4680 vec_oprnds0->release ();
4681 *vec_oprnds0 = vec_tmp;
4682 }
4683
4684
4685 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4687 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4688 Return true if STMT_INFO is vectorizable in this way. */
4689
4690 static bool
4691 vectorizable_conversion (vec_info *vinfo,
4692 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4693 stmt_vec_info *vec_stmt, slp_tree slp_node,
4694 stmt_vector_for_cost *cost_vec)
4695 {
4696 tree vec_dest;
4697 tree scalar_dest;
4698 tree op0, op1 = NULL_TREE;
4699 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4700 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4701 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4702 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4703 tree new_temp;
4704 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4705 int ndts = 2;
4706 stmt_vec_info prev_stmt_info;
4707 poly_uint64 nunits_in;
4708 poly_uint64 nunits_out;
4709 tree vectype_out, vectype_in;
4710 int ncopies, i, j;
4711 tree lhs_type, rhs_type;
4712 enum { NARROW, NONE, WIDEN } modifier;
4713 vec<tree> vec_oprnds0 = vNULL;
4714 vec<tree> vec_oprnds1 = vNULL;
4715 tree vop0;
4716 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4717 int multi_step_cvt = 0;
4718 vec<tree> interm_types = vNULL;
4719 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4720 int op_type;
4721 unsigned short fltsz;
4722
4723 /* Is STMT a vectorizable conversion? */
4724
4725 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4726 return false;
4727
4728 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4729 && ! vec_stmt)
4730 return false;
4731
4732 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4733 if (!stmt)
4734 return false;
4735
4736 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4737 return false;
4738
4739 code = gimple_assign_rhs_code (stmt);
4740 if (!CONVERT_EXPR_CODE_P (code)
4741 && code != FIX_TRUNC_EXPR
4742 && code != FLOAT_EXPR
4743 && code != WIDEN_MULT_EXPR
4744 && code != WIDEN_LSHIFT_EXPR)
4745 return false;
4746
4747 op_type = TREE_CODE_LENGTH (code);
4748
4749 /* Check types of lhs and rhs. */
4750 scalar_dest = gimple_assign_lhs (stmt);
4751 lhs_type = TREE_TYPE (scalar_dest);
4752 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4753
4754 /* Check the operands of the operation. */
4755 slp_tree slp_op0, slp_op1 = NULL;
4756 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4757 0, &op0, &slp_op0, &dt[0], &vectype_in))
4758 {
4759 if (dump_enabled_p ())
4760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4761 "use not simple.\n");
4762 return false;
4763 }
4764
4765 rhs_type = TREE_TYPE (op0);
4766 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4767 && !((INTEGRAL_TYPE_P (lhs_type)
4768 && INTEGRAL_TYPE_P (rhs_type))
4769 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4770 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4771 return false;
4772
4773 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4774 && ((INTEGRAL_TYPE_P (lhs_type)
4775 && !type_has_mode_precision_p (lhs_type))
4776 || (INTEGRAL_TYPE_P (rhs_type)
4777 && !type_has_mode_precision_p (rhs_type))))
4778 {
4779 if (dump_enabled_p ())
4780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4781 "type conversion to/from bit-precision unsupported."
4782 "\n");
4783 return false;
4784 }
4785
4786 if (op_type == binary_op)
4787 {
4788 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4789
4790 op1 = gimple_assign_rhs2 (stmt);
4791 tree vectype1_in;
4792 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4793 &op1, &slp_op1, &dt[1], &vectype1_in))
4794 {
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4797 "use not simple.\n");
4798 return false;
4799 }
4800 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4801 OP1. */
4802 if (!vectype_in)
4803 vectype_in = vectype1_in;
4804 }
4805
4806 /* If op0 is an external or constant def, infer the vector type
4807 from the scalar type. */
4808 if (!vectype_in)
4809 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4810 if (vec_stmt)
4811 gcc_assert (vectype_in);
4812 if (!vectype_in)
4813 {
4814 if (dump_enabled_p ())
4815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4816 "no vectype for scalar type %T\n", rhs_type);
4817
4818 return false;
4819 }
4820
4821 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4822 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4823 {
4824 if (dump_enabled_p ())
4825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4826 "can't convert between boolean and non "
4827 "boolean vectors %T\n", rhs_type);
4828
4829 return false;
4830 }
4831
4832 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4833 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4834 if (known_eq (nunits_out, nunits_in))
4835 modifier = NONE;
4836 else if (multiple_p (nunits_out, nunits_in))
4837 modifier = NARROW;
4838 else
4839 {
4840 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4841 modifier = WIDEN;
4842 }
4843
4844 /* Multiple types in SLP are handled by creating the appropriate number of
4845 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4846 case of SLP. */
4847 if (slp_node)
4848 ncopies = 1;
4849 else if (modifier == NARROW)
4850 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4851 else
4852 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4853
4854 /* Sanity check: make sure that at least one copy of the vectorized stmt
4855 needs to be generated. */
4856 gcc_assert (ncopies >= 1);
4857
4858 bool found_mode = false;
4859 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4860 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4861 opt_scalar_mode rhs_mode_iter;
4862
4863 /* Supportable by target? */
4864 switch (modifier)
4865 {
4866 case NONE:
4867 if (code != FIX_TRUNC_EXPR
4868 && code != FLOAT_EXPR
4869 && !CONVERT_EXPR_CODE_P (code))
4870 return false;
4871 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4872 break;
4873 /* FALLTHRU */
4874 unsupported:
4875 if (dump_enabled_p ())
4876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4877 "conversion not supported by target.\n");
4878 return false;
4879
4880 case WIDEN:
4881 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4882 vectype_in, &code1, &code2,
4883 &multi_step_cvt, &interm_types))
4884 {
4885 /* Binary widening operation can only be supported directly by the
4886 architecture. */
4887 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4888 break;
4889 }
4890
4891 if (code != FLOAT_EXPR
4892 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4893 goto unsupported;
4894
4895 fltsz = GET_MODE_SIZE (lhs_mode);
4896 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4897 {
4898 rhs_mode = rhs_mode_iter.require ();
4899 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4900 break;
4901
4902 cvt_type
4903 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4904 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4905 if (cvt_type == NULL_TREE)
4906 goto unsupported;
4907
4908 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4909 {
4910 if (!supportable_convert_operation (code, vectype_out,
4911 cvt_type, &codecvt1))
4912 goto unsupported;
4913 }
4914 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4915 vectype_out, cvt_type,
4916 &codecvt1, &codecvt2,
4917 &multi_step_cvt,
4918 &interm_types))
4919 continue;
4920 else
4921 gcc_assert (multi_step_cvt == 0);
4922
4923 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4924 cvt_type,
4925 vectype_in, &code1, &code2,
4926 &multi_step_cvt, &interm_types))
4927 {
4928 found_mode = true;
4929 break;
4930 }
4931 }
4932
4933 if (!found_mode)
4934 goto unsupported;
4935
4936 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4937 codecvt2 = ERROR_MARK;
4938 else
4939 {
4940 multi_step_cvt++;
4941 interm_types.safe_push (cvt_type);
4942 cvt_type = NULL_TREE;
4943 }
4944 break;
4945
4946 case NARROW:
4947 gcc_assert (op_type == unary_op);
4948 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4949 &code1, &multi_step_cvt,
4950 &interm_types))
4951 break;
4952
4953 if (code != FIX_TRUNC_EXPR
4954 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4955 goto unsupported;
4956
4957 cvt_type
4958 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4959 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4960 if (cvt_type == NULL_TREE)
4961 goto unsupported;
4962 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4963 &codecvt1))
4964 goto unsupported;
4965 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4966 &code1, &multi_step_cvt,
4967 &interm_types))
4968 break;
4969 goto unsupported;
4970
4971 default:
4972 gcc_unreachable ();
4973 }
4974
4975 if (!vec_stmt) /* transformation not required. */
4976 {
4977 if (slp_node
4978 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4979 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4980 {
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4983 "incompatible vector types for invariants\n");
4984 return false;
4985 }
4986 DUMP_VECT_SCOPE ("vectorizable_conversion");
4987 if (modifier == NONE)
4988 {
4989 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4990 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4991 cost_vec);
4992 }
4993 else if (modifier == NARROW)
4994 {
4995 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4996 /* The final packing step produces one vector result per copy. */
4997 unsigned int nvectors
4998 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4999 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5000 multi_step_cvt, cost_vec);
5001 }
5002 else
5003 {
5004 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5005 /* The initial unpacking step produces two vector results
5006 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5007 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5008 unsigned int nvectors
5009 = (slp_node
5010 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5011 : ncopies * 2);
5012 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5013 multi_step_cvt, cost_vec);
5014 }
5015 interm_types.release ();
5016 return true;
5017 }
5018
5019 /* Transform. */
5020 if (dump_enabled_p ())
5021 dump_printf_loc (MSG_NOTE, vect_location,
5022 "transform conversion. ncopies = %d.\n", ncopies);
5023
5024 if (op_type == binary_op)
5025 {
5026 if (CONSTANT_CLASS_P (op0))
5027 op0 = fold_convert (TREE_TYPE (op1), op0);
5028 else if (CONSTANT_CLASS_P (op1))
5029 op1 = fold_convert (TREE_TYPE (op0), op1);
5030 }
5031
5032 /* In case of multi-step conversion, we first generate conversion operations
5033 to the intermediate types, and then from that types to the final one.
5034 We create vector destinations for the intermediate type (TYPES) received
5035 from supportable_*_operation, and store them in the correct order
5036 for future use in vect_create_vectorized_*_stmts (). */
5037 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5038 vec_dest = vect_create_destination_var (scalar_dest,
5039 (cvt_type && modifier == WIDEN)
5040 ? cvt_type : vectype_out);
5041 vec_dsts.quick_push (vec_dest);
5042
5043 if (multi_step_cvt)
5044 {
5045 for (i = interm_types.length () - 1;
5046 interm_types.iterate (i, &intermediate_type); i--)
5047 {
5048 vec_dest = vect_create_destination_var (scalar_dest,
5049 intermediate_type);
5050 vec_dsts.quick_push (vec_dest);
5051 }
5052 }
5053
5054 if (cvt_type)
5055 vec_dest = vect_create_destination_var (scalar_dest,
5056 modifier == WIDEN
5057 ? vectype_out : cvt_type);
5058
5059 if (!slp_node)
5060 {
5061 if (modifier == WIDEN)
5062 {
5063 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5064 if (op_type == binary_op)
5065 vec_oprnds1.create (1);
5066 }
5067 else if (modifier == NARROW)
5068 vec_oprnds0.create (
5069 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5070 }
5071 else if (code == WIDEN_LSHIFT_EXPR)
5072 vec_oprnds1.create (slp_node->vec_stmts_size);
5073
5074 last_oprnd = op0;
5075 prev_stmt_info = NULL;
5076 switch (modifier)
5077 {
5078 case NONE:
5079 for (j = 0; j < ncopies; j++)
5080 {
5081 if (j == 0)
5082 vect_get_vec_defs (vinfo, op0, NULL, stmt_info, &vec_oprnds0,
5083 NULL, slp_node);
5084 else
5085 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5086
5087 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5088 {
5089 stmt_vec_info new_stmt_info;
5090 /* Arguments are ready, create the new vector stmt. */
5091 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5092 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5093 new_temp = make_ssa_name (vec_dest, new_stmt);
5094 gimple_assign_set_lhs (new_stmt, new_temp);
5095 new_stmt_info
5096 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5097
5098 if (slp_node)
5099 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5100 else
5101 {
5102 if (!prev_stmt_info)
5103 STMT_VINFO_VEC_STMT (stmt_info)
5104 = *vec_stmt = new_stmt_info;
5105 else
5106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5107 prev_stmt_info = new_stmt_info;
5108 }
5109 }
5110 }
5111 break;
5112
5113 case WIDEN:
5114 /* In case the vectorization factor (VF) is bigger than the number
5115 of elements that we can fit in a vectype (nunits), we have to
5116 generate more than one vector stmt - i.e - we need to "unroll"
5117 the vector stmt by a factor VF/nunits. */
5118 for (j = 0; j < ncopies; j++)
5119 {
5120 /* Handle uses. */
5121 if (j == 0)
5122 {
5123 if (slp_node)
5124 {
5125 if (code == WIDEN_LSHIFT_EXPR)
5126 {
5127 unsigned int k;
5128
5129 vec_oprnd1 = op1;
5130 /* Store vec_oprnd1 for every vector stmt to be created
5131 for SLP_NODE. We check during the analysis that all
5132 the shift arguments are the same. */
5133 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5134 vec_oprnds1.quick_push (vec_oprnd1);
5135
5136 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5137 &vec_oprnds0, NULL, slp_node);
5138 }
5139 else
5140 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
5141 &vec_oprnds1, slp_node);
5142 }
5143 else
5144 {
5145 vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
5146 op0, stmt_info);
5147 vec_oprnds0.quick_push (vec_oprnd0);
5148 if (op_type == binary_op)
5149 {
5150 if (code == WIDEN_LSHIFT_EXPR)
5151 vec_oprnd1 = op1;
5152 else
5153 vec_oprnd1
5154 = vect_get_vec_def_for_operand (vinfo,
5155 op1, stmt_info);
5156 vec_oprnds1.quick_push (vec_oprnd1);
5157 }
5158 }
5159 }
5160 else
5161 {
5162 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5163 vec_oprnds0.truncate (0);
5164 vec_oprnds0.quick_push (vec_oprnd0);
5165 if (op_type == binary_op)
5166 {
5167 if (code == WIDEN_LSHIFT_EXPR)
5168 vec_oprnd1 = op1;
5169 else
5170 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5171 vec_oprnd1);
5172 vec_oprnds1.truncate (0);
5173 vec_oprnds1.quick_push (vec_oprnd1);
5174 }
5175 }
5176
5177 /* Arguments are ready. Create the new vector stmts. */
5178 for (i = multi_step_cvt; i >= 0; i--)
5179 {
5180 tree this_dest = vec_dsts[i];
5181 enum tree_code c1 = code1, c2 = code2;
5182 if (i == 0 && codecvt2 != ERROR_MARK)
5183 {
5184 c1 = codecvt1;
5185 c2 = codecvt2;
5186 }
5187 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5188 &vec_oprnds1, stmt_info,
5189 this_dest, gsi,
5190 c1, c2, op_type);
5191 }
5192
5193 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5194 {
5195 stmt_vec_info new_stmt_info;
5196 if (cvt_type)
5197 {
5198 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5199 new_temp = make_ssa_name (vec_dest);
5200 gassign *new_stmt
5201 = gimple_build_assign (new_temp, codecvt1, vop0);
5202 new_stmt_info
5203 = vect_finish_stmt_generation (vinfo, stmt_info,
5204 new_stmt, gsi);
5205 }
5206 else
5207 new_stmt_info = vinfo->lookup_def (vop0);
5208
5209 if (slp_node)
5210 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5211 else
5212 {
5213 if (!prev_stmt_info)
5214 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5215 else
5216 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5217 prev_stmt_info = new_stmt_info;
5218 }
5219 }
5220 }
5221
5222 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5223 break;
5224
5225 case NARROW:
5226 /* In case the vectorization factor (VF) is bigger than the number
5227 of elements that we can fit in a vectype (nunits), we have to
5228 generate more than one vector stmt - i.e - we need to "unroll"
5229 the vector stmt by a factor VF/nunits. */
5230 for (j = 0; j < ncopies; j++)
5231 {
5232 /* Handle uses. */
5233 if (slp_node)
5234 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
5235 NULL, slp_node);
5236 else
5237 {
5238 vec_oprnds0.truncate (0);
5239 vect_get_loop_based_defs (vinfo,
5240 &last_oprnd, stmt_info, &vec_oprnds0,
5241 vect_pow2 (multi_step_cvt) - 1);
5242 }
5243
5244 /* Arguments are ready. Create the new vector stmts. */
5245 if (cvt_type)
5246 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5247 {
5248 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5249 new_temp = make_ssa_name (vec_dest);
5250 gassign *new_stmt
5251 = gimple_build_assign (new_temp, codecvt1, vop0);
5252 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5253 vec_oprnds0[i] = new_temp;
5254 }
5255
5256 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5257 multi_step_cvt,
5258 stmt_info, vec_dsts, gsi,
5259 slp_node, code1,
5260 &prev_stmt_info);
5261 }
5262
5263 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5264 break;
5265 }
5266
5267 vec_oprnds0.release ();
5268 vec_oprnds1.release ();
5269 interm_types.release ();
5270
5271 return true;
5272 }
5273
5274 /* Return true if we can assume from the scalar form of STMT_INFO that
5275 neither the scalar nor the vector forms will generate code. STMT_INFO
5276 is known not to involve a data reference. */
5277
5278 bool
5279 vect_nop_conversion_p (stmt_vec_info stmt_info)
5280 {
5281 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5282 if (!stmt)
5283 return false;
5284
5285 tree lhs = gimple_assign_lhs (stmt);
5286 tree_code code = gimple_assign_rhs_code (stmt);
5287 tree rhs = gimple_assign_rhs1 (stmt);
5288
5289 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5290 return true;
5291
5292 if (CONVERT_EXPR_CODE_P (code))
5293 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5294
5295 return false;
5296 }
5297
5298 /* Function vectorizable_assignment.
5299
5300 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5301 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5302 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5303 Return true if STMT_INFO is vectorizable in this way. */
5304
5305 static bool
5306 vectorizable_assignment (vec_info *vinfo,
5307 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5308 stmt_vec_info *vec_stmt, slp_tree slp_node,
5309 stmt_vector_for_cost *cost_vec)
5310 {
5311 tree vec_dest;
5312 tree scalar_dest;
5313 tree op;
5314 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5315 tree new_temp;
5316 enum vect_def_type dt[1] = {vect_unknown_def_type};
5317 int ndts = 1;
5318 int ncopies;
5319 int i, j;
5320 vec<tree> vec_oprnds = vNULL;
5321 tree vop;
5322 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5323 stmt_vec_info prev_stmt_info = NULL;
5324 enum tree_code code;
5325 tree vectype_in;
5326
5327 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5328 return false;
5329
5330 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5331 && ! vec_stmt)
5332 return false;
5333
5334 /* Is vectorizable assignment? */
5335 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5336 if (!stmt)
5337 return false;
5338
5339 scalar_dest = gimple_assign_lhs (stmt);
5340 if (TREE_CODE (scalar_dest) != SSA_NAME)
5341 return false;
5342
5343 if (STMT_VINFO_DATA_REF (stmt_info))
5344 return false;
5345
5346 code = gimple_assign_rhs_code (stmt);
5347 if (!(gimple_assign_single_p (stmt)
5348 || code == PAREN_EXPR
5349 || CONVERT_EXPR_CODE_P (code)))
5350 return false;
5351
5352 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5353 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5354
5355 /* Multiple types in SLP are handled by creating the appropriate number of
5356 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5357 case of SLP. */
5358 if (slp_node)
5359 ncopies = 1;
5360 else
5361 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5362
5363 gcc_assert (ncopies >= 1);
5364
5365 slp_tree slp_op;
5366 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5367 &dt[0], &vectype_in))
5368 {
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5371 "use not simple.\n");
5372 return false;
5373 }
5374 if (!vectype_in)
5375 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5376
5377 /* We can handle NOP_EXPR conversions that do not change the number
5378 of elements or the vector size. */
5379 if ((CONVERT_EXPR_CODE_P (code)
5380 || code == VIEW_CONVERT_EXPR)
5381 && (!vectype_in
5382 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5383 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5384 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5385 return false;
5386
5387 /* We do not handle bit-precision changes. */
5388 if ((CONVERT_EXPR_CODE_P (code)
5389 || code == VIEW_CONVERT_EXPR)
5390 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5391 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5392 || !type_has_mode_precision_p (TREE_TYPE (op)))
5393 /* But a conversion that does not change the bit-pattern is ok. */
5394 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5395 > TYPE_PRECISION (TREE_TYPE (op)))
5396 && TYPE_UNSIGNED (TREE_TYPE (op)))
5397 /* Conversion between boolean types of different sizes is
5398 a simple assignment in case their vectypes are same
5399 boolean vectors. */
5400 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5401 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5402 {
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5405 "type conversion to/from bit-precision "
5406 "unsupported.\n");
5407 return false;
5408 }
5409
5410 if (!vec_stmt) /* transformation not required. */
5411 {
5412 if (slp_node
5413 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5414 {
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "incompatible vector types for invariants\n");
5418 return false;
5419 }
5420 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5421 DUMP_VECT_SCOPE ("vectorizable_assignment");
5422 if (!vect_nop_conversion_p (stmt_info))
5423 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5424 cost_vec);
5425 return true;
5426 }
5427
5428 /* Transform. */
5429 if (dump_enabled_p ())
5430 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5431
5432 /* Handle def. */
5433 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5434
5435 /* Handle use. */
5436 for (j = 0; j < ncopies; j++)
5437 {
5438 /* Handle uses. */
5439 if (j == 0)
5440 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
5441 slp_node);
5442 else
5443 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5444
5445 /* Arguments are ready. create the new vector stmt. */
5446 stmt_vec_info new_stmt_info = NULL;
5447 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5448 {
5449 if (CONVERT_EXPR_CODE_P (code)
5450 || code == VIEW_CONVERT_EXPR)
5451 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5452 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5453 new_temp = make_ssa_name (vec_dest, new_stmt);
5454 gimple_assign_set_lhs (new_stmt, new_temp);
5455 new_stmt_info
5456 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5457 if (slp_node)
5458 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5459 }
5460
5461 if (slp_node)
5462 continue;
5463
5464 if (j == 0)
5465 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5466 else
5467 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5468
5469 prev_stmt_info = new_stmt_info;
5470 }
5471
5472 vec_oprnds.release ();
5473 return true;
5474 }
5475
5476
5477 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5478 either as shift by a scalar or by a vector. */
5479
5480 bool
5481 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5482 {
5483
5484 machine_mode vec_mode;
5485 optab optab;
5486 int icode;
5487 tree vectype;
5488
5489 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5490 if (!vectype)
5491 return false;
5492
5493 optab = optab_for_tree_code (code, vectype, optab_scalar);
5494 if (!optab
5495 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5496 {
5497 optab = optab_for_tree_code (code, vectype, optab_vector);
5498 if (!optab
5499 || (optab_handler (optab, TYPE_MODE (vectype))
5500 == CODE_FOR_nothing))
5501 return false;
5502 }
5503
5504 vec_mode = TYPE_MODE (vectype);
5505 icode = (int) optab_handler (optab, vec_mode);
5506 if (icode == CODE_FOR_nothing)
5507 return false;
5508
5509 return true;
5510 }
5511
5512
5513 /* Function vectorizable_shift.
5514
5515 Check if STMT_INFO performs a shift operation that can be vectorized.
5516 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5518 Return true if STMT_INFO is vectorizable in this way. */
5519
5520 static bool
5521 vectorizable_shift (vec_info *vinfo,
5522 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5523 stmt_vec_info *vec_stmt, slp_tree slp_node,
5524 stmt_vector_for_cost *cost_vec)
5525 {
5526 tree vec_dest;
5527 tree scalar_dest;
5528 tree op0, op1 = NULL;
5529 tree vec_oprnd1 = NULL_TREE;
5530 tree vectype;
5531 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5532 enum tree_code code;
5533 machine_mode vec_mode;
5534 tree new_temp;
5535 optab optab;
5536 int icode;
5537 machine_mode optab_op2_mode;
5538 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5539 int ndts = 2;
5540 stmt_vec_info prev_stmt_info;
5541 poly_uint64 nunits_in;
5542 poly_uint64 nunits_out;
5543 tree vectype_out;
5544 tree op1_vectype;
5545 int ncopies;
5546 int j, i;
5547 vec<tree> vec_oprnds0 = vNULL;
5548 vec<tree> vec_oprnds1 = vNULL;
5549 tree vop0, vop1;
5550 unsigned int k;
5551 bool scalar_shift_arg = true;
5552 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5553 bool incompatible_op1_vectype_p = false;
5554
5555 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5556 return false;
5557
5558 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5559 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5560 && ! vec_stmt)
5561 return false;
5562
5563 /* Is STMT a vectorizable binary/unary operation? */
5564 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5565 if (!stmt)
5566 return false;
5567
5568 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5569 return false;
5570
5571 code = gimple_assign_rhs_code (stmt);
5572
5573 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5574 || code == RROTATE_EXPR))
5575 return false;
5576
5577 scalar_dest = gimple_assign_lhs (stmt);
5578 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5579 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5580 {
5581 if (dump_enabled_p ())
5582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5583 "bit-precision shifts not supported.\n");
5584 return false;
5585 }
5586
5587 slp_tree slp_op0;
5588 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5589 0, &op0, &slp_op0, &dt[0], &vectype))
5590 {
5591 if (dump_enabled_p ())
5592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5593 "use not simple.\n");
5594 return false;
5595 }
5596 /* If op0 is an external or constant def, infer the vector type
5597 from the scalar type. */
5598 if (!vectype)
5599 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5600 if (vec_stmt)
5601 gcc_assert (vectype);
5602 if (!vectype)
5603 {
5604 if (dump_enabled_p ())
5605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5606 "no vectype for scalar type\n");
5607 return false;
5608 }
5609
5610 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5611 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5612 if (maybe_ne (nunits_out, nunits_in))
5613 return false;
5614
5615 stmt_vec_info op1_def_stmt_info;
5616 slp_tree slp_op1;
5617 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5618 &dt[1], &op1_vectype, &op1_def_stmt_info))
5619 {
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "use not simple.\n");
5623 return false;
5624 }
5625
5626 /* Multiple types in SLP are handled by creating the appropriate number of
5627 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5628 case of SLP. */
5629 if (slp_node)
5630 ncopies = 1;
5631 else
5632 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5633
5634 gcc_assert (ncopies >= 1);
5635
5636 /* Determine whether the shift amount is a vector, or scalar. If the
5637 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5638
5639 if ((dt[1] == vect_internal_def
5640 || dt[1] == vect_induction_def
5641 || dt[1] == vect_nested_cycle)
5642 && !slp_node)
5643 scalar_shift_arg = false;
5644 else if (dt[1] == vect_constant_def
5645 || dt[1] == vect_external_def
5646 || dt[1] == vect_internal_def)
5647 {
5648 /* In SLP, need to check whether the shift count is the same,
5649 in loops if it is a constant or invariant, it is always
5650 a scalar shift. */
5651 if (slp_node)
5652 {
5653 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5654 stmt_vec_info slpstmt_info;
5655
5656 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5657 {
5658 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5659 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5660 scalar_shift_arg = false;
5661 }
5662
5663 /* For internal SLP defs we have to make sure we see scalar stmts
5664 for all vector elements.
5665 ??? For different vectors we could resort to a different
5666 scalar shift operand but code-generation below simply always
5667 takes the first. */
5668 if (dt[1] == vect_internal_def
5669 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5670 stmts.length ()))
5671 scalar_shift_arg = false;
5672 }
5673
5674 /* If the shift amount is computed by a pattern stmt we cannot
5675 use the scalar amount directly thus give up and use a vector
5676 shift. */
5677 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5678 scalar_shift_arg = false;
5679 }
5680 else
5681 {
5682 if (dump_enabled_p ())
5683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5684 "operand mode requires invariant argument.\n");
5685 return false;
5686 }
5687
5688 /* Vector shifted by vector. */
5689 bool was_scalar_shift_arg = scalar_shift_arg;
5690 if (!scalar_shift_arg)
5691 {
5692 optab = optab_for_tree_code (code, vectype, optab_vector);
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_NOTE, vect_location,
5695 "vector/vector shift/rotate found.\n");
5696
5697 if (!op1_vectype)
5698 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5699 slp_op1);
5700 incompatible_op1_vectype_p
5701 = (op1_vectype == NULL_TREE
5702 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5703 TYPE_VECTOR_SUBPARTS (vectype))
5704 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5705 if (incompatible_op1_vectype_p
5706 && (!slp_node
5707 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5708 || slp_op1->refcnt != 1))
5709 {
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5712 "unusable type for last operand in"
5713 " vector/vector shift/rotate.\n");
5714 return false;
5715 }
5716 }
5717 /* See if the machine has a vector shifted by scalar insn and if not
5718 then see if it has a vector shifted by vector insn. */
5719 else
5720 {
5721 optab = optab_for_tree_code (code, vectype, optab_scalar);
5722 if (optab
5723 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5724 {
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_NOTE, vect_location,
5727 "vector/scalar shift/rotate found.\n");
5728 }
5729 else
5730 {
5731 optab = optab_for_tree_code (code, vectype, optab_vector);
5732 if (optab
5733 && (optab_handler (optab, TYPE_MODE (vectype))
5734 != CODE_FOR_nothing))
5735 {
5736 scalar_shift_arg = false;
5737
5738 if (dump_enabled_p ())
5739 dump_printf_loc (MSG_NOTE, vect_location,
5740 "vector/vector shift/rotate found.\n");
5741
5742 if (!op1_vectype)
5743 op1_vectype = get_vectype_for_scalar_type (vinfo,
5744 TREE_TYPE (op1),
5745 slp_node);
5746
5747 /* Unlike the other binary operators, shifts/rotates have
5748 the rhs being int, instead of the same type as the lhs,
5749 so make sure the scalar is the right type if we are
5750 dealing with vectors of long long/long/short/char. */
5751 incompatible_op1_vectype_p
5752 = (!op1_vectype
5753 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5754 TREE_TYPE (op1)));
5755 }
5756 }
5757 }
5758
5759 /* Supportable by target? */
5760 if (!optab)
5761 {
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5764 "no optab.\n");
5765 return false;
5766 }
5767 vec_mode = TYPE_MODE (vectype);
5768 icode = (int) optab_handler (optab, vec_mode);
5769 if (icode == CODE_FOR_nothing)
5770 {
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "op not supported by target.\n");
5774 /* Check only during analysis. */
5775 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5776 || (!vec_stmt
5777 && !vect_worthwhile_without_simd_p (vinfo, code)))
5778 return false;
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE, vect_location,
5781 "proceeding using word mode.\n");
5782 }
5783
5784 /* Worthwhile without SIMD support? Check only during analysis. */
5785 if (!vec_stmt
5786 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5787 && !vect_worthwhile_without_simd_p (vinfo, code))
5788 {
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5791 "not worthwhile without SIMD support.\n");
5792 return false;
5793 }
5794
5795 if (!vec_stmt) /* transformation not required. */
5796 {
5797 if (slp_node
5798 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5799 || (!scalar_shift_arg
5800 && (!incompatible_op1_vectype_p
5801 || dt[1] == vect_constant_def)
5802 && !vect_maybe_update_slp_op_vectype
5803 (slp_op1,
5804 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5805 {
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5808 "incompatible vector types for invariants\n");
5809 return false;
5810 }
5811 /* Now adjust the constant shift amount in place. */
5812 if (slp_node
5813 && incompatible_op1_vectype_p
5814 && dt[1] == vect_constant_def)
5815 {
5816 for (unsigned i = 0;
5817 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5818 {
5819 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5820 = fold_convert (TREE_TYPE (vectype),
5821 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5822 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5823 == INTEGER_CST));
5824 }
5825 }
5826 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5827 DUMP_VECT_SCOPE ("vectorizable_shift");
5828 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5829 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5830 return true;
5831 }
5832
5833 /* Transform. */
5834
5835 if (dump_enabled_p ())
5836 dump_printf_loc (MSG_NOTE, vect_location,
5837 "transform binary/unary operation.\n");
5838
5839 if (incompatible_op1_vectype_p && !slp_node)
5840 {
5841 op1 = fold_convert (TREE_TYPE (vectype), op1);
5842 if (dt[1] != vect_constant_def)
5843 op1 = vect_init_vector (vinfo, stmt_info, op1,
5844 TREE_TYPE (vectype), NULL);
5845 }
5846
5847 /* Handle def. */
5848 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5849
5850 prev_stmt_info = NULL;
5851 for (j = 0; j < ncopies; j++)
5852 {
5853 /* Handle uses. */
5854 if (j == 0)
5855 {
5856 if (scalar_shift_arg)
5857 {
5858 /* Vector shl and shr insn patterns can be defined with scalar
5859 operand 2 (shift operand). In this case, use constant or loop
5860 invariant op1 directly, without extending it to vector mode
5861 first. */
5862 optab_op2_mode = insn_data[icode].operand[2].mode;
5863 if (!VECTOR_MODE_P (optab_op2_mode))
5864 {
5865 if (dump_enabled_p ())
5866 dump_printf_loc (MSG_NOTE, vect_location,
5867 "operand 1 using scalar mode.\n");
5868 vec_oprnd1 = op1;
5869 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5870 vec_oprnds1.quick_push (vec_oprnd1);
5871 if (slp_node)
5872 {
5873 /* Store vec_oprnd1 for every vector stmt to be created
5874 for SLP_NODE. We check during the analysis that all
5875 the shift arguments are the same.
5876 TODO: Allow different constants for different vector
5877 stmts generated for an SLP instance. */
5878 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5879 vec_oprnds1.quick_push (vec_oprnd1);
5880 }
5881 }
5882 }
5883 else if (slp_node && incompatible_op1_vectype_p)
5884 {
5885 if (was_scalar_shift_arg)
5886 {
5887 /* If the argument was the same in all lanes create
5888 the correctly typed vector shift amount directly. */
5889 op1 = fold_convert (TREE_TYPE (vectype), op1);
5890 op1 = vect_init_vector (vinfo, stmt_info,
5891 op1, TREE_TYPE (vectype),
5892 !loop_vinfo ? gsi : NULL);
5893 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5894 !loop_vinfo ? gsi : NULL);
5895 vec_oprnds1.create (slp_node->vec_stmts_size);
5896 for (k = 0; k < slp_node->vec_stmts_size; k++)
5897 vec_oprnds1.quick_push (vec_oprnd1);
5898 }
5899 else if (dt[1] == vect_constant_def)
5900 /* The constant shift amount has been adjusted in place. */
5901 ;
5902 else
5903 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5904 }
5905
5906 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5907 (a special case for certain kind of vector shifts); otherwise,
5908 operand 1 should be of a vector type (the usual case). */
5909 if (vec_oprnd1)
5910 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5911 &vec_oprnds0, NULL, slp_node);
5912 else
5913 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
5914 &vec_oprnds0, &vec_oprnds1, slp_node);
5915 }
5916 else
5917 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5918
5919 /* Arguments are ready. Create the new vector stmt. */
5920 stmt_vec_info new_stmt_info = NULL;
5921 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5922 {
5923 vop1 = vec_oprnds1[i];
5924 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5925 new_temp = make_ssa_name (vec_dest, new_stmt);
5926 gimple_assign_set_lhs (new_stmt, new_temp);
5927 new_stmt_info
5928 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5929 if (slp_node)
5930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5931 }
5932
5933 if (slp_node)
5934 continue;
5935
5936 if (j == 0)
5937 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5938 else
5939 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5940 prev_stmt_info = new_stmt_info;
5941 }
5942
5943 vec_oprnds0.release ();
5944 vec_oprnds1.release ();
5945
5946 return true;
5947 }
5948
5949
5950 /* Function vectorizable_operation.
5951
5952 Check if STMT_INFO performs a binary, unary or ternary operation that can
5953 be vectorized.
5954 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5955 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5956 Return true if STMT_INFO is vectorizable in this way. */
5957
5958 static bool
5959 vectorizable_operation (vec_info *vinfo,
5960 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5961 stmt_vec_info *vec_stmt, slp_tree slp_node,
5962 stmt_vector_for_cost *cost_vec)
5963 {
5964 tree vec_dest;
5965 tree scalar_dest;
5966 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5967 tree vectype;
5968 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5969 enum tree_code code, orig_code;
5970 machine_mode vec_mode;
5971 tree new_temp;
5972 int op_type;
5973 optab optab;
5974 bool target_support_p;
5975 enum vect_def_type dt[3]
5976 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5977 int ndts = 3;
5978 stmt_vec_info prev_stmt_info;
5979 poly_uint64 nunits_in;
5980 poly_uint64 nunits_out;
5981 tree vectype_out;
5982 int ncopies, vec_num;
5983 int j, i;
5984 vec<tree> vec_oprnds0 = vNULL;
5985 vec<tree> vec_oprnds1 = vNULL;
5986 vec<tree> vec_oprnds2 = vNULL;
5987 tree vop0, vop1, vop2;
5988 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5989
5990 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5991 return false;
5992
5993 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5994 && ! vec_stmt)
5995 return false;
5996
5997 /* Is STMT a vectorizable binary/unary operation? */
5998 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5999 if (!stmt)
6000 return false;
6001
6002 /* Loads and stores are handled in vectorizable_{load,store}. */
6003 if (STMT_VINFO_DATA_REF (stmt_info))
6004 return false;
6005
6006 orig_code = code = gimple_assign_rhs_code (stmt);
6007
6008 /* Shifts are handled in vectorizable_shift. */
6009 if (code == LSHIFT_EXPR
6010 || code == RSHIFT_EXPR
6011 || code == LROTATE_EXPR
6012 || code == RROTATE_EXPR)
6013 return false;
6014
6015 /* Comparisons are handled in vectorizable_comparison. */
6016 if (TREE_CODE_CLASS (code) == tcc_comparison)
6017 return false;
6018
6019 /* Conditions are handled in vectorizable_condition. */
6020 if (code == COND_EXPR)
6021 return false;
6022
6023 /* For pointer addition and subtraction, we should use the normal
6024 plus and minus for the vector operation. */
6025 if (code == POINTER_PLUS_EXPR)
6026 code = PLUS_EXPR;
6027 if (code == POINTER_DIFF_EXPR)
6028 code = MINUS_EXPR;
6029
6030 /* Support only unary or binary operations. */
6031 op_type = TREE_CODE_LENGTH (code);
6032 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6033 {
6034 if (dump_enabled_p ())
6035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6036 "num. args = %d (not unary/binary/ternary op).\n",
6037 op_type);
6038 return false;
6039 }
6040
6041 scalar_dest = gimple_assign_lhs (stmt);
6042 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6043
6044 /* Most operations cannot handle bit-precision types without extra
6045 truncations. */
6046 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6047 if (!mask_op_p
6048 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6049 /* Exception are bitwise binary operations. */
6050 && code != BIT_IOR_EXPR
6051 && code != BIT_XOR_EXPR
6052 && code != BIT_AND_EXPR)
6053 {
6054 if (dump_enabled_p ())
6055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6056 "bit-precision arithmetic not supported.\n");
6057 return false;
6058 }
6059
6060 slp_tree slp_op0;
6061 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6062 0, &op0, &slp_op0, &dt[0], &vectype))
6063 {
6064 if (dump_enabled_p ())
6065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6066 "use not simple.\n");
6067 return false;
6068 }
6069 /* If op0 is an external or constant def, infer the vector type
6070 from the scalar type. */
6071 if (!vectype)
6072 {
6073 /* For boolean type we cannot determine vectype by
6074 invariant value (don't know whether it is a vector
6075 of booleans or vector of integers). We use output
6076 vectype because operations on boolean don't change
6077 type. */
6078 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6079 {
6080 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6081 {
6082 if (dump_enabled_p ())
6083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6084 "not supported operation on bool value.\n");
6085 return false;
6086 }
6087 vectype = vectype_out;
6088 }
6089 else
6090 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6091 slp_node);
6092 }
6093 if (vec_stmt)
6094 gcc_assert (vectype);
6095 if (!vectype)
6096 {
6097 if (dump_enabled_p ())
6098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6099 "no vectype for scalar type %T\n",
6100 TREE_TYPE (op0));
6101
6102 return false;
6103 }
6104
6105 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6106 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6107 if (maybe_ne (nunits_out, nunits_in))
6108 return false;
6109
6110 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6111 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6112 if (op_type == binary_op || op_type == ternary_op)
6113 {
6114 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6115 1, &op1, &slp_op1, &dt[1], &vectype2))
6116 {
6117 if (dump_enabled_p ())
6118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6119 "use not simple.\n");
6120 return false;
6121 }
6122 }
6123 if (op_type == ternary_op)
6124 {
6125 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6126 2, &op2, &slp_op2, &dt[2], &vectype3))
6127 {
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "use not simple.\n");
6131 return false;
6132 }
6133 }
6134
6135 /* Multiple types in SLP are handled by creating the appropriate number of
6136 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6137 case of SLP. */
6138 if (slp_node)
6139 {
6140 ncopies = 1;
6141 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6142 }
6143 else
6144 {
6145 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6146 vec_num = 1;
6147 }
6148
6149 gcc_assert (ncopies >= 1);
6150
6151 /* Reject attempts to combine mask types with nonmask types, e.g. if
6152 we have an AND between a (nonmask) boolean loaded from memory and
6153 a (mask) boolean result of a comparison.
6154
6155 TODO: We could easily fix these cases up using pattern statements. */
6156 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6157 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6158 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6159 {
6160 if (dump_enabled_p ())
6161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6162 "mixed mask and nonmask vector types\n");
6163 return false;
6164 }
6165
6166 /* Supportable by target? */
6167
6168 vec_mode = TYPE_MODE (vectype);
6169 if (code == MULT_HIGHPART_EXPR)
6170 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6171 else
6172 {
6173 optab = optab_for_tree_code (code, vectype, optab_default);
6174 if (!optab)
6175 {
6176 if (dump_enabled_p ())
6177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6178 "no optab.\n");
6179 return false;
6180 }
6181 target_support_p = (optab_handler (optab, vec_mode)
6182 != CODE_FOR_nothing);
6183 }
6184
6185 if (!target_support_p)
6186 {
6187 if (dump_enabled_p ())
6188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6189 "op not supported by target.\n");
6190 /* Check only during analysis. */
6191 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6192 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6193 return false;
6194 if (dump_enabled_p ())
6195 dump_printf_loc (MSG_NOTE, vect_location,
6196 "proceeding using word mode.\n");
6197 }
6198
6199 /* Worthwhile without SIMD support? Check only during analysis. */
6200 if (!VECTOR_MODE_P (vec_mode)
6201 && !vec_stmt
6202 && !vect_worthwhile_without_simd_p (vinfo, code))
6203 {
6204 if (dump_enabled_p ())
6205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6206 "not worthwhile without SIMD support.\n");
6207 return false;
6208 }
6209
6210 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6211 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6212 internal_fn cond_fn = get_conditional_internal_fn (code);
6213
6214 if (!vec_stmt) /* transformation not required. */
6215 {
6216 /* If this operation is part of a reduction, a fully-masked loop
6217 should only change the active lanes of the reduction chain,
6218 keeping the inactive lanes as-is. */
6219 if (loop_vinfo
6220 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6221 && reduc_idx >= 0)
6222 {
6223 if (cond_fn == IFN_LAST
6224 || !direct_internal_fn_supported_p (cond_fn, vectype,
6225 OPTIMIZE_FOR_SPEED))
6226 {
6227 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6229 "can't use a fully-masked loop because no"
6230 " conditional operation is available.\n");
6231 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6232 }
6233 else
6234 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6235 vectype, NULL);
6236 }
6237
6238 /* Put types on constant and invariant SLP children. */
6239 if (slp_node
6240 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6241 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6242 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6243 {
6244 if (dump_enabled_p ())
6245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6246 "incompatible vector types for invariants\n");
6247 return false;
6248 }
6249
6250 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6251 DUMP_VECT_SCOPE ("vectorizable_operation");
6252 vect_model_simple_cost (vinfo, stmt_info,
6253 ncopies, dt, ndts, slp_node, cost_vec);
6254 return true;
6255 }
6256
6257 /* Transform. */
6258
6259 if (dump_enabled_p ())
6260 dump_printf_loc (MSG_NOTE, vect_location,
6261 "transform binary/unary operation.\n");
6262
6263 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6264
6265 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6266 vectors with unsigned elements, but the result is signed. So, we
6267 need to compute the MINUS_EXPR into vectype temporary and
6268 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6269 tree vec_cvt_dest = NULL_TREE;
6270 if (orig_code == POINTER_DIFF_EXPR)
6271 {
6272 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6273 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6274 }
6275 /* Handle def. */
6276 else
6277 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6278
6279 /* In case the vectorization factor (VF) is bigger than the number
6280 of elements that we can fit in a vectype (nunits), we have to generate
6281 more than one vector stmt - i.e - we need to "unroll" the
6282 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6283 from one copy of the vector stmt to the next, in the field
6284 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6285 stages to find the correct vector defs to be used when vectorizing
6286 stmts that use the defs of the current stmt. The example below
6287 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6288 we need to create 4 vectorized stmts):
6289
6290 before vectorization:
6291 RELATED_STMT VEC_STMT
6292 S1: x = memref - -
6293 S2: z = x + 1 - -
6294
6295 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6296 there):
6297 RELATED_STMT VEC_STMT
6298 VS1_0: vx0 = memref0 VS1_1 -
6299 VS1_1: vx1 = memref1 VS1_2 -
6300 VS1_2: vx2 = memref2 VS1_3 -
6301 VS1_3: vx3 = memref3 - -
6302 S1: x = load - VS1_0
6303 S2: z = x + 1 - -
6304
6305 step2: vectorize stmt S2 (done here):
6306 To vectorize stmt S2 we first need to find the relevant vector
6307 def for the first operand 'x'. This is, as usual, obtained from
6308 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6309 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6310 relevant vector def 'vx0'. Having found 'vx0' we can generate
6311 the vector stmt VS2_0, and as usual, record it in the
6312 STMT_VINFO_VEC_STMT of stmt S2.
6313 When creating the second copy (VS2_1), we obtain the relevant vector
6314 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6315 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6316 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6317 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6318 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6319 chain of stmts and pointers:
6320 RELATED_STMT VEC_STMT
6321 VS1_0: vx0 = memref0 VS1_1 -
6322 VS1_1: vx1 = memref1 VS1_2 -
6323 VS1_2: vx2 = memref2 VS1_3 -
6324 VS1_3: vx3 = memref3 - -
6325 S1: x = load - VS1_0
6326 VS2_0: vz0 = vx0 + v1 VS2_1 -
6327 VS2_1: vz1 = vx1 + v1 VS2_2 -
6328 VS2_2: vz2 = vx2 + v1 VS2_3 -
6329 VS2_3: vz3 = vx3 + v1 - -
6330 S2: z = x + 1 - VS2_0 */
6331
6332 prev_stmt_info = NULL;
6333 for (j = 0; j < ncopies; j++)
6334 {
6335 /* Handle uses. */
6336 if (j == 0)
6337 {
6338 if (op_type == binary_op)
6339 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
6340 &vec_oprnds0, &vec_oprnds1, slp_node);
6341 else if (op_type == ternary_op)
6342 {
6343 if (slp_node)
6344 {
6345 auto_vec<vec<tree> > vec_defs(3);
6346 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
6347 vec_oprnds0 = vec_defs[0];
6348 vec_oprnds1 = vec_defs[1];
6349 vec_oprnds2 = vec_defs[2];
6350 }
6351 else
6352 {
6353 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
6354 &vec_oprnds1, NULL);
6355 vect_get_vec_defs (vinfo, op2, NULL_TREE, stmt_info,
6356 &vec_oprnds2, NULL, NULL);
6357 }
6358 }
6359 else
6360 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
6361 NULL, slp_node);
6362 }
6363 else
6364 {
6365 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6366 if (op_type == ternary_op)
6367 {
6368 tree vec_oprnd = vec_oprnds2.pop ();
6369 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6370 vec_oprnd));
6371 }
6372 }
6373
6374 /* Arguments are ready. Create the new vector stmt. */
6375 stmt_vec_info new_stmt_info = NULL;
6376 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6377 {
6378 vop1 = ((op_type == binary_op || op_type == ternary_op)
6379 ? vec_oprnds1[i] : NULL_TREE);
6380 vop2 = ((op_type == ternary_op)
6381 ? vec_oprnds2[i] : NULL_TREE);
6382 if (masked_loop_p && reduc_idx >= 0)
6383 {
6384 /* Perform the operation on active elements only and take
6385 inactive elements from the reduction chain input. */
6386 gcc_assert (!vop2);
6387 vop2 = reduc_idx == 1 ? vop1 : vop0;
6388 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6389 vectype, i * ncopies + j);
6390 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6391 vop0, vop1, vop2);
6392 new_temp = make_ssa_name (vec_dest, call);
6393 gimple_call_set_lhs (call, new_temp);
6394 gimple_call_set_nothrow (call, true);
6395 new_stmt_info
6396 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6397 }
6398 else
6399 {
6400 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6401 vop0, vop1, vop2);
6402 new_temp = make_ssa_name (vec_dest, new_stmt);
6403 gimple_assign_set_lhs (new_stmt, new_temp);
6404 new_stmt_info
6405 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6406 if (vec_cvt_dest)
6407 {
6408 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6409 gassign *new_stmt
6410 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6411 new_temp);
6412 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6413 gimple_assign_set_lhs (new_stmt, new_temp);
6414 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
6415 new_stmt, gsi);
6416 }
6417 }
6418 if (slp_node)
6419 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6420 }
6421
6422 if (slp_node)
6423 continue;
6424
6425 if (j == 0)
6426 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6427 else
6428 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6429 prev_stmt_info = new_stmt_info;
6430 }
6431
6432 vec_oprnds0.release ();
6433 vec_oprnds1.release ();
6434 vec_oprnds2.release ();
6435
6436 return true;
6437 }
6438
6439 /* A helper function to ensure data reference DR_INFO's base alignment. */
6440
6441 static void
6442 ensure_base_align (dr_vec_info *dr_info)
6443 {
6444 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6445 return;
6446
6447 if (dr_info->base_misaligned)
6448 {
6449 tree base_decl = dr_info->base_decl;
6450
6451 // We should only be able to increase the alignment of a base object if
6452 // we know what its new alignment should be at compile time.
6453 unsigned HOST_WIDE_INT align_base_to =
6454 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6455
6456 if (decl_in_symtab_p (base_decl))
6457 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6458 else if (DECL_ALIGN (base_decl) < align_base_to)
6459 {
6460 SET_DECL_ALIGN (base_decl, align_base_to);
6461 DECL_USER_ALIGN (base_decl) = 1;
6462 }
6463 dr_info->base_misaligned = false;
6464 }
6465 }
6466
6467
6468 /* Function get_group_alias_ptr_type.
6469
6470 Return the alias type for the group starting at FIRST_STMT_INFO. */
6471
6472 static tree
6473 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6474 {
6475 struct data_reference *first_dr, *next_dr;
6476
6477 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6478 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6479 while (next_stmt_info)
6480 {
6481 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6482 if (get_alias_set (DR_REF (first_dr))
6483 != get_alias_set (DR_REF (next_dr)))
6484 {
6485 if (dump_enabled_p ())
6486 dump_printf_loc (MSG_NOTE, vect_location,
6487 "conflicting alias set types.\n");
6488 return ptr_type_node;
6489 }
6490 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6491 }
6492 return reference_alias_ptr_type (DR_REF (first_dr));
6493 }
6494
6495
6496 /* Function scan_operand_equal_p.
6497
6498 Helper function for check_scan_store. Compare two references
6499 with .GOMP_SIMD_LANE bases. */
6500
6501 static bool
6502 scan_operand_equal_p (tree ref1, tree ref2)
6503 {
6504 tree ref[2] = { ref1, ref2 };
6505 poly_int64 bitsize[2], bitpos[2];
6506 tree offset[2], base[2];
6507 for (int i = 0; i < 2; ++i)
6508 {
6509 machine_mode mode;
6510 int unsignedp, reversep, volatilep = 0;
6511 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6512 &offset[i], &mode, &unsignedp,
6513 &reversep, &volatilep);
6514 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6515 return false;
6516 if (TREE_CODE (base[i]) == MEM_REF
6517 && offset[i] == NULL_TREE
6518 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6519 {
6520 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6521 if (is_gimple_assign (def_stmt)
6522 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6523 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6524 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6525 {
6526 if (maybe_ne (mem_ref_offset (base[i]), 0))
6527 return false;
6528 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6529 offset[i] = gimple_assign_rhs2 (def_stmt);
6530 }
6531 }
6532 }
6533
6534 if (!operand_equal_p (base[0], base[1], 0))
6535 return false;
6536 if (maybe_ne (bitsize[0], bitsize[1]))
6537 return false;
6538 if (offset[0] != offset[1])
6539 {
6540 if (!offset[0] || !offset[1])
6541 return false;
6542 if (!operand_equal_p (offset[0], offset[1], 0))
6543 {
6544 tree step[2];
6545 for (int i = 0; i < 2; ++i)
6546 {
6547 step[i] = integer_one_node;
6548 if (TREE_CODE (offset[i]) == SSA_NAME)
6549 {
6550 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6551 if (is_gimple_assign (def_stmt)
6552 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6553 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6554 == INTEGER_CST))
6555 {
6556 step[i] = gimple_assign_rhs2 (def_stmt);
6557 offset[i] = gimple_assign_rhs1 (def_stmt);
6558 }
6559 }
6560 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6561 {
6562 step[i] = TREE_OPERAND (offset[i], 1);
6563 offset[i] = TREE_OPERAND (offset[i], 0);
6564 }
6565 tree rhs1 = NULL_TREE;
6566 if (TREE_CODE (offset[i]) == SSA_NAME)
6567 {
6568 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6569 if (gimple_assign_cast_p (def_stmt))
6570 rhs1 = gimple_assign_rhs1 (def_stmt);
6571 }
6572 else if (CONVERT_EXPR_P (offset[i]))
6573 rhs1 = TREE_OPERAND (offset[i], 0);
6574 if (rhs1
6575 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6576 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6577 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6578 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6579 offset[i] = rhs1;
6580 }
6581 if (!operand_equal_p (offset[0], offset[1], 0)
6582 || !operand_equal_p (step[0], step[1], 0))
6583 return false;
6584 }
6585 }
6586 return true;
6587 }
6588
6589
6590 enum scan_store_kind {
6591 /* Normal permutation. */
6592 scan_store_kind_perm,
6593
6594 /* Whole vector left shift permutation with zero init. */
6595 scan_store_kind_lshift_zero,
6596
6597 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6598 scan_store_kind_lshift_cond
6599 };
6600
6601 /* Function check_scan_store.
6602
6603 Verify if we can perform the needed permutations or whole vector shifts.
6604 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6605 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6606 to do at each step. */
6607
6608 static int
6609 scan_store_can_perm_p (tree vectype, tree init,
6610 vec<enum scan_store_kind> *use_whole_vector = NULL)
6611 {
6612 enum machine_mode vec_mode = TYPE_MODE (vectype);
6613 unsigned HOST_WIDE_INT nunits;
6614 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6615 return -1;
6616 int units_log2 = exact_log2 (nunits);
6617 if (units_log2 <= 0)
6618 return -1;
6619
6620 int i;
6621 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6622 for (i = 0; i <= units_log2; ++i)
6623 {
6624 unsigned HOST_WIDE_INT j, k;
6625 enum scan_store_kind kind = scan_store_kind_perm;
6626 vec_perm_builder sel (nunits, nunits, 1);
6627 sel.quick_grow (nunits);
6628 if (i == units_log2)
6629 {
6630 for (j = 0; j < nunits; ++j)
6631 sel[j] = nunits - 1;
6632 }
6633 else
6634 {
6635 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6636 sel[j] = j;
6637 for (k = 0; j < nunits; ++j, ++k)
6638 sel[j] = nunits + k;
6639 }
6640 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6641 if (!can_vec_perm_const_p (vec_mode, indices))
6642 {
6643 if (i == units_log2)
6644 return -1;
6645
6646 if (whole_vector_shift_kind == scan_store_kind_perm)
6647 {
6648 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6649 return -1;
6650 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6651 /* Whole vector shifts shift in zeros, so if init is all zero
6652 constant, there is no need to do anything further. */
6653 if ((TREE_CODE (init) != INTEGER_CST
6654 && TREE_CODE (init) != REAL_CST)
6655 || !initializer_zerop (init))
6656 {
6657 tree masktype = truth_type_for (vectype);
6658 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6659 return -1;
6660 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6661 }
6662 }
6663 kind = whole_vector_shift_kind;
6664 }
6665 if (use_whole_vector)
6666 {
6667 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6668 use_whole_vector->safe_grow_cleared (i);
6669 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6670 use_whole_vector->safe_push (kind);
6671 }
6672 }
6673
6674 return units_log2;
6675 }
6676
6677
6678 /* Function check_scan_store.
6679
6680 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6681
6682 static bool
6683 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6684 enum vect_def_type rhs_dt, bool slp, tree mask,
6685 vect_memory_access_type memory_access_type)
6686 {
6687 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6688 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6689 tree ref_type;
6690
6691 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6692 if (slp
6693 || mask
6694 || memory_access_type != VMAT_CONTIGUOUS
6695 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6696 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6697 || loop_vinfo == NULL
6698 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6699 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6700 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6701 || !integer_zerop (DR_INIT (dr_info->dr))
6702 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6703 || !alias_sets_conflict_p (get_alias_set (vectype),
6704 get_alias_set (TREE_TYPE (ref_type))))
6705 {
6706 if (dump_enabled_p ())
6707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6708 "unsupported OpenMP scan store.\n");
6709 return false;
6710 }
6711
6712 /* We need to pattern match code built by OpenMP lowering and simplified
6713 by following optimizations into something we can handle.
6714 #pragma omp simd reduction(inscan,+:r)
6715 for (...)
6716 {
6717 r += something ();
6718 #pragma omp scan inclusive (r)
6719 use (r);
6720 }
6721 shall have body with:
6722 // Initialization for input phase, store the reduction initializer:
6723 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6724 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6725 D.2042[_21] = 0;
6726 // Actual input phase:
6727 ...
6728 r.0_5 = D.2042[_20];
6729 _6 = _4 + r.0_5;
6730 D.2042[_20] = _6;
6731 // Initialization for scan phase:
6732 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6733 _26 = D.2043[_25];
6734 _27 = D.2042[_25];
6735 _28 = _26 + _27;
6736 D.2043[_25] = _28;
6737 D.2042[_25] = _28;
6738 // Actual scan phase:
6739 ...
6740 r.1_8 = D.2042[_20];
6741 ...
6742 The "omp simd array" variable D.2042 holds the privatized copy used
6743 inside of the loop and D.2043 is another one that holds copies of
6744 the current original list item. The separate GOMP_SIMD_LANE ifn
6745 kinds are there in order to allow optimizing the initializer store
6746 and combiner sequence, e.g. if it is originally some C++ish user
6747 defined reduction, but allow the vectorizer to pattern recognize it
6748 and turn into the appropriate vectorized scan.
6749
6750 For exclusive scan, this is slightly different:
6751 #pragma omp simd reduction(inscan,+:r)
6752 for (...)
6753 {
6754 use (r);
6755 #pragma omp scan exclusive (r)
6756 r += something ();
6757 }
6758 shall have body with:
6759 // Initialization for input phase, store the reduction initializer:
6760 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6761 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6762 D.2042[_21] = 0;
6763 // Actual input phase:
6764 ...
6765 r.0_5 = D.2042[_20];
6766 _6 = _4 + r.0_5;
6767 D.2042[_20] = _6;
6768 // Initialization for scan phase:
6769 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6770 _26 = D.2043[_25];
6771 D.2044[_25] = _26;
6772 _27 = D.2042[_25];
6773 _28 = _26 + _27;
6774 D.2043[_25] = _28;
6775 // Actual scan phase:
6776 ...
6777 r.1_8 = D.2044[_20];
6778 ... */
6779
6780 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6781 {
6782 /* Match the D.2042[_21] = 0; store above. Just require that
6783 it is a constant or external definition store. */
6784 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6785 {
6786 fail_init:
6787 if (dump_enabled_p ())
6788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6789 "unsupported OpenMP scan initializer store.\n");
6790 return false;
6791 }
6792
6793 if (! loop_vinfo->scan_map)
6794 loop_vinfo->scan_map = new hash_map<tree, tree>;
6795 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6796 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6797 if (cached)
6798 goto fail_init;
6799 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6800
6801 /* These stores can be vectorized normally. */
6802 return true;
6803 }
6804
6805 if (rhs_dt != vect_internal_def)
6806 {
6807 fail:
6808 if (dump_enabled_p ())
6809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6810 "unsupported OpenMP scan combiner pattern.\n");
6811 return false;
6812 }
6813
6814 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6815 tree rhs = gimple_assign_rhs1 (stmt);
6816 if (TREE_CODE (rhs) != SSA_NAME)
6817 goto fail;
6818
6819 gimple *other_store_stmt = NULL;
6820 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6821 bool inscan_var_store
6822 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6823
6824 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6825 {
6826 if (!inscan_var_store)
6827 {
6828 use_operand_p use_p;
6829 imm_use_iterator iter;
6830 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6831 {
6832 gimple *use_stmt = USE_STMT (use_p);
6833 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6834 continue;
6835 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6836 || !is_gimple_assign (use_stmt)
6837 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6838 || other_store_stmt
6839 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6840 goto fail;
6841 other_store_stmt = use_stmt;
6842 }
6843 if (other_store_stmt == NULL)
6844 goto fail;
6845 rhs = gimple_assign_lhs (other_store_stmt);
6846 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6847 goto fail;
6848 }
6849 }
6850 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6851 {
6852 use_operand_p use_p;
6853 imm_use_iterator iter;
6854 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6855 {
6856 gimple *use_stmt = USE_STMT (use_p);
6857 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6858 continue;
6859 if (other_store_stmt)
6860 goto fail;
6861 other_store_stmt = use_stmt;
6862 }
6863 }
6864 else
6865 goto fail;
6866
6867 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6868 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6869 || !is_gimple_assign (def_stmt)
6870 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6871 goto fail;
6872
6873 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6874 /* For pointer addition, we should use the normal plus for the vector
6875 operation. */
6876 switch (code)
6877 {
6878 case POINTER_PLUS_EXPR:
6879 code = PLUS_EXPR;
6880 break;
6881 case MULT_HIGHPART_EXPR:
6882 goto fail;
6883 default:
6884 break;
6885 }
6886 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6887 goto fail;
6888
6889 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6890 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6891 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6892 goto fail;
6893
6894 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6895 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6896 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6897 || !gimple_assign_load_p (load1_stmt)
6898 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6899 || !gimple_assign_load_p (load2_stmt))
6900 goto fail;
6901
6902 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6903 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6904 if (load1_stmt_info == NULL
6905 || load2_stmt_info == NULL
6906 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6907 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6908 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6909 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6910 goto fail;
6911
6912 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6913 {
6914 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6915 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6916 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6917 goto fail;
6918 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6919 tree lrhs;
6920 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6921 lrhs = rhs1;
6922 else
6923 lrhs = rhs2;
6924 use_operand_p use_p;
6925 imm_use_iterator iter;
6926 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6927 {
6928 gimple *use_stmt = USE_STMT (use_p);
6929 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6930 continue;
6931 if (other_store_stmt)
6932 goto fail;
6933 other_store_stmt = use_stmt;
6934 }
6935 }
6936
6937 if (other_store_stmt == NULL)
6938 goto fail;
6939 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6940 || !gimple_store_p (other_store_stmt))
6941 goto fail;
6942
6943 stmt_vec_info other_store_stmt_info
6944 = loop_vinfo->lookup_stmt (other_store_stmt);
6945 if (other_store_stmt_info == NULL
6946 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6947 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6948 goto fail;
6949
6950 gimple *stmt1 = stmt;
6951 gimple *stmt2 = other_store_stmt;
6952 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6953 std::swap (stmt1, stmt2);
6954 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6955 gimple_assign_rhs1 (load2_stmt)))
6956 {
6957 std::swap (rhs1, rhs2);
6958 std::swap (load1_stmt, load2_stmt);
6959 std::swap (load1_stmt_info, load2_stmt_info);
6960 }
6961 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6962 gimple_assign_rhs1 (load1_stmt)))
6963 goto fail;
6964
6965 tree var3 = NULL_TREE;
6966 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6967 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6968 gimple_assign_rhs1 (load2_stmt)))
6969 goto fail;
6970 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6971 {
6972 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6973 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6974 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6975 goto fail;
6976 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6977 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6978 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6979 || lookup_attribute ("omp simd inscan exclusive",
6980 DECL_ATTRIBUTES (var3)))
6981 goto fail;
6982 }
6983
6984 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6985 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6986 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6987 goto fail;
6988
6989 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6990 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6991 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6992 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6993 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6994 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6995 goto fail;
6996
6997 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6998 std::swap (var1, var2);
6999
7000 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7001 {
7002 if (!lookup_attribute ("omp simd inscan exclusive",
7003 DECL_ATTRIBUTES (var1)))
7004 goto fail;
7005 var1 = var3;
7006 }
7007
7008 if (loop_vinfo->scan_map == NULL)
7009 goto fail;
7010 tree *init = loop_vinfo->scan_map->get (var1);
7011 if (init == NULL)
7012 goto fail;
7013
7014 /* The IL is as expected, now check if we can actually vectorize it.
7015 Inclusive scan:
7016 _26 = D.2043[_25];
7017 _27 = D.2042[_25];
7018 _28 = _26 + _27;
7019 D.2043[_25] = _28;
7020 D.2042[_25] = _28;
7021 should be vectorized as (where _40 is the vectorized rhs
7022 from the D.2042[_21] = 0; store):
7023 _30 = MEM <vector(8) int> [(int *)&D.2043];
7024 _31 = MEM <vector(8) int> [(int *)&D.2042];
7025 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7026 _33 = _31 + _32;
7027 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7028 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7029 _35 = _33 + _34;
7030 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7031 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7032 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7033 _37 = _35 + _36;
7034 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7035 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7036 _38 = _30 + _37;
7037 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7038 MEM <vector(8) int> [(int *)&D.2043] = _39;
7039 MEM <vector(8) int> [(int *)&D.2042] = _38;
7040 Exclusive scan:
7041 _26 = D.2043[_25];
7042 D.2044[_25] = _26;
7043 _27 = D.2042[_25];
7044 _28 = _26 + _27;
7045 D.2043[_25] = _28;
7046 should be vectorized as (where _40 is the vectorized rhs
7047 from the D.2042[_21] = 0; store):
7048 _30 = MEM <vector(8) int> [(int *)&D.2043];
7049 _31 = MEM <vector(8) int> [(int *)&D.2042];
7050 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7051 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7052 _34 = _32 + _33;
7053 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7054 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7055 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7056 _36 = _34 + _35;
7057 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7058 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7059 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7060 _38 = _36 + _37;
7061 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7062 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7063 _39 = _30 + _38;
7064 _50 = _31 + _39;
7065 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7066 MEM <vector(8) int> [(int *)&D.2044] = _39;
7067 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7068 enum machine_mode vec_mode = TYPE_MODE (vectype);
7069 optab optab = optab_for_tree_code (code, vectype, optab_default);
7070 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7071 goto fail;
7072
7073 int units_log2 = scan_store_can_perm_p (vectype, *init);
7074 if (units_log2 == -1)
7075 goto fail;
7076
7077 return true;
7078 }
7079
7080
7081 /* Function vectorizable_scan_store.
7082
7083 Helper of vectorizable_score, arguments like on vectorizable_store.
7084 Handle only the transformation, checking is done in check_scan_store. */
7085
7086 static bool
7087 vectorizable_scan_store (vec_info *vinfo,
7088 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7089 stmt_vec_info *vec_stmt, int ncopies)
7090 {
7091 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7092 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7093 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7094 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7095
7096 if (dump_enabled_p ())
7097 dump_printf_loc (MSG_NOTE, vect_location,
7098 "transform scan store. ncopies = %d\n", ncopies);
7099
7100 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7101 tree rhs = gimple_assign_rhs1 (stmt);
7102 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7103
7104 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7105 bool inscan_var_store
7106 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7107
7108 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7109 {
7110 use_operand_p use_p;
7111 imm_use_iterator iter;
7112 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7113 {
7114 gimple *use_stmt = USE_STMT (use_p);
7115 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7116 continue;
7117 rhs = gimple_assign_lhs (use_stmt);
7118 break;
7119 }
7120 }
7121
7122 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7123 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7124 if (code == POINTER_PLUS_EXPR)
7125 code = PLUS_EXPR;
7126 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7127 && commutative_tree_code (code));
7128 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7129 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7130 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7131 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7132 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7133 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7134 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7135 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7136 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7137 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7138 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7139
7140 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7141 {
7142 std::swap (rhs1, rhs2);
7143 std::swap (var1, var2);
7144 std::swap (load1_dr_info, load2_dr_info);
7145 }
7146
7147 tree *init = loop_vinfo->scan_map->get (var1);
7148 gcc_assert (init);
7149
7150 unsigned HOST_WIDE_INT nunits;
7151 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7152 gcc_unreachable ();
7153 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7154 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7155 gcc_assert (units_log2 > 0);
7156 auto_vec<tree, 16> perms;
7157 perms.quick_grow (units_log2 + 1);
7158 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7159 for (int i = 0; i <= units_log2; ++i)
7160 {
7161 unsigned HOST_WIDE_INT j, k;
7162 vec_perm_builder sel (nunits, nunits, 1);
7163 sel.quick_grow (nunits);
7164 if (i == units_log2)
7165 for (j = 0; j < nunits; ++j)
7166 sel[j] = nunits - 1;
7167 else
7168 {
7169 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7170 sel[j] = j;
7171 for (k = 0; j < nunits; ++j, ++k)
7172 sel[j] = nunits + k;
7173 }
7174 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7175 if (!use_whole_vector.is_empty ()
7176 && use_whole_vector[i] != scan_store_kind_perm)
7177 {
7178 if (zero_vec == NULL_TREE)
7179 zero_vec = build_zero_cst (vectype);
7180 if (masktype == NULL_TREE
7181 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7182 masktype = truth_type_for (vectype);
7183 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7184 }
7185 else
7186 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7187 }
7188
7189 stmt_vec_info prev_stmt_info = NULL;
7190 tree vec_oprnd1 = NULL_TREE;
7191 tree vec_oprnd2 = NULL_TREE;
7192 tree vec_oprnd3 = NULL_TREE;
7193 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7194 tree dataref_offset = build_int_cst (ref_type, 0);
7195 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7196 vectype, VMAT_CONTIGUOUS);
7197 tree ldataref_ptr = NULL_TREE;
7198 tree orig = NULL_TREE;
7199 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7200 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7201 for (int j = 0; j < ncopies; j++)
7202 {
7203 stmt_vec_info new_stmt_info;
7204 if (j == 0)
7205 {
7206 vec_oprnd1 = vect_get_vec_def_for_operand (vinfo, *init, stmt_info);
7207 if (ldataref_ptr == NULL)
7208 vec_oprnd2 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info);
7209 vec_oprnd3 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info);
7210 orig = vec_oprnd3;
7211 }
7212 else
7213 {
7214 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7215 if (ldataref_ptr == NULL)
7216 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7217 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7218 if (!inscan_var_store)
7219 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7220 }
7221
7222 if (ldataref_ptr)
7223 {
7224 vec_oprnd2 = make_ssa_name (vectype);
7225 tree data_ref = fold_build2 (MEM_REF, vectype,
7226 unshare_expr (ldataref_ptr),
7227 dataref_offset);
7228 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7229 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7230 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7231 if (prev_stmt_info == NULL)
7232 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7233 else
7234 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7235 prev_stmt_info = new_stmt_info;
7236 }
7237
7238 tree v = vec_oprnd2;
7239 for (int i = 0; i < units_log2; ++i)
7240 {
7241 tree new_temp = make_ssa_name (vectype);
7242 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7243 (zero_vec
7244 && (use_whole_vector[i]
7245 != scan_store_kind_perm))
7246 ? zero_vec : vec_oprnd1, v,
7247 perms[i]);
7248 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7249 if (prev_stmt_info == NULL)
7250 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7251 else
7252 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7253 prev_stmt_info = new_stmt_info;
7254
7255 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7256 {
7257 /* Whole vector shift shifted in zero bits, but if *init
7258 is not initializer_zerop, we need to replace those elements
7259 with elements from vec_oprnd1. */
7260 tree_vector_builder vb (masktype, nunits, 1);
7261 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7262 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7263 ? boolean_false_node : boolean_true_node);
7264
7265 tree new_temp2 = make_ssa_name (vectype);
7266 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7267 new_temp, vec_oprnd1);
7268 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
7269 g, gsi);
7270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7271 prev_stmt_info = new_stmt_info;
7272 new_temp = new_temp2;
7273 }
7274
7275 /* For exclusive scan, perform the perms[i] permutation once
7276 more. */
7277 if (i == 0
7278 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7279 && v == vec_oprnd2)
7280 {
7281 v = new_temp;
7282 --i;
7283 continue;
7284 }
7285
7286 tree new_temp2 = make_ssa_name (vectype);
7287 g = gimple_build_assign (new_temp2, code, v, new_temp);
7288 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7289 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7290 prev_stmt_info = new_stmt_info;
7291
7292 v = new_temp2;
7293 }
7294
7295 tree new_temp = make_ssa_name (vectype);
7296 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7297 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7298 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7299 prev_stmt_info = new_stmt_info;
7300
7301 tree last_perm_arg = new_temp;
7302 /* For exclusive scan, new_temp computed above is the exclusive scan
7303 prefix sum. Turn it into inclusive prefix sum for the broadcast
7304 of the last element into orig. */
7305 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7306 {
7307 last_perm_arg = make_ssa_name (vectype);
7308 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7309 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7310 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7311 prev_stmt_info = new_stmt_info;
7312 }
7313
7314 orig = make_ssa_name (vectype);
7315 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7316 last_perm_arg, perms[units_log2]);
7317 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7318 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7319 prev_stmt_info = new_stmt_info;
7320
7321 if (!inscan_var_store)
7322 {
7323 tree data_ref = fold_build2 (MEM_REF, vectype,
7324 unshare_expr (dataref_ptr),
7325 dataref_offset);
7326 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7327 g = gimple_build_assign (data_ref, new_temp);
7328 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7329 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7330 prev_stmt_info = new_stmt_info;
7331 }
7332 }
7333
7334 if (inscan_var_store)
7335 for (int j = 0; j < ncopies; j++)
7336 {
7337 if (j != 0)
7338 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7339
7340 tree data_ref = fold_build2 (MEM_REF, vectype,
7341 unshare_expr (dataref_ptr),
7342 dataref_offset);
7343 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7344 gimple *g = gimple_build_assign (data_ref, orig);
7345 stmt_vec_info new_stmt_info
7346 = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7347 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7348 prev_stmt_info = new_stmt_info;
7349 }
7350 return true;
7351 }
7352
7353
7354 /* Function vectorizable_store.
7355
7356 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7357 that can be vectorized.
7358 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7359 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7360 Return true if STMT_INFO is vectorizable in this way. */
7361
7362 static bool
7363 vectorizable_store (vec_info *vinfo,
7364 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7365 stmt_vec_info *vec_stmt, slp_tree slp_node,
7366 stmt_vector_for_cost *cost_vec)
7367 {
7368 tree data_ref;
7369 tree op;
7370 tree vec_oprnd = NULL_TREE;
7371 tree elem_type;
7372 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7373 class loop *loop = NULL;
7374 machine_mode vec_mode;
7375 tree dummy;
7376 enum dr_alignment_support alignment_support_scheme;
7377 enum vect_def_type rhs_dt = vect_unknown_def_type;
7378 enum vect_def_type mask_dt = vect_unknown_def_type;
7379 stmt_vec_info prev_stmt_info = NULL;
7380 tree dataref_ptr = NULL_TREE;
7381 tree dataref_offset = NULL_TREE;
7382 gimple *ptr_incr = NULL;
7383 int ncopies;
7384 int j;
7385 stmt_vec_info first_stmt_info;
7386 bool grouped_store;
7387 unsigned int group_size, i;
7388 vec<tree> oprnds = vNULL;
7389 vec<tree> result_chain = vNULL;
7390 tree offset = NULL_TREE;
7391 vec<tree> vec_oprnds = vNULL;
7392 bool slp = (slp_node != NULL);
7393 unsigned int vec_num;
7394 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7395 tree aggr_type;
7396 gather_scatter_info gs_info;
7397 poly_uint64 vf;
7398 vec_load_store_type vls_type;
7399 tree ref_type;
7400
7401 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7402 return false;
7403
7404 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7405 && ! vec_stmt)
7406 return false;
7407
7408 /* Is vectorizable store? */
7409
7410 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7411 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7412 {
7413 tree scalar_dest = gimple_assign_lhs (assign);
7414 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7415 && is_pattern_stmt_p (stmt_info))
7416 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7417 if (TREE_CODE (scalar_dest) != ARRAY_REF
7418 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7419 && TREE_CODE (scalar_dest) != INDIRECT_REF
7420 && TREE_CODE (scalar_dest) != COMPONENT_REF
7421 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7422 && TREE_CODE (scalar_dest) != REALPART_EXPR
7423 && TREE_CODE (scalar_dest) != MEM_REF)
7424 return false;
7425 }
7426 else
7427 {
7428 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7429 if (!call || !gimple_call_internal_p (call))
7430 return false;
7431
7432 internal_fn ifn = gimple_call_internal_fn (call);
7433 if (!internal_store_fn_p (ifn))
7434 return false;
7435
7436 if (slp_node != NULL)
7437 {
7438 if (dump_enabled_p ())
7439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7440 "SLP of masked stores not supported.\n");
7441 return false;
7442 }
7443
7444 int mask_index = internal_fn_mask_index (ifn);
7445 if (mask_index >= 0)
7446 {
7447 mask = gimple_call_arg (call, mask_index);
7448 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7449 &mask_vectype))
7450 return false;
7451 }
7452 }
7453
7454 op = vect_get_store_rhs (stmt_info);
7455
7456 /* Cannot have hybrid store SLP -- that would mean storing to the
7457 same location twice. */
7458 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7459
7460 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7461 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7462
7463 if (loop_vinfo)
7464 {
7465 loop = LOOP_VINFO_LOOP (loop_vinfo);
7466 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7467 }
7468 else
7469 vf = 1;
7470
7471 /* Multiple types in SLP are handled by creating the appropriate number of
7472 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7473 case of SLP. */
7474 if (slp)
7475 ncopies = 1;
7476 else
7477 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7478
7479 gcc_assert (ncopies >= 1);
7480
7481 /* FORNOW. This restriction should be relaxed. */
7482 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7483 {
7484 if (dump_enabled_p ())
7485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7486 "multiple types in nested loop.\n");
7487 return false;
7488 }
7489
7490 if (!vect_check_store_rhs (vinfo, stmt_info,
7491 op, &rhs_dt, &rhs_vectype, &vls_type))
7492 return false;
7493
7494 elem_type = TREE_TYPE (vectype);
7495 vec_mode = TYPE_MODE (vectype);
7496
7497 if (!STMT_VINFO_DATA_REF (stmt_info))
7498 return false;
7499
7500 vect_memory_access_type memory_access_type;
7501 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
7502 ncopies, &memory_access_type, &gs_info))
7503 return false;
7504
7505 if (mask)
7506 {
7507 if (memory_access_type == VMAT_CONTIGUOUS)
7508 {
7509 if (!VECTOR_MODE_P (vec_mode)
7510 || !can_vec_mask_load_store_p (vec_mode,
7511 TYPE_MODE (mask_vectype), false))
7512 return false;
7513 }
7514 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7515 && (memory_access_type != VMAT_GATHER_SCATTER
7516 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7517 {
7518 if (dump_enabled_p ())
7519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7520 "unsupported access type for masked store.\n");
7521 return false;
7522 }
7523 }
7524 else
7525 {
7526 /* FORNOW. In some cases can vectorize even if data-type not supported
7527 (e.g. - array initialization with 0). */
7528 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7529 return false;
7530 }
7531
7532 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7533 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7534 && memory_access_type != VMAT_GATHER_SCATTER
7535 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7536 if (grouped_store)
7537 {
7538 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7539 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7540 group_size = DR_GROUP_SIZE (first_stmt_info);
7541 }
7542 else
7543 {
7544 first_stmt_info = stmt_info;
7545 first_dr_info = dr_info;
7546 group_size = vec_num = 1;
7547 }
7548
7549 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7550 {
7551 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7552 memory_access_type))
7553 return false;
7554 }
7555
7556 if (!vec_stmt) /* transformation not required. */
7557 {
7558 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7559
7560 if (loop_vinfo
7561 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7562 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7563 memory_access_type, &gs_info, mask);
7564
7565 if (slp_node
7566 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7567 vectype))
7568 {
7569 if (dump_enabled_p ())
7570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7571 "incompatible vector types for invariants\n");
7572 return false;
7573 }
7574
7575 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7576 vect_model_store_cost (vinfo, stmt_info, ncopies,
7577 memory_access_type, vls_type, slp_node, cost_vec);
7578 return true;
7579 }
7580 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7581
7582 /* Transform. */
7583
7584 ensure_base_align (dr_info);
7585
7586 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7587 {
7588 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7589 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7590 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7591 tree ptr, var, scale, vec_mask;
7592 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7593 tree mask_halfvectype = mask_vectype;
7594 edge pe = loop_preheader_edge (loop);
7595 gimple_seq seq;
7596 basic_block new_bb;
7597 enum { NARROW, NONE, WIDEN } modifier;
7598 poly_uint64 scatter_off_nunits
7599 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7600
7601 if (known_eq (nunits, scatter_off_nunits))
7602 modifier = NONE;
7603 else if (known_eq (nunits * 2, scatter_off_nunits))
7604 {
7605 modifier = WIDEN;
7606
7607 /* Currently gathers and scatters are only supported for
7608 fixed-length vectors. */
7609 unsigned int count = scatter_off_nunits.to_constant ();
7610 vec_perm_builder sel (count, count, 1);
7611 for (i = 0; i < (unsigned int) count; ++i)
7612 sel.quick_push (i | (count / 2));
7613
7614 vec_perm_indices indices (sel, 1, count);
7615 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7616 indices);
7617 gcc_assert (perm_mask != NULL_TREE);
7618 }
7619 else if (known_eq (nunits, scatter_off_nunits * 2))
7620 {
7621 modifier = NARROW;
7622
7623 /* Currently gathers and scatters are only supported for
7624 fixed-length vectors. */
7625 unsigned int count = nunits.to_constant ();
7626 vec_perm_builder sel (count, count, 1);
7627 for (i = 0; i < (unsigned int) count; ++i)
7628 sel.quick_push (i | (count / 2));
7629
7630 vec_perm_indices indices (sel, 2, count);
7631 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7632 gcc_assert (perm_mask != NULL_TREE);
7633 ncopies *= 2;
7634
7635 if (mask)
7636 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7637 }
7638 else
7639 gcc_unreachable ();
7640
7641 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7642 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7643 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7644 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7645 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7646 scaletype = TREE_VALUE (arglist);
7647
7648 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7649 && TREE_CODE (rettype) == VOID_TYPE);
7650
7651 ptr = fold_convert (ptrtype, gs_info.base);
7652 if (!is_gimple_min_invariant (ptr))
7653 {
7654 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7655 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7656 gcc_assert (!new_bb);
7657 }
7658
7659 if (mask == NULL_TREE)
7660 {
7661 mask_arg = build_int_cst (masktype, -1);
7662 mask_arg = vect_init_vector (vinfo, stmt_info,
7663 mask_arg, masktype, NULL);
7664 }
7665
7666 scale = build_int_cst (scaletype, gs_info.scale);
7667
7668 prev_stmt_info = NULL;
7669 for (j = 0; j < ncopies; ++j)
7670 {
7671 if (j == 0)
7672 {
7673 src = vec_oprnd1 = vect_get_vec_def_for_operand (vinfo,
7674 op, stmt_info);
7675 op = vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
7676 gs_info.offset,
7677 stmt_info);
7678 if (mask)
7679 mask_op = vec_mask = vect_get_vec_def_for_operand (vinfo, mask,
7680 stmt_info);
7681 }
7682 else if (modifier != NONE && (j & 1))
7683 {
7684 if (modifier == WIDEN)
7685 {
7686 src
7687 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7688 vec_oprnd1);
7689 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7690 perm_mask, stmt_info, gsi);
7691 if (mask)
7692 mask_op
7693 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7694 vec_mask);
7695 }
7696 else if (modifier == NARROW)
7697 {
7698 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7699 perm_mask, stmt_info, gsi);
7700 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7701 vec_oprnd0);
7702 }
7703 else
7704 gcc_unreachable ();
7705 }
7706 else
7707 {
7708 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7709 vec_oprnd1);
7710 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7711 vec_oprnd0);
7712 if (mask)
7713 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7714 vec_mask);
7715 }
7716
7717 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7718 {
7719 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7720 TYPE_VECTOR_SUBPARTS (srctype)));
7721 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7722 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7723 gassign *new_stmt
7724 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7725 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7726 src = var;
7727 }
7728
7729 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7730 {
7731 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7732 TYPE_VECTOR_SUBPARTS (idxtype)));
7733 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7734 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7735 gassign *new_stmt
7736 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7737 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7738 op = var;
7739 }
7740
7741 if (mask)
7742 {
7743 tree utype;
7744 mask_arg = mask_op;
7745 if (modifier == NARROW)
7746 {
7747 var = vect_get_new_ssa_name (mask_halfvectype,
7748 vect_simple_var);
7749 gassign *new_stmt
7750 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7751 : VEC_UNPACK_LO_EXPR,
7752 mask_op);
7753 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7754 mask_arg = var;
7755 }
7756 tree optype = TREE_TYPE (mask_arg);
7757 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7758 utype = masktype;
7759 else
7760 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7761 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7762 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7763 gassign *new_stmt
7764 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7765 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7766 mask_arg = var;
7767 if (!useless_type_conversion_p (masktype, utype))
7768 {
7769 gcc_assert (TYPE_PRECISION (utype)
7770 <= TYPE_PRECISION (masktype));
7771 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7772 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7773 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7774 mask_arg = var;
7775 }
7776 }
7777
7778 gcall *new_stmt
7779 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7780 stmt_vec_info new_stmt_info
7781 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7782
7783 if (prev_stmt_info == NULL)
7784 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7785 else
7786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7787 prev_stmt_info = new_stmt_info;
7788 }
7789 return true;
7790 }
7791 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7792 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7793
7794 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7795 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7796
7797 if (grouped_store)
7798 {
7799 /* FORNOW */
7800 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7801
7802 /* We vectorize all the stmts of the interleaving group when we
7803 reach the last stmt in the group. */
7804 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7805 < DR_GROUP_SIZE (first_stmt_info)
7806 && !slp)
7807 {
7808 *vec_stmt = NULL;
7809 return true;
7810 }
7811
7812 if (slp)
7813 {
7814 grouped_store = false;
7815 /* VEC_NUM is the number of vect stmts to be created for this
7816 group. */
7817 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7818 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7819 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7820 == first_stmt_info);
7821 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7822 op = vect_get_store_rhs (first_stmt_info);
7823 }
7824 else
7825 /* VEC_NUM is the number of vect stmts to be created for this
7826 group. */
7827 vec_num = group_size;
7828
7829 ref_type = get_group_alias_ptr_type (first_stmt_info);
7830 }
7831 else
7832 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7833
7834 if (dump_enabled_p ())
7835 dump_printf_loc (MSG_NOTE, vect_location,
7836 "transform store. ncopies = %d\n", ncopies);
7837
7838 if (memory_access_type == VMAT_ELEMENTWISE
7839 || memory_access_type == VMAT_STRIDED_SLP)
7840 {
7841 gimple_stmt_iterator incr_gsi;
7842 bool insert_after;
7843 gimple *incr;
7844 tree offvar;
7845 tree ivstep;
7846 tree running_off;
7847 tree stride_base, stride_step, alias_off;
7848 tree vec_oprnd;
7849 tree dr_offset;
7850 unsigned int g;
7851 /* Checked by get_load_store_type. */
7852 unsigned int const_nunits = nunits.to_constant ();
7853
7854 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7855 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7856
7857 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7858 stride_base
7859 = fold_build_pointer_plus
7860 (DR_BASE_ADDRESS (first_dr_info->dr),
7861 size_binop (PLUS_EXPR,
7862 convert_to_ptrofftype (dr_offset),
7863 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7864 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7865
7866 /* For a store with loop-invariant (but other than power-of-2)
7867 stride (i.e. not a grouped access) like so:
7868
7869 for (i = 0; i < n; i += stride)
7870 array[i] = ...;
7871
7872 we generate a new induction variable and new stores from
7873 the components of the (vectorized) rhs:
7874
7875 for (j = 0; ; j += VF*stride)
7876 vectemp = ...;
7877 tmp1 = vectemp[0];
7878 array[j] = tmp1;
7879 tmp2 = vectemp[1];
7880 array[j + stride] = tmp2;
7881 ...
7882 */
7883
7884 unsigned nstores = const_nunits;
7885 unsigned lnel = 1;
7886 tree ltype = elem_type;
7887 tree lvectype = vectype;
7888 if (slp)
7889 {
7890 if (group_size < const_nunits
7891 && const_nunits % group_size == 0)
7892 {
7893 nstores = const_nunits / group_size;
7894 lnel = group_size;
7895 ltype = build_vector_type (elem_type, group_size);
7896 lvectype = vectype;
7897
7898 /* First check if vec_extract optab doesn't support extraction
7899 of vector elts directly. */
7900 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7901 machine_mode vmode;
7902 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7903 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7904 group_size).exists (&vmode)
7905 || (convert_optab_handler (vec_extract_optab,
7906 TYPE_MODE (vectype), vmode)
7907 == CODE_FOR_nothing))
7908 {
7909 /* Try to avoid emitting an extract of vector elements
7910 by performing the extracts using an integer type of the
7911 same size, extracting from a vector of those and then
7912 re-interpreting it as the original vector type if
7913 supported. */
7914 unsigned lsize
7915 = group_size * GET_MODE_BITSIZE (elmode);
7916 unsigned int lnunits = const_nunits / group_size;
7917 /* If we can't construct such a vector fall back to
7918 element extracts from the original vector type and
7919 element size stores. */
7920 if (int_mode_for_size (lsize, 0).exists (&elmode)
7921 && VECTOR_MODE_P (TYPE_MODE (vectype))
7922 && related_vector_mode (TYPE_MODE (vectype), elmode,
7923 lnunits).exists (&vmode)
7924 && (convert_optab_handler (vec_extract_optab,
7925 vmode, elmode)
7926 != CODE_FOR_nothing))
7927 {
7928 nstores = lnunits;
7929 lnel = group_size;
7930 ltype = build_nonstandard_integer_type (lsize, 1);
7931 lvectype = build_vector_type (ltype, nstores);
7932 }
7933 /* Else fall back to vector extraction anyway.
7934 Fewer stores are more important than avoiding spilling
7935 of the vector we extract from. Compared to the
7936 construction case in vectorizable_load no store-forwarding
7937 issue exists here for reasonable archs. */
7938 }
7939 }
7940 else if (group_size >= const_nunits
7941 && group_size % const_nunits == 0)
7942 {
7943 nstores = 1;
7944 lnel = const_nunits;
7945 ltype = vectype;
7946 lvectype = vectype;
7947 }
7948 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7949 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7950 }
7951
7952 ivstep = stride_step;
7953 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7954 build_int_cst (TREE_TYPE (ivstep), vf));
7955
7956 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7957
7958 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7959 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7960 create_iv (stride_base, ivstep, NULL,
7961 loop, &incr_gsi, insert_after,
7962 &offvar, NULL);
7963 incr = gsi_stmt (incr_gsi);
7964 loop_vinfo->add_stmt (incr);
7965
7966 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7967
7968 prev_stmt_info = NULL;
7969 alias_off = build_int_cst (ref_type, 0);
7970 stmt_vec_info next_stmt_info = first_stmt_info;
7971 for (g = 0; g < group_size; g++)
7972 {
7973 running_off = offvar;
7974 if (g)
7975 {
7976 tree size = TYPE_SIZE_UNIT (ltype);
7977 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7978 size);
7979 tree newoff = copy_ssa_name (running_off, NULL);
7980 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7981 running_off, pos);
7982 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7983 running_off = newoff;
7984 }
7985 unsigned int group_el = 0;
7986 unsigned HOST_WIDE_INT
7987 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7988 for (j = 0; j < ncopies; j++)
7989 {
7990 /* We've set op and dt above, from vect_get_store_rhs,
7991 and first_stmt_info == stmt_info. */
7992 if (j == 0)
7993 {
7994 if (slp)
7995 {
7996 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info,
7997 &vec_oprnds, NULL, slp_node);
7998 vec_oprnd = vec_oprnds[0];
7999 }
8000 else
8001 {
8002 op = vect_get_store_rhs (next_stmt_info);
8003 vec_oprnd = vect_get_vec_def_for_operand
8004 (vinfo, op, next_stmt_info);
8005 }
8006 }
8007 else
8008 {
8009 if (slp)
8010 vec_oprnd = vec_oprnds[j];
8011 else
8012 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
8013 vec_oprnd);
8014 }
8015 /* Pun the vector to extract from if necessary. */
8016 if (lvectype != vectype)
8017 {
8018 tree tem = make_ssa_name (lvectype);
8019 gimple *pun
8020 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8021 lvectype, vec_oprnd));
8022 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8023 vec_oprnd = tem;
8024 }
8025 for (i = 0; i < nstores; i++)
8026 {
8027 tree newref, newoff;
8028 gimple *incr, *assign;
8029 tree size = TYPE_SIZE (ltype);
8030 /* Extract the i'th component. */
8031 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8032 bitsize_int (i), size);
8033 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8034 size, pos);
8035
8036 elem = force_gimple_operand_gsi (gsi, elem, true,
8037 NULL_TREE, true,
8038 GSI_SAME_STMT);
8039
8040 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8041 group_el * elsz);
8042 newref = build2 (MEM_REF, ltype,
8043 running_off, this_off);
8044 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8045
8046 /* And store it to *running_off. */
8047 assign = gimple_build_assign (newref, elem);
8048 stmt_vec_info assign_info
8049 = vect_finish_stmt_generation (vinfo, stmt_info,
8050 assign, gsi);
8051
8052 group_el += lnel;
8053 if (! slp
8054 || group_el == group_size)
8055 {
8056 newoff = copy_ssa_name (running_off, NULL);
8057 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8058 running_off, stride_step);
8059 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8060
8061 running_off = newoff;
8062 group_el = 0;
8063 }
8064 if (g == group_size - 1
8065 && !slp)
8066 {
8067 if (j == 0 && i == 0)
8068 STMT_VINFO_VEC_STMT (stmt_info)
8069 = *vec_stmt = assign_info;
8070 else
8071 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8072 prev_stmt_info = assign_info;
8073 }
8074 }
8075 }
8076 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8077 if (slp)
8078 break;
8079 }
8080
8081 vec_oprnds.release ();
8082 return true;
8083 }
8084
8085 auto_vec<tree> dr_chain (group_size);
8086 oprnds.create (group_size);
8087
8088 /* Gather-scatter accesses perform only component accesses, alignment
8089 is irrelevant for them. */
8090 if (memory_access_type == VMAT_GATHER_SCATTER)
8091 alignment_support_scheme = dr_unaligned_supported;
8092 else
8093 alignment_support_scheme
8094 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
8095
8096 gcc_assert (alignment_support_scheme);
8097 vec_loop_masks *loop_masks
8098 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8099 ? &LOOP_VINFO_MASKS (loop_vinfo)
8100 : NULL);
8101 /* Targets with store-lane instructions must not require explicit
8102 realignment. vect_supportable_dr_alignment always returns either
8103 dr_aligned or dr_unaligned_supported for masked operations. */
8104 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8105 && !mask
8106 && !loop_masks)
8107 || alignment_support_scheme == dr_aligned
8108 || alignment_support_scheme == dr_unaligned_supported);
8109
8110 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8111 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8112 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8113
8114 tree bump;
8115 tree vec_offset = NULL_TREE;
8116 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8117 {
8118 aggr_type = NULL_TREE;
8119 bump = NULL_TREE;
8120 }
8121 else if (memory_access_type == VMAT_GATHER_SCATTER)
8122 {
8123 aggr_type = elem_type;
8124 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8125 &bump, &vec_offset);
8126 }
8127 else
8128 {
8129 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8130 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8131 else
8132 aggr_type = vectype;
8133 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8134 memory_access_type);
8135 }
8136
8137 if (mask)
8138 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8139
8140 /* In case the vectorization factor (VF) is bigger than the number
8141 of elements that we can fit in a vectype (nunits), we have to generate
8142 more than one vector stmt - i.e - we need to "unroll" the
8143 vector stmt by a factor VF/nunits. For more details see documentation in
8144 vect_get_vec_def_for_copy_stmt. */
8145
8146 /* In case of interleaving (non-unit grouped access):
8147
8148 S1: &base + 2 = x2
8149 S2: &base = x0
8150 S3: &base + 1 = x1
8151 S4: &base + 3 = x3
8152
8153 We create vectorized stores starting from base address (the access of the
8154 first stmt in the chain (S2 in the above example), when the last store stmt
8155 of the chain (S4) is reached:
8156
8157 VS1: &base = vx2
8158 VS2: &base + vec_size*1 = vx0
8159 VS3: &base + vec_size*2 = vx1
8160 VS4: &base + vec_size*3 = vx3
8161
8162 Then permutation statements are generated:
8163
8164 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8165 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8166 ...
8167
8168 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8169 (the order of the data-refs in the output of vect_permute_store_chain
8170 corresponds to the order of scalar stmts in the interleaving chain - see
8171 the documentation of vect_permute_store_chain()).
8172
8173 In case of both multiple types and interleaving, above vector stores and
8174 permutation stmts are created for every copy. The result vector stmts are
8175 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8176 STMT_VINFO_RELATED_STMT for the next copies.
8177 */
8178
8179 prev_stmt_info = NULL;
8180 tree vec_mask = NULL_TREE;
8181 for (j = 0; j < ncopies; j++)
8182 {
8183 stmt_vec_info new_stmt_info;
8184 if (j == 0)
8185 {
8186 if (slp)
8187 {
8188 /* Get vectorized arguments for SLP_NODE. */
8189 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info, &vec_oprnds,
8190 NULL, slp_node);
8191
8192 vec_oprnd = vec_oprnds[0];
8193 }
8194 else
8195 {
8196 /* For interleaved stores we collect vectorized defs for all the
8197 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8198 used as an input to vect_permute_store_chain(), and OPRNDS as
8199 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8200
8201 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8202 OPRNDS are of size 1. */
8203 stmt_vec_info next_stmt_info = first_stmt_info;
8204 for (i = 0; i < group_size; i++)
8205 {
8206 /* Since gaps are not supported for interleaved stores,
8207 DR_GROUP_SIZE is the exact number of stmts in the chain.
8208 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8209 that there is no interleaving, DR_GROUP_SIZE is 1,
8210 and only one iteration of the loop will be executed. */
8211 op = vect_get_store_rhs (next_stmt_info);
8212 vec_oprnd = vect_get_vec_def_for_operand
8213 (vinfo, op, next_stmt_info);
8214 dr_chain.quick_push (vec_oprnd);
8215 oprnds.quick_push (vec_oprnd);
8216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8217 }
8218 if (mask)
8219 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
8220 mask_vectype);
8221 }
8222
8223 /* We should have catched mismatched types earlier. */
8224 gcc_assert (useless_type_conversion_p (vectype,
8225 TREE_TYPE (vec_oprnd)));
8226 bool simd_lane_access_p
8227 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8228 if (simd_lane_access_p
8229 && !loop_masks
8230 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8231 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8232 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8233 && integer_zerop (DR_INIT (first_dr_info->dr))
8234 && alias_sets_conflict_p (get_alias_set (aggr_type),
8235 get_alias_set (TREE_TYPE (ref_type))))
8236 {
8237 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8238 dataref_offset = build_int_cst (ref_type, 0);
8239 }
8240 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8241 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8242 &dataref_ptr, &vec_offset);
8243 else
8244 dataref_ptr
8245 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8246 simd_lane_access_p ? loop : NULL,
8247 offset, &dummy, gsi, &ptr_incr,
8248 simd_lane_access_p, NULL_TREE, bump);
8249 }
8250 else
8251 {
8252 /* For interleaved stores we created vectorized defs for all the
8253 defs stored in OPRNDS in the previous iteration (previous copy).
8254 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8255 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8256 next copy.
8257 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8258 OPRNDS are of size 1. */
8259 for (i = 0; i < group_size; i++)
8260 {
8261 op = oprnds[i];
8262 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8263 dr_chain[i] = vec_oprnd;
8264 oprnds[i] = vec_oprnd;
8265 }
8266 if (mask)
8267 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8268 if (dataref_offset)
8269 dataref_offset
8270 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8271 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8272 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8273 else
8274 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8275 stmt_info, bump);
8276 }
8277
8278 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8279 {
8280 tree vec_array;
8281
8282 /* Get an array into which we can store the individual vectors. */
8283 vec_array = create_vector_array (vectype, vec_num);
8284
8285 /* Invalidate the current contents of VEC_ARRAY. This should
8286 become an RTL clobber too, which prevents the vector registers
8287 from being upward-exposed. */
8288 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8289
8290 /* Store the individual vectors into the array. */
8291 for (i = 0; i < vec_num; i++)
8292 {
8293 vec_oprnd = dr_chain[i];
8294 write_vector_array (vinfo, stmt_info,
8295 gsi, vec_oprnd, vec_array, i);
8296 }
8297
8298 tree final_mask = NULL;
8299 if (loop_masks)
8300 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8301 vectype, j);
8302 if (vec_mask)
8303 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8304 vec_mask, gsi);
8305
8306 gcall *call;
8307 if (final_mask)
8308 {
8309 /* Emit:
8310 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8311 VEC_ARRAY). */
8312 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8313 tree alias_ptr = build_int_cst (ref_type, align);
8314 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8315 dataref_ptr, alias_ptr,
8316 final_mask, vec_array);
8317 }
8318 else
8319 {
8320 /* Emit:
8321 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8322 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8323 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8324 vec_array);
8325 gimple_call_set_lhs (call, data_ref);
8326 }
8327 gimple_call_set_nothrow (call, true);
8328 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
8329 call, gsi);
8330
8331 /* Record that VEC_ARRAY is now dead. */
8332 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8333 }
8334 else
8335 {
8336 new_stmt_info = NULL;
8337 if (grouped_store)
8338 {
8339 if (j == 0)
8340 result_chain.create (group_size);
8341 /* Permute. */
8342 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8343 gsi, &result_chain);
8344 }
8345
8346 stmt_vec_info next_stmt_info = first_stmt_info;
8347 for (i = 0; i < vec_num; i++)
8348 {
8349 unsigned misalign;
8350 unsigned HOST_WIDE_INT align;
8351
8352 tree final_mask = NULL_TREE;
8353 if (loop_masks)
8354 final_mask = vect_get_loop_mask (gsi, loop_masks,
8355 vec_num * ncopies,
8356 vectype, vec_num * j + i);
8357 if (vec_mask)
8358 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8359 vec_mask, gsi);
8360
8361 if (memory_access_type == VMAT_GATHER_SCATTER)
8362 {
8363 tree scale = size_int (gs_info.scale);
8364 gcall *call;
8365 if (loop_masks)
8366 call = gimple_build_call_internal
8367 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8368 scale, vec_oprnd, final_mask);
8369 else
8370 call = gimple_build_call_internal
8371 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8372 scale, vec_oprnd);
8373 gimple_call_set_nothrow (call, true);
8374 new_stmt_info
8375 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8376 break;
8377 }
8378
8379 if (i > 0)
8380 /* Bump the vector pointer. */
8381 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8382 gsi, stmt_info, bump);
8383
8384 if (slp)
8385 vec_oprnd = vec_oprnds[i];
8386 else if (grouped_store)
8387 /* For grouped stores vectorized defs are interleaved in
8388 vect_permute_store_chain(). */
8389 vec_oprnd = result_chain[i];
8390
8391 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8392 if (aligned_access_p (first_dr_info))
8393 misalign = 0;
8394 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8395 {
8396 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8397 misalign = 0;
8398 }
8399 else
8400 misalign = DR_MISALIGNMENT (first_dr_info);
8401 if (dataref_offset == NULL_TREE
8402 && TREE_CODE (dataref_ptr) == SSA_NAME)
8403 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8404 misalign);
8405
8406 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8407 {
8408 tree perm_mask = perm_mask_for_reverse (vectype);
8409 tree perm_dest = vect_create_destination_var
8410 (vect_get_store_rhs (stmt_info), vectype);
8411 tree new_temp = make_ssa_name (perm_dest);
8412
8413 /* Generate the permute statement. */
8414 gimple *perm_stmt
8415 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8416 vec_oprnd, perm_mask);
8417 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8418
8419 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8420 vec_oprnd = new_temp;
8421 }
8422
8423 /* Arguments are ready. Create the new vector stmt. */
8424 if (final_mask)
8425 {
8426 align = least_bit_hwi (misalign | align);
8427 tree ptr = build_int_cst (ref_type, align);
8428 gcall *call
8429 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8430 dataref_ptr, ptr,
8431 final_mask, vec_oprnd);
8432 gimple_call_set_nothrow (call, true);
8433 new_stmt_info
8434 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8435 }
8436 else
8437 {
8438 data_ref = fold_build2 (MEM_REF, vectype,
8439 dataref_ptr,
8440 dataref_offset
8441 ? dataref_offset
8442 : build_int_cst (ref_type, 0));
8443 if (aligned_access_p (first_dr_info))
8444 ;
8445 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8446 TREE_TYPE (data_ref)
8447 = build_aligned_type (TREE_TYPE (data_ref),
8448 align * BITS_PER_UNIT);
8449 else
8450 TREE_TYPE (data_ref)
8451 = build_aligned_type (TREE_TYPE (data_ref),
8452 TYPE_ALIGN (elem_type));
8453 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8454 gassign *new_stmt
8455 = gimple_build_assign (data_ref, vec_oprnd);
8456 new_stmt_info
8457 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8458 }
8459
8460 if (slp)
8461 continue;
8462
8463 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8464 if (!next_stmt_info)
8465 break;
8466 }
8467 }
8468 if (!slp)
8469 {
8470 if (j == 0)
8471 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8472 else
8473 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8474 prev_stmt_info = new_stmt_info;
8475 }
8476 }
8477
8478 oprnds.release ();
8479 result_chain.release ();
8480 vec_oprnds.release ();
8481
8482 return true;
8483 }
8484
8485 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8486 VECTOR_CST mask. No checks are made that the target platform supports the
8487 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8488 vect_gen_perm_mask_checked. */
8489
8490 tree
8491 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8492 {
8493 tree mask_type;
8494
8495 poly_uint64 nunits = sel.length ();
8496 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8497
8498 mask_type = build_vector_type (ssizetype, nunits);
8499 return vec_perm_indices_to_tree (mask_type, sel);
8500 }
8501
8502 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8503 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8504
8505 tree
8506 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8507 {
8508 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8509 return vect_gen_perm_mask_any (vectype, sel);
8510 }
8511
8512 /* Given a vector variable X and Y, that was generated for the scalar
8513 STMT_INFO, generate instructions to permute the vector elements of X and Y
8514 using permutation mask MASK_VEC, insert them at *GSI and return the
8515 permuted vector variable. */
8516
8517 static tree
8518 permute_vec_elements (vec_info *vinfo,
8519 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8520 gimple_stmt_iterator *gsi)
8521 {
8522 tree vectype = TREE_TYPE (x);
8523 tree perm_dest, data_ref;
8524 gimple *perm_stmt;
8525
8526 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8527 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8528 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8529 else
8530 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8531 data_ref = make_ssa_name (perm_dest);
8532
8533 /* Generate the permute statement. */
8534 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8535 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8536
8537 return data_ref;
8538 }
8539
8540 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8541 inserting them on the loops preheader edge. Returns true if we
8542 were successful in doing so (and thus STMT_INFO can be moved then),
8543 otherwise returns false. */
8544
8545 static bool
8546 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8547 {
8548 ssa_op_iter i;
8549 tree op;
8550 bool any = false;
8551
8552 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8553 {
8554 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8555 if (!gimple_nop_p (def_stmt)
8556 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8557 {
8558 /* Make sure we don't need to recurse. While we could do
8559 so in simple cases when there are more complex use webs
8560 we don't have an easy way to preserve stmt order to fulfil
8561 dependencies within them. */
8562 tree op2;
8563 ssa_op_iter i2;
8564 if (gimple_code (def_stmt) == GIMPLE_PHI)
8565 return false;
8566 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8567 {
8568 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8569 if (!gimple_nop_p (def_stmt2)
8570 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8571 return false;
8572 }
8573 any = true;
8574 }
8575 }
8576
8577 if (!any)
8578 return true;
8579
8580 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8581 {
8582 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8583 if (!gimple_nop_p (def_stmt)
8584 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8585 {
8586 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8587 gsi_remove (&gsi, false);
8588 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8589 }
8590 }
8591
8592 return true;
8593 }
8594
8595 /* vectorizable_load.
8596
8597 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8598 that can be vectorized.
8599 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8600 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8601 Return true if STMT_INFO is vectorizable in this way. */
8602
8603 static bool
8604 vectorizable_load (vec_info *vinfo,
8605 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8606 stmt_vec_info *vec_stmt, slp_tree slp_node,
8607 stmt_vector_for_cost *cost_vec)
8608 {
8609 tree scalar_dest;
8610 tree vec_dest = NULL;
8611 tree data_ref = NULL;
8612 stmt_vec_info prev_stmt_info;
8613 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8614 class loop *loop = NULL;
8615 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8616 bool nested_in_vect_loop = false;
8617 tree elem_type;
8618 tree new_temp;
8619 machine_mode mode;
8620 tree dummy;
8621 enum dr_alignment_support alignment_support_scheme;
8622 tree dataref_ptr = NULL_TREE;
8623 tree dataref_offset = NULL_TREE;
8624 gimple *ptr_incr = NULL;
8625 int ncopies;
8626 int i, j;
8627 unsigned int group_size;
8628 poly_uint64 group_gap_adj;
8629 tree msq = NULL_TREE, lsq;
8630 tree offset = NULL_TREE;
8631 tree byte_offset = NULL_TREE;
8632 tree realignment_token = NULL_TREE;
8633 gphi *phi = NULL;
8634 vec<tree> dr_chain = vNULL;
8635 bool grouped_load = false;
8636 stmt_vec_info first_stmt_info;
8637 stmt_vec_info first_stmt_info_for_drptr = NULL;
8638 bool compute_in_loop = false;
8639 class loop *at_loop;
8640 int vec_num;
8641 bool slp = (slp_node != NULL);
8642 bool slp_perm = false;
8643 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8644 poly_uint64 vf;
8645 tree aggr_type;
8646 gather_scatter_info gs_info;
8647 tree ref_type;
8648 enum vect_def_type mask_dt = vect_unknown_def_type;
8649
8650 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8651 return false;
8652
8653 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8654 && ! vec_stmt)
8655 return false;
8656
8657 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8658 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8659 {
8660 scalar_dest = gimple_assign_lhs (assign);
8661 if (TREE_CODE (scalar_dest) != SSA_NAME)
8662 return false;
8663
8664 tree_code code = gimple_assign_rhs_code (assign);
8665 if (code != ARRAY_REF
8666 && code != BIT_FIELD_REF
8667 && code != INDIRECT_REF
8668 && code != COMPONENT_REF
8669 && code != IMAGPART_EXPR
8670 && code != REALPART_EXPR
8671 && code != MEM_REF
8672 && TREE_CODE_CLASS (code) != tcc_declaration)
8673 return false;
8674 }
8675 else
8676 {
8677 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8678 if (!call || !gimple_call_internal_p (call))
8679 return false;
8680
8681 internal_fn ifn = gimple_call_internal_fn (call);
8682 if (!internal_load_fn_p (ifn))
8683 return false;
8684
8685 scalar_dest = gimple_call_lhs (call);
8686 if (!scalar_dest)
8687 return false;
8688
8689 int mask_index = internal_fn_mask_index (ifn);
8690 if (mask_index >= 0)
8691 {
8692 mask = gimple_call_arg (call, mask_index);
8693 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8694 &mask_vectype))
8695 return false;
8696 }
8697 }
8698
8699 if (!STMT_VINFO_DATA_REF (stmt_info))
8700 return false;
8701
8702 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8703 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8704
8705 if (loop_vinfo)
8706 {
8707 loop = LOOP_VINFO_LOOP (loop_vinfo);
8708 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8709 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8710 }
8711 else
8712 vf = 1;
8713
8714 /* Multiple types in SLP are handled by creating the appropriate number of
8715 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8716 case of SLP. */
8717 if (slp)
8718 ncopies = 1;
8719 else
8720 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8721
8722 gcc_assert (ncopies >= 1);
8723
8724 /* FORNOW. This restriction should be relaxed. */
8725 if (nested_in_vect_loop && ncopies > 1)
8726 {
8727 if (dump_enabled_p ())
8728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8729 "multiple types in nested loop.\n");
8730 return false;
8731 }
8732
8733 /* Invalidate assumptions made by dependence analysis when vectorization
8734 on the unrolled body effectively re-orders stmts. */
8735 if (ncopies > 1
8736 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8737 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8738 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8739 {
8740 if (dump_enabled_p ())
8741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8742 "cannot perform implicit CSE when unrolling "
8743 "with negative dependence distance\n");
8744 return false;
8745 }
8746
8747 elem_type = TREE_TYPE (vectype);
8748 mode = TYPE_MODE (vectype);
8749
8750 /* FORNOW. In some cases can vectorize even if data-type not supported
8751 (e.g. - data copies). */
8752 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8753 {
8754 if (dump_enabled_p ())
8755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8756 "Aligned load, but unsupported type.\n");
8757 return false;
8758 }
8759
8760 /* Check if the load is a part of an interleaving chain. */
8761 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8762 {
8763 grouped_load = true;
8764 /* FORNOW */
8765 gcc_assert (!nested_in_vect_loop);
8766 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8767
8768 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8769 group_size = DR_GROUP_SIZE (first_stmt_info);
8770
8771 /* Refuse non-SLP vectorization of SLP-only groups. */
8772 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8773 {
8774 if (dump_enabled_p ())
8775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8776 "cannot vectorize load in non-SLP mode.\n");
8777 return false;
8778 }
8779
8780 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8781 {
8782 slp_perm = true;
8783
8784 if (!loop_vinfo)
8785 {
8786 /* In BB vectorization we may not actually use a loaded vector
8787 accessing elements in excess of DR_GROUP_SIZE. */
8788 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8789 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8790 unsigned HOST_WIDE_INT nunits;
8791 unsigned j, k, maxk = 0;
8792 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8793 if (k > maxk)
8794 maxk = k;
8795 tree vectype = STMT_VINFO_VECTYPE (group_info);
8796 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8797 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8798 {
8799 if (dump_enabled_p ())
8800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8801 "BB vectorization with gaps at the end of "
8802 "a load is not supported\n");
8803 return false;
8804 }
8805 }
8806
8807 auto_vec<tree> tem;
8808 unsigned n_perms;
8809 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8810 true, &n_perms))
8811 {
8812 if (dump_enabled_p ())
8813 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8814 vect_location,
8815 "unsupported load permutation\n");
8816 return false;
8817 }
8818 }
8819
8820 /* Invalidate assumptions made by dependence analysis when vectorization
8821 on the unrolled body effectively re-orders stmts. */
8822 if (!PURE_SLP_STMT (stmt_info)
8823 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8824 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8825 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8826 {
8827 if (dump_enabled_p ())
8828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8829 "cannot perform implicit CSE when performing "
8830 "group loads with negative dependence distance\n");
8831 return false;
8832 }
8833 }
8834 else
8835 group_size = 1;
8836
8837 vect_memory_access_type memory_access_type;
8838 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
8839 ncopies, &memory_access_type, &gs_info))
8840 return false;
8841
8842 if (mask)
8843 {
8844 if (memory_access_type == VMAT_CONTIGUOUS)
8845 {
8846 machine_mode vec_mode = TYPE_MODE (vectype);
8847 if (!VECTOR_MODE_P (vec_mode)
8848 || !can_vec_mask_load_store_p (vec_mode,
8849 TYPE_MODE (mask_vectype), true))
8850 return false;
8851 }
8852 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8853 && memory_access_type != VMAT_GATHER_SCATTER)
8854 {
8855 if (dump_enabled_p ())
8856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8857 "unsupported access type for masked load.\n");
8858 return false;
8859 }
8860 }
8861
8862 if (!vec_stmt) /* transformation not required. */
8863 {
8864 if (!slp)
8865 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8866
8867 if (loop_vinfo
8868 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8869 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8870 memory_access_type, &gs_info, mask);
8871
8872 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8873 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8874 slp_node, cost_vec);
8875 return true;
8876 }
8877
8878 if (!slp)
8879 gcc_assert (memory_access_type
8880 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8881
8882 if (dump_enabled_p ())
8883 dump_printf_loc (MSG_NOTE, vect_location,
8884 "transform load. ncopies = %d\n", ncopies);
8885
8886 /* Transform. */
8887
8888 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8889 ensure_base_align (dr_info);
8890
8891 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8892 {
8893 vect_build_gather_load_calls (vinfo,
8894 stmt_info, gsi, vec_stmt, &gs_info, mask);
8895 return true;
8896 }
8897
8898 if (memory_access_type == VMAT_INVARIANT)
8899 {
8900 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8901 /* If we have versioned for aliasing or the loop doesn't
8902 have any data dependencies that would preclude this,
8903 then we are sure this is a loop invariant load and
8904 thus we can insert it on the preheader edge. */
8905 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8906 && !nested_in_vect_loop
8907 && hoist_defs_of_uses (stmt_info, loop));
8908 if (hoist_p)
8909 {
8910 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8911 if (dump_enabled_p ())
8912 dump_printf_loc (MSG_NOTE, vect_location,
8913 "hoisting out of the vectorized loop: %G", stmt);
8914 scalar_dest = copy_ssa_name (scalar_dest);
8915 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8916 gsi_insert_on_edge_immediate
8917 (loop_preheader_edge (loop),
8918 gimple_build_assign (scalar_dest, rhs));
8919 }
8920 /* These copies are all equivalent, but currently the representation
8921 requires a separate STMT_VINFO_VEC_STMT for each one. */
8922 prev_stmt_info = NULL;
8923 gimple_stmt_iterator gsi2 = *gsi;
8924 gsi_next (&gsi2);
8925 for (j = 0; j < ncopies; j++)
8926 {
8927 stmt_vec_info new_stmt_info;
8928 if (hoist_p)
8929 {
8930 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8931 vectype, NULL);
8932 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8933 new_stmt_info = vinfo->add_stmt (new_stmt);
8934 }
8935 else
8936 {
8937 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8938 vectype, &gsi2);
8939 new_stmt_info = vinfo->lookup_def (new_temp);
8940 }
8941 if (slp)
8942 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8943 else if (j == 0)
8944 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8945 else
8946 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8947 prev_stmt_info = new_stmt_info;
8948 }
8949 return true;
8950 }
8951
8952 if (memory_access_type == VMAT_ELEMENTWISE
8953 || memory_access_type == VMAT_STRIDED_SLP)
8954 {
8955 gimple_stmt_iterator incr_gsi;
8956 bool insert_after;
8957 gimple *incr;
8958 tree offvar;
8959 tree ivstep;
8960 tree running_off;
8961 vec<constructor_elt, va_gc> *v = NULL;
8962 tree stride_base, stride_step, alias_off;
8963 /* Checked by get_load_store_type. */
8964 unsigned int const_nunits = nunits.to_constant ();
8965 unsigned HOST_WIDE_INT cst_offset = 0;
8966 tree dr_offset;
8967
8968 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8969 gcc_assert (!nested_in_vect_loop);
8970
8971 if (grouped_load)
8972 {
8973 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8974 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8975 }
8976 else
8977 {
8978 first_stmt_info = stmt_info;
8979 first_dr_info = dr_info;
8980 }
8981 if (slp && grouped_load)
8982 {
8983 group_size = DR_GROUP_SIZE (first_stmt_info);
8984 ref_type = get_group_alias_ptr_type (first_stmt_info);
8985 }
8986 else
8987 {
8988 if (grouped_load)
8989 cst_offset
8990 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8991 * vect_get_place_in_interleaving_chain (stmt_info,
8992 first_stmt_info));
8993 group_size = 1;
8994 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8995 }
8996
8997 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8998 stride_base
8999 = fold_build_pointer_plus
9000 (DR_BASE_ADDRESS (first_dr_info->dr),
9001 size_binop (PLUS_EXPR,
9002 convert_to_ptrofftype (dr_offset),
9003 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9004 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9005
9006 /* For a load with loop-invariant (but other than power-of-2)
9007 stride (i.e. not a grouped access) like so:
9008
9009 for (i = 0; i < n; i += stride)
9010 ... = array[i];
9011
9012 we generate a new induction variable and new accesses to
9013 form a new vector (or vectors, depending on ncopies):
9014
9015 for (j = 0; ; j += VF*stride)
9016 tmp1 = array[j];
9017 tmp2 = array[j + stride];
9018 ...
9019 vectemp = {tmp1, tmp2, ...}
9020 */
9021
9022 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9023 build_int_cst (TREE_TYPE (stride_step), vf));
9024
9025 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9026
9027 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9028 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9029 create_iv (stride_base, ivstep, NULL,
9030 loop, &incr_gsi, insert_after,
9031 &offvar, NULL);
9032 incr = gsi_stmt (incr_gsi);
9033 loop_vinfo->add_stmt (incr);
9034
9035 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9036
9037 prev_stmt_info = NULL;
9038 running_off = offvar;
9039 alias_off = build_int_cst (ref_type, 0);
9040 int nloads = const_nunits;
9041 int lnel = 1;
9042 tree ltype = TREE_TYPE (vectype);
9043 tree lvectype = vectype;
9044 auto_vec<tree> dr_chain;
9045 if (memory_access_type == VMAT_STRIDED_SLP)
9046 {
9047 if (group_size < const_nunits)
9048 {
9049 /* First check if vec_init optab supports construction from vector
9050 elts directly. Otherwise avoid emitting a constructor of
9051 vector elements by performing the loads using an integer type
9052 of the same size, constructing a vector of those and then
9053 re-interpreting it as the original vector type. This avoids a
9054 huge runtime penalty due to the general inability to perform
9055 store forwarding from smaller stores to a larger load. */
9056 tree ptype;
9057 tree vtype
9058 = vector_vector_composition_type (vectype,
9059 const_nunits / group_size,
9060 &ptype);
9061 if (vtype != NULL_TREE)
9062 {
9063 nloads = const_nunits / group_size;
9064 lnel = group_size;
9065 lvectype = vtype;
9066 ltype = ptype;
9067 }
9068 }
9069 else
9070 {
9071 nloads = 1;
9072 lnel = const_nunits;
9073 ltype = vectype;
9074 }
9075 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9076 }
9077 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9078 else if (nloads == 1)
9079 ltype = vectype;
9080
9081 if (slp)
9082 {
9083 /* For SLP permutation support we need to load the whole group,
9084 not only the number of vector stmts the permutation result
9085 fits in. */
9086 if (slp_perm)
9087 {
9088 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9089 variable VF. */
9090 unsigned int const_vf = vf.to_constant ();
9091 ncopies = CEIL (group_size * const_vf, const_nunits);
9092 dr_chain.create (ncopies);
9093 }
9094 else
9095 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9096 }
9097 unsigned int group_el = 0;
9098 unsigned HOST_WIDE_INT
9099 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9100 for (j = 0; j < ncopies; j++)
9101 {
9102 if (nloads > 1)
9103 vec_alloc (v, nloads);
9104 stmt_vec_info new_stmt_info = NULL;
9105 for (i = 0; i < nloads; i++)
9106 {
9107 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9108 group_el * elsz + cst_offset);
9109 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9110 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9111 gassign *new_stmt
9112 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9113 new_stmt_info
9114 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9115 if (nloads > 1)
9116 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9117 gimple_assign_lhs (new_stmt));
9118
9119 group_el += lnel;
9120 if (! slp
9121 || group_el == group_size)
9122 {
9123 tree newoff = copy_ssa_name (running_off);
9124 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9125 running_off, stride_step);
9126 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9127
9128 running_off = newoff;
9129 group_el = 0;
9130 }
9131 }
9132 if (nloads > 1)
9133 {
9134 tree vec_inv = build_constructor (lvectype, v);
9135 new_temp = vect_init_vector (vinfo, stmt_info,
9136 vec_inv, lvectype, gsi);
9137 new_stmt_info = vinfo->lookup_def (new_temp);
9138 if (lvectype != vectype)
9139 {
9140 gassign *new_stmt
9141 = gimple_build_assign (make_ssa_name (vectype),
9142 VIEW_CONVERT_EXPR,
9143 build1 (VIEW_CONVERT_EXPR,
9144 vectype, new_temp));
9145 new_stmt_info
9146 = vect_finish_stmt_generation (vinfo, stmt_info,
9147 new_stmt, gsi);
9148 }
9149 }
9150
9151 if (slp)
9152 {
9153 if (slp_perm)
9154 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9155 else
9156 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9157 }
9158 else
9159 {
9160 if (j == 0)
9161 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9162 else
9163 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9164 prev_stmt_info = new_stmt_info;
9165 }
9166 }
9167 if (slp_perm)
9168 {
9169 unsigned n_perms;
9170 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9171 false, &n_perms);
9172 }
9173 return true;
9174 }
9175
9176 if (memory_access_type == VMAT_GATHER_SCATTER
9177 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9178 grouped_load = false;
9179
9180 if (grouped_load)
9181 {
9182 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9183 group_size = DR_GROUP_SIZE (first_stmt_info);
9184 /* For SLP vectorization we directly vectorize a subchain
9185 without permutation. */
9186 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9187 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9188 /* For BB vectorization always use the first stmt to base
9189 the data ref pointer on. */
9190 if (bb_vinfo)
9191 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9192
9193 /* Check if the chain of loads is already vectorized. */
9194 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9195 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9196 ??? But we can only do so if there is exactly one
9197 as we have no way to get at the rest. Leave the CSE
9198 opportunity alone.
9199 ??? With the group load eventually participating
9200 in multiple different permutations (having multiple
9201 slp nodes which refer to the same group) the CSE
9202 is even wrong code. See PR56270. */
9203 && !slp)
9204 {
9205 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9206 return true;
9207 }
9208 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9209 group_gap_adj = 0;
9210
9211 /* VEC_NUM is the number of vect stmts to be created for this group. */
9212 if (slp)
9213 {
9214 grouped_load = false;
9215 /* If an SLP permutation is from N elements to N elements,
9216 and if one vector holds a whole number of N, we can load
9217 the inputs to the permutation in the same way as an
9218 unpermuted sequence. In other cases we need to load the
9219 whole group, not only the number of vector stmts the
9220 permutation result fits in. */
9221 unsigned scalar_lanes = SLP_TREE_SCALAR_STMTS (slp_node).length ();
9222 if (slp_perm
9223 && (group_size != scalar_lanes
9224 || !multiple_p (nunits, group_size)))
9225 {
9226 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9227 variable VF; see vect_transform_slp_perm_load. */
9228 unsigned int const_vf = vf.to_constant ();
9229 unsigned int const_nunits = nunits.to_constant ();
9230 vec_num = CEIL (group_size * const_vf, const_nunits);
9231 group_gap_adj = vf * group_size - nunits * vec_num;
9232 }
9233 else
9234 {
9235 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9236 group_gap_adj
9237 = group_size - scalar_lanes;
9238 }
9239 }
9240 else
9241 vec_num = group_size;
9242
9243 ref_type = get_group_alias_ptr_type (first_stmt_info);
9244 }
9245 else
9246 {
9247 first_stmt_info = stmt_info;
9248 first_dr_info = dr_info;
9249 group_size = vec_num = 1;
9250 group_gap_adj = 0;
9251 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9252 }
9253
9254 /* Gather-scatter accesses perform only component accesses, alignment
9255 is irrelevant for them. */
9256 if (memory_access_type == VMAT_GATHER_SCATTER)
9257 alignment_support_scheme = dr_unaligned_supported;
9258 else
9259 alignment_support_scheme
9260 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
9261
9262 gcc_assert (alignment_support_scheme);
9263 vec_loop_masks *loop_masks
9264 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9265 ? &LOOP_VINFO_MASKS (loop_vinfo)
9266 : NULL);
9267 /* Targets with store-lane instructions must not require explicit
9268 realignment. vect_supportable_dr_alignment always returns either
9269 dr_aligned or dr_unaligned_supported for masked operations. */
9270 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9271 && !mask
9272 && !loop_masks)
9273 || alignment_support_scheme == dr_aligned
9274 || alignment_support_scheme == dr_unaligned_supported);
9275
9276 /* In case the vectorization factor (VF) is bigger than the number
9277 of elements that we can fit in a vectype (nunits), we have to generate
9278 more than one vector stmt - i.e - we need to "unroll" the
9279 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9280 from one copy of the vector stmt to the next, in the field
9281 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9282 stages to find the correct vector defs to be used when vectorizing
9283 stmts that use the defs of the current stmt. The example below
9284 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9285 need to create 4 vectorized stmts):
9286
9287 before vectorization:
9288 RELATED_STMT VEC_STMT
9289 S1: x = memref - -
9290 S2: z = x + 1 - -
9291
9292 step 1: vectorize stmt S1:
9293 We first create the vector stmt VS1_0, and, as usual, record a
9294 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9295 Next, we create the vector stmt VS1_1, and record a pointer to
9296 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9297 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9298 stmts and pointers:
9299 RELATED_STMT VEC_STMT
9300 VS1_0: vx0 = memref0 VS1_1 -
9301 VS1_1: vx1 = memref1 VS1_2 -
9302 VS1_2: vx2 = memref2 VS1_3 -
9303 VS1_3: vx3 = memref3 - -
9304 S1: x = load - VS1_0
9305 S2: z = x + 1 - -
9306
9307 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9308 information we recorded in RELATED_STMT field is used to vectorize
9309 stmt S2. */
9310
9311 /* In case of interleaving (non-unit grouped access):
9312
9313 S1: x2 = &base + 2
9314 S2: x0 = &base
9315 S3: x1 = &base + 1
9316 S4: x3 = &base + 3
9317
9318 Vectorized loads are created in the order of memory accesses
9319 starting from the access of the first stmt of the chain:
9320
9321 VS1: vx0 = &base
9322 VS2: vx1 = &base + vec_size*1
9323 VS3: vx3 = &base + vec_size*2
9324 VS4: vx4 = &base + vec_size*3
9325
9326 Then permutation statements are generated:
9327
9328 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9329 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9330 ...
9331
9332 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9333 (the order of the data-refs in the output of vect_permute_load_chain
9334 corresponds to the order of scalar stmts in the interleaving chain - see
9335 the documentation of vect_permute_load_chain()).
9336 The generation of permutation stmts and recording them in
9337 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9338
9339 In case of both multiple types and interleaving, the vector loads and
9340 permutation stmts above are created for every copy. The result vector
9341 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9342 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9343
9344 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9345 on a target that supports unaligned accesses (dr_unaligned_supported)
9346 we generate the following code:
9347 p = initial_addr;
9348 indx = 0;
9349 loop {
9350 p = p + indx * vectype_size;
9351 vec_dest = *(p);
9352 indx = indx + 1;
9353 }
9354
9355 Otherwise, the data reference is potentially unaligned on a target that
9356 does not support unaligned accesses (dr_explicit_realign_optimized) -
9357 then generate the following code, in which the data in each iteration is
9358 obtained by two vector loads, one from the previous iteration, and one
9359 from the current iteration:
9360 p1 = initial_addr;
9361 msq_init = *(floor(p1))
9362 p2 = initial_addr + VS - 1;
9363 realignment_token = call target_builtin;
9364 indx = 0;
9365 loop {
9366 p2 = p2 + indx * vectype_size
9367 lsq = *(floor(p2))
9368 vec_dest = realign_load (msq, lsq, realignment_token)
9369 indx = indx + 1;
9370 msq = lsq;
9371 } */
9372
9373 /* If the misalignment remains the same throughout the execution of the
9374 loop, we can create the init_addr and permutation mask at the loop
9375 preheader. Otherwise, it needs to be created inside the loop.
9376 This can only occur when vectorizing memory accesses in the inner-loop
9377 nested within an outer-loop that is being vectorized. */
9378
9379 if (nested_in_vect_loop
9380 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9381 GET_MODE_SIZE (TYPE_MODE (vectype))))
9382 {
9383 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9384 compute_in_loop = true;
9385 }
9386
9387 bool diff_first_stmt_info
9388 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9389
9390 if ((alignment_support_scheme == dr_explicit_realign_optimized
9391 || alignment_support_scheme == dr_explicit_realign)
9392 && !compute_in_loop)
9393 {
9394 /* If we have different first_stmt_info, we can't set up realignment
9395 here, since we can't guarantee first_stmt_info DR has been
9396 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9397 distance from first_stmt_info DR instead as below. */
9398 if (!diff_first_stmt_info)
9399 msq = vect_setup_realignment (vinfo,
9400 first_stmt_info, gsi, &realignment_token,
9401 alignment_support_scheme, NULL_TREE,
9402 &at_loop);
9403 if (alignment_support_scheme == dr_explicit_realign_optimized)
9404 {
9405 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9406 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9407 size_one_node);
9408 gcc_assert (!first_stmt_info_for_drptr);
9409 }
9410 }
9411 else
9412 at_loop = loop;
9413
9414 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9415 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9416
9417 tree bump;
9418 tree vec_offset = NULL_TREE;
9419 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9420 {
9421 aggr_type = NULL_TREE;
9422 bump = NULL_TREE;
9423 }
9424 else if (memory_access_type == VMAT_GATHER_SCATTER)
9425 {
9426 aggr_type = elem_type;
9427 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9428 &bump, &vec_offset);
9429 }
9430 else
9431 {
9432 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9433 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9434 else
9435 aggr_type = vectype;
9436 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9437 memory_access_type);
9438 }
9439
9440 tree vec_mask = NULL_TREE;
9441 prev_stmt_info = NULL;
9442 poly_uint64 group_elt = 0;
9443 for (j = 0; j < ncopies; j++)
9444 {
9445 stmt_vec_info new_stmt_info = NULL;
9446 /* 1. Create the vector or array pointer update chain. */
9447 if (j == 0)
9448 {
9449 bool simd_lane_access_p
9450 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9451 if (simd_lane_access_p
9452 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9453 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9454 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9455 && integer_zerop (DR_INIT (first_dr_info->dr))
9456 && alias_sets_conflict_p (get_alias_set (aggr_type),
9457 get_alias_set (TREE_TYPE (ref_type)))
9458 && (alignment_support_scheme == dr_aligned
9459 || alignment_support_scheme == dr_unaligned_supported))
9460 {
9461 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9462 dataref_offset = build_int_cst (ref_type, 0);
9463 }
9464 else if (diff_first_stmt_info)
9465 {
9466 dataref_ptr
9467 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9468 aggr_type, at_loop, offset, &dummy,
9469 gsi, &ptr_incr, simd_lane_access_p,
9470 byte_offset, bump);
9471 /* Adjust the pointer by the difference to first_stmt. */
9472 data_reference_p ptrdr
9473 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9474 tree diff
9475 = fold_convert (sizetype,
9476 size_binop (MINUS_EXPR,
9477 DR_INIT (first_dr_info->dr),
9478 DR_INIT (ptrdr)));
9479 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9480 stmt_info, diff);
9481 if (alignment_support_scheme == dr_explicit_realign)
9482 {
9483 msq = vect_setup_realignment (vinfo,
9484 first_stmt_info_for_drptr, gsi,
9485 &realignment_token,
9486 alignment_support_scheme,
9487 dataref_ptr, &at_loop);
9488 gcc_assert (!compute_in_loop);
9489 }
9490 }
9491 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9492 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9493 &dataref_ptr, &vec_offset);
9494 else
9495 dataref_ptr
9496 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9497 at_loop,
9498 offset, &dummy, gsi, &ptr_incr,
9499 simd_lane_access_p,
9500 byte_offset, bump);
9501 if (mask)
9502 {
9503 if (slp_node)
9504 {
9505 auto_vec<vec<tree> > vec_defs (1);
9506 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
9507 vec_mask = vec_defs[0][0];
9508 }
9509 else
9510 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
9511 mask_vectype);
9512 }
9513 }
9514 else
9515 {
9516 if (dataref_offset)
9517 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9518 bump);
9519 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9520 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9521 else
9522 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9523 stmt_info, bump);
9524 if (mask)
9525 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9526 }
9527
9528 if (grouped_load || slp_perm)
9529 dr_chain.create (vec_num);
9530
9531 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9532 {
9533 tree vec_array;
9534
9535 vec_array = create_vector_array (vectype, vec_num);
9536
9537 tree final_mask = NULL_TREE;
9538 if (loop_masks)
9539 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9540 vectype, j);
9541 if (vec_mask)
9542 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9543 vec_mask, gsi);
9544
9545 gcall *call;
9546 if (final_mask)
9547 {
9548 /* Emit:
9549 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9550 VEC_MASK). */
9551 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9552 tree alias_ptr = build_int_cst (ref_type, align);
9553 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9554 dataref_ptr, alias_ptr,
9555 final_mask);
9556 }
9557 else
9558 {
9559 /* Emit:
9560 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9561 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9562 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9563 }
9564 gimple_call_set_lhs (call, vec_array);
9565 gimple_call_set_nothrow (call, true);
9566 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
9567 call, gsi);
9568
9569 /* Extract each vector into an SSA_NAME. */
9570 for (i = 0; i < vec_num; i++)
9571 {
9572 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9573 vec_array, i);
9574 dr_chain.quick_push (new_temp);
9575 }
9576
9577 /* Record the mapping between SSA_NAMEs and statements. */
9578 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9579
9580 /* Record that VEC_ARRAY is now dead. */
9581 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9582 }
9583 else
9584 {
9585 for (i = 0; i < vec_num; i++)
9586 {
9587 tree final_mask = NULL_TREE;
9588 if (loop_masks
9589 && memory_access_type != VMAT_INVARIANT)
9590 final_mask = vect_get_loop_mask (gsi, loop_masks,
9591 vec_num * ncopies,
9592 vectype, vec_num * j + i);
9593 if (vec_mask)
9594 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9595 vec_mask, gsi);
9596
9597 if (i > 0)
9598 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9599 gsi, stmt_info, bump);
9600
9601 /* 2. Create the vector-load in the loop. */
9602 gimple *new_stmt = NULL;
9603 switch (alignment_support_scheme)
9604 {
9605 case dr_aligned:
9606 case dr_unaligned_supported:
9607 {
9608 unsigned int misalign;
9609 unsigned HOST_WIDE_INT align;
9610
9611 if (memory_access_type == VMAT_GATHER_SCATTER)
9612 {
9613 tree zero = build_zero_cst (vectype);
9614 tree scale = size_int (gs_info.scale);
9615 gcall *call;
9616 if (loop_masks)
9617 call = gimple_build_call_internal
9618 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9619 vec_offset, scale, zero, final_mask);
9620 else
9621 call = gimple_build_call_internal
9622 (IFN_GATHER_LOAD, 4, dataref_ptr,
9623 vec_offset, scale, zero);
9624 gimple_call_set_nothrow (call, true);
9625 new_stmt = call;
9626 data_ref = NULL_TREE;
9627 break;
9628 }
9629
9630 align =
9631 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9632 if (alignment_support_scheme == dr_aligned)
9633 {
9634 gcc_assert (aligned_access_p (first_dr_info));
9635 misalign = 0;
9636 }
9637 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9638 {
9639 align = dr_alignment
9640 (vect_dr_behavior (vinfo, first_dr_info));
9641 misalign = 0;
9642 }
9643 else
9644 misalign = DR_MISALIGNMENT (first_dr_info);
9645 if (dataref_offset == NULL_TREE
9646 && TREE_CODE (dataref_ptr) == SSA_NAME)
9647 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9648 align, misalign);
9649
9650 if (final_mask)
9651 {
9652 align = least_bit_hwi (misalign | align);
9653 tree ptr = build_int_cst (ref_type, align);
9654 gcall *call
9655 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9656 dataref_ptr, ptr,
9657 final_mask);
9658 gimple_call_set_nothrow (call, true);
9659 new_stmt = call;
9660 data_ref = NULL_TREE;
9661 }
9662 else
9663 {
9664 tree ltype = vectype;
9665 tree new_vtype = NULL_TREE;
9666 unsigned HOST_WIDE_INT gap
9667 = DR_GROUP_GAP (first_stmt_info);
9668 unsigned int vect_align
9669 = vect_known_alignment_in_bytes (first_dr_info);
9670 unsigned int scalar_dr_size
9671 = vect_get_scalar_dr_size (first_dr_info);
9672 /* If there's no peeling for gaps but we have a gap
9673 with slp loads then load the lower half of the
9674 vector only. See get_group_load_store_type for
9675 when we apply this optimization. */
9676 if (slp
9677 && loop_vinfo
9678 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9679 && gap != 0
9680 && known_eq (nunits, (group_size - gap) * 2)
9681 && known_eq (nunits, group_size)
9682 && gap >= (vect_align / scalar_dr_size))
9683 {
9684 tree half_vtype;
9685 new_vtype
9686 = vector_vector_composition_type (vectype, 2,
9687 &half_vtype);
9688 if (new_vtype != NULL_TREE)
9689 ltype = half_vtype;
9690 }
9691 tree offset
9692 = (dataref_offset ? dataref_offset
9693 : build_int_cst (ref_type, 0));
9694 if (ltype != vectype
9695 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9696 {
9697 unsigned HOST_WIDE_INT gap_offset
9698 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9699 tree gapcst = build_int_cst (ref_type, gap_offset);
9700 offset = size_binop (PLUS_EXPR, offset, gapcst);
9701 }
9702 data_ref
9703 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9704 if (alignment_support_scheme == dr_aligned)
9705 ;
9706 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9707 TREE_TYPE (data_ref)
9708 = build_aligned_type (TREE_TYPE (data_ref),
9709 align * BITS_PER_UNIT);
9710 else
9711 TREE_TYPE (data_ref)
9712 = build_aligned_type (TREE_TYPE (data_ref),
9713 TYPE_ALIGN (elem_type));
9714 if (ltype != vectype)
9715 {
9716 vect_copy_ref_info (data_ref,
9717 DR_REF (first_dr_info->dr));
9718 tree tem = make_ssa_name (ltype);
9719 new_stmt = gimple_build_assign (tem, data_ref);
9720 vect_finish_stmt_generation (vinfo, stmt_info,
9721 new_stmt, gsi);
9722 data_ref = NULL;
9723 vec<constructor_elt, va_gc> *v;
9724 vec_alloc (v, 2);
9725 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9726 {
9727 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9728 build_zero_cst (ltype));
9729 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9730 }
9731 else
9732 {
9733 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9734 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9735 build_zero_cst (ltype));
9736 }
9737 gcc_assert (new_vtype != NULL_TREE);
9738 if (new_vtype == vectype)
9739 new_stmt = gimple_build_assign (
9740 vec_dest, build_constructor (vectype, v));
9741 else
9742 {
9743 tree new_vname = make_ssa_name (new_vtype);
9744 new_stmt = gimple_build_assign (
9745 new_vname, build_constructor (new_vtype, v));
9746 vect_finish_stmt_generation (vinfo, stmt_info,
9747 new_stmt, gsi);
9748 new_stmt = gimple_build_assign (
9749 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9750 new_vname));
9751 }
9752 }
9753 }
9754 break;
9755 }
9756 case dr_explicit_realign:
9757 {
9758 tree ptr, bump;
9759
9760 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9761
9762 if (compute_in_loop)
9763 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9764 &realignment_token,
9765 dr_explicit_realign,
9766 dataref_ptr, NULL);
9767
9768 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9769 ptr = copy_ssa_name (dataref_ptr);
9770 else
9771 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9772 // For explicit realign the target alignment should be
9773 // known at compile time.
9774 unsigned HOST_WIDE_INT align =
9775 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9776 new_stmt = gimple_build_assign
9777 (ptr, BIT_AND_EXPR, dataref_ptr,
9778 build_int_cst
9779 (TREE_TYPE (dataref_ptr),
9780 -(HOST_WIDE_INT) align));
9781 vect_finish_stmt_generation (vinfo, stmt_info,
9782 new_stmt, gsi);
9783 data_ref
9784 = build2 (MEM_REF, vectype, ptr,
9785 build_int_cst (ref_type, 0));
9786 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9787 vec_dest = vect_create_destination_var (scalar_dest,
9788 vectype);
9789 new_stmt = gimple_build_assign (vec_dest, data_ref);
9790 new_temp = make_ssa_name (vec_dest, new_stmt);
9791 gimple_assign_set_lhs (new_stmt, new_temp);
9792 gimple_move_vops (new_stmt, stmt_info->stmt);
9793 vect_finish_stmt_generation (vinfo, stmt_info,
9794 new_stmt, gsi);
9795 msq = new_temp;
9796
9797 bump = size_binop (MULT_EXPR, vs,
9798 TYPE_SIZE_UNIT (elem_type));
9799 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9800 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9801 stmt_info, bump);
9802 new_stmt = gimple_build_assign
9803 (NULL_TREE, BIT_AND_EXPR, ptr,
9804 build_int_cst
9805 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9806 ptr = copy_ssa_name (ptr, new_stmt);
9807 gimple_assign_set_lhs (new_stmt, ptr);
9808 vect_finish_stmt_generation (vinfo, stmt_info,
9809 new_stmt, gsi);
9810 data_ref
9811 = build2 (MEM_REF, vectype, ptr,
9812 build_int_cst (ref_type, 0));
9813 break;
9814 }
9815 case dr_explicit_realign_optimized:
9816 {
9817 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9818 new_temp = copy_ssa_name (dataref_ptr);
9819 else
9820 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9821 // We should only be doing this if we know the target
9822 // alignment at compile time.
9823 unsigned HOST_WIDE_INT align =
9824 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9825 new_stmt = gimple_build_assign
9826 (new_temp, BIT_AND_EXPR, dataref_ptr,
9827 build_int_cst (TREE_TYPE (dataref_ptr),
9828 -(HOST_WIDE_INT) align));
9829 vect_finish_stmt_generation (vinfo, stmt_info,
9830 new_stmt, gsi);
9831 data_ref
9832 = build2 (MEM_REF, vectype, new_temp,
9833 build_int_cst (ref_type, 0));
9834 break;
9835 }
9836 default:
9837 gcc_unreachable ();
9838 }
9839 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9840 /* DATA_REF is null if we've already built the statement. */
9841 if (data_ref)
9842 {
9843 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9844 new_stmt = gimple_build_assign (vec_dest, data_ref);
9845 }
9846 new_temp = make_ssa_name (vec_dest, new_stmt);
9847 gimple_set_lhs (new_stmt, new_temp);
9848 new_stmt_info
9849 = vect_finish_stmt_generation (vinfo, stmt_info,
9850 new_stmt, gsi);
9851
9852 /* 3. Handle explicit realignment if necessary/supported.
9853 Create in loop:
9854 vec_dest = realign_load (msq, lsq, realignment_token) */
9855 if (alignment_support_scheme == dr_explicit_realign_optimized
9856 || alignment_support_scheme == dr_explicit_realign)
9857 {
9858 lsq = gimple_assign_lhs (new_stmt);
9859 if (!realignment_token)
9860 realignment_token = dataref_ptr;
9861 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9862 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9863 msq, lsq, realignment_token);
9864 new_temp = make_ssa_name (vec_dest, new_stmt);
9865 gimple_assign_set_lhs (new_stmt, new_temp);
9866 new_stmt_info
9867 = vect_finish_stmt_generation (vinfo, stmt_info,
9868 new_stmt, gsi);
9869
9870 if (alignment_support_scheme == dr_explicit_realign_optimized)
9871 {
9872 gcc_assert (phi);
9873 if (i == vec_num - 1 && j == ncopies - 1)
9874 add_phi_arg (phi, lsq,
9875 loop_latch_edge (containing_loop),
9876 UNKNOWN_LOCATION);
9877 msq = lsq;
9878 }
9879 }
9880
9881 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9882 {
9883 tree perm_mask = perm_mask_for_reverse (vectype);
9884 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9885 perm_mask, stmt_info, gsi);
9886 new_stmt_info = vinfo->lookup_def (new_temp);
9887 }
9888
9889 /* Collect vector loads and later create their permutation in
9890 vect_transform_grouped_load (). */
9891 if (grouped_load || slp_perm)
9892 dr_chain.quick_push (new_temp);
9893
9894 /* Store vector loads in the corresponding SLP_NODE. */
9895 if (slp && !slp_perm)
9896 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9897
9898 /* With SLP permutation we load the gaps as well, without
9899 we need to skip the gaps after we manage to fully load
9900 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9901 group_elt += nunits;
9902 if (maybe_ne (group_gap_adj, 0U)
9903 && !slp_perm
9904 && known_eq (group_elt, group_size - group_gap_adj))
9905 {
9906 poly_wide_int bump_val
9907 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9908 * group_gap_adj);
9909 tree bump = wide_int_to_tree (sizetype, bump_val);
9910 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9911 gsi, stmt_info, bump);
9912 group_elt = 0;
9913 }
9914 }
9915 /* Bump the vector pointer to account for a gap or for excess
9916 elements loaded for a permuted SLP load. */
9917 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9918 {
9919 poly_wide_int bump_val
9920 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9921 * group_gap_adj);
9922 tree bump = wide_int_to_tree (sizetype, bump_val);
9923 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9924 stmt_info, bump);
9925 }
9926 }
9927
9928 if (slp && !slp_perm)
9929 continue;
9930
9931 if (slp_perm)
9932 {
9933 unsigned n_perms;
9934 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9935 gsi, vf, false, &n_perms);
9936 gcc_assert (ok);
9937 }
9938 else
9939 {
9940 if (grouped_load)
9941 {
9942 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9943 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9944 group_size, gsi);
9945 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9946 }
9947 else
9948 {
9949 if (j == 0)
9950 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9951 else
9952 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9953 prev_stmt_info = new_stmt_info;
9954 }
9955 }
9956 dr_chain.release ();
9957 }
9958
9959 return true;
9960 }
9961
9962 /* Function vect_is_simple_cond.
9963
9964 Input:
9965 LOOP - the loop that is being vectorized.
9966 COND - Condition that is checked for simple use.
9967
9968 Output:
9969 *COMP_VECTYPE - the vector type for the comparison.
9970 *DTS - The def types for the arguments of the comparison
9971
9972 Returns whether a COND can be vectorized. Checks whether
9973 condition operands are supportable using vec_is_simple_use. */
9974
9975 static bool
9976 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9977 tree *comp_vectype, enum vect_def_type *dts,
9978 tree vectype)
9979 {
9980 tree lhs, rhs;
9981 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9982
9983 /* Mask case. */
9984 if (TREE_CODE (cond) == SSA_NAME
9985 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9986 {
9987 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9988 || !*comp_vectype
9989 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9990 return false;
9991 return true;
9992 }
9993
9994 if (!COMPARISON_CLASS_P (cond))
9995 return false;
9996
9997 lhs = TREE_OPERAND (cond, 0);
9998 rhs = TREE_OPERAND (cond, 1);
9999
10000 if (TREE_CODE (lhs) == SSA_NAME)
10001 {
10002 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
10003 return false;
10004 }
10005 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10006 || TREE_CODE (lhs) == FIXED_CST)
10007 dts[0] = vect_constant_def;
10008 else
10009 return false;
10010
10011 if (TREE_CODE (rhs) == SSA_NAME)
10012 {
10013 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
10014 return false;
10015 }
10016 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10017 || TREE_CODE (rhs) == FIXED_CST)
10018 dts[1] = vect_constant_def;
10019 else
10020 return false;
10021
10022 if (vectype1 && vectype2
10023 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10024 TYPE_VECTOR_SUBPARTS (vectype2)))
10025 return false;
10026
10027 *comp_vectype = vectype1 ? vectype1 : vectype2;
10028 /* Invariant comparison. */
10029 if (! *comp_vectype)
10030 {
10031 tree scalar_type = TREE_TYPE (lhs);
10032 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10033 *comp_vectype = truth_type_for (vectype);
10034 else
10035 {
10036 /* If we can widen the comparison to match vectype do so. */
10037 if (INTEGRAL_TYPE_P (scalar_type)
10038 && !slp_node
10039 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10040 TYPE_SIZE (TREE_TYPE (vectype))))
10041 scalar_type = build_nonstandard_integer_type
10042 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10043 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10044 slp_node);
10045 }
10046 }
10047
10048 return true;
10049 }
10050
10051 /* vectorizable_condition.
10052
10053 Check if STMT_INFO is conditional modify expression that can be vectorized.
10054 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10055 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10056 at GSI.
10057
10058 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10059
10060 Return true if STMT_INFO is vectorizable in this way. */
10061
10062 static bool
10063 vectorizable_condition (vec_info *vinfo,
10064 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10065 stmt_vec_info *vec_stmt,
10066 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10067 {
10068 tree scalar_dest = NULL_TREE;
10069 tree vec_dest = NULL_TREE;
10070 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10071 tree then_clause, else_clause;
10072 tree comp_vectype = NULL_TREE;
10073 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10074 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10075 tree vec_compare;
10076 tree new_temp;
10077 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10078 enum vect_def_type dts[4]
10079 = {vect_unknown_def_type, vect_unknown_def_type,
10080 vect_unknown_def_type, vect_unknown_def_type};
10081 int ndts = 4;
10082 int ncopies;
10083 int vec_num;
10084 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10085 stmt_vec_info prev_stmt_info = NULL;
10086 int i, j;
10087 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10088 vec<tree> vec_oprnds0 = vNULL;
10089 vec<tree> vec_oprnds1 = vNULL;
10090 vec<tree> vec_oprnds2 = vNULL;
10091 vec<tree> vec_oprnds3 = vNULL;
10092 tree vec_cmp_type;
10093 bool masked = false;
10094
10095 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10096 return false;
10097
10098 /* Is vectorizable conditional operation? */
10099 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10100 if (!stmt)
10101 return false;
10102
10103 code = gimple_assign_rhs_code (stmt);
10104 if (code != COND_EXPR)
10105 return false;
10106
10107 stmt_vec_info reduc_info = NULL;
10108 int reduc_index = -1;
10109 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10110 bool for_reduction
10111 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10112 if (for_reduction)
10113 {
10114 if (STMT_SLP_TYPE (stmt_info))
10115 return false;
10116 reduc_info = info_for_reduction (vinfo, stmt_info);
10117 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10118 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10119 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10120 || reduc_index != -1);
10121 }
10122 else
10123 {
10124 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10125 return false;
10126
10127 /* FORNOW: only supported as part of a reduction. */
10128 if (STMT_VINFO_LIVE_P (stmt_info))
10129 {
10130 if (dump_enabled_p ())
10131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10132 "value used after loop.\n");
10133 return false;
10134 }
10135 }
10136
10137 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10138 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10139
10140 if (slp_node)
10141 {
10142 ncopies = 1;
10143 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10144 }
10145 else
10146 {
10147 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10148 vec_num = 1;
10149 }
10150
10151 gcc_assert (ncopies >= 1);
10152 if (for_reduction && ncopies > 1)
10153 return false; /* FORNOW */
10154
10155 cond_expr = gimple_assign_rhs1 (stmt);
10156
10157 if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
10158 &comp_vectype, &dts[0], vectype)
10159 || !comp_vectype)
10160 return false;
10161
10162 unsigned slp_adjust = 0;
10163 if (slp_node && SLP_TREE_CHILDREN (slp_node).length () == 4)
10164 /* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor
10165 things more... */
10166 slp_adjust = 1;
10167 slp_tree then_slp_node, else_slp_node;
10168 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + slp_adjust,
10169 &then_clause, &then_slp_node, &dts[2], &vectype1))
10170 return false;
10171 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + slp_adjust,
10172 &else_clause, &else_slp_node, &dts[3], &vectype2))
10173 return false;
10174
10175 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10176 return false;
10177
10178 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10179 return false;
10180
10181 masked = !COMPARISON_CLASS_P (cond_expr);
10182 vec_cmp_type = truth_type_for (comp_vectype);
10183
10184 if (vec_cmp_type == NULL_TREE)
10185 return false;
10186
10187 cond_code = TREE_CODE (cond_expr);
10188 if (!masked)
10189 {
10190 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10191 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10192 }
10193
10194 /* For conditional reductions, the "then" value needs to be the candidate
10195 value calculated by this iteration while the "else" value needs to be
10196 the result carried over from previous iterations. If the COND_EXPR
10197 is the other way around, we need to swap it. */
10198 bool must_invert_cmp_result = false;
10199 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10200 {
10201 if (masked)
10202 must_invert_cmp_result = true;
10203 else
10204 {
10205 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10206 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10207 if (new_code == ERROR_MARK)
10208 must_invert_cmp_result = true;
10209 else
10210 {
10211 cond_code = new_code;
10212 /* Make sure we don't accidentally use the old condition. */
10213 cond_expr = NULL_TREE;
10214 }
10215 }
10216 std::swap (then_clause, else_clause);
10217 }
10218
10219 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10220 {
10221 /* Boolean values may have another representation in vectors
10222 and therefore we prefer bit operations over comparison for
10223 them (which also works for scalar masks). We store opcodes
10224 to use in bitop1 and bitop2. Statement is vectorized as
10225 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10226 depending on bitop1 and bitop2 arity. */
10227 switch (cond_code)
10228 {
10229 case GT_EXPR:
10230 bitop1 = BIT_NOT_EXPR;
10231 bitop2 = BIT_AND_EXPR;
10232 break;
10233 case GE_EXPR:
10234 bitop1 = BIT_NOT_EXPR;
10235 bitop2 = BIT_IOR_EXPR;
10236 break;
10237 case LT_EXPR:
10238 bitop1 = BIT_NOT_EXPR;
10239 bitop2 = BIT_AND_EXPR;
10240 std::swap (cond_expr0, cond_expr1);
10241 break;
10242 case LE_EXPR:
10243 bitop1 = BIT_NOT_EXPR;
10244 bitop2 = BIT_IOR_EXPR;
10245 std::swap (cond_expr0, cond_expr1);
10246 break;
10247 case NE_EXPR:
10248 bitop1 = BIT_XOR_EXPR;
10249 break;
10250 case EQ_EXPR:
10251 bitop1 = BIT_XOR_EXPR;
10252 bitop2 = BIT_NOT_EXPR;
10253 break;
10254 default:
10255 return false;
10256 }
10257 cond_code = SSA_NAME;
10258 }
10259
10260 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10261 && reduction_type == EXTRACT_LAST_REDUCTION
10262 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10263 {
10264 if (dump_enabled_p ())
10265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10266 "reduction comparison operation not supported.\n");
10267 return false;
10268 }
10269
10270 if (!vec_stmt)
10271 {
10272 if (bitop1 != NOP_EXPR)
10273 {
10274 machine_mode mode = TYPE_MODE (comp_vectype);
10275 optab optab;
10276
10277 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10278 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10279 return false;
10280
10281 if (bitop2 != NOP_EXPR)
10282 {
10283 optab = optab_for_tree_code (bitop2, comp_vectype,
10284 optab_default);
10285 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10286 return false;
10287 }
10288 }
10289
10290 vect_cost_for_stmt kind = vector_stmt;
10291 if (reduction_type == EXTRACT_LAST_REDUCTION)
10292 /* Count one reduction-like operation per vector. */
10293 kind = vec_to_scalar;
10294 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10295 return false;
10296
10297 if (slp_node
10298 && (!vect_maybe_update_slp_op_vectype
10299 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10300 || (slp_adjust == 1
10301 && !vect_maybe_update_slp_op_vectype
10302 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10303 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10304 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10305 {
10306 if (dump_enabled_p ())
10307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10308 "incompatible vector types for invariants\n");
10309 return false;
10310 }
10311
10312 if (loop_vinfo
10313 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10314 && reduction_type == EXTRACT_LAST_REDUCTION)
10315 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10316 ncopies * vec_num, vectype, NULL);
10317
10318 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10319 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10320 cost_vec, kind);
10321 return true;
10322 }
10323
10324 /* Transform. */
10325
10326 if (!slp_node)
10327 {
10328 vec_oprnds0.create (1);
10329 vec_oprnds1.create (1);
10330 vec_oprnds2.create (1);
10331 vec_oprnds3.create (1);
10332 }
10333
10334 /* Handle def. */
10335 scalar_dest = gimple_assign_lhs (stmt);
10336 if (reduction_type != EXTRACT_LAST_REDUCTION)
10337 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10338
10339 /* Handle cond expr. */
10340 for (j = 0; j < ncopies; j++)
10341 {
10342 bool swap_cond_operands = false;
10343
10344 /* See whether another part of the vectorized code applies a loop
10345 mask to the condition, or to its inverse. */
10346
10347 vec_loop_masks *masks = NULL;
10348 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10349 {
10350 if (reduction_type == EXTRACT_LAST_REDUCTION)
10351 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10352 else
10353 {
10354 scalar_cond_masked_key cond (cond_expr, ncopies);
10355 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10356 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10357 else
10358 {
10359 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10360 cond.code = invert_tree_comparison (cond.code, honor_nans);
10361 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10362 {
10363 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10364 cond_code = cond.code;
10365 swap_cond_operands = true;
10366 }
10367 }
10368 }
10369 }
10370
10371 stmt_vec_info new_stmt_info = NULL;
10372 if (j == 0)
10373 {
10374 if (slp_node)
10375 {
10376 auto_vec<vec<tree>, 4> vec_defs;
10377 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10378 vec_oprnds3 = vec_defs.pop ();
10379 vec_oprnds2 = vec_defs.pop ();
10380 if (!masked)
10381 vec_oprnds1 = vec_defs.pop ();
10382 vec_oprnds0 = vec_defs.pop ();
10383 }
10384 else
10385 {
10386 if (masked)
10387 {
10388 vec_cond_lhs
10389 = vect_get_vec_def_for_operand (vinfo, cond_expr, stmt_info,
10390 comp_vectype);
10391 }
10392 else
10393 {
10394 vec_cond_lhs
10395 = vect_get_vec_def_for_operand (vinfo, cond_expr0,
10396 stmt_info, comp_vectype);
10397 vec_cond_rhs
10398 = vect_get_vec_def_for_operand (vinfo, cond_expr1,
10399 stmt_info, comp_vectype);
10400 }
10401 vec_then_clause = vect_get_vec_def_for_operand (vinfo,
10402 then_clause,
10403 stmt_info);
10404 if (reduction_type != EXTRACT_LAST_REDUCTION)
10405 vec_else_clause = vect_get_vec_def_for_operand (vinfo,
10406 else_clause,
10407 stmt_info);
10408 }
10409 }
10410 else
10411 {
10412 vec_cond_lhs
10413 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10414 if (!masked)
10415 vec_cond_rhs
10416 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10417
10418 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10419 vec_oprnds2.pop ());
10420 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10421 vec_oprnds3.pop ());
10422 }
10423
10424 if (!slp_node)
10425 {
10426 vec_oprnds0.quick_push (vec_cond_lhs);
10427 if (!masked)
10428 vec_oprnds1.quick_push (vec_cond_rhs);
10429 vec_oprnds2.quick_push (vec_then_clause);
10430 vec_oprnds3.quick_push (vec_else_clause);
10431 }
10432
10433 /* Arguments are ready. Create the new vector stmt. */
10434 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10435 {
10436 vec_then_clause = vec_oprnds2[i];
10437 vec_else_clause = vec_oprnds3[i];
10438
10439 if (swap_cond_operands)
10440 std::swap (vec_then_clause, vec_else_clause);
10441
10442 if (masked)
10443 vec_compare = vec_cond_lhs;
10444 else
10445 {
10446 vec_cond_rhs = vec_oprnds1[i];
10447 if (bitop1 == NOP_EXPR)
10448 vec_compare = build2 (cond_code, vec_cmp_type,
10449 vec_cond_lhs, vec_cond_rhs);
10450 else
10451 {
10452 new_temp = make_ssa_name (vec_cmp_type);
10453 gassign *new_stmt;
10454 if (bitop1 == BIT_NOT_EXPR)
10455 new_stmt = gimple_build_assign (new_temp, bitop1,
10456 vec_cond_rhs);
10457 else
10458 new_stmt
10459 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10460 vec_cond_rhs);
10461 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10462 if (bitop2 == NOP_EXPR)
10463 vec_compare = new_temp;
10464 else if (bitop2 == BIT_NOT_EXPR)
10465 {
10466 /* Instead of doing ~x ? y : z do x ? z : y. */
10467 vec_compare = new_temp;
10468 std::swap (vec_then_clause, vec_else_clause);
10469 }
10470 else
10471 {
10472 vec_compare = make_ssa_name (vec_cmp_type);
10473 new_stmt
10474 = gimple_build_assign (vec_compare, bitop2,
10475 vec_cond_lhs, new_temp);
10476 vect_finish_stmt_generation (vinfo, stmt_info,
10477 new_stmt, gsi);
10478 }
10479 }
10480 }
10481
10482 /* If we decided to apply a loop mask to the result of the vector
10483 comparison, AND the comparison with the mask now. Later passes
10484 should then be able to reuse the AND results between mulitple
10485 vector statements.
10486
10487 For example:
10488 for (int i = 0; i < 100; ++i)
10489 x[i] = y[i] ? z[i] : 10;
10490
10491 results in following optimized GIMPLE:
10492
10493 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10494 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10495 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10496 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10497 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10498 vect_iftmp.11_47, { 10, ... }>;
10499
10500 instead of using a masked and unmasked forms of
10501 vec != { 0, ... } (masked in the MASK_LOAD,
10502 unmasked in the VEC_COND_EXPR). */
10503
10504 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10505 in cases where that's necessary. */
10506
10507 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10508 {
10509 if (!is_gimple_val (vec_compare))
10510 {
10511 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10512 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10513 vec_compare);
10514 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10515 vec_compare = vec_compare_name;
10516 }
10517
10518 if (must_invert_cmp_result)
10519 {
10520 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10521 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10522 BIT_NOT_EXPR,
10523 vec_compare);
10524 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10525 vec_compare = vec_compare_name;
10526 }
10527
10528 if (masks)
10529 {
10530 unsigned vec_num = vec_oprnds0.length ();
10531 tree loop_mask
10532 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10533 vectype, vec_num * j + i);
10534 tree tmp2 = make_ssa_name (vec_cmp_type);
10535 gassign *g
10536 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10537 loop_mask);
10538 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10539 vec_compare = tmp2;
10540 }
10541 }
10542
10543 if (reduction_type == EXTRACT_LAST_REDUCTION)
10544 {
10545 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10546 tree lhs = gimple_get_lhs (old_stmt);
10547 gcall *new_stmt = gimple_build_call_internal
10548 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10549 vec_then_clause);
10550 gimple_call_set_lhs (new_stmt, lhs);
10551 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10552 if (old_stmt == gsi_stmt (*gsi))
10553 new_stmt_info = vect_finish_replace_stmt (vinfo,
10554 stmt_info, new_stmt);
10555 else
10556 {
10557 /* In this case we're moving the definition to later in the
10558 block. That doesn't matter because the only uses of the
10559 lhs are in phi statements. */
10560 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10561 gsi_remove (&old_gsi, true);
10562 new_stmt_info
10563 = vect_finish_stmt_generation (vinfo, stmt_info,
10564 new_stmt, gsi);
10565 }
10566 }
10567 else
10568 {
10569 new_temp = make_ssa_name (vec_dest);
10570 gassign *new_stmt
10571 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10572 vec_then_clause, vec_else_clause);
10573 new_stmt_info
10574 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10575 }
10576 if (slp_node)
10577 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10578 }
10579
10580 if (slp_node)
10581 continue;
10582
10583 if (j == 0)
10584 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10585 else
10586 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10587
10588 prev_stmt_info = new_stmt_info;
10589 }
10590
10591 vec_oprnds0.release ();
10592 vec_oprnds1.release ();
10593 vec_oprnds2.release ();
10594 vec_oprnds3.release ();
10595
10596 return true;
10597 }
10598
10599 /* vectorizable_comparison.
10600
10601 Check if STMT_INFO is comparison expression that can be vectorized.
10602 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10603 comparison, put it in VEC_STMT, and insert it at GSI.
10604
10605 Return true if STMT_INFO is vectorizable in this way. */
10606
10607 static bool
10608 vectorizable_comparison (vec_info *vinfo,
10609 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10610 stmt_vec_info *vec_stmt,
10611 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10612 {
10613 tree lhs, rhs1, rhs2;
10614 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10615 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10616 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10617 tree new_temp;
10618 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10619 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10620 int ndts = 2;
10621 poly_uint64 nunits;
10622 int ncopies;
10623 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10624 stmt_vec_info prev_stmt_info = NULL;
10625 int i, j;
10626 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10627 vec<tree> vec_oprnds0 = vNULL;
10628 vec<tree> vec_oprnds1 = vNULL;
10629 tree mask_type;
10630 tree mask;
10631
10632 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10633 return false;
10634
10635 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10636 return false;
10637
10638 mask_type = vectype;
10639 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10640
10641 if (slp_node)
10642 ncopies = 1;
10643 else
10644 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10645
10646 gcc_assert (ncopies >= 1);
10647 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10648 return false;
10649
10650 if (STMT_VINFO_LIVE_P (stmt_info))
10651 {
10652 if (dump_enabled_p ())
10653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10654 "value used after loop.\n");
10655 return false;
10656 }
10657
10658 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10659 if (!stmt)
10660 return false;
10661
10662 code = gimple_assign_rhs_code (stmt);
10663
10664 if (TREE_CODE_CLASS (code) != tcc_comparison)
10665 return false;
10666
10667 slp_tree slp_rhs1, slp_rhs2;
10668 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10669 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10670 return false;
10671
10672 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10673 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10674 return false;
10675
10676 if (vectype1 && vectype2
10677 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10678 TYPE_VECTOR_SUBPARTS (vectype2)))
10679 return false;
10680
10681 vectype = vectype1 ? vectype1 : vectype2;
10682
10683 /* Invariant comparison. */
10684 if (!vectype)
10685 {
10686 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10687 vectype = mask_type;
10688 else
10689 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10690 slp_node);
10691 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10692 return false;
10693 }
10694 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10695 return false;
10696
10697 /* Can't compare mask and non-mask types. */
10698 if (vectype1 && vectype2
10699 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10700 return false;
10701
10702 /* Boolean values may have another representation in vectors
10703 and therefore we prefer bit operations over comparison for
10704 them (which also works for scalar masks). We store opcodes
10705 to use in bitop1 and bitop2. Statement is vectorized as
10706 BITOP2 (rhs1 BITOP1 rhs2) or
10707 rhs1 BITOP2 (BITOP1 rhs2)
10708 depending on bitop1 and bitop2 arity. */
10709 bool swap_p = false;
10710 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10711 {
10712 if (code == GT_EXPR)
10713 {
10714 bitop1 = BIT_NOT_EXPR;
10715 bitop2 = BIT_AND_EXPR;
10716 }
10717 else if (code == GE_EXPR)
10718 {
10719 bitop1 = BIT_NOT_EXPR;
10720 bitop2 = BIT_IOR_EXPR;
10721 }
10722 else if (code == LT_EXPR)
10723 {
10724 bitop1 = BIT_NOT_EXPR;
10725 bitop2 = BIT_AND_EXPR;
10726 swap_p = true;
10727 }
10728 else if (code == LE_EXPR)
10729 {
10730 bitop1 = BIT_NOT_EXPR;
10731 bitop2 = BIT_IOR_EXPR;
10732 swap_p = true;
10733 }
10734 else
10735 {
10736 bitop1 = BIT_XOR_EXPR;
10737 if (code == EQ_EXPR)
10738 bitop2 = BIT_NOT_EXPR;
10739 }
10740 }
10741
10742 if (!vec_stmt)
10743 {
10744 if (bitop1 == NOP_EXPR)
10745 {
10746 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10747 return false;
10748 }
10749 else
10750 {
10751 machine_mode mode = TYPE_MODE (vectype);
10752 optab optab;
10753
10754 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10755 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10756 return false;
10757
10758 if (bitop2 != NOP_EXPR)
10759 {
10760 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10761 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10762 return false;
10763 }
10764 }
10765
10766 /* Put types on constant and invariant SLP children. */
10767 if (slp_node
10768 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10769 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10770 {
10771 if (dump_enabled_p ())
10772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10773 "incompatible vector types for invariants\n");
10774 return false;
10775 }
10776
10777 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10778 vect_model_simple_cost (vinfo, stmt_info,
10779 ncopies * (1 + (bitop2 != NOP_EXPR)),
10780 dts, ndts, slp_node, cost_vec);
10781 return true;
10782 }
10783
10784 /* Transform. */
10785 if (!slp_node)
10786 {
10787 vec_oprnds0.create (1);
10788 vec_oprnds1.create (1);
10789 }
10790
10791 /* Handle def. */
10792 lhs = gimple_assign_lhs (stmt);
10793 mask = vect_create_destination_var (lhs, mask_type);
10794
10795 /* Handle cmp expr. */
10796 for (j = 0; j < ncopies; j++)
10797 {
10798 stmt_vec_info new_stmt_info = NULL;
10799 if (j == 0)
10800 {
10801 if (slp_node)
10802 {
10803 auto_vec<vec<tree>, 2> vec_defs;
10804 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10805 vec_oprnds1 = vec_defs.pop ();
10806 vec_oprnds0 = vec_defs.pop ();
10807 if (swap_p)
10808 std::swap (vec_oprnds0, vec_oprnds1);
10809 }
10810 else
10811 {
10812 vec_rhs1 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info,
10813 vectype);
10814 vec_rhs2 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info,
10815 vectype);
10816 }
10817 }
10818 else
10819 {
10820 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10821 vec_oprnds0.pop ());
10822 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10823 vec_oprnds1.pop ());
10824 }
10825
10826 if (!slp_node)
10827 {
10828 if (swap_p && j == 0)
10829 std::swap (vec_rhs1, vec_rhs2);
10830 vec_oprnds0.quick_push (vec_rhs1);
10831 vec_oprnds1.quick_push (vec_rhs2);
10832 }
10833
10834 /* Arguments are ready. Create the new vector stmt. */
10835 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10836 {
10837 vec_rhs2 = vec_oprnds1[i];
10838
10839 new_temp = make_ssa_name (mask);
10840 if (bitop1 == NOP_EXPR)
10841 {
10842 gassign *new_stmt = gimple_build_assign (new_temp, code,
10843 vec_rhs1, vec_rhs2);
10844 new_stmt_info
10845 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10846 }
10847 else
10848 {
10849 gassign *new_stmt;
10850 if (bitop1 == BIT_NOT_EXPR)
10851 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10852 else
10853 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10854 vec_rhs2);
10855 new_stmt_info
10856 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10857 if (bitop2 != NOP_EXPR)
10858 {
10859 tree res = make_ssa_name (mask);
10860 if (bitop2 == BIT_NOT_EXPR)
10861 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10862 else
10863 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10864 new_temp);
10865 new_stmt_info
10866 = vect_finish_stmt_generation (vinfo, stmt_info,
10867 new_stmt, gsi);
10868 }
10869 }
10870 if (slp_node)
10871 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10872 }
10873
10874 if (slp_node)
10875 continue;
10876
10877 if (j == 0)
10878 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10879 else
10880 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10881
10882 prev_stmt_info = new_stmt_info;
10883 }
10884
10885 vec_oprnds0.release ();
10886 vec_oprnds1.release ();
10887
10888 return true;
10889 }
10890
10891 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10892 can handle all live statements in the node. Otherwise return true
10893 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10894 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10895
10896 static bool
10897 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10898 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10899 slp_tree slp_node, slp_instance slp_node_instance,
10900 bool vec_stmt_p,
10901 stmt_vector_for_cost *cost_vec)
10902 {
10903 if (slp_node)
10904 {
10905 stmt_vec_info slp_stmt_info;
10906 unsigned int i;
10907 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10908 {
10909 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10910 && !vectorizable_live_operation (loop_vinfo,
10911 slp_stmt_info, gsi, slp_node,
10912 slp_node_instance, i,
10913 vec_stmt_p, cost_vec))
10914 return false;
10915 }
10916 }
10917 else if (STMT_VINFO_LIVE_P (stmt_info)
10918 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10919 slp_node, slp_node_instance, -1,
10920 vec_stmt_p, cost_vec))
10921 return false;
10922
10923 return true;
10924 }
10925
10926 /* Make sure the statement is vectorizable. */
10927
10928 opt_result
10929 vect_analyze_stmt (vec_info *vinfo,
10930 stmt_vec_info stmt_info, bool *need_to_vectorize,
10931 slp_tree node, slp_instance node_instance,
10932 stmt_vector_for_cost *cost_vec)
10933 {
10934 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10935 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10936 bool ok;
10937 gimple_seq pattern_def_seq;
10938
10939 if (dump_enabled_p ())
10940 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10941 stmt_info->stmt);
10942
10943 if (gimple_has_volatile_ops (stmt_info->stmt))
10944 return opt_result::failure_at (stmt_info->stmt,
10945 "not vectorized:"
10946 " stmt has volatile operands: %G\n",
10947 stmt_info->stmt);
10948
10949 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10950 && node == NULL
10951 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10952 {
10953 gimple_stmt_iterator si;
10954
10955 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10956 {
10957 stmt_vec_info pattern_def_stmt_info
10958 = vinfo->lookup_stmt (gsi_stmt (si));
10959 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10960 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10961 {
10962 /* Analyze def stmt of STMT if it's a pattern stmt. */
10963 if (dump_enabled_p ())
10964 dump_printf_loc (MSG_NOTE, vect_location,
10965 "==> examining pattern def statement: %G",
10966 pattern_def_stmt_info->stmt);
10967
10968 opt_result res
10969 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10970 need_to_vectorize, node, node_instance,
10971 cost_vec);
10972 if (!res)
10973 return res;
10974 }
10975 }
10976 }
10977
10978 /* Skip stmts that do not need to be vectorized. In loops this is expected
10979 to include:
10980 - the COND_EXPR which is the loop exit condition
10981 - any LABEL_EXPRs in the loop
10982 - computations that are used only for array indexing or loop control.
10983 In basic blocks we only analyze statements that are a part of some SLP
10984 instance, therefore, all the statements are relevant.
10985
10986 Pattern statement needs to be analyzed instead of the original statement
10987 if the original statement is not relevant. Otherwise, we analyze both
10988 statements. In basic blocks we are called from some SLP instance
10989 traversal, don't analyze pattern stmts instead, the pattern stmts
10990 already will be part of SLP instance. */
10991
10992 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10993 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10994 && !STMT_VINFO_LIVE_P (stmt_info))
10995 {
10996 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10997 && pattern_stmt_info
10998 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10999 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11000 {
11001 /* Analyze PATTERN_STMT instead of the original stmt. */
11002 stmt_info = pattern_stmt_info;
11003 if (dump_enabled_p ())
11004 dump_printf_loc (MSG_NOTE, vect_location,
11005 "==> examining pattern statement: %G",
11006 stmt_info->stmt);
11007 }
11008 else
11009 {
11010 if (dump_enabled_p ())
11011 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11012
11013 return opt_result::success ();
11014 }
11015 }
11016 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11017 && node == NULL
11018 && pattern_stmt_info
11019 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11020 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11021 {
11022 /* Analyze PATTERN_STMT too. */
11023 if (dump_enabled_p ())
11024 dump_printf_loc (MSG_NOTE, vect_location,
11025 "==> examining pattern statement: %G",
11026 pattern_stmt_info->stmt);
11027
11028 opt_result res
11029 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11030 node_instance, cost_vec);
11031 if (!res)
11032 return res;
11033 }
11034
11035 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11036 {
11037 case vect_internal_def:
11038 break;
11039
11040 case vect_reduction_def:
11041 case vect_nested_cycle:
11042 gcc_assert (!bb_vinfo
11043 && (relevance == vect_used_in_outer
11044 || relevance == vect_used_in_outer_by_reduction
11045 || relevance == vect_used_by_reduction
11046 || relevance == vect_unused_in_scope
11047 || relevance == vect_used_only_live));
11048 break;
11049
11050 case vect_induction_def:
11051 gcc_assert (!bb_vinfo);
11052 break;
11053
11054 case vect_constant_def:
11055 case vect_external_def:
11056 case vect_unknown_def_type:
11057 default:
11058 gcc_unreachable ();
11059 }
11060
11061 if (STMT_VINFO_RELEVANT_P (stmt_info))
11062 {
11063 tree type = gimple_expr_type (stmt_info->stmt);
11064 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
11065 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11066 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11067 || (call && gimple_call_lhs (call) == NULL_TREE));
11068 *need_to_vectorize = true;
11069 }
11070
11071 if (PURE_SLP_STMT (stmt_info) && !node)
11072 {
11073 if (dump_enabled_p ())
11074 dump_printf_loc (MSG_NOTE, vect_location,
11075 "handled only by SLP analysis\n");
11076 return opt_result::success ();
11077 }
11078
11079 ok = true;
11080 if (!bb_vinfo
11081 && (STMT_VINFO_RELEVANT_P (stmt_info)
11082 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11083 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11084 -mveclibabi= takes preference over library functions with
11085 the simd attribute. */
11086 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11087 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11088 cost_vec)
11089 || vectorizable_conversion (vinfo, stmt_info,
11090 NULL, NULL, node, cost_vec)
11091 || vectorizable_operation (vinfo, stmt_info,
11092 NULL, NULL, node, cost_vec)
11093 || vectorizable_assignment (vinfo, stmt_info,
11094 NULL, NULL, node, cost_vec)
11095 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11096 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11097 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11098 node, node_instance, cost_vec)
11099 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11100 NULL, NULL, node, cost_vec)
11101 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11102 || vectorizable_condition (vinfo, stmt_info,
11103 NULL, NULL, node, cost_vec)
11104 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11105 cost_vec)
11106 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11107 stmt_info, NULL, node));
11108 else
11109 {
11110 if (bb_vinfo)
11111 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11112 || vectorizable_simd_clone_call (vinfo, stmt_info,
11113 NULL, NULL, node, cost_vec)
11114 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11115 cost_vec)
11116 || vectorizable_shift (vinfo, stmt_info,
11117 NULL, NULL, node, cost_vec)
11118 || vectorizable_operation (vinfo, stmt_info,
11119 NULL, NULL, node, cost_vec)
11120 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11121 cost_vec)
11122 || vectorizable_load (vinfo, stmt_info,
11123 NULL, NULL, node, cost_vec)
11124 || vectorizable_store (vinfo, stmt_info,
11125 NULL, NULL, node, cost_vec)
11126 || vectorizable_condition (vinfo, stmt_info,
11127 NULL, NULL, node, cost_vec)
11128 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11129 cost_vec));
11130 }
11131
11132 if (!ok)
11133 return opt_result::failure_at (stmt_info->stmt,
11134 "not vectorized:"
11135 " relevant stmt not supported: %G",
11136 stmt_info->stmt);
11137
11138 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11139 need extra handling, except for vectorizable reductions. */
11140 if (!bb_vinfo
11141 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11142 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11143 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11144 stmt_info, NULL, node, node_instance,
11145 false, cost_vec))
11146 return opt_result::failure_at (stmt_info->stmt,
11147 "not vectorized:"
11148 " live stmt not supported: %G",
11149 stmt_info->stmt);
11150
11151 return opt_result::success ();
11152 }
11153
11154
11155 /* Function vect_transform_stmt.
11156
11157 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11158
11159 bool
11160 vect_transform_stmt (vec_info *vinfo,
11161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11162 slp_tree slp_node, slp_instance slp_node_instance)
11163 {
11164 bool is_store = false;
11165 stmt_vec_info vec_stmt = NULL;
11166 bool done;
11167
11168 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11169 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11170
11171 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11172 bool nested_p = (loop_vinfo
11173 && nested_in_vect_loop_p
11174 (LOOP_VINFO_LOOP (loop_vinfo), stmt_info));
11175
11176 gimple *stmt = stmt_info->stmt;
11177 switch (STMT_VINFO_TYPE (stmt_info))
11178 {
11179 case type_demotion_vec_info_type:
11180 case type_promotion_vec_info_type:
11181 case type_conversion_vec_info_type:
11182 done = vectorizable_conversion (vinfo, stmt_info,
11183 gsi, &vec_stmt, slp_node, NULL);
11184 gcc_assert (done);
11185 break;
11186
11187 case induc_vec_info_type:
11188 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11189 stmt_info, gsi, &vec_stmt, slp_node,
11190 NULL);
11191 gcc_assert (done);
11192 break;
11193
11194 case shift_vec_info_type:
11195 done = vectorizable_shift (vinfo, stmt_info,
11196 gsi, &vec_stmt, slp_node, NULL);
11197 gcc_assert (done);
11198 break;
11199
11200 case op_vec_info_type:
11201 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11202 NULL);
11203 gcc_assert (done);
11204 break;
11205
11206 case assignment_vec_info_type:
11207 done = vectorizable_assignment (vinfo, stmt_info,
11208 gsi, &vec_stmt, slp_node, NULL);
11209 gcc_assert (done);
11210 break;
11211
11212 case load_vec_info_type:
11213 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11214 NULL);
11215 gcc_assert (done);
11216 break;
11217
11218 case store_vec_info_type:
11219 done = vectorizable_store (vinfo, stmt_info,
11220 gsi, &vec_stmt, slp_node, NULL);
11221 gcc_assert (done);
11222 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11223 {
11224 /* In case of interleaving, the whole chain is vectorized when the
11225 last store in the chain is reached. Store stmts before the last
11226 one are skipped, and there vec_stmt_info shouldn't be freed
11227 meanwhile. */
11228 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11229 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11230 is_store = true;
11231 }
11232 else
11233 is_store = true;
11234 break;
11235
11236 case condition_vec_info_type:
11237 done = vectorizable_condition (vinfo, stmt_info,
11238 gsi, &vec_stmt, slp_node, NULL);
11239 gcc_assert (done);
11240 break;
11241
11242 case comparison_vec_info_type:
11243 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11244 slp_node, NULL);
11245 gcc_assert (done);
11246 break;
11247
11248 case call_vec_info_type:
11249 done = vectorizable_call (vinfo, stmt_info,
11250 gsi, &vec_stmt, slp_node, NULL);
11251 stmt = gsi_stmt (*gsi);
11252 break;
11253
11254 case call_simd_clone_vec_info_type:
11255 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11256 slp_node, NULL);
11257 stmt = gsi_stmt (*gsi);
11258 break;
11259
11260 case reduc_vec_info_type:
11261 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11262 gsi, &vec_stmt, slp_node);
11263 gcc_assert (done);
11264 break;
11265
11266 case cycle_phi_info_type:
11267 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11268 &vec_stmt, slp_node, slp_node_instance);
11269 gcc_assert (done);
11270 break;
11271
11272 case lc_phi_info_type:
11273 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11274 stmt_info, &vec_stmt, slp_node);
11275 gcc_assert (done);
11276 break;
11277
11278 default:
11279 if (!STMT_VINFO_LIVE_P (stmt_info))
11280 {
11281 if (dump_enabled_p ())
11282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11283 "stmt not supported.\n");
11284 gcc_unreachable ();
11285 }
11286 done = true;
11287 }
11288
11289 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11290 This would break hybrid SLP vectorization. */
11291 if (slp_node)
11292 gcc_assert (!vec_stmt
11293 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11294
11295 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11296 is being vectorized, but outside the immediately enclosing loop. */
11297 if (vec_stmt
11298 && nested_p
11299 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11300 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11301 || STMT_VINFO_RELEVANT (stmt_info) ==
11302 vect_used_in_outer_by_reduction))
11303 {
11304 class loop *innerloop = LOOP_VINFO_LOOP (loop_vinfo)->inner;
11305 imm_use_iterator imm_iter;
11306 use_operand_p use_p;
11307 tree scalar_dest;
11308
11309 if (dump_enabled_p ())
11310 dump_printf_loc (MSG_NOTE, vect_location,
11311 "Record the vdef for outer-loop vectorization.\n");
11312
11313 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11314 (to be used when vectorizing outer-loop stmts that use the DEF of
11315 STMT). */
11316 if (gimple_code (stmt) == GIMPLE_PHI)
11317 scalar_dest = PHI_RESULT (stmt);
11318 else
11319 scalar_dest = gimple_get_lhs (stmt);
11320
11321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11322 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11323 {
11324 stmt_vec_info exit_phi_info
11325 = vinfo->lookup_stmt (USE_STMT (use_p));
11326 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11327 }
11328 }
11329
11330 if (vec_stmt)
11331 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11332
11333 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11334 return is_store;
11335
11336 /* If this stmt defines a value used on a backedge, update the
11337 vectorized PHIs. */
11338 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11339 stmt_vec_info reduc_info;
11340 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11341 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11342 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
11343 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11344 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11345 {
11346 gphi *phi;
11347 edge e;
11348 if (!slp_node
11349 && (phi = dyn_cast <gphi *>
11350 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11351 && dominated_by_p (CDI_DOMINATORS,
11352 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11353 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11354 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11355 == gimple_get_lhs (orig_stmt_info->stmt)))
11356 {
11357 stmt_vec_info phi_info
11358 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11359 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11360 do
11361 {
11362 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11363 gimple_get_lhs (vec_stmt->stmt), e,
11364 gimple_phi_arg_location (phi, e->dest_idx));
11365 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11366 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11367 }
11368 while (phi_info);
11369 gcc_assert (!vec_stmt);
11370 }
11371 else if (slp_node
11372 && slp_node != slp_node_instance->reduc_phis)
11373 {
11374 slp_tree phi_node = slp_node_instance->reduc_phis;
11375 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11376 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11377 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11378 == SLP_TREE_VEC_STMTS (slp_node).length ());
11379 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11380 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11381 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11382 e, gimple_phi_arg_location (phi, e->dest_idx));
11383 }
11384 }
11385
11386 /* Handle stmts whose DEF is used outside the loop-nest that is
11387 being vectorized. */
11388 if (is_a <loop_vec_info> (vinfo))
11389 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11390 stmt_info, gsi, slp_node,
11391 slp_node_instance, true, NULL);
11392 gcc_assert (done);
11393
11394 return false;
11395 }
11396
11397
11398 /* Remove a group of stores (for SLP or interleaving), free their
11399 stmt_vec_info. */
11400
11401 void
11402 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11403 {
11404 stmt_vec_info next_stmt_info = first_stmt_info;
11405
11406 while (next_stmt_info)
11407 {
11408 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11409 next_stmt_info = vect_orig_stmt (next_stmt_info);
11410 /* Free the attached stmt_vec_info and remove the stmt. */
11411 vinfo->remove_stmt (next_stmt_info);
11412 next_stmt_info = tmp;
11413 }
11414 }
11415
11416 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11417 elements of type SCALAR_TYPE, or null if the target doesn't support
11418 such a type.
11419
11420 If NUNITS is zero, return a vector type that contains elements of
11421 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11422
11423 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11424 for this vectorization region and want to "autodetect" the best choice.
11425 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11426 and we want the new type to be interoperable with it. PREVAILING_MODE
11427 in this case can be a scalar integer mode or a vector mode; when it
11428 is a vector mode, the function acts like a tree-level version of
11429 related_vector_mode. */
11430
11431 tree
11432 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11433 tree scalar_type, poly_uint64 nunits)
11434 {
11435 tree orig_scalar_type = scalar_type;
11436 scalar_mode inner_mode;
11437 machine_mode simd_mode;
11438 tree vectype;
11439
11440 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11441 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11442 return NULL_TREE;
11443
11444 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11445
11446 /* For vector types of elements whose mode precision doesn't
11447 match their types precision we use a element type of mode
11448 precision. The vectorization routines will have to make sure
11449 they support the proper result truncation/extension.
11450 We also make sure to build vector types with INTEGER_TYPE
11451 component type only. */
11452 if (INTEGRAL_TYPE_P (scalar_type)
11453 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11454 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11455 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11456 TYPE_UNSIGNED (scalar_type));
11457
11458 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11459 When the component mode passes the above test simply use a type
11460 corresponding to that mode. The theory is that any use that
11461 would cause problems with this will disable vectorization anyway. */
11462 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11463 && !INTEGRAL_TYPE_P (scalar_type))
11464 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11465
11466 /* We can't build a vector type of elements with alignment bigger than
11467 their size. */
11468 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11469 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11470 TYPE_UNSIGNED (scalar_type));
11471
11472 /* If we felt back to using the mode fail if there was
11473 no scalar type for it. */
11474 if (scalar_type == NULL_TREE)
11475 return NULL_TREE;
11476
11477 /* If no prevailing mode was supplied, use the mode the target prefers.
11478 Otherwise lookup a vector mode based on the prevailing mode. */
11479 if (prevailing_mode == VOIDmode)
11480 {
11481 gcc_assert (known_eq (nunits, 0U));
11482 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11483 if (SCALAR_INT_MODE_P (simd_mode))
11484 {
11485 /* Traditional behavior is not to take the integer mode
11486 literally, but simply to use it as a way of determining
11487 the vector size. It is up to mode_for_vector to decide
11488 what the TYPE_MODE should be.
11489
11490 Note that nunits == 1 is allowed in order to support single
11491 element vector types. */
11492 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11493 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11494 return NULL_TREE;
11495 }
11496 }
11497 else if (SCALAR_INT_MODE_P (prevailing_mode)
11498 || !related_vector_mode (prevailing_mode,
11499 inner_mode, nunits).exists (&simd_mode))
11500 {
11501 /* Fall back to using mode_for_vector, mostly in the hope of being
11502 able to use an integer mode. */
11503 if (known_eq (nunits, 0U)
11504 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11505 return NULL_TREE;
11506
11507 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11508 return NULL_TREE;
11509 }
11510
11511 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11512
11513 /* In cases where the mode was chosen by mode_for_vector, check that
11514 the target actually supports the chosen mode, or that it at least
11515 allows the vector mode to be replaced by a like-sized integer. */
11516 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11517 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11518 return NULL_TREE;
11519
11520 /* Re-attach the address-space qualifier if we canonicalized the scalar
11521 type. */
11522 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11523 return build_qualified_type
11524 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11525
11526 return vectype;
11527 }
11528
11529 /* Function get_vectype_for_scalar_type.
11530
11531 Returns the vector type corresponding to SCALAR_TYPE as supported
11532 by the target. If GROUP_SIZE is nonzero and we're performing BB
11533 vectorization, make sure that the number of elements in the vector
11534 is no bigger than GROUP_SIZE. */
11535
11536 tree
11537 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11538 unsigned int group_size)
11539 {
11540 /* For BB vectorization, we should always have a group size once we've
11541 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11542 are tentative requests during things like early data reference
11543 analysis and pattern recognition. */
11544 if (is_a <bb_vec_info> (vinfo))
11545 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11546 else
11547 group_size = 0;
11548
11549 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11550 scalar_type);
11551 if (vectype && vinfo->vector_mode == VOIDmode)
11552 vinfo->vector_mode = TYPE_MODE (vectype);
11553
11554 /* Register the natural choice of vector type, before the group size
11555 has been applied. */
11556 if (vectype)
11557 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11558
11559 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11560 try again with an explicit number of elements. */
11561 if (vectype
11562 && group_size
11563 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11564 {
11565 /* Start with the biggest number of units that fits within
11566 GROUP_SIZE and halve it until we find a valid vector type.
11567 Usually either the first attempt will succeed or all will
11568 fail (in the latter case because GROUP_SIZE is too small
11569 for the target), but it's possible that a target could have
11570 a hole between supported vector types.
11571
11572 If GROUP_SIZE is not a power of 2, this has the effect of
11573 trying the largest power of 2 that fits within the group,
11574 even though the group is not a multiple of that vector size.
11575 The BB vectorizer will then try to carve up the group into
11576 smaller pieces. */
11577 unsigned int nunits = 1 << floor_log2 (group_size);
11578 do
11579 {
11580 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11581 scalar_type, nunits);
11582 nunits /= 2;
11583 }
11584 while (nunits > 1 && !vectype);
11585 }
11586
11587 return vectype;
11588 }
11589
11590 /* Return the vector type corresponding to SCALAR_TYPE as supported
11591 by the target. NODE, if nonnull, is the SLP tree node that will
11592 use the returned vector type. */
11593
11594 tree
11595 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11596 {
11597 unsigned int group_size = 0;
11598 if (node)
11599 {
11600 group_size = SLP_TREE_SCALAR_OPS (node).length ();
11601 if (group_size == 0)
11602 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11603 }
11604 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11605 }
11606
11607 /* Function get_mask_type_for_scalar_type.
11608
11609 Returns the mask type corresponding to a result of comparison
11610 of vectors of specified SCALAR_TYPE as supported by target.
11611 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11612 make sure that the number of elements in the vector is no bigger
11613 than GROUP_SIZE. */
11614
11615 tree
11616 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11617 unsigned int group_size)
11618 {
11619 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11620
11621 if (!vectype)
11622 return NULL;
11623
11624 return truth_type_for (vectype);
11625 }
11626
11627 /* Function get_same_sized_vectype
11628
11629 Returns a vector type corresponding to SCALAR_TYPE of size
11630 VECTOR_TYPE if supported by the target. */
11631
11632 tree
11633 get_same_sized_vectype (tree scalar_type, tree vector_type)
11634 {
11635 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11636 return truth_type_for (vector_type);
11637
11638 poly_uint64 nunits;
11639 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11640 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11641 return NULL_TREE;
11642
11643 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11644 scalar_type, nunits);
11645 }
11646
11647 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11648 would not change the chosen vector modes. */
11649
11650 bool
11651 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11652 {
11653 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11654 i != vinfo->used_vector_modes.end (); ++i)
11655 if (!VECTOR_MODE_P (*i)
11656 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11657 return false;
11658 return true;
11659 }
11660
11661 /* Function vect_is_simple_use.
11662
11663 Input:
11664 VINFO - the vect info of the loop or basic block that is being vectorized.
11665 OPERAND - operand in the loop or bb.
11666 Output:
11667 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11668 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11669 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11670 the definition could be anywhere in the function
11671 DT - the type of definition
11672
11673 Returns whether a stmt with OPERAND can be vectorized.
11674 For loops, supportable operands are constants, loop invariants, and operands
11675 that are defined by the current iteration of the loop. Unsupportable
11676 operands are those that are defined by a previous iteration of the loop (as
11677 is the case in reduction/induction computations).
11678 For basic blocks, supportable operands are constants and bb invariants.
11679 For now, operands defined outside the basic block are not supported. */
11680
11681 bool
11682 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11683 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11684 {
11685 if (def_stmt_info_out)
11686 *def_stmt_info_out = NULL;
11687 if (def_stmt_out)
11688 *def_stmt_out = NULL;
11689 *dt = vect_unknown_def_type;
11690
11691 if (dump_enabled_p ())
11692 {
11693 dump_printf_loc (MSG_NOTE, vect_location,
11694 "vect_is_simple_use: operand ");
11695 if (TREE_CODE (operand) == SSA_NAME
11696 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11697 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11698 else
11699 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11700 }
11701
11702 if (CONSTANT_CLASS_P (operand))
11703 *dt = vect_constant_def;
11704 else if (is_gimple_min_invariant (operand))
11705 *dt = vect_external_def;
11706 else if (TREE_CODE (operand) != SSA_NAME)
11707 *dt = vect_unknown_def_type;
11708 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11709 *dt = vect_external_def;
11710 else
11711 {
11712 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11713 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11714 if (!stmt_vinfo)
11715 *dt = vect_external_def;
11716 else
11717 {
11718 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11719 def_stmt = stmt_vinfo->stmt;
11720 switch (gimple_code (def_stmt))
11721 {
11722 case GIMPLE_PHI:
11723 case GIMPLE_ASSIGN:
11724 case GIMPLE_CALL:
11725 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11726 break;
11727 default:
11728 *dt = vect_unknown_def_type;
11729 break;
11730 }
11731 if (def_stmt_info_out)
11732 *def_stmt_info_out = stmt_vinfo;
11733 }
11734 if (def_stmt_out)
11735 *def_stmt_out = def_stmt;
11736 }
11737
11738 if (dump_enabled_p ())
11739 {
11740 dump_printf (MSG_NOTE, ", type of def: ");
11741 switch (*dt)
11742 {
11743 case vect_uninitialized_def:
11744 dump_printf (MSG_NOTE, "uninitialized\n");
11745 break;
11746 case vect_constant_def:
11747 dump_printf (MSG_NOTE, "constant\n");
11748 break;
11749 case vect_external_def:
11750 dump_printf (MSG_NOTE, "external\n");
11751 break;
11752 case vect_internal_def:
11753 dump_printf (MSG_NOTE, "internal\n");
11754 break;
11755 case vect_induction_def:
11756 dump_printf (MSG_NOTE, "induction\n");
11757 break;
11758 case vect_reduction_def:
11759 dump_printf (MSG_NOTE, "reduction\n");
11760 break;
11761 case vect_double_reduction_def:
11762 dump_printf (MSG_NOTE, "double reduction\n");
11763 break;
11764 case vect_nested_cycle:
11765 dump_printf (MSG_NOTE, "nested cycle\n");
11766 break;
11767 case vect_unknown_def_type:
11768 dump_printf (MSG_NOTE, "unknown\n");
11769 break;
11770 }
11771 }
11772
11773 if (*dt == vect_unknown_def_type)
11774 {
11775 if (dump_enabled_p ())
11776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11777 "Unsupported pattern.\n");
11778 return false;
11779 }
11780
11781 return true;
11782 }
11783
11784 /* Function vect_is_simple_use.
11785
11786 Same as vect_is_simple_use but also determines the vector operand
11787 type of OPERAND and stores it to *VECTYPE. If the definition of
11788 OPERAND is vect_uninitialized_def, vect_constant_def or
11789 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11790 is responsible to compute the best suited vector type for the
11791 scalar operand. */
11792
11793 bool
11794 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11795 tree *vectype, stmt_vec_info *def_stmt_info_out,
11796 gimple **def_stmt_out)
11797 {
11798 stmt_vec_info def_stmt_info;
11799 gimple *def_stmt;
11800 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11801 return false;
11802
11803 if (def_stmt_out)
11804 *def_stmt_out = def_stmt;
11805 if (def_stmt_info_out)
11806 *def_stmt_info_out = def_stmt_info;
11807
11808 /* Now get a vector type if the def is internal, otherwise supply
11809 NULL_TREE and leave it up to the caller to figure out a proper
11810 type for the use stmt. */
11811 if (*dt == vect_internal_def
11812 || *dt == vect_induction_def
11813 || *dt == vect_reduction_def
11814 || *dt == vect_double_reduction_def
11815 || *dt == vect_nested_cycle)
11816 {
11817 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11818 gcc_assert (*vectype != NULL_TREE);
11819 if (dump_enabled_p ())
11820 dump_printf_loc (MSG_NOTE, vect_location,
11821 "vect_is_simple_use: vectype %T\n", *vectype);
11822 }
11823 else if (*dt == vect_uninitialized_def
11824 || *dt == vect_constant_def
11825 || *dt == vect_external_def)
11826 *vectype = NULL_TREE;
11827 else
11828 gcc_unreachable ();
11829
11830 return true;
11831 }
11832
11833 /* Function vect_is_simple_use.
11834
11835 Same as vect_is_simple_use but determines the operand by operand
11836 position OPERAND from either STMT or SLP_NODE, filling in *OP
11837 and *SLP_DEF (when SLP_NODE is not NULL). */
11838
11839 bool
11840 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11841 unsigned operand, tree *op, slp_tree *slp_def,
11842 enum vect_def_type *dt,
11843 tree *vectype, stmt_vec_info *def_stmt_info_out)
11844 {
11845 if (slp_node)
11846 {
11847 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11848 *slp_def = child;
11849 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11850 *op = gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child)[0]->stmt);
11851 else
11852 *op = SLP_TREE_SCALAR_OPS (child)[0];
11853 }
11854 else
11855 {
11856 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11857 {
11858 *op = gimple_op (ass, operand + 1);
11859 /* ??? Ick. But it will vanish with SLP only. */
11860 if (TREE_CODE (*op) == VIEW_CONVERT_EXPR)
11861 *op = TREE_OPERAND (*op, 0);
11862 }
11863 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11864 *op = gimple_call_arg (call, operand);
11865 else
11866 gcc_unreachable ();
11867 }
11868
11869 /* ??? We might want to update *vectype from *slp_def here though
11870 when sharing nodes this would prevent unsharing in the caller. */
11871 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11872 }
11873
11874 /* If OP is not NULL and is external or constant update its vector
11875 type with VECTYPE. Returns true if successful or false if not,
11876 for example when conflicting vector types are present. */
11877
11878 bool
11879 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11880 {
11881 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11882 return true;
11883 if (SLP_TREE_VECTYPE (op))
11884 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11885 SLP_TREE_VECTYPE (op) = vectype;
11886 return true;
11887 }
11888
11889 /* Function supportable_widening_operation
11890
11891 Check whether an operation represented by the code CODE is a
11892 widening operation that is supported by the target platform in
11893 vector form (i.e., when operating on arguments of type VECTYPE_IN
11894 producing a result of type VECTYPE_OUT).
11895
11896 Widening operations we currently support are NOP (CONVERT), FLOAT,
11897 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11898 are supported by the target platform either directly (via vector
11899 tree-codes), or via target builtins.
11900
11901 Output:
11902 - CODE1 and CODE2 are codes of vector operations to be used when
11903 vectorizing the operation, if available.
11904 - MULTI_STEP_CVT determines the number of required intermediate steps in
11905 case of multi-step conversion (like char->short->int - in that case
11906 MULTI_STEP_CVT will be 1).
11907 - INTERM_TYPES contains the intermediate type required to perform the
11908 widening operation (short in the above example). */
11909
11910 bool
11911 supportable_widening_operation (vec_info *vinfo,
11912 enum tree_code code, stmt_vec_info stmt_info,
11913 tree vectype_out, tree vectype_in,
11914 enum tree_code *code1, enum tree_code *code2,
11915 int *multi_step_cvt,
11916 vec<tree> *interm_types)
11917 {
11918 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11919 class loop *vect_loop = NULL;
11920 machine_mode vec_mode;
11921 enum insn_code icode1, icode2;
11922 optab optab1, optab2;
11923 tree vectype = vectype_in;
11924 tree wide_vectype = vectype_out;
11925 enum tree_code c1, c2;
11926 int i;
11927 tree prev_type, intermediate_type;
11928 machine_mode intermediate_mode, prev_mode;
11929 optab optab3, optab4;
11930
11931 *multi_step_cvt = 0;
11932 if (loop_info)
11933 vect_loop = LOOP_VINFO_LOOP (loop_info);
11934
11935 switch (code)
11936 {
11937 case WIDEN_MULT_EXPR:
11938 /* The result of a vectorized widening operation usually requires
11939 two vectors (because the widened results do not fit into one vector).
11940 The generated vector results would normally be expected to be
11941 generated in the same order as in the original scalar computation,
11942 i.e. if 8 results are generated in each vector iteration, they are
11943 to be organized as follows:
11944 vect1: [res1,res2,res3,res4],
11945 vect2: [res5,res6,res7,res8].
11946
11947 However, in the special case that the result of the widening
11948 operation is used in a reduction computation only, the order doesn't
11949 matter (because when vectorizing a reduction we change the order of
11950 the computation). Some targets can take advantage of this and
11951 generate more efficient code. For example, targets like Altivec,
11952 that support widen_mult using a sequence of {mult_even,mult_odd}
11953 generate the following vectors:
11954 vect1: [res1,res3,res5,res7],
11955 vect2: [res2,res4,res6,res8].
11956
11957 When vectorizing outer-loops, we execute the inner-loop sequentially
11958 (each vectorized inner-loop iteration contributes to VF outer-loop
11959 iterations in parallel). We therefore don't allow to change the
11960 order of the computation in the inner-loop during outer-loop
11961 vectorization. */
11962 /* TODO: Another case in which order doesn't *really* matter is when we
11963 widen and then contract again, e.g. (short)((int)x * y >> 8).
11964 Normally, pack_trunc performs an even/odd permute, whereas the
11965 repack from an even/odd expansion would be an interleave, which
11966 would be significantly simpler for e.g. AVX2. */
11967 /* In any case, in order to avoid duplicating the code below, recurse
11968 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11969 are properly set up for the caller. If we fail, we'll continue with
11970 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11971 if (vect_loop
11972 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11973 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11974 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11975 stmt_info, vectype_out,
11976 vectype_in, code1, code2,
11977 multi_step_cvt, interm_types))
11978 {
11979 /* Elements in a vector with vect_used_by_reduction property cannot
11980 be reordered if the use chain with this property does not have the
11981 same operation. One such an example is s += a * b, where elements
11982 in a and b cannot be reordered. Here we check if the vector defined
11983 by STMT is only directly used in the reduction statement. */
11984 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11985 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11986 if (use_stmt_info
11987 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11988 return true;
11989 }
11990 c1 = VEC_WIDEN_MULT_LO_EXPR;
11991 c2 = VEC_WIDEN_MULT_HI_EXPR;
11992 break;
11993
11994 case DOT_PROD_EXPR:
11995 c1 = DOT_PROD_EXPR;
11996 c2 = DOT_PROD_EXPR;
11997 break;
11998
11999 case SAD_EXPR:
12000 c1 = SAD_EXPR;
12001 c2 = SAD_EXPR;
12002 break;
12003
12004 case VEC_WIDEN_MULT_EVEN_EXPR:
12005 /* Support the recursion induced just above. */
12006 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12007 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12008 break;
12009
12010 case WIDEN_LSHIFT_EXPR:
12011 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12012 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12013 break;
12014
12015 CASE_CONVERT:
12016 c1 = VEC_UNPACK_LO_EXPR;
12017 c2 = VEC_UNPACK_HI_EXPR;
12018 break;
12019
12020 case FLOAT_EXPR:
12021 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12022 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12023 break;
12024
12025 case FIX_TRUNC_EXPR:
12026 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12027 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12028 break;
12029
12030 default:
12031 gcc_unreachable ();
12032 }
12033
12034 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12035 std::swap (c1, c2);
12036
12037 if (code == FIX_TRUNC_EXPR)
12038 {
12039 /* The signedness is determined from output operand. */
12040 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12041 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12042 }
12043 else if (CONVERT_EXPR_CODE_P (code)
12044 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12045 && VECTOR_BOOLEAN_TYPE_P (vectype)
12046 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12047 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12048 {
12049 /* If the input and result modes are the same, a different optab
12050 is needed where we pass in the number of units in vectype. */
12051 optab1 = vec_unpacks_sbool_lo_optab;
12052 optab2 = vec_unpacks_sbool_hi_optab;
12053 }
12054 else
12055 {
12056 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12057 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12058 }
12059
12060 if (!optab1 || !optab2)
12061 return false;
12062
12063 vec_mode = TYPE_MODE (vectype);
12064 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12065 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12066 return false;
12067
12068 *code1 = c1;
12069 *code2 = c2;
12070
12071 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12072 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12073 {
12074 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12075 return true;
12076 /* For scalar masks we may have different boolean
12077 vector types having the same QImode. Thus we
12078 add additional check for elements number. */
12079 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12080 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12081 return true;
12082 }
12083
12084 /* Check if it's a multi-step conversion that can be done using intermediate
12085 types. */
12086
12087 prev_type = vectype;
12088 prev_mode = vec_mode;
12089
12090 if (!CONVERT_EXPR_CODE_P (code))
12091 return false;
12092
12093 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12094 intermediate steps in promotion sequence. We try
12095 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12096 not. */
12097 interm_types->create (MAX_INTERM_CVT_STEPS);
12098 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12099 {
12100 intermediate_mode = insn_data[icode1].operand[0].mode;
12101 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12102 intermediate_type
12103 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12104 else
12105 intermediate_type
12106 = lang_hooks.types.type_for_mode (intermediate_mode,
12107 TYPE_UNSIGNED (prev_type));
12108
12109 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12110 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12111 && intermediate_mode == prev_mode
12112 && SCALAR_INT_MODE_P (prev_mode))
12113 {
12114 /* If the input and result modes are the same, a different optab
12115 is needed where we pass in the number of units in vectype. */
12116 optab3 = vec_unpacks_sbool_lo_optab;
12117 optab4 = vec_unpacks_sbool_hi_optab;
12118 }
12119 else
12120 {
12121 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12122 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12123 }
12124
12125 if (!optab3 || !optab4
12126 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12127 || insn_data[icode1].operand[0].mode != intermediate_mode
12128 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12129 || insn_data[icode2].operand[0].mode != intermediate_mode
12130 || ((icode1 = optab_handler (optab3, intermediate_mode))
12131 == CODE_FOR_nothing)
12132 || ((icode2 = optab_handler (optab4, intermediate_mode))
12133 == CODE_FOR_nothing))
12134 break;
12135
12136 interm_types->quick_push (intermediate_type);
12137 (*multi_step_cvt)++;
12138
12139 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12140 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12141 {
12142 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12143 return true;
12144 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12145 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12146 return true;
12147 }
12148
12149 prev_type = intermediate_type;
12150 prev_mode = intermediate_mode;
12151 }
12152
12153 interm_types->release ();
12154 return false;
12155 }
12156
12157
12158 /* Function supportable_narrowing_operation
12159
12160 Check whether an operation represented by the code CODE is a
12161 narrowing operation that is supported by the target platform in
12162 vector form (i.e., when operating on arguments of type VECTYPE_IN
12163 and producing a result of type VECTYPE_OUT).
12164
12165 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12166 and FLOAT. This function checks if these operations are supported by
12167 the target platform directly via vector tree-codes.
12168
12169 Output:
12170 - CODE1 is the code of a vector operation to be used when
12171 vectorizing the operation, if available.
12172 - MULTI_STEP_CVT determines the number of required intermediate steps in
12173 case of multi-step conversion (like int->short->char - in that case
12174 MULTI_STEP_CVT will be 1).
12175 - INTERM_TYPES contains the intermediate type required to perform the
12176 narrowing operation (short in the above example). */
12177
12178 bool
12179 supportable_narrowing_operation (enum tree_code code,
12180 tree vectype_out, tree vectype_in,
12181 enum tree_code *code1, int *multi_step_cvt,
12182 vec<tree> *interm_types)
12183 {
12184 machine_mode vec_mode;
12185 enum insn_code icode1;
12186 optab optab1, interm_optab;
12187 tree vectype = vectype_in;
12188 tree narrow_vectype = vectype_out;
12189 enum tree_code c1;
12190 tree intermediate_type, prev_type;
12191 machine_mode intermediate_mode, prev_mode;
12192 int i;
12193 bool uns;
12194
12195 *multi_step_cvt = 0;
12196 switch (code)
12197 {
12198 CASE_CONVERT:
12199 c1 = VEC_PACK_TRUNC_EXPR;
12200 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12201 && VECTOR_BOOLEAN_TYPE_P (vectype)
12202 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12203 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12204 optab1 = vec_pack_sbool_trunc_optab;
12205 else
12206 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12207 break;
12208
12209 case FIX_TRUNC_EXPR:
12210 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12211 /* The signedness is determined from output operand. */
12212 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12213 break;
12214
12215 case FLOAT_EXPR:
12216 c1 = VEC_PACK_FLOAT_EXPR;
12217 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12218 break;
12219
12220 default:
12221 gcc_unreachable ();
12222 }
12223
12224 if (!optab1)
12225 return false;
12226
12227 vec_mode = TYPE_MODE (vectype);
12228 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12229 return false;
12230
12231 *code1 = c1;
12232
12233 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12234 {
12235 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12236 return true;
12237 /* For scalar masks we may have different boolean
12238 vector types having the same QImode. Thus we
12239 add additional check for elements number. */
12240 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12241 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12242 return true;
12243 }
12244
12245 if (code == FLOAT_EXPR)
12246 return false;
12247
12248 /* Check if it's a multi-step conversion that can be done using intermediate
12249 types. */
12250 prev_mode = vec_mode;
12251 prev_type = vectype;
12252 if (code == FIX_TRUNC_EXPR)
12253 uns = TYPE_UNSIGNED (vectype_out);
12254 else
12255 uns = TYPE_UNSIGNED (vectype);
12256
12257 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12258 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12259 costly than signed. */
12260 if (code == FIX_TRUNC_EXPR && uns)
12261 {
12262 enum insn_code icode2;
12263
12264 intermediate_type
12265 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12266 interm_optab
12267 = optab_for_tree_code (c1, intermediate_type, optab_default);
12268 if (interm_optab != unknown_optab
12269 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12270 && insn_data[icode1].operand[0].mode
12271 == insn_data[icode2].operand[0].mode)
12272 {
12273 uns = false;
12274 optab1 = interm_optab;
12275 icode1 = icode2;
12276 }
12277 }
12278
12279 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12280 intermediate steps in promotion sequence. We try
12281 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12282 interm_types->create (MAX_INTERM_CVT_STEPS);
12283 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12284 {
12285 intermediate_mode = insn_data[icode1].operand[0].mode;
12286 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12287 intermediate_type
12288 = vect_double_mask_nunits (prev_type, intermediate_mode);
12289 else
12290 intermediate_type
12291 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12292 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12293 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12294 && intermediate_mode == prev_mode
12295 && SCALAR_INT_MODE_P (prev_mode))
12296 interm_optab = vec_pack_sbool_trunc_optab;
12297 else
12298 interm_optab
12299 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12300 optab_default);
12301 if (!interm_optab
12302 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12303 || insn_data[icode1].operand[0].mode != intermediate_mode
12304 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12305 == CODE_FOR_nothing))
12306 break;
12307
12308 interm_types->quick_push (intermediate_type);
12309 (*multi_step_cvt)++;
12310
12311 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12312 {
12313 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12314 return true;
12315 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12316 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12317 return true;
12318 }
12319
12320 prev_mode = intermediate_mode;
12321 prev_type = intermediate_type;
12322 optab1 = interm_optab;
12323 }
12324
12325 interm_types->release ();
12326 return false;
12327 }
12328
12329 /* Generate and return a statement that sets vector mask MASK such that
12330 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12331
12332 gcall *
12333 vect_gen_while (tree mask, tree start_index, tree end_index)
12334 {
12335 tree cmp_type = TREE_TYPE (start_index);
12336 tree mask_type = TREE_TYPE (mask);
12337 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12338 cmp_type, mask_type,
12339 OPTIMIZE_FOR_SPEED));
12340 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12341 start_index, end_index,
12342 build_zero_cst (mask_type));
12343 gimple_call_set_lhs (call, mask);
12344 return call;
12345 }
12346
12347 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12348 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12349
12350 tree
12351 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12352 tree end_index)
12353 {
12354 tree tmp = make_ssa_name (mask_type);
12355 gcall *call = vect_gen_while (tmp, start_index, end_index);
12356 gimple_seq_add_stmt (seq, call);
12357 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12358 }
12359
12360 /* Try to compute the vector types required to vectorize STMT_INFO,
12361 returning true on success and false if vectorization isn't possible.
12362 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12363 take sure that the number of elements in the vectors is no bigger
12364 than GROUP_SIZE.
12365
12366 On success:
12367
12368 - Set *STMT_VECTYPE_OUT to:
12369 - NULL_TREE if the statement doesn't need to be vectorized;
12370 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12371
12372 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12373 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12374 statement does not help to determine the overall number of units. */
12375
12376 opt_result
12377 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12378 tree *stmt_vectype_out,
12379 tree *nunits_vectype_out,
12380 unsigned int group_size)
12381 {
12382 gimple *stmt = stmt_info->stmt;
12383
12384 /* For BB vectorization, we should always have a group size once we've
12385 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12386 are tentative requests during things like early data reference
12387 analysis and pattern recognition. */
12388 if (is_a <bb_vec_info> (vinfo))
12389 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12390 else
12391 group_size = 0;
12392
12393 *stmt_vectype_out = NULL_TREE;
12394 *nunits_vectype_out = NULL_TREE;
12395
12396 if (gimple_get_lhs (stmt) == NULL_TREE
12397 /* MASK_STORE has no lhs, but is ok. */
12398 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12399 {
12400 if (is_a <gcall *> (stmt))
12401 {
12402 /* Ignore calls with no lhs. These must be calls to
12403 #pragma omp simd functions, and what vectorization factor
12404 it really needs can't be determined until
12405 vectorizable_simd_clone_call. */
12406 if (dump_enabled_p ())
12407 dump_printf_loc (MSG_NOTE, vect_location,
12408 "defer to SIMD clone analysis.\n");
12409 return opt_result::success ();
12410 }
12411
12412 return opt_result::failure_at (stmt,
12413 "not vectorized: irregular stmt.%G", stmt);
12414 }
12415
12416 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12417 return opt_result::failure_at (stmt,
12418 "not vectorized: vector stmt in loop:%G",
12419 stmt);
12420
12421 tree vectype;
12422 tree scalar_type = NULL_TREE;
12423 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12424 {
12425 vectype = STMT_VINFO_VECTYPE (stmt_info);
12426 if (dump_enabled_p ())
12427 dump_printf_loc (MSG_NOTE, vect_location,
12428 "precomputed vectype: %T\n", vectype);
12429 }
12430 else if (vect_use_mask_type_p (stmt_info))
12431 {
12432 unsigned int precision = stmt_info->mask_precision;
12433 scalar_type = build_nonstandard_integer_type (precision, 1);
12434 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12435 if (!vectype)
12436 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12437 " data-type %T\n", scalar_type);
12438 if (dump_enabled_p ())
12439 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12440 }
12441 else
12442 {
12443 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12444 scalar_type = TREE_TYPE (DR_REF (dr));
12445 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12446 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12447 else
12448 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12449
12450 if (dump_enabled_p ())
12451 {
12452 if (group_size)
12453 dump_printf_loc (MSG_NOTE, vect_location,
12454 "get vectype for scalar type (group size %d):"
12455 " %T\n", group_size, scalar_type);
12456 else
12457 dump_printf_loc (MSG_NOTE, vect_location,
12458 "get vectype for scalar type: %T\n", scalar_type);
12459 }
12460 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12461 if (!vectype)
12462 return opt_result::failure_at (stmt,
12463 "not vectorized:"
12464 " unsupported data-type %T\n",
12465 scalar_type);
12466
12467 if (dump_enabled_p ())
12468 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12469 }
12470 *stmt_vectype_out = vectype;
12471
12472 /* Don't try to compute scalar types if the stmt produces a boolean
12473 vector; use the existing vector type instead. */
12474 tree nunits_vectype = vectype;
12475 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12476 {
12477 /* The number of units is set according to the smallest scalar
12478 type (or the largest vector size, but we only support one
12479 vector size per vectorization). */
12480 HOST_WIDE_INT dummy;
12481 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12482 if (scalar_type != TREE_TYPE (vectype))
12483 {
12484 if (dump_enabled_p ())
12485 dump_printf_loc (MSG_NOTE, vect_location,
12486 "get vectype for smallest scalar type: %T\n",
12487 scalar_type);
12488 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12489 group_size);
12490 if (!nunits_vectype)
12491 return opt_result::failure_at
12492 (stmt, "not vectorized: unsupported data-type %T\n",
12493 scalar_type);
12494 if (dump_enabled_p ())
12495 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12496 nunits_vectype);
12497 }
12498 }
12499
12500 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12501 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12502
12503 if (dump_enabled_p ())
12504 {
12505 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12506 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12507 dump_printf (MSG_NOTE, "\n");
12508 }
12509
12510 *nunits_vectype_out = nunits_vectype;
12511 return opt_result::success ();
12512 }