2f92bb5555e6590002e2d688bc7276a99a96a624
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
643
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
646 }
647 }
648
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
659
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
664
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
667
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
675
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
677 {
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
687
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
695
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
703
704 default:
705 break;
706 }
707
708 if (is_pattern_stmt_p (stmt_vinfo))
709 {
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
714 {
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
717
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
720 {
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
731 }
732 for (; i < gimple_num_ops (assign); i++)
733 {
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
736 {
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
742 }
743 }
744 }
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
746 {
747 for (i = 0; i < gimple_call_num_args (call); i++)
748 {
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
755 }
756 }
757 }
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
760 {
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
767 }
768
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
770 {
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 {
779 if (fatal)
780 *fatal = false;
781 return res;
782 }
783 }
784 } /* while worklist */
785
786 return opt_result::success ();
787 }
788
789 /* Function vect_model_simple_cost.
790
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
794
795 static void
796 vect_model_simple_cost (vec_info *,
797 stmt_vec_info stmt_info, int ncopies,
798 enum vect_def_type *dt,
799 int ndts,
800 slp_tree node,
801 stmt_vector_for_cost *cost_vec,
802 vect_cost_for_stmt kind = vector_stmt)
803 {
804 int inside_cost = 0, prologue_cost = 0;
805
806 gcc_assert (cost_vec != NULL);
807
808 /* ??? Somehow we need to fix this at the callers. */
809 if (node)
810 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811
812 if (!node)
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
815 cost model. */
816 for (int i = 0; i < ndts; i++)
817 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
818 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
819 stmt_info, 0, vect_prologue);
820
821 /* Adjust for two-operator SLP nodes. */
822 if (node && SLP_TREE_TWO_OPERATORS (node))
823 {
824 ncopies *= 2;
825 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
826 stmt_info, 0, vect_body);
827 }
828
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
831 stmt_info, 0, vect_body);
832
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost, prologue_cost);
837 }
838
839
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
846
847 static void
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
849 enum vect_def_type *dt,
850 unsigned int ncopies, int pwr,
851 stmt_vector_for_cost *cost_vec)
852 {
853 int i;
854 int inside_cost = 0, prologue_cost = 0;
855
856 for (i = 0; i < pwr + 1; i++)
857 {
858 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
859 stmt_info, 0, vect_body);
860 ncopies *= 2;
861 }
862
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i = 0; i < 2; i++)
865 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
866 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
867 stmt_info, 0, vect_prologue);
868
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE, vect_location,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 }
874
875 /* Returns true if the current function returns DECL. */
876
877 static bool
878 cfun_returns (tree decl)
879 {
880 edge_iterator ei;
881 edge e;
882 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
883 {
884 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
885 if (!ret)
886 continue;
887 if (gimple_return_retval (ret) == decl)
888 return true;
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
891 though. */
892 gimple *def = ret;
893 do
894 {
895 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
896 }
897 while (gimple_clobber_p (def));
898 if (is_a <gassign *> (def)
899 && gimple_assign_lhs (def) == gimple_return_retval (ret)
900 && gimple_assign_rhs1 (def) == decl)
901 return true;
902 }
903 return false;
904 }
905
906 /* Function vect_model_store_cost
907
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
910
911 static void
912 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
913 vect_memory_access_type memory_access_type,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
916 {
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924
925 if (vls_type == VLS_STORE_INVARIANT)
926 {
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
930 }
931
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
941
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948 {
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
955
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
960 }
961
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
966 {
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
972 }
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
975
976 if (memory_access_type == VMAT_ELEMENTWISE
977 || memory_access_type == VMAT_STRIDED_SLP)
978 {
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
981 inside_cost += record_stmt_cost (cost_vec,
982 ncopies * assumed_nunits,
983 vec_to_scalar, stmt_info, 0, vect_body);
984 }
985
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
991 if (base
992 && (TREE_CODE (base) == RESULT_DECL
993 || (DECL_P (base) && cfun_returns (base)))
994 && !aggregate_value_p (base, cfun->decl))
995 {
996 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
998 if (REG_P (reg))
999 {
1000 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1003 if (nregs > 1)
1004 {
1005 /* Spill. */
1006 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1007 vector_store,
1008 stmt_info, 0, vect_epilogue);
1009 /* Loads. */
1010 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1011 scalar_load,
1012 stmt_info, 0, vect_epilogue);
1013 }
1014 }
1015 }
1016
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1021 }
1022
1023
1024 /* Calculate cost of DR's memory access. */
1025 void
1026 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1027 unsigned int *inside_cost,
1028 stmt_vector_for_cost *body_cost_vec)
1029 {
1030 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1033
1034 switch (alignment_support_scheme)
1035 {
1036 case dr_aligned:
1037 {
1038 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1039 vector_store, stmt_info, 0,
1040 vect_body);
1041
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE, vect_location,
1044 "vect_model_store_cost: aligned.\n");
1045 break;
1046 }
1047
1048 case dr_unaligned_supported:
1049 {
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1052 unaligned_store, stmt_info,
1053 DR_MISALIGNMENT (dr_info),
1054 vect_body);
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE, vect_location,
1057 "vect_model_store_cost: unaligned supported by "
1058 "hardware.\n");
1059 break;
1060 }
1061
1062 case dr_unaligned_unsupported:
1063 {
1064 *inside_cost = VECT_MAX_COST;
1065
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1068 "vect_model_store_cost: unsupported access.\n");
1069 break;
1070 }
1071
1072 default:
1073 gcc_unreachable ();
1074 }
1075 }
1076
1077
1078 /* Function vect_model_load_cost
1079
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1084
1085 static void
1086 vect_model_load_cost (vec_info *vinfo,
1087 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1088 vect_memory_access_type memory_access_type,
1089 slp_tree slp_node,
1090 stmt_vector_for_cost *cost_vec)
1091 {
1092 unsigned int inside_cost = 0, prologue_cost = 0;
1093 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1094
1095 gcc_assert (cost_vec);
1096
1097 /* ??? Somehow we need to fix this at the callers. */
1098 if (slp_node)
1099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1100
1101 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1102 {
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1106 /* Record the cost for the permutation. */
1107 unsigned n_perms;
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1110 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1111 vf, true, &n_perms);
1112 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1113 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1117 bitmap_clear (perm);
1118 for (unsigned i = 0;
1119 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1120 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1121 ncopies = 0;
1122 bool load_seen = false;
1123 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1124 {
1125 if (i % assumed_nunits == 0)
1126 {
1127 if (load_seen)
1128 ncopies++;
1129 load_seen = false;
1130 }
1131 if (bitmap_bit_p (perm, i))
1132 load_seen = true;
1133 }
1134 if (load_seen)
1135 ncopies++;
1136 gcc_assert (ncopies
1137 <= (DR_GROUP_SIZE (first_stmt_info)
1138 - DR_GROUP_GAP (first_stmt_info)
1139 + assumed_nunits - 1) / assumed_nunits);
1140 }
1141
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info = stmt_info;
1145 if (!slp_node && grouped_access_p)
1146 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1147
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p = (first_stmt_info == stmt_info);
1152
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1157 if (first_stmt_p
1158 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1159 {
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size = DR_GROUP_SIZE (first_stmt_info);
1163 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1164 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1165 stmt_info, 0, vect_body);
1166
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1170 group_size);
1171 }
1172
1173 /* The loads themselves. */
1174 if (memory_access_type == VMAT_ELEMENTWISE
1175 || memory_access_type == VMAT_GATHER_SCATTER)
1176 {
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1179 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1180 inside_cost += record_stmt_cost (cost_vec,
1181 ncopies * assumed_nunits,
1182 scalar_load, stmt_info, 0, vect_body);
1183 }
1184 else
1185 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1186 &inside_cost, &prologue_cost,
1187 cost_vec, cost_vec, true);
1188 if (memory_access_type == VMAT_ELEMENTWISE
1189 || memory_access_type == VMAT_STRIDED_SLP)
1190 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1191 stmt_info, 0, vect_body);
1192
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE, vect_location,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1197 }
1198
1199
1200 /* Calculate cost of DR's memory access. */
1201 void
1202 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1203 bool add_realign_cost, unsigned int *inside_cost,
1204 unsigned int *prologue_cost,
1205 stmt_vector_for_cost *prologue_cost_vec,
1206 stmt_vector_for_cost *body_cost_vec,
1207 bool record_prologue_costs)
1208 {
1209 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1212
1213 switch (alignment_support_scheme)
1214 {
1215 case dr_aligned:
1216 {
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1218 stmt_info, 0, vect_body);
1219
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: aligned.\n");
1223
1224 break;
1225 }
1226 case dr_unaligned_supported:
1227 {
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1230 unaligned_load, stmt_info,
1231 DR_MISALIGNMENT (dr_info),
1232 vect_body);
1233
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE, vect_location,
1236 "vect_model_load_cost: unaligned supported by "
1237 "hardware.\n");
1238
1239 break;
1240 }
1241 case dr_explicit_realign:
1242 {
1243 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1244 vector_load, stmt_info, 0, vect_body);
1245 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1246 vec_perm, stmt_info, 0, vect_body);
1247
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1250 prologue costs. */
1251 if (targetm.vectorize.builtin_mask_for_load)
1252 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1253 stmt_info, 0, vect_body);
1254
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_model_load_cost: explicit realign\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign_optimized:
1262 {
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location,
1265 "vect_model_load_cost: unaligned software "
1266 "pipelined.\n");
1267
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1274
1275 if (add_realign_cost && record_prologue_costs)
1276 {
1277 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1278 vector_stmt, stmt_info,
1279 0, vect_prologue);
1280 if (targetm.vectorize.builtin_mask_for_load)
1281 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1282 vector_stmt, stmt_info,
1283 0, vect_prologue);
1284 }
1285
1286 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1287 stmt_info, 0, vect_body);
1288 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1289 stmt_info, 0, vect_body);
1290
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_model_load_cost: explicit realign optimized"
1294 "\n");
1295
1296 break;
1297 }
1298
1299 case dr_unaligned_unsupported:
1300 {
1301 *inside_cost = VECT_MAX_COST;
1302
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1305 "vect_model_load_cost: unsupported access.\n");
1306 break;
1307 }
1308
1309 default:
1310 gcc_unreachable ();
1311 }
1312 }
1313
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1316
1317 static void
1318 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1319 gimple_stmt_iterator *gsi)
1320 {
1321 if (gsi)
1322 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1323 else
1324 {
1325 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1326
1327 if (loop_vinfo)
1328 {
1329 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1330 basic_block new_bb;
1331 edge pe;
1332
1333 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1334 loop = loop->inner;
1335
1336 pe = loop_preheader_edge (loop);
1337 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1338 gcc_assert (!new_bb);
1339 }
1340 else
1341 {
1342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
1343 gimple_stmt_iterator gsi_region_begin = bb_vinfo->region_begin;
1344 gsi_insert_before (&gsi_region_begin, new_stmt, GSI_SAME_STMT);
1345 }
1346 }
1347
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE, vect_location,
1350 "created new init_stmt: %G", new_stmt);
1351 }
1352
1353 /* Function vect_init_vector.
1354
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1362
1363 tree
1364 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1365 gimple_stmt_iterator *gsi)
1366 {
1367 gimple *init_stmt;
1368 tree new_temp;
1369
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1372 {
1373 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1374 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1375 {
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type))
1379 {
1380 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1381 tree false_val = build_zero_cst (TREE_TYPE (type));
1382
1383 if (CONSTANT_CLASS_P (val))
1384 val = integer_zerop (val) ? false_val : true_val;
1385 else
1386 {
1387 new_temp = make_ssa_name (TREE_TYPE (type));
1388 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1389 val, true_val, false_val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 val = new_temp;
1392 }
1393 }
1394 else
1395 {
1396 gimple_seq stmts = NULL;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1398 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1399 TREE_TYPE (type), val);
1400 else
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1404 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1405 !gsi_end_p (gsi2); )
1406 {
1407 init_stmt = gsi_stmt (gsi2);
1408 gsi_remove (&gsi2, false);
1409 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1410 }
1411 }
1412 }
1413 val = build_vector_from_val (type, val);
1414 }
1415
1416 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1417 init_stmt = gimple_build_assign (new_temp, val);
1418 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1419 return new_temp;
1420 }
1421
1422 /* Function vect_get_vec_def_for_operand_1.
1423
1424 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1425 with type DT that will be used in the vectorized stmt. */
1426
1427 tree
1428 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1429 enum vect_def_type dt)
1430 {
1431 tree vec_oprnd;
1432 stmt_vec_info vec_stmt_info;
1433
1434 switch (dt)
1435 {
1436 /* operand is a constant or a loop invariant. */
1437 case vect_constant_def:
1438 case vect_external_def:
1439 /* Code should use vect_get_vec_def_for_operand. */
1440 gcc_unreachable ();
1441
1442 /* Operand is defined by a loop header phi. In case of nested
1443 cycles we also may have uses of the backedge def. */
1444 case vect_reduction_def:
1445 case vect_double_reduction_def:
1446 case vect_nested_cycle:
1447 case vect_induction_def:
1448 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1449 || dt == vect_nested_cycle);
1450 /* Fallthru. */
1451
1452 /* operand is defined inside the loop. */
1453 case vect_internal_def:
1454 {
1455 /* Get the def from the vectorized stmt. */
1456 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1457 /* Get vectorized pattern statement. */
1458 if (!vec_stmt_info
1459 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1460 && !STMT_VINFO_RELEVANT (def_stmt_info))
1461 vec_stmt_info = (STMT_VINFO_VEC_STMT
1462 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1463 gcc_assert (vec_stmt_info);
1464 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1465 vec_oprnd = PHI_RESULT (phi);
1466 else
1467 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1468 return vec_oprnd;
1469 }
1470
1471 default:
1472 gcc_unreachable ();
1473 }
1474 }
1475
1476
1477 /* Function vect_get_vec_def_for_operand.
1478
1479 OP is an operand in STMT_VINFO. This function returns a (vector) def
1480 that will be used in the vectorized stmt for STMT_VINFO.
1481
1482 In the case that OP is an SSA_NAME which is defined in the loop, then
1483 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1484
1485 In case OP is an invariant or constant, a new stmt that creates a vector def
1486 needs to be introduced. VECTYPE may be used to specify a required type for
1487 vector invariant. */
1488
1489 tree
1490 vect_get_vec_def_for_operand (vec_info *vinfo,
1491 tree op, stmt_vec_info stmt_vinfo, tree vectype)
1492 {
1493 gimple *def_stmt;
1494 enum vect_def_type dt;
1495 bool is_simple_use;
1496 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1497
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE, vect_location,
1500 "vect_get_vec_def_for_operand: %T\n", op);
1501
1502 stmt_vec_info def_stmt_info;
1503 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1504 &def_stmt_info, &def_stmt);
1505 gcc_assert (is_simple_use);
1506 if (def_stmt && dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1508
1509 if (dt == vect_constant_def || dt == vect_external_def)
1510 {
1511 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1512 tree vector_type;
1513
1514 if (vectype)
1515 vector_type = vectype;
1516 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1517 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1518 vector_type = truth_type_for (stmt_vectype);
1519 else
1520 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1521
1522 gcc_assert (vector_type);
1523 return vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1524 }
1525 else
1526 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1527 }
1528
1529
1530 /* Function vect_get_vec_def_for_stmt_copy
1531
1532 Return a vector-def for an operand. This function is used when the
1533 vectorized stmt to be created (by the caller to this function) is a "copy"
1534 created in case the vectorized result cannot fit in one vector, and several
1535 copies of the vector-stmt are required. In this case the vector-def is
1536 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1537 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1538
1539 Context:
1540 In case the vectorization factor (VF) is bigger than the number
1541 of elements that can fit in a vectype (nunits), we have to generate
1542 more than one vector stmt to vectorize the scalar stmt. This situation
1543 arises when there are multiple data-types operated upon in the loop; the
1544 smallest data-type determines the VF, and as a result, when vectorizing
1545 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1546 vector stmt (each computing a vector of 'nunits' results, and together
1547 computing 'VF' results in each iteration). This function is called when
1548 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1549 which VF=16 and nunits=4, so the number of copies required is 4):
1550
1551 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1552
1553 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1554 VS1.1: vx.1 = memref1 VS1.2
1555 VS1.2: vx.2 = memref2 VS1.3
1556 VS1.3: vx.3 = memref3
1557
1558 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1559 VSnew.1: vz1 = vx.1 + ... VSnew.2
1560 VSnew.2: vz2 = vx.2 + ... VSnew.3
1561 VSnew.3: vz3 = vx.3 + ...
1562
1563 The vectorization of S1 is explained in vectorizable_load.
1564 The vectorization of S2:
1565 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1566 the function 'vect_get_vec_def_for_operand' is called to
1567 get the relevant vector-def for each operand of S2. For operand x it
1568 returns the vector-def 'vx.0'.
1569
1570 To create the remaining copies of the vector-stmt (VSnew.j), this
1571 function is called to get the relevant vector-def for each operand. It is
1572 obtained from the respective VS1.j stmt, which is recorded in the
1573 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1574
1575 For example, to obtain the vector-def 'vx.1' in order to create the
1576 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1577 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1578 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1579 and return its def ('vx.1').
1580 Overall, to create the above sequence this function will be called 3 times:
1581 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1582 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1583 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1584
1585 tree
1586 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1587 {
1588 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1589 if (!def_stmt_info)
1590 /* Do nothing; can reuse same def. */
1591 return vec_oprnd;
1592
1593 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1594 gcc_assert (def_stmt_info);
1595 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1596 vec_oprnd = PHI_RESULT (phi);
1597 else
1598 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1599 return vec_oprnd;
1600 }
1601
1602
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1605
1606 void
1607 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1608 vec<tree> *vec_oprnds0,
1609 vec<tree> *vec_oprnds1)
1610 {
1611 tree vec_oprnd = vec_oprnds0->pop ();
1612
1613 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1614 vec_oprnds0->quick_push (vec_oprnd);
1615
1616 if (vec_oprnds1 && vec_oprnds1->length ())
1617 {
1618 vec_oprnd = vec_oprnds1->pop ();
1619 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1620 vec_oprnds1->quick_push (vec_oprnd);
1621 }
1622 }
1623
1624
1625 /* Get vectorized definitions for OP0 and OP1. */
1626
1627 void
1628 vect_get_vec_defs (vec_info *vinfo, tree op0, tree op1, stmt_vec_info stmt_info,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1,
1631 slp_tree slp_node)
1632 {
1633 if (slp_node)
1634 {
1635 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1636 vect_get_slp_defs (vinfo, slp_node, &vec_defs, op1 ? 2 : 1);
1637 *vec_oprnds0 = vec_defs[0];
1638 if (op1)
1639 *vec_oprnds1 = vec_defs[1];
1640 }
1641 else
1642 {
1643 tree vec_oprnd;
1644
1645 vec_oprnds0->create (1);
1646 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op0, stmt_info);
1647 vec_oprnds0->quick_push (vec_oprnd);
1648
1649 if (op1)
1650 {
1651 vec_oprnds1->create (1);
1652 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op1, stmt_info);
1653 vec_oprnds1->quick_push (vec_oprnd);
1654 }
1655 }
1656 }
1657
1658 /* Helper function called by vect_finish_replace_stmt and
1659 vect_finish_stmt_generation. Set the location of the new
1660 statement and create and return a stmt_vec_info for it. */
1661
1662 static stmt_vec_info
1663 vect_finish_stmt_generation_1 (vec_info *vinfo,
1664 stmt_vec_info stmt_info, gimple *vec_stmt)
1665 {
1666 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1667
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1670
1671 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1672
1673 /* While EH edges will generally prevent vectorization, stmt might
1674 e.g. be in a must-not-throw region. Ensure newly created stmts
1675 that could throw are part of the same region. */
1676 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1677 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1678 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1679
1680 return vec_stmt_info;
1681 }
1682
1683 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1684 which sets the same scalar result as STMT_INFO did. Create and return a
1685 stmt_vec_info for VEC_STMT. */
1686
1687 stmt_vec_info
1688 vect_finish_replace_stmt (vec_info *vinfo,
1689 stmt_vec_info stmt_info, gimple *vec_stmt)
1690 {
1691 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1692 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1693
1694 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1695 gsi_replace (&gsi, vec_stmt, true);
1696
1697 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1698 }
1699
1700 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1701 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1702
1703 stmt_vec_info
1704 vect_finish_stmt_generation (vec_info *vinfo,
1705 stmt_vec_info stmt_info, gimple *vec_stmt,
1706 gimple_stmt_iterator *gsi)
1707 {
1708 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1709
1710 if (!gsi_end_p (*gsi)
1711 && gimple_has_mem_ops (vec_stmt))
1712 {
1713 gimple *at_stmt = gsi_stmt (*gsi);
1714 tree vuse = gimple_vuse (at_stmt);
1715 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1716 {
1717 tree vdef = gimple_vdef (at_stmt);
1718 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1719 /* If we have an SSA vuse and insert a store, update virtual
1720 SSA form to avoid triggering the renamer. Do so only
1721 if we can easily see all uses - which is what almost always
1722 happens with the way vectorized stmts are inserted. */
1723 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1724 && ((is_gimple_assign (vec_stmt)
1725 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1726 || (is_gimple_call (vec_stmt)
1727 && !(gimple_call_flags (vec_stmt)
1728 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1729 {
1730 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1731 gimple_set_vdef (vec_stmt, new_vdef);
1732 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1733 }
1734 }
1735 }
1736 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1737 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1738 }
1739
1740 /* We want to vectorize a call to combined function CFN with function
1741 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1742 as the types of all inputs. Check whether this is possible using
1743 an internal function, returning its code if so or IFN_LAST if not. */
1744
1745 static internal_fn
1746 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1747 tree vectype_out, tree vectype_in)
1748 {
1749 internal_fn ifn;
1750 if (internal_fn_p (cfn))
1751 ifn = as_internal_fn (cfn);
1752 else
1753 ifn = associated_internal_fn (fndecl);
1754 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1755 {
1756 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1757 if (info.vectorizable)
1758 {
1759 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1760 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1761 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1762 OPTIMIZE_FOR_SPEED))
1763 return ifn;
1764 }
1765 }
1766 return IFN_LAST;
1767 }
1768
1769
1770 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1771 gimple_stmt_iterator *);
1772
1773 /* Check whether a load or store statement in the loop described by
1774 LOOP_VINFO is possible in a fully-masked loop. This is testing
1775 whether the vectorizer pass has the appropriate support, as well as
1776 whether the target does.
1777
1778 VLS_TYPE says whether the statement is a load or store and VECTYPE
1779 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1780 says how the load or store is going to be implemented and GROUP_SIZE
1781 is the number of load or store statements in the containing group.
1782 If the access is a gather load or scatter store, GS_INFO describes
1783 its arguments. If the load or store is conditional, SCALAR_MASK is the
1784 condition under which it occurs.
1785
1786 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1787 supported, otherwise record the required mask types. */
1788
1789 static void
1790 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1791 vec_load_store_type vls_type, int group_size,
1792 vect_memory_access_type memory_access_type,
1793 gather_scatter_info *gs_info, tree scalar_mask)
1794 {
1795 /* Invariant loads need no special support. */
1796 if (memory_access_type == VMAT_INVARIANT)
1797 return;
1798
1799 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1800 machine_mode vecmode = TYPE_MODE (vectype);
1801 bool is_load = (vls_type == VLS_LOAD);
1802 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1803 {
1804 if (is_load
1805 ? !vect_load_lanes_supported (vectype, group_size, true)
1806 : !vect_store_lanes_supported (vectype, group_size, true))
1807 {
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "can't use a fully-masked loop because the"
1811 " target doesn't have an appropriate masked"
1812 " load/store-lanes instruction.\n");
1813 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814 return;
1815 }
1816 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1817 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1818 return;
1819 }
1820
1821 if (memory_access_type == VMAT_GATHER_SCATTER)
1822 {
1823 internal_fn ifn = (is_load
1824 ? IFN_MASK_GATHER_LOAD
1825 : IFN_MASK_SCATTER_STORE);
1826 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1827 gs_info->memory_type,
1828 gs_info->offset_vectype,
1829 gs_info->scale))
1830 {
1831 if (dump_enabled_p ())
1832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1833 "can't use a fully-masked loop because the"
1834 " target doesn't have an appropriate masked"
1835 " gather load or scatter store instruction.\n");
1836 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1837 return;
1838 }
1839 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1840 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1841 return;
1842 }
1843
1844 if (memory_access_type != VMAT_CONTIGUOUS
1845 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1846 {
1847 /* Element X of the data must come from iteration i * VF + X of the
1848 scalar loop. We need more work to support other mappings. */
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1851 "can't use a fully-masked loop because an access"
1852 " isn't contiguous.\n");
1853 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1854 return;
1855 }
1856
1857 machine_mode mask_mode;
1858 if (!VECTOR_MODE_P (vecmode)
1859 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1860 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1861 {
1862 if (dump_enabled_p ())
1863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1864 "can't use a fully-masked loop because the target"
1865 " doesn't have the appropriate masked load or"
1866 " store.\n");
1867 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1868 return;
1869 }
1870 /* We might load more scalars than we need for permuting SLP loads.
1871 We checked in get_group_load_store_type that the extra elements
1872 don't leak into a new vector. */
1873 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1874 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1875 unsigned int nvectors;
1876 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1877 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1878 else
1879 gcc_unreachable ();
1880 }
1881
1882 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1883 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1884 that needs to be applied to all loads and stores in a vectorized loop.
1885 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1886
1887 MASK_TYPE is the type of both masks. If new statements are needed,
1888 insert them before GSI. */
1889
1890 static tree
1891 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1892 gimple_stmt_iterator *gsi)
1893 {
1894 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1895 if (!loop_mask)
1896 return vec_mask;
1897
1898 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1899 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1900 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1901 vec_mask, loop_mask);
1902 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1903 return and_res;
1904 }
1905
1906 /* Determine whether we can use a gather load or scatter store to vectorize
1907 strided load or store STMT_INFO by truncating the current offset to a
1908 smaller width. We need to be able to construct an offset vector:
1909
1910 { 0, X, X*2, X*3, ... }
1911
1912 without loss of precision, where X is STMT_INFO's DR_STEP.
1913
1914 Return true if this is possible, describing the gather load or scatter
1915 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1916
1917 static bool
1918 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1919 loop_vec_info loop_vinfo, bool masked_p,
1920 gather_scatter_info *gs_info)
1921 {
1922 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1923 data_reference *dr = dr_info->dr;
1924 tree step = DR_STEP (dr);
1925 if (TREE_CODE (step) != INTEGER_CST)
1926 {
1927 /* ??? Perhaps we could use range information here? */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_NOTE, vect_location,
1930 "cannot truncate variable step.\n");
1931 return false;
1932 }
1933
1934 /* Get the number of bits in an element. */
1935 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1936 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1937 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1938
1939 /* Set COUNT to the upper limit on the number of elements - 1.
1940 Start with the maximum vectorization factor. */
1941 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1942
1943 /* Try lowering COUNT to the number of scalar latch iterations. */
1944 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1945 widest_int max_iters;
1946 if (max_loop_iterations (loop, &max_iters)
1947 && max_iters < count)
1948 count = max_iters.to_shwi ();
1949
1950 /* Try scales of 1 and the element size. */
1951 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1952 wi::overflow_type overflow = wi::OVF_NONE;
1953 for (int i = 0; i < 2; ++i)
1954 {
1955 int scale = scales[i];
1956 widest_int factor;
1957 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1958 continue;
1959
1960 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1961 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1962 if (overflow)
1963 continue;
1964 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1965 unsigned int min_offset_bits = wi::min_precision (range, sign);
1966
1967 /* Find the narrowest viable offset type. */
1968 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1969 tree offset_type = build_nonstandard_integer_type (offset_bits,
1970 sign == UNSIGNED);
1971
1972 /* See whether the target supports the operation with an offset
1973 no narrower than OFFSET_TYPE. */
1974 tree memory_type = TREE_TYPE (DR_REF (dr));
1975 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1976 vectype, memory_type, offset_type, scale,
1977 &gs_info->ifn, &gs_info->offset_vectype))
1978 continue;
1979
1980 gs_info->decl = NULL_TREE;
1981 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1982 but we don't need to store that here. */
1983 gs_info->base = NULL_TREE;
1984 gs_info->element_type = TREE_TYPE (vectype);
1985 gs_info->offset = fold_convert (offset_type, step);
1986 gs_info->offset_dt = vect_constant_def;
1987 gs_info->scale = scale;
1988 gs_info->memory_type = memory_type;
1989 return true;
1990 }
1991
1992 if (overflow && dump_enabled_p ())
1993 dump_printf_loc (MSG_NOTE, vect_location,
1994 "truncating gather/scatter offset to %d bits"
1995 " might change its value.\n", element_bits);
1996
1997 return false;
1998 }
1999
2000 /* Return true if we can use gather/scatter internal functions to
2001 vectorize STMT_INFO, which is a grouped or strided load or store.
2002 MASKED_P is true if load or store is conditional. When returning
2003 true, fill in GS_INFO with the information required to perform the
2004 operation. */
2005
2006 static bool
2007 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2008 loop_vec_info loop_vinfo, bool masked_p,
2009 gather_scatter_info *gs_info)
2010 {
2011 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2012 || gs_info->decl)
2013 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2014 masked_p, gs_info);
2015
2016 tree old_offset_type = TREE_TYPE (gs_info->offset);
2017 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2018
2019 gcc_assert (TYPE_PRECISION (new_offset_type)
2020 >= TYPE_PRECISION (old_offset_type));
2021 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2022
2023 if (dump_enabled_p ())
2024 dump_printf_loc (MSG_NOTE, vect_location,
2025 "using gather/scatter for strided/grouped access,"
2026 " scale = %d\n", gs_info->scale);
2027
2028 return true;
2029 }
2030
2031 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2032 elements with a known constant step. Return -1 if that step
2033 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2034
2035 static int
2036 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2037 {
2038 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2039 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2040 size_zero_node);
2041 }
2042
2043 /* If the target supports a permute mask that reverses the elements in
2044 a vector of type VECTYPE, return that mask, otherwise return null. */
2045
2046 static tree
2047 perm_mask_for_reverse (tree vectype)
2048 {
2049 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2050
2051 /* The encoding has a single stepped pattern. */
2052 vec_perm_builder sel (nunits, 1, 3);
2053 for (int i = 0; i < 3; ++i)
2054 sel.quick_push (nunits - 1 - i);
2055
2056 vec_perm_indices indices (sel, 1, nunits);
2057 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2058 return NULL_TREE;
2059 return vect_gen_perm_mask_checked (vectype, indices);
2060 }
2061
2062 /* A subroutine of get_load_store_type, with a subset of the same
2063 arguments. Handle the case where STMT_INFO is a load or store that
2064 accesses consecutive elements with a negative step. */
2065
2066 static vect_memory_access_type
2067 get_negative_load_store_type (vec_info *vinfo,
2068 stmt_vec_info stmt_info, tree vectype,
2069 vec_load_store_type vls_type,
2070 unsigned int ncopies)
2071 {
2072 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2073 dr_alignment_support alignment_support_scheme;
2074
2075 if (ncopies > 1)
2076 {
2077 if (dump_enabled_p ())
2078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2079 "multiple types with negative step.\n");
2080 return VMAT_ELEMENTWISE;
2081 }
2082
2083 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
2084 dr_info, false);
2085 if (alignment_support_scheme != dr_aligned
2086 && alignment_support_scheme != dr_unaligned_supported)
2087 {
2088 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2090 "negative step but alignment required.\n");
2091 return VMAT_ELEMENTWISE;
2092 }
2093
2094 if (vls_type == VLS_STORE_INVARIANT)
2095 {
2096 if (dump_enabled_p ())
2097 dump_printf_loc (MSG_NOTE, vect_location,
2098 "negative step with invariant source;"
2099 " no permute needed.\n");
2100 return VMAT_CONTIGUOUS_DOWN;
2101 }
2102
2103 if (!perm_mask_for_reverse (vectype))
2104 {
2105 if (dump_enabled_p ())
2106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2107 "negative step and reversing not supported.\n");
2108 return VMAT_ELEMENTWISE;
2109 }
2110
2111 return VMAT_CONTIGUOUS_REVERSE;
2112 }
2113
2114 /* STMT_INFO is either a masked or unconditional store. Return the value
2115 being stored. */
2116
2117 tree
2118 vect_get_store_rhs (stmt_vec_info stmt_info)
2119 {
2120 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2121 {
2122 gcc_assert (gimple_assign_single_p (assign));
2123 return gimple_assign_rhs1 (assign);
2124 }
2125 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2126 {
2127 internal_fn ifn = gimple_call_internal_fn (call);
2128 int index = internal_fn_stored_value_index (ifn);
2129 gcc_assert (index >= 0);
2130 return gimple_call_arg (call, index);
2131 }
2132 gcc_unreachable ();
2133 }
2134
2135 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2136
2137 This function returns a vector type which can be composed with NETLS pieces,
2138 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2139 same vector size as the return vector. It checks target whether supports
2140 pieces-size vector mode for construction firstly, if target fails to, check
2141 pieces-size scalar mode for construction further. It returns NULL_TREE if
2142 fails to find the available composition.
2143
2144 For example, for (vtype=V16QI, nelts=4), we can probably get:
2145 - V16QI with PTYPE V4QI.
2146 - V4SI with PTYPE SI.
2147 - NULL_TREE. */
2148
2149 static tree
2150 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2151 {
2152 gcc_assert (VECTOR_TYPE_P (vtype));
2153 gcc_assert (known_gt (nelts, 0U));
2154
2155 machine_mode vmode = TYPE_MODE (vtype);
2156 if (!VECTOR_MODE_P (vmode))
2157 return NULL_TREE;
2158
2159 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2160 unsigned int pbsize;
2161 if (constant_multiple_p (vbsize, nelts, &pbsize))
2162 {
2163 /* First check if vec_init optab supports construction from
2164 vector pieces directly. */
2165 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2166 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2167 machine_mode rmode;
2168 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2169 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2170 != CODE_FOR_nothing))
2171 {
2172 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2173 return vtype;
2174 }
2175
2176 /* Otherwise check if exists an integer type of the same piece size and
2177 if vec_init optab supports construction from it directly. */
2178 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2179 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2180 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2181 != CODE_FOR_nothing))
2182 {
2183 *ptype = build_nonstandard_integer_type (pbsize, 1);
2184 return build_vector_type (*ptype, nelts);
2185 }
2186 }
2187
2188 return NULL_TREE;
2189 }
2190
2191 /* A subroutine of get_load_store_type, with a subset of the same
2192 arguments. Handle the case where STMT_INFO is part of a grouped load
2193 or store.
2194
2195 For stores, the statements in the group are all consecutive
2196 and there is no gap at the end. For loads, the statements in the
2197 group might not be consecutive; there can be gaps between statements
2198 as well as at the end. */
2199
2200 static bool
2201 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2202 tree vectype, bool slp,
2203 bool masked_p, vec_load_store_type vls_type,
2204 vect_memory_access_type *memory_access_type,
2205 gather_scatter_info *gs_info)
2206 {
2207 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2208 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2209 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2210 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2211 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2212 bool single_element_p = (stmt_info == first_stmt_info
2213 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2214 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2215 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2216
2217 /* True if the vectorized statements would access beyond the last
2218 statement in the group. */
2219 bool overrun_p = false;
2220
2221 /* True if we can cope with such overrun by peeling for gaps, so that
2222 there is at least one final scalar iteration after the vector loop. */
2223 bool can_overrun_p = (!masked_p
2224 && vls_type == VLS_LOAD
2225 && loop_vinfo
2226 && !loop->inner);
2227
2228 /* There can only be a gap at the end of the group if the stride is
2229 known at compile time. */
2230 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2231
2232 /* Stores can't yet have gaps. */
2233 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2234
2235 if (slp)
2236 {
2237 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2238 {
2239 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2240 separated by the stride, until we have a complete vector.
2241 Fall back to scalar accesses if that isn't possible. */
2242 if (multiple_p (nunits, group_size))
2243 *memory_access_type = VMAT_STRIDED_SLP;
2244 else
2245 *memory_access_type = VMAT_ELEMENTWISE;
2246 }
2247 else
2248 {
2249 overrun_p = loop_vinfo && gap != 0;
2250 if (overrun_p && vls_type != VLS_LOAD)
2251 {
2252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2253 "Grouped store with gaps requires"
2254 " non-consecutive accesses\n");
2255 return false;
2256 }
2257 /* An overrun is fine if the trailing elements are smaller
2258 than the alignment boundary B. Every vector access will
2259 be a multiple of B and so we are guaranteed to access a
2260 non-gap element in the same B-sized block. */
2261 if (overrun_p
2262 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2263 / vect_get_scalar_dr_size (first_dr_info)))
2264 overrun_p = false;
2265
2266 /* If the gap splits the vector in half and the target
2267 can do half-vector operations avoid the epilogue peeling
2268 by simply loading half of the vector only. Usually
2269 the construction with an upper zero half will be elided. */
2270 dr_alignment_support alignment_support_scheme;
2271 tree half_vtype;
2272 if (overrun_p
2273 && !masked_p
2274 && (((alignment_support_scheme
2275 = vect_supportable_dr_alignment (vinfo,
2276 first_dr_info, false)))
2277 == dr_aligned
2278 || alignment_support_scheme == dr_unaligned_supported)
2279 && known_eq (nunits, (group_size - gap) * 2)
2280 && known_eq (nunits, group_size)
2281 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2282 != NULL_TREE))
2283 overrun_p = false;
2284
2285 if (overrun_p && !can_overrun_p)
2286 {
2287 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2289 "Peeling for outer loop is not supported\n");
2290 return false;
2291 }
2292 int cmp = compare_step_with_zero (vinfo, stmt_info);
2293 if (cmp < 0)
2294 *memory_access_type = get_negative_load_store_type
2295 (vinfo, stmt_info, vectype, vls_type, 1);
2296 else
2297 {
2298 gcc_assert (!loop_vinfo || cmp > 0);
2299 *memory_access_type = VMAT_CONTIGUOUS;
2300 }
2301 }
2302 }
2303 else
2304 {
2305 /* We can always handle this case using elementwise accesses,
2306 but see if something more efficient is available. */
2307 *memory_access_type = VMAT_ELEMENTWISE;
2308
2309 /* If there is a gap at the end of the group then these optimizations
2310 would access excess elements in the last iteration. */
2311 bool would_overrun_p = (gap != 0);
2312 /* An overrun is fine if the trailing elements are smaller than the
2313 alignment boundary B. Every vector access will be a multiple of B
2314 and so we are guaranteed to access a non-gap element in the
2315 same B-sized block. */
2316 if (would_overrun_p
2317 && !masked_p
2318 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2319 / vect_get_scalar_dr_size (first_dr_info)))
2320 would_overrun_p = false;
2321
2322 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2323 && (can_overrun_p || !would_overrun_p)
2324 && compare_step_with_zero (vinfo, stmt_info) > 0)
2325 {
2326 /* First cope with the degenerate case of a single-element
2327 vector. */
2328 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2329 *memory_access_type = VMAT_CONTIGUOUS;
2330
2331 /* Otherwise try using LOAD/STORE_LANES. */
2332 if (*memory_access_type == VMAT_ELEMENTWISE
2333 && (vls_type == VLS_LOAD
2334 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2335 : vect_store_lanes_supported (vectype, group_size,
2336 masked_p)))
2337 {
2338 *memory_access_type = VMAT_LOAD_STORE_LANES;
2339 overrun_p = would_overrun_p;
2340 }
2341
2342 /* If that fails, try using permuting loads. */
2343 if (*memory_access_type == VMAT_ELEMENTWISE
2344 && (vls_type == VLS_LOAD
2345 ? vect_grouped_load_supported (vectype, single_element_p,
2346 group_size)
2347 : vect_grouped_store_supported (vectype, group_size)))
2348 {
2349 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2350 overrun_p = would_overrun_p;
2351 }
2352 }
2353
2354 /* As a last resort, trying using a gather load or scatter store.
2355
2356 ??? Although the code can handle all group sizes correctly,
2357 it probably isn't a win to use separate strided accesses based
2358 on nearby locations. Or, even if it's a win over scalar code,
2359 it might not be a win over vectorizing at a lower VF, if that
2360 allows us to use contiguous accesses. */
2361 if (*memory_access_type == VMAT_ELEMENTWISE
2362 && single_element_p
2363 && loop_vinfo
2364 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2365 masked_p, gs_info))
2366 *memory_access_type = VMAT_GATHER_SCATTER;
2367 }
2368
2369 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2370 {
2371 /* STMT is the leader of the group. Check the operands of all the
2372 stmts of the group. */
2373 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2374 while (next_stmt_info)
2375 {
2376 tree op = vect_get_store_rhs (next_stmt_info);
2377 enum vect_def_type dt;
2378 if (!vect_is_simple_use (op, vinfo, &dt))
2379 {
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2382 "use not simple.\n");
2383 return false;
2384 }
2385 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2386 }
2387 }
2388
2389 if (overrun_p)
2390 {
2391 gcc_assert (can_overrun_p);
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394 "Data access with gaps requires scalar "
2395 "epilogue loop\n");
2396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2397 }
2398
2399 return true;
2400 }
2401
2402 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2403 if there is a memory access type that the vectorized form can use,
2404 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2405 or scatters, fill in GS_INFO accordingly.
2406
2407 SLP says whether we're performing SLP rather than loop vectorization.
2408 MASKED_P is true if the statement is conditional on a vectorized mask.
2409 VECTYPE is the vector type that the vectorized statements will use.
2410 NCOPIES is the number of vector statements that will be needed. */
2411
2412 static bool
2413 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2414 tree vectype, bool slp,
2415 bool masked_p, vec_load_store_type vls_type,
2416 unsigned int ncopies,
2417 vect_memory_access_type *memory_access_type,
2418 gather_scatter_info *gs_info)
2419 {
2420 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2421 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2422 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2423 {
2424 *memory_access_type = VMAT_GATHER_SCATTER;
2425 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2426 gcc_unreachable ();
2427 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2428 &gs_info->offset_dt,
2429 &gs_info->offset_vectype))
2430 {
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "%s index use not simple.\n",
2434 vls_type == VLS_LOAD ? "gather" : "scatter");
2435 return false;
2436 }
2437 }
2438 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2439 {
2440 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
2441 vls_type, memory_access_type, gs_info))
2442 return false;
2443 }
2444 else if (STMT_VINFO_STRIDED_P (stmt_info))
2445 {
2446 gcc_assert (!slp);
2447 if (loop_vinfo
2448 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2449 masked_p, gs_info))
2450 *memory_access_type = VMAT_GATHER_SCATTER;
2451 else
2452 *memory_access_type = VMAT_ELEMENTWISE;
2453 }
2454 else
2455 {
2456 int cmp = compare_step_with_zero (vinfo, stmt_info);
2457 if (cmp < 0)
2458 *memory_access_type = get_negative_load_store_type
2459 (vinfo, stmt_info, vectype, vls_type, ncopies);
2460 else if (cmp == 0)
2461 {
2462 gcc_assert (vls_type == VLS_LOAD);
2463 *memory_access_type = VMAT_INVARIANT;
2464 }
2465 else
2466 *memory_access_type = VMAT_CONTIGUOUS;
2467 }
2468
2469 if ((*memory_access_type == VMAT_ELEMENTWISE
2470 || *memory_access_type == VMAT_STRIDED_SLP)
2471 && !nunits.is_constant ())
2472 {
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "Not using elementwise accesses due to variable "
2476 "vectorization factor.\n");
2477 return false;
2478 }
2479
2480 /* FIXME: At the moment the cost model seems to underestimate the
2481 cost of using elementwise accesses. This check preserves the
2482 traditional behavior until that can be fixed. */
2483 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2484 if (!first_stmt_info)
2485 first_stmt_info = stmt_info;
2486 if (*memory_access_type == VMAT_ELEMENTWISE
2487 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2488 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2489 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2490 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2491 {
2492 if (dump_enabled_p ())
2493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2494 "not falling back to elementwise accesses\n");
2495 return false;
2496 }
2497 return true;
2498 }
2499
2500 /* Return true if boolean argument MASK is suitable for vectorizing
2501 conditional operation STMT_INFO. When returning true, store the type
2502 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2503 in *MASK_VECTYPE_OUT. */
2504
2505 static bool
2506 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2507 vect_def_type *mask_dt_out,
2508 tree *mask_vectype_out)
2509 {
2510 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2511 {
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2514 "mask argument is not a boolean.\n");
2515 return false;
2516 }
2517
2518 if (TREE_CODE (mask) != SSA_NAME)
2519 {
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522 "mask argument is not an SSA name.\n");
2523 return false;
2524 }
2525
2526 enum vect_def_type mask_dt;
2527 tree mask_vectype;
2528 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2529 {
2530 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2532 "mask use not simple.\n");
2533 return false;
2534 }
2535
2536 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2537 if (!mask_vectype)
2538 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2539
2540 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2541 {
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2544 "could not find an appropriate vector mask type.\n");
2545 return false;
2546 }
2547
2548 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2549 TYPE_VECTOR_SUBPARTS (vectype)))
2550 {
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2553 "vector mask type %T"
2554 " does not match vector data type %T.\n",
2555 mask_vectype, vectype);
2556
2557 return false;
2558 }
2559
2560 *mask_dt_out = mask_dt;
2561 *mask_vectype_out = mask_vectype;
2562 return true;
2563 }
2564
2565 /* Return true if stored value RHS is suitable for vectorizing store
2566 statement STMT_INFO. When returning true, store the type of the
2567 definition in *RHS_DT_OUT, the type of the vectorized store value in
2568 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2569
2570 static bool
2571 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs,
2572 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2573 vec_load_store_type *vls_type_out)
2574 {
2575 /* In the case this is a store from a constant make sure
2576 native_encode_expr can handle it. */
2577 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2578 {
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2581 "cannot encode constant as a byte sequence.\n");
2582 return false;
2583 }
2584
2585 enum vect_def_type rhs_dt;
2586 tree rhs_vectype;
2587 if (!vect_is_simple_use (rhs, vinfo, &rhs_dt, &rhs_vectype))
2588 {
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591 "use not simple.\n");
2592 return false;
2593 }
2594
2595 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2596 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2597 {
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2600 "incompatible vector types.\n");
2601 return false;
2602 }
2603
2604 *rhs_dt_out = rhs_dt;
2605 *rhs_vectype_out = rhs_vectype;
2606 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2607 *vls_type_out = VLS_STORE_INVARIANT;
2608 else
2609 *vls_type_out = VLS_STORE;
2610 return true;
2611 }
2612
2613 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2614 Note that we support masks with floating-point type, in which case the
2615 floats are interpreted as a bitmask. */
2616
2617 static tree
2618 vect_build_all_ones_mask (vec_info *vinfo,
2619 stmt_vec_info stmt_info, tree masktype)
2620 {
2621 if (TREE_CODE (masktype) == INTEGER_TYPE)
2622 return build_int_cst (masktype, -1);
2623 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2624 {
2625 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2626 mask = build_vector_from_val (masktype, mask);
2627 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2628 }
2629 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2630 {
2631 REAL_VALUE_TYPE r;
2632 long tmp[6];
2633 for (int j = 0; j < 6; ++j)
2634 tmp[j] = -1;
2635 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2636 tree mask = build_real (TREE_TYPE (masktype), r);
2637 mask = build_vector_from_val (masktype, mask);
2638 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2639 }
2640 gcc_unreachable ();
2641 }
2642
2643 /* Build an all-zero merge value of type VECTYPE while vectorizing
2644 STMT_INFO as a gather load. */
2645
2646 static tree
2647 vect_build_zero_merge_argument (vec_info *vinfo,
2648 stmt_vec_info stmt_info, tree vectype)
2649 {
2650 tree merge;
2651 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2652 merge = build_int_cst (TREE_TYPE (vectype), 0);
2653 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2654 {
2655 REAL_VALUE_TYPE r;
2656 long tmp[6];
2657 for (int j = 0; j < 6; ++j)
2658 tmp[j] = 0;
2659 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2660 merge = build_real (TREE_TYPE (vectype), r);
2661 }
2662 else
2663 gcc_unreachable ();
2664 merge = build_vector_from_val (vectype, merge);
2665 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2666 }
2667
2668 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2669 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2670 the gather load operation. If the load is conditional, MASK is the
2671 unvectorized condition and MASK_DT is its definition type, otherwise
2672 MASK is null. */
2673
2674 static void
2675 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2676 gimple_stmt_iterator *gsi,
2677 stmt_vec_info *vec_stmt,
2678 gather_scatter_info *gs_info,
2679 tree mask)
2680 {
2681 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2682 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2683 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2684 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2685 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2686 edge pe = loop_preheader_edge (loop);
2687 enum { NARROW, NONE, WIDEN } modifier;
2688 poly_uint64 gather_off_nunits
2689 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2690
2691 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2692 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2693 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2694 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2695 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2696 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2697 tree scaletype = TREE_VALUE (arglist);
2698 tree real_masktype = masktype;
2699 gcc_checking_assert (types_compatible_p (srctype, rettype)
2700 && (!mask
2701 || TREE_CODE (masktype) == INTEGER_TYPE
2702 || types_compatible_p (srctype, masktype)));
2703 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2704 masktype = truth_type_for (srctype);
2705
2706 tree mask_halftype = masktype;
2707 tree perm_mask = NULL_TREE;
2708 tree mask_perm_mask = NULL_TREE;
2709 if (known_eq (nunits, gather_off_nunits))
2710 modifier = NONE;
2711 else if (known_eq (nunits * 2, gather_off_nunits))
2712 {
2713 modifier = WIDEN;
2714
2715 /* Currently widening gathers and scatters are only supported for
2716 fixed-length vectors. */
2717 int count = gather_off_nunits.to_constant ();
2718 vec_perm_builder sel (count, count, 1);
2719 for (int i = 0; i < count; ++i)
2720 sel.quick_push (i | (count / 2));
2721
2722 vec_perm_indices indices (sel, 1, count);
2723 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2724 indices);
2725 }
2726 else if (known_eq (nunits, gather_off_nunits * 2))
2727 {
2728 modifier = NARROW;
2729
2730 /* Currently narrowing gathers and scatters are only supported for
2731 fixed-length vectors. */
2732 int count = nunits.to_constant ();
2733 vec_perm_builder sel (count, count, 1);
2734 sel.quick_grow (count);
2735 for (int i = 0; i < count; ++i)
2736 sel[i] = i < count / 2 ? i : i + count / 2;
2737 vec_perm_indices indices (sel, 2, count);
2738 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2739
2740 ncopies *= 2;
2741
2742 if (mask && masktype == real_masktype)
2743 {
2744 for (int i = 0; i < count; ++i)
2745 sel[i] = i | (count / 2);
2746 indices.new_vector (sel, 2, count);
2747 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2748 }
2749 else if (mask)
2750 mask_halftype = truth_type_for (gs_info->offset_vectype);
2751 }
2752 else
2753 gcc_unreachable ();
2754
2755 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2756 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2757
2758 tree ptr = fold_convert (ptrtype, gs_info->base);
2759 if (!is_gimple_min_invariant (ptr))
2760 {
2761 gimple_seq seq;
2762 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2763 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2764 gcc_assert (!new_bb);
2765 }
2766
2767 tree scale = build_int_cst (scaletype, gs_info->scale);
2768
2769 tree vec_oprnd0 = NULL_TREE;
2770 tree vec_mask = NULL_TREE;
2771 tree src_op = NULL_TREE;
2772 tree mask_op = NULL_TREE;
2773 tree prev_res = NULL_TREE;
2774 stmt_vec_info prev_stmt_info = NULL;
2775
2776 if (!mask)
2777 {
2778 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2779 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2780 }
2781
2782 for (int j = 0; j < ncopies; ++j)
2783 {
2784 tree op, var;
2785 if (modifier == WIDEN && (j & 1))
2786 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2787 perm_mask, stmt_info, gsi);
2788 else if (j == 0)
2789 op = vec_oprnd0
2790 = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info);
2791 else
2792 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2793 vec_oprnd0);
2794
2795 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2796 {
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2798 TYPE_VECTOR_SUBPARTS (idxtype)));
2799 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2800 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2801 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2802 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2803 op = var;
2804 }
2805
2806 if (mask)
2807 {
2808 if (mask_perm_mask && (j & 1))
2809 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2810 mask_perm_mask, stmt_info, gsi);
2811 else
2812 {
2813 if (j == 0)
2814 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info);
2815 else if (modifier != NARROW || (j & 1) == 0)
2816 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2817 vec_mask);
2818
2819 mask_op = vec_mask;
2820 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2821 {
2822 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2823 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2824 gcc_assert (known_eq (sub1, sub2));
2825 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2826 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2827 gassign *new_stmt
2828 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2830 mask_op = var;
2831 }
2832 }
2833 if (modifier == NARROW && masktype != real_masktype)
2834 {
2835 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2836 gassign *new_stmt
2837 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2838 : VEC_UNPACK_LO_EXPR,
2839 mask_op);
2840 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2841 mask_op = var;
2842 }
2843 src_op = mask_op;
2844 }
2845
2846 tree mask_arg = mask_op;
2847 if (masktype != real_masktype)
2848 {
2849 tree utype, optype = TREE_TYPE (mask_op);
2850 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2851 utype = real_masktype;
2852 else
2853 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2854 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2855 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2856 gassign *new_stmt
2857 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2858 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2859 mask_arg = var;
2860 if (!useless_type_conversion_p (real_masktype, utype))
2861 {
2862 gcc_assert (TYPE_PRECISION (utype)
2863 <= TYPE_PRECISION (real_masktype));
2864 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2865 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2866 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2867 mask_arg = var;
2868 }
2869 src_op = build_zero_cst (srctype);
2870 }
2871 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2872 mask_arg, scale);
2873
2874 stmt_vec_info new_stmt_info;
2875 if (!useless_type_conversion_p (vectype, rettype))
2876 {
2877 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2878 TYPE_VECTOR_SUBPARTS (rettype)));
2879 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2880 gimple_call_set_lhs (new_call, op);
2881 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2882 var = make_ssa_name (vec_dest);
2883 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2884 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2885 new_stmt_info
2886 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2887 }
2888 else
2889 {
2890 var = make_ssa_name (vec_dest, new_call);
2891 gimple_call_set_lhs (new_call, var);
2892 new_stmt_info
2893 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2894 }
2895
2896 if (modifier == NARROW)
2897 {
2898 if ((j & 1) == 0)
2899 {
2900 prev_res = var;
2901 continue;
2902 }
2903 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2904 stmt_info, gsi);
2905 new_stmt_info = loop_vinfo->lookup_def (var);
2906 }
2907
2908 if (prev_stmt_info == NULL)
2909 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2910 else
2911 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2912 prev_stmt_info = new_stmt_info;
2913 }
2914 }
2915
2916 /* Prepare the base and offset in GS_INFO for vectorization.
2917 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2918 to the vectorized offset argument for the first copy of STMT_INFO.
2919 STMT_INFO is the statement described by GS_INFO and LOOP is the
2920 containing loop. */
2921
2922 static void
2923 vect_get_gather_scatter_ops (vec_info *vinfo,
2924 class loop *loop, stmt_vec_info stmt_info,
2925 gather_scatter_info *gs_info,
2926 tree *dataref_ptr, tree *vec_offset)
2927 {
2928 gimple_seq stmts = NULL;
2929 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2930 if (stmts != NULL)
2931 {
2932 basic_block new_bb;
2933 edge pe = loop_preheader_edge (loop);
2934 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2935 gcc_assert (!new_bb);
2936 }
2937 *vec_offset = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info,
2938 gs_info->offset_vectype);
2939 }
2940
2941 /* Prepare to implement a grouped or strided load or store using
2942 the gather load or scatter store operation described by GS_INFO.
2943 STMT_INFO is the load or store statement.
2944
2945 Set *DATAREF_BUMP to the amount that should be added to the base
2946 address after each copy of the vectorized statement. Set *VEC_OFFSET
2947 to an invariant offset vector in which element I has the value
2948 I * DR_STEP / SCALE. */
2949
2950 static void
2951 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2952 loop_vec_info loop_vinfo,
2953 gather_scatter_info *gs_info,
2954 tree *dataref_bump, tree *vec_offset)
2955 {
2956 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2957 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2958 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2959 gimple_seq stmts;
2960
2961 tree bump = size_binop (MULT_EXPR,
2962 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2963 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2964 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2965 if (stmts)
2966 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2967
2968 /* The offset given in GS_INFO can have pointer type, so use the element
2969 type of the vector instead. */
2970 tree offset_type = TREE_TYPE (gs_info->offset);
2971 offset_type = TREE_TYPE (gs_info->offset_vectype);
2972
2973 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2974 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2975 ssize_int (gs_info->scale));
2976 step = fold_convert (offset_type, step);
2977 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2978
2979 /* Create {0, X, X*2, X*3, ...}. */
2980 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
2981 build_zero_cst (offset_type), step);
2982 if (stmts)
2983 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2984 }
2985
2986 /* Return the amount that should be added to a vector pointer to move
2987 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2988 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2989 vectorization. */
2990
2991 static tree
2992 vect_get_data_ptr_increment (vec_info *vinfo,
2993 dr_vec_info *dr_info, tree aggr_type,
2994 vect_memory_access_type memory_access_type)
2995 {
2996 if (memory_access_type == VMAT_INVARIANT)
2997 return size_zero_node;
2998
2999 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3000 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3001 if (tree_int_cst_sgn (step) == -1)
3002 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3003 return iv_step;
3004 }
3005
3006 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3007
3008 static bool
3009 vectorizable_bswap (vec_info *vinfo,
3010 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3011 stmt_vec_info *vec_stmt, slp_tree slp_node,
3012 slp_tree *slp_op,
3013 tree vectype_in, stmt_vector_for_cost *cost_vec)
3014 {
3015 tree op, vectype;
3016 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3017 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3018 unsigned ncopies;
3019
3020 op = gimple_call_arg (stmt, 0);
3021 vectype = STMT_VINFO_VECTYPE (stmt_info);
3022 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3023
3024 /* Multiple types in SLP are handled by creating the appropriate number of
3025 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3026 case of SLP. */
3027 if (slp_node)
3028 ncopies = 1;
3029 else
3030 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3031
3032 gcc_assert (ncopies >= 1);
3033
3034 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3035 if (! char_vectype)
3036 return false;
3037
3038 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3039 unsigned word_bytes;
3040 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3041 return false;
3042
3043 /* The encoding uses one stepped pattern for each byte in the word. */
3044 vec_perm_builder elts (num_bytes, word_bytes, 3);
3045 for (unsigned i = 0; i < 3; ++i)
3046 for (unsigned j = 0; j < word_bytes; ++j)
3047 elts.quick_push ((i + 1) * word_bytes - j - 1);
3048
3049 vec_perm_indices indices (elts, 1, num_bytes);
3050 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3051 return false;
3052
3053 if (! vec_stmt)
3054 {
3055 if (slp_node
3056 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3057 {
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3060 "incompatible vector types for invariants\n");
3061 return false;
3062 }
3063
3064 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3065 DUMP_VECT_SCOPE ("vectorizable_bswap");
3066 if (! slp_node)
3067 {
3068 record_stmt_cost (cost_vec,
3069 1, vector_stmt, stmt_info, 0, vect_prologue);
3070 record_stmt_cost (cost_vec,
3071 ncopies, vec_perm, stmt_info, 0, vect_body);
3072 }
3073 return true;
3074 }
3075
3076 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3077
3078 /* Transform. */
3079 vec<tree> vec_oprnds = vNULL;
3080 stmt_vec_info new_stmt_info = NULL;
3081 stmt_vec_info prev_stmt_info = NULL;
3082 for (unsigned j = 0; j < ncopies; j++)
3083 {
3084 /* Handle uses. */
3085 if (j == 0)
3086 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
3087 slp_node);
3088 else
3089 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3090
3091 /* Arguments are ready. create the new vector stmt. */
3092 unsigned i;
3093 tree vop;
3094 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3095 {
3096 gimple *new_stmt;
3097 tree tem = make_ssa_name (char_vectype);
3098 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3099 char_vectype, vop));
3100 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3101 tree tem2 = make_ssa_name (char_vectype);
3102 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3103 tem, tem, bswap_vconst);
3104 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3105 tem = make_ssa_name (vectype);
3106 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3107 vectype, tem2));
3108 new_stmt_info
3109 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3110 if (slp_node)
3111 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3112 }
3113
3114 if (slp_node)
3115 continue;
3116
3117 if (j == 0)
3118 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3119 else
3120 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3121
3122 prev_stmt_info = new_stmt_info;
3123 }
3124
3125 vec_oprnds.release ();
3126 return true;
3127 }
3128
3129 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3130 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3131 in a single step. On success, store the binary pack code in
3132 *CONVERT_CODE. */
3133
3134 static bool
3135 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3136 tree_code *convert_code)
3137 {
3138 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3139 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3140 return false;
3141
3142 tree_code code;
3143 int multi_step_cvt = 0;
3144 auto_vec <tree, 8> interm_types;
3145 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3146 &code, &multi_step_cvt, &interm_types)
3147 || multi_step_cvt)
3148 return false;
3149
3150 *convert_code = code;
3151 return true;
3152 }
3153
3154 /* Function vectorizable_call.
3155
3156 Check if STMT_INFO performs a function call that can be vectorized.
3157 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3158 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3159 Return true if STMT_INFO is vectorizable in this way. */
3160
3161 static bool
3162 vectorizable_call (vec_info *vinfo,
3163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3164 stmt_vec_info *vec_stmt, slp_tree slp_node,
3165 stmt_vector_for_cost *cost_vec)
3166 {
3167 gcall *stmt;
3168 tree vec_dest;
3169 tree scalar_dest;
3170 tree op;
3171 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3172 stmt_vec_info prev_stmt_info;
3173 tree vectype_out, vectype_in;
3174 poly_uint64 nunits_in;
3175 poly_uint64 nunits_out;
3176 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3177 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3178 tree fndecl, new_temp, rhs_type;
3179 enum vect_def_type dt[4]
3180 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3181 vect_unknown_def_type };
3182 tree vectypes[ARRAY_SIZE (dt)] = {};
3183 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3184 int ndts = ARRAY_SIZE (dt);
3185 int ncopies, j;
3186 auto_vec<tree, 8> vargs;
3187 auto_vec<tree, 8> orig_vargs;
3188 enum { NARROW, NONE, WIDEN } modifier;
3189 size_t i, nargs;
3190 tree lhs;
3191
3192 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3193 return false;
3194
3195 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3196 && ! vec_stmt)
3197 return false;
3198
3199 /* Is STMT_INFO a vectorizable call? */
3200 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3201 if (!stmt)
3202 return false;
3203
3204 if (gimple_call_internal_p (stmt)
3205 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3206 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3207 /* Handled by vectorizable_load and vectorizable_store. */
3208 return false;
3209
3210 if (gimple_call_lhs (stmt) == NULL_TREE
3211 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3212 return false;
3213
3214 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3215
3216 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3217
3218 /* Process function arguments. */
3219 rhs_type = NULL_TREE;
3220 vectype_in = NULL_TREE;
3221 nargs = gimple_call_num_args (stmt);
3222
3223 /* Bail out if the function has more than four arguments, we do not have
3224 interesting builtin functions to vectorize with more than two arguments
3225 except for fma. No arguments is also not good. */
3226 if (nargs == 0 || nargs > 4)
3227 return false;
3228
3229 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3230 combined_fn cfn = gimple_call_combined_fn (stmt);
3231 if (cfn == CFN_GOMP_SIMD_LANE)
3232 {
3233 nargs = 0;
3234 rhs_type = unsigned_type_node;
3235 }
3236
3237 int mask_opno = -1;
3238 if (internal_fn_p (cfn))
3239 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3240
3241 for (i = 0; i < nargs; i++)
3242 {
3243 if ((int) i == mask_opno)
3244 {
3245 op = gimple_call_arg (stmt, i);
3246 if (!vect_check_scalar_mask (vinfo,
3247 stmt_info, op, &dt[i], &vectypes[i]))
3248 return false;
3249 continue;
3250 }
3251
3252 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3253 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3254 {
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3257 "use not simple.\n");
3258 return false;
3259 }
3260
3261 /* We can only handle calls with arguments of the same type. */
3262 if (rhs_type
3263 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3264 {
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3267 "argument types differ.\n");
3268 return false;
3269 }
3270 if (!rhs_type)
3271 rhs_type = TREE_TYPE (op);
3272
3273 if (!vectype_in)
3274 vectype_in = vectypes[i];
3275 else if (vectypes[i]
3276 && !types_compatible_p (vectypes[i], vectype_in))
3277 {
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3280 "argument vector types differ.\n");
3281 return false;
3282 }
3283 }
3284 /* If all arguments are external or constant defs, infer the vector type
3285 from the scalar type. */
3286 if (!vectype_in)
3287 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3288 if (vec_stmt)
3289 gcc_assert (vectype_in);
3290 if (!vectype_in)
3291 {
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "no vectype for scalar type %T\n", rhs_type);
3295
3296 return false;
3297 }
3298 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3299 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3300 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3301 by a pack of the two vectors into an SI vector. We would need
3302 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3303 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3304 {
3305 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3307 "mismatched vector sizes %T and %T\n",
3308 vectype_in, vectype_out);
3309 return false;
3310 }
3311
3312 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3313 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3314 {
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317 "mixed mask and nonmask vector types\n");
3318 return false;
3319 }
3320
3321 /* FORNOW */
3322 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3323 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3324 if (known_eq (nunits_in * 2, nunits_out))
3325 modifier = NARROW;
3326 else if (known_eq (nunits_out, nunits_in))
3327 modifier = NONE;
3328 else if (known_eq (nunits_out * 2, nunits_in))
3329 modifier = WIDEN;
3330 else
3331 return false;
3332
3333 /* We only handle functions that do not read or clobber memory. */
3334 if (gimple_vuse (stmt))
3335 {
3336 if (dump_enabled_p ())
3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3338 "function reads from or writes to memory.\n");
3339 return false;
3340 }
3341
3342 /* For now, we only vectorize functions if a target specific builtin
3343 is available. TODO -- in some cases, it might be profitable to
3344 insert the calls for pieces of the vector, in order to be able
3345 to vectorize other operations in the loop. */
3346 fndecl = NULL_TREE;
3347 internal_fn ifn = IFN_LAST;
3348 tree callee = gimple_call_fndecl (stmt);
3349
3350 /* First try using an internal function. */
3351 tree_code convert_code = ERROR_MARK;
3352 if (cfn != CFN_LAST
3353 && (modifier == NONE
3354 || (modifier == NARROW
3355 && simple_integer_narrowing (vectype_out, vectype_in,
3356 &convert_code))))
3357 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3358 vectype_in);
3359
3360 /* If that fails, try asking for a target-specific built-in function. */
3361 if (ifn == IFN_LAST)
3362 {
3363 if (cfn != CFN_LAST)
3364 fndecl = targetm.vectorize.builtin_vectorized_function
3365 (cfn, vectype_out, vectype_in);
3366 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3367 fndecl = targetm.vectorize.builtin_md_vectorized_function
3368 (callee, vectype_out, vectype_in);
3369 }
3370
3371 if (ifn == IFN_LAST && !fndecl)
3372 {
3373 if (cfn == CFN_GOMP_SIMD_LANE
3374 && !slp_node
3375 && loop_vinfo
3376 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3377 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3378 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3379 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3380 {
3381 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3382 { 0, 1, 2, ... vf - 1 } vector. */
3383 gcc_assert (nargs == 0);
3384 }
3385 else if (modifier == NONE
3386 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3387 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3388 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3389 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3390 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3391 slp_op, vectype_in, cost_vec);
3392 else
3393 {
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3396 "function is not vectorizable.\n");
3397 return false;
3398 }
3399 }
3400
3401 if (slp_node)
3402 ncopies = 1;
3403 else if (modifier == NARROW && ifn == IFN_LAST)
3404 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3405 else
3406 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3407
3408 /* Sanity check: make sure that at least one copy of the vectorized stmt
3409 needs to be generated. */
3410 gcc_assert (ncopies >= 1);
3411
3412 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3413 if (!vec_stmt) /* transformation not required. */
3414 {
3415 if (slp_node)
3416 for (i = 0; i < nargs; ++i)
3417 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3418 {
3419 if (dump_enabled_p ())
3420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3421 "incompatible vector types for invariants\n");
3422 return false;
3423 }
3424 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3425 DUMP_VECT_SCOPE ("vectorizable_call");
3426 vect_model_simple_cost (vinfo, stmt_info,
3427 ncopies, dt, ndts, slp_node, cost_vec);
3428 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3429 record_stmt_cost (cost_vec, ncopies / 2,
3430 vec_promote_demote, stmt_info, 0, vect_body);
3431
3432 if (loop_vinfo && mask_opno >= 0)
3433 {
3434 unsigned int nvectors = (slp_node
3435 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3436 : ncopies);
3437 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3438 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3439 vectype_out, scalar_mask);
3440 }
3441 return true;
3442 }
3443
3444 /* Transform. */
3445
3446 if (dump_enabled_p ())
3447 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3448
3449 /* Handle def. */
3450 scalar_dest = gimple_call_lhs (stmt);
3451 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3452
3453 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3454
3455 stmt_vec_info new_stmt_info = NULL;
3456 prev_stmt_info = NULL;
3457 if (modifier == NONE || ifn != IFN_LAST)
3458 {
3459 tree prev_res = NULL_TREE;
3460 vargs.safe_grow (nargs);
3461 orig_vargs.safe_grow (nargs);
3462 for (j = 0; j < ncopies; ++j)
3463 {
3464 /* Build argument list for the vectorized call. */
3465 if (slp_node)
3466 {
3467 auto_vec<vec<tree> > vec_defs (nargs);
3468 vec<tree> vec_oprnds0;
3469
3470 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3471 vec_oprnds0 = vec_defs[0];
3472
3473 /* Arguments are ready. Create the new vector stmt. */
3474 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3475 {
3476 size_t k;
3477 for (k = 0; k < nargs; k++)
3478 {
3479 vec<tree> vec_oprndsk = vec_defs[k];
3480 vargs[k] = vec_oprndsk[i];
3481 }
3482 if (modifier == NARROW)
3483 {
3484 /* We don't define any narrowing conditional functions
3485 at present. */
3486 gcc_assert (mask_opno < 0);
3487 tree half_res = make_ssa_name (vectype_in);
3488 gcall *call
3489 = gimple_build_call_internal_vec (ifn, vargs);
3490 gimple_call_set_lhs (call, half_res);
3491 gimple_call_set_nothrow (call, true);
3492 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3493 if ((i & 1) == 0)
3494 {
3495 prev_res = half_res;
3496 continue;
3497 }
3498 new_temp = make_ssa_name (vec_dest);
3499 gimple *new_stmt
3500 = gimple_build_assign (new_temp, convert_code,
3501 prev_res, half_res);
3502 new_stmt_info
3503 = vect_finish_stmt_generation (vinfo, stmt_info,
3504 new_stmt, gsi);
3505 }
3506 else
3507 {
3508 if (mask_opno >= 0 && masked_loop_p)
3509 {
3510 unsigned int vec_num = vec_oprnds0.length ();
3511 /* Always true for SLP. */
3512 gcc_assert (ncopies == 1);
3513 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3514 vectype_out, i);
3515 vargs[mask_opno] = prepare_load_store_mask
3516 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3517 }
3518
3519 gcall *call;
3520 if (ifn != IFN_LAST)
3521 call = gimple_build_call_internal_vec (ifn, vargs);
3522 else
3523 call = gimple_build_call_vec (fndecl, vargs);
3524 new_temp = make_ssa_name (vec_dest, call);
3525 gimple_call_set_lhs (call, new_temp);
3526 gimple_call_set_nothrow (call, true);
3527 new_stmt_info
3528 = vect_finish_stmt_generation (vinfo, stmt_info,
3529 call, gsi);
3530 }
3531 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3532 }
3533
3534 for (i = 0; i < nargs; i++)
3535 {
3536 vec<tree> vec_oprndsi = vec_defs[i];
3537 vec_oprndsi.release ();
3538 }
3539 continue;
3540 }
3541
3542 for (i = 0; i < nargs; i++)
3543 {
3544 op = gimple_call_arg (stmt, i);
3545 if (j == 0)
3546 vec_oprnd0
3547 = vect_get_vec_def_for_operand (vinfo,
3548 op, stmt_info, vectypes[i]);
3549 else
3550 vec_oprnd0
3551 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3552
3553 orig_vargs[i] = vargs[i] = vec_oprnd0;
3554 }
3555
3556 if (mask_opno >= 0 && masked_loop_p)
3557 {
3558 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3559 vectype_out, j);
3560 vargs[mask_opno]
3561 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3562 vargs[mask_opno], gsi);
3563 }
3564
3565 if (cfn == CFN_GOMP_SIMD_LANE)
3566 {
3567 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3568 tree new_var
3569 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3570 gimple *init_stmt = gimple_build_assign (new_var, cst);
3571 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3572 new_temp = make_ssa_name (vec_dest);
3573 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3574 new_stmt_info
3575 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3576 }
3577 else if (modifier == NARROW)
3578 {
3579 /* We don't define any narrowing conditional functions at
3580 present. */
3581 gcc_assert (mask_opno < 0);
3582 tree half_res = make_ssa_name (vectype_in);
3583 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3584 gimple_call_set_lhs (call, half_res);
3585 gimple_call_set_nothrow (call, true);
3586 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3587 if ((j & 1) == 0)
3588 {
3589 prev_res = half_res;
3590 continue;
3591 }
3592 new_temp = make_ssa_name (vec_dest);
3593 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3594 prev_res, half_res);
3595 new_stmt_info
3596 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3597 }
3598 else
3599 {
3600 gcall *call;
3601 if (ifn != IFN_LAST)
3602 call = gimple_build_call_internal_vec (ifn, vargs);
3603 else
3604 call = gimple_build_call_vec (fndecl, vargs);
3605 new_temp = make_ssa_name (vec_dest, call);
3606 gimple_call_set_lhs (call, new_temp);
3607 gimple_call_set_nothrow (call, true);
3608 new_stmt_info
3609 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3610 }
3611
3612 if (j == (modifier == NARROW ? 1 : 0))
3613 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3614 else
3615 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3616
3617 prev_stmt_info = new_stmt_info;
3618 }
3619 }
3620 else if (modifier == NARROW)
3621 {
3622 /* We don't define any narrowing conditional functions at present. */
3623 gcc_assert (mask_opno < 0);
3624 for (j = 0; j < ncopies; ++j)
3625 {
3626 /* Build argument list for the vectorized call. */
3627 if (j == 0)
3628 vargs.create (nargs * 2);
3629 else
3630 vargs.truncate (0);
3631
3632 if (slp_node)
3633 {
3634 auto_vec<vec<tree> > vec_defs (nargs);
3635 vec<tree> vec_oprnds0;
3636
3637 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3638 vec_oprnds0 = vec_defs[0];
3639
3640 /* Arguments are ready. Create the new vector stmt. */
3641 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3642 {
3643 size_t k;
3644 vargs.truncate (0);
3645 for (k = 0; k < nargs; k++)
3646 {
3647 vec<tree> vec_oprndsk = vec_defs[k];
3648 vargs.quick_push (vec_oprndsk[i]);
3649 vargs.quick_push (vec_oprndsk[i + 1]);
3650 }
3651 gcall *call;
3652 if (ifn != IFN_LAST)
3653 call = gimple_build_call_internal_vec (ifn, vargs);
3654 else
3655 call = gimple_build_call_vec (fndecl, vargs);
3656 new_temp = make_ssa_name (vec_dest, call);
3657 gimple_call_set_lhs (call, new_temp);
3658 gimple_call_set_nothrow (call, true);
3659 new_stmt_info
3660 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3661 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3662 }
3663
3664 for (i = 0; i < nargs; i++)
3665 {
3666 vec<tree> vec_oprndsi = vec_defs[i];
3667 vec_oprndsi.release ();
3668 }
3669 continue;
3670 }
3671
3672 for (i = 0; i < nargs; i++)
3673 {
3674 op = gimple_call_arg (stmt, i);
3675 if (j == 0)
3676 {
3677 vec_oprnd0
3678 = vect_get_vec_def_for_operand (vinfo, op, stmt_info,
3679 vectypes[i]);
3680 vec_oprnd1
3681 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3682 }
3683 else
3684 {
3685 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3686 2 * i + 1);
3687 vec_oprnd0
3688 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3689 vec_oprnd1
3690 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3691 }
3692
3693 vargs.quick_push (vec_oprnd0);
3694 vargs.quick_push (vec_oprnd1);
3695 }
3696
3697 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3698 new_temp = make_ssa_name (vec_dest, new_stmt);
3699 gimple_call_set_lhs (new_stmt, new_temp);
3700 new_stmt_info
3701 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3702
3703 if (j == 0)
3704 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3705 else
3706 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3707
3708 prev_stmt_info = new_stmt_info;
3709 }
3710
3711 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3712 }
3713 else
3714 /* No current target implements this case. */
3715 return false;
3716
3717 vargs.release ();
3718
3719 /* The call in STMT might prevent it from being removed in dce.
3720 We however cannot remove it here, due to the way the ssa name
3721 it defines is mapped to the new definition. So just replace
3722 rhs of the statement with something harmless. */
3723
3724 if (slp_node)
3725 return true;
3726
3727 stmt_info = vect_orig_stmt (stmt_info);
3728 lhs = gimple_get_lhs (stmt_info->stmt);
3729
3730 gassign *new_stmt
3731 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3732 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3733
3734 return true;
3735 }
3736
3737
3738 struct simd_call_arg_info
3739 {
3740 tree vectype;
3741 tree op;
3742 HOST_WIDE_INT linear_step;
3743 enum vect_def_type dt;
3744 unsigned int align;
3745 bool simd_lane_linear;
3746 };
3747
3748 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3749 is linear within simd lane (but not within whole loop), note it in
3750 *ARGINFO. */
3751
3752 static void
3753 vect_simd_lane_linear (tree op, class loop *loop,
3754 struct simd_call_arg_info *arginfo)
3755 {
3756 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3757
3758 if (!is_gimple_assign (def_stmt)
3759 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3760 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3761 return;
3762
3763 tree base = gimple_assign_rhs1 (def_stmt);
3764 HOST_WIDE_INT linear_step = 0;
3765 tree v = gimple_assign_rhs2 (def_stmt);
3766 while (TREE_CODE (v) == SSA_NAME)
3767 {
3768 tree t;
3769 def_stmt = SSA_NAME_DEF_STMT (v);
3770 if (is_gimple_assign (def_stmt))
3771 switch (gimple_assign_rhs_code (def_stmt))
3772 {
3773 case PLUS_EXPR:
3774 t = gimple_assign_rhs2 (def_stmt);
3775 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3776 return;
3777 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3778 v = gimple_assign_rhs1 (def_stmt);
3779 continue;
3780 case MULT_EXPR:
3781 t = gimple_assign_rhs2 (def_stmt);
3782 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3783 return;
3784 linear_step = tree_to_shwi (t);
3785 v = gimple_assign_rhs1 (def_stmt);
3786 continue;
3787 CASE_CONVERT:
3788 t = gimple_assign_rhs1 (def_stmt);
3789 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3790 || (TYPE_PRECISION (TREE_TYPE (v))
3791 < TYPE_PRECISION (TREE_TYPE (t))))
3792 return;
3793 if (!linear_step)
3794 linear_step = 1;
3795 v = t;
3796 continue;
3797 default:
3798 return;
3799 }
3800 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3801 && loop->simduid
3802 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3803 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3804 == loop->simduid))
3805 {
3806 if (!linear_step)
3807 linear_step = 1;
3808 arginfo->linear_step = linear_step;
3809 arginfo->op = base;
3810 arginfo->simd_lane_linear = true;
3811 return;
3812 }
3813 }
3814 }
3815
3816 /* Return the number of elements in vector type VECTYPE, which is associated
3817 with a SIMD clone. At present these vectors always have a constant
3818 length. */
3819
3820 static unsigned HOST_WIDE_INT
3821 simd_clone_subparts (tree vectype)
3822 {
3823 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3824 }
3825
3826 /* Function vectorizable_simd_clone_call.
3827
3828 Check if STMT_INFO performs a function call that can be vectorized
3829 by calling a simd clone of the function.
3830 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3831 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3832 Return true if STMT_INFO is vectorizable in this way. */
3833
3834 static bool
3835 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3836 gimple_stmt_iterator *gsi,
3837 stmt_vec_info *vec_stmt, slp_tree slp_node,
3838 stmt_vector_for_cost *)
3839 {
3840 tree vec_dest;
3841 tree scalar_dest;
3842 tree op, type;
3843 tree vec_oprnd0 = NULL_TREE;
3844 stmt_vec_info prev_stmt_info;
3845 tree vectype;
3846 unsigned int nunits;
3847 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3848 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3849 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3850 tree fndecl, new_temp;
3851 int ncopies, j;
3852 auto_vec<simd_call_arg_info> arginfo;
3853 vec<tree> vargs = vNULL;
3854 size_t i, nargs;
3855 tree lhs, rtype, ratype;
3856 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3857
3858 /* Is STMT a vectorizable call? */
3859 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3860 if (!stmt)
3861 return false;
3862
3863 fndecl = gimple_call_fndecl (stmt);
3864 if (fndecl == NULL_TREE)
3865 return false;
3866
3867 struct cgraph_node *node = cgraph_node::get (fndecl);
3868 if (node == NULL || node->simd_clones == NULL)
3869 return false;
3870
3871 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3872 return false;
3873
3874 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3875 && ! vec_stmt)
3876 return false;
3877
3878 if (gimple_call_lhs (stmt)
3879 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3880 return false;
3881
3882 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3883
3884 vectype = STMT_VINFO_VECTYPE (stmt_info);
3885
3886 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3887 return false;
3888
3889 /* FORNOW */
3890 if (slp_node)
3891 return false;
3892
3893 /* Process function arguments. */
3894 nargs = gimple_call_num_args (stmt);
3895
3896 /* Bail out if the function has zero arguments. */
3897 if (nargs == 0)
3898 return false;
3899
3900 arginfo.reserve (nargs, true);
3901
3902 for (i = 0; i < nargs; i++)
3903 {
3904 simd_call_arg_info thisarginfo;
3905 affine_iv iv;
3906
3907 thisarginfo.linear_step = 0;
3908 thisarginfo.align = 0;
3909 thisarginfo.op = NULL_TREE;
3910 thisarginfo.simd_lane_linear = false;
3911
3912 op = gimple_call_arg (stmt, i);
3913 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3914 &thisarginfo.vectype)
3915 || thisarginfo.dt == vect_uninitialized_def)
3916 {
3917 if (dump_enabled_p ())
3918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3919 "use not simple.\n");
3920 return false;
3921 }
3922
3923 if (thisarginfo.dt == vect_constant_def
3924 || thisarginfo.dt == vect_external_def)
3925 gcc_assert (thisarginfo.vectype == NULL_TREE);
3926 else
3927 {
3928 gcc_assert (thisarginfo.vectype != NULL_TREE);
3929 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3930 {
3931 if (dump_enabled_p ())
3932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3933 "vector mask arguments are not supported\n");
3934 return false;
3935 }
3936 }
3937
3938 /* For linear arguments, the analyze phase should have saved
3939 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3940 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3941 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3942 {
3943 gcc_assert (vec_stmt);
3944 thisarginfo.linear_step
3945 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3946 thisarginfo.op
3947 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3948 thisarginfo.simd_lane_linear
3949 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3950 == boolean_true_node);
3951 /* If loop has been peeled for alignment, we need to adjust it. */
3952 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3953 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3954 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3955 {
3956 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3957 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3958 tree opt = TREE_TYPE (thisarginfo.op);
3959 bias = fold_convert (TREE_TYPE (step), bias);
3960 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3961 thisarginfo.op
3962 = fold_build2 (POINTER_TYPE_P (opt)
3963 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3964 thisarginfo.op, bias);
3965 }
3966 }
3967 else if (!vec_stmt
3968 && thisarginfo.dt != vect_constant_def
3969 && thisarginfo.dt != vect_external_def
3970 && loop_vinfo
3971 && TREE_CODE (op) == SSA_NAME
3972 && simple_iv (loop, loop_containing_stmt (stmt), op,
3973 &iv, false)
3974 && tree_fits_shwi_p (iv.step))
3975 {
3976 thisarginfo.linear_step = tree_to_shwi (iv.step);
3977 thisarginfo.op = iv.base;
3978 }
3979 else if ((thisarginfo.dt == vect_constant_def
3980 || thisarginfo.dt == vect_external_def)
3981 && POINTER_TYPE_P (TREE_TYPE (op)))
3982 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3983 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3984 linear too. */
3985 if (POINTER_TYPE_P (TREE_TYPE (op))
3986 && !thisarginfo.linear_step
3987 && !vec_stmt
3988 && thisarginfo.dt != vect_constant_def
3989 && thisarginfo.dt != vect_external_def
3990 && loop_vinfo
3991 && !slp_node
3992 && TREE_CODE (op) == SSA_NAME)
3993 vect_simd_lane_linear (op, loop, &thisarginfo);
3994
3995 arginfo.quick_push (thisarginfo);
3996 }
3997
3998 unsigned HOST_WIDE_INT vf;
3999 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4000 {
4001 if (dump_enabled_p ())
4002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4003 "not considering SIMD clones; not yet supported"
4004 " for variable-width vectors.\n");
4005 return false;
4006 }
4007
4008 unsigned int badness = 0;
4009 struct cgraph_node *bestn = NULL;
4010 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4011 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4012 else
4013 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4014 n = n->simdclone->next_clone)
4015 {
4016 unsigned int this_badness = 0;
4017 if (n->simdclone->simdlen > vf
4018 || n->simdclone->nargs != nargs)
4019 continue;
4020 if (n->simdclone->simdlen < vf)
4021 this_badness += (exact_log2 (vf)
4022 - exact_log2 (n->simdclone->simdlen)) * 1024;
4023 if (n->simdclone->inbranch)
4024 this_badness += 2048;
4025 int target_badness = targetm.simd_clone.usable (n);
4026 if (target_badness < 0)
4027 continue;
4028 this_badness += target_badness * 512;
4029 /* FORNOW: Have to add code to add the mask argument. */
4030 if (n->simdclone->inbranch)
4031 continue;
4032 for (i = 0; i < nargs; i++)
4033 {
4034 switch (n->simdclone->args[i].arg_type)
4035 {
4036 case SIMD_CLONE_ARG_TYPE_VECTOR:
4037 if (!useless_type_conversion_p
4038 (n->simdclone->args[i].orig_type,
4039 TREE_TYPE (gimple_call_arg (stmt, i))))
4040 i = -1;
4041 else if (arginfo[i].dt == vect_constant_def
4042 || arginfo[i].dt == vect_external_def
4043 || arginfo[i].linear_step)
4044 this_badness += 64;
4045 break;
4046 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4047 if (arginfo[i].dt != vect_constant_def
4048 && arginfo[i].dt != vect_external_def)
4049 i = -1;
4050 break;
4051 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4053 if (arginfo[i].dt == vect_constant_def
4054 || arginfo[i].dt == vect_external_def
4055 || (arginfo[i].linear_step
4056 != n->simdclone->args[i].linear_step))
4057 i = -1;
4058 break;
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4063 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4065 /* FORNOW */
4066 i = -1;
4067 break;
4068 case SIMD_CLONE_ARG_TYPE_MASK:
4069 gcc_unreachable ();
4070 }
4071 if (i == (size_t) -1)
4072 break;
4073 if (n->simdclone->args[i].alignment > arginfo[i].align)
4074 {
4075 i = -1;
4076 break;
4077 }
4078 if (arginfo[i].align)
4079 this_badness += (exact_log2 (arginfo[i].align)
4080 - exact_log2 (n->simdclone->args[i].alignment));
4081 }
4082 if (i == (size_t) -1)
4083 continue;
4084 if (bestn == NULL || this_badness < badness)
4085 {
4086 bestn = n;
4087 badness = this_badness;
4088 }
4089 }
4090
4091 if (bestn == NULL)
4092 return false;
4093
4094 for (i = 0; i < nargs; i++)
4095 if ((arginfo[i].dt == vect_constant_def
4096 || arginfo[i].dt == vect_external_def)
4097 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4098 {
4099 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4100 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4101 slp_node);
4102 if (arginfo[i].vectype == NULL
4103 || (simd_clone_subparts (arginfo[i].vectype)
4104 > bestn->simdclone->simdlen))
4105 return false;
4106 }
4107
4108 fndecl = bestn->decl;
4109 nunits = bestn->simdclone->simdlen;
4110 ncopies = vf / nunits;
4111
4112 /* If the function isn't const, only allow it in simd loops where user
4113 has asserted that at least nunits consecutive iterations can be
4114 performed using SIMD instructions. */
4115 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4116 && gimple_vuse (stmt))
4117 return false;
4118
4119 /* Sanity check: make sure that at least one copy of the vectorized stmt
4120 needs to be generated. */
4121 gcc_assert (ncopies >= 1);
4122
4123 if (!vec_stmt) /* transformation not required. */
4124 {
4125 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4126 for (i = 0; i < nargs; i++)
4127 if ((bestn->simdclone->args[i].arg_type
4128 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4129 || (bestn->simdclone->args[i].arg_type
4130 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4131 {
4132 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4133 + 1);
4134 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4135 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4136 ? size_type_node : TREE_TYPE (arginfo[i].op);
4137 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4138 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4139 tree sll = arginfo[i].simd_lane_linear
4140 ? boolean_true_node : boolean_false_node;
4141 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4142 }
4143 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4144 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4145 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4146 dt, slp_node, cost_vec); */
4147 return true;
4148 }
4149
4150 /* Transform. */
4151
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4154
4155 /* Handle def. */
4156 scalar_dest = gimple_call_lhs (stmt);
4157 vec_dest = NULL_TREE;
4158 rtype = NULL_TREE;
4159 ratype = NULL_TREE;
4160 if (scalar_dest)
4161 {
4162 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4163 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4164 if (TREE_CODE (rtype) == ARRAY_TYPE)
4165 {
4166 ratype = rtype;
4167 rtype = TREE_TYPE (ratype);
4168 }
4169 }
4170
4171 prev_stmt_info = NULL;
4172 for (j = 0; j < ncopies; ++j)
4173 {
4174 /* Build argument list for the vectorized call. */
4175 if (j == 0)
4176 vargs.create (nargs);
4177 else
4178 vargs.truncate (0);
4179
4180 for (i = 0; i < nargs; i++)
4181 {
4182 unsigned int k, l, m, o;
4183 tree atype;
4184 op = gimple_call_arg (stmt, i);
4185 switch (bestn->simdclone->args[i].arg_type)
4186 {
4187 case SIMD_CLONE_ARG_TYPE_VECTOR:
4188 atype = bestn->simdclone->args[i].vector_type;
4189 o = nunits / simd_clone_subparts (atype);
4190 for (m = j * o; m < (j + 1) * o; m++)
4191 {
4192 if (simd_clone_subparts (atype)
4193 < simd_clone_subparts (arginfo[i].vectype))
4194 {
4195 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4196 k = (simd_clone_subparts (arginfo[i].vectype)
4197 / simd_clone_subparts (atype));
4198 gcc_assert ((k & (k - 1)) == 0);
4199 if (m == 0)
4200 vec_oprnd0
4201 = vect_get_vec_def_for_operand (vinfo, op, stmt_info);
4202 else
4203 {
4204 vec_oprnd0 = arginfo[i].op;
4205 if ((m & (k - 1)) == 0)
4206 vec_oprnd0
4207 = vect_get_vec_def_for_stmt_copy (vinfo,
4208 vec_oprnd0);
4209 }
4210 arginfo[i].op = vec_oprnd0;
4211 vec_oprnd0
4212 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4213 bitsize_int (prec),
4214 bitsize_int ((m & (k - 1)) * prec));
4215 gassign *new_stmt
4216 = gimple_build_assign (make_ssa_name (atype),
4217 vec_oprnd0);
4218 vect_finish_stmt_generation (vinfo, stmt_info,
4219 new_stmt, gsi);
4220 vargs.safe_push (gimple_assign_lhs (new_stmt));
4221 }
4222 else
4223 {
4224 k = (simd_clone_subparts (atype)
4225 / simd_clone_subparts (arginfo[i].vectype));
4226 gcc_assert ((k & (k - 1)) == 0);
4227 vec<constructor_elt, va_gc> *ctor_elts;
4228 if (k != 1)
4229 vec_alloc (ctor_elts, k);
4230 else
4231 ctor_elts = NULL;
4232 for (l = 0; l < k; l++)
4233 {
4234 if (m == 0 && l == 0)
4235 vec_oprnd0
4236 = vect_get_vec_def_for_operand (vinfo,
4237 op, stmt_info);
4238 else
4239 vec_oprnd0
4240 = vect_get_vec_def_for_stmt_copy (vinfo,
4241 arginfo[i].op);
4242 arginfo[i].op = vec_oprnd0;
4243 if (k == 1)
4244 break;
4245 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4246 vec_oprnd0);
4247 }
4248 if (k == 1)
4249 vargs.safe_push (vec_oprnd0);
4250 else
4251 {
4252 vec_oprnd0 = build_constructor (atype, ctor_elts);
4253 gassign *new_stmt
4254 = gimple_build_assign (make_ssa_name (atype),
4255 vec_oprnd0);
4256 vect_finish_stmt_generation (vinfo, stmt_info,
4257 new_stmt, gsi);
4258 vargs.safe_push (gimple_assign_lhs (new_stmt));
4259 }
4260 }
4261 }
4262 break;
4263 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4264 vargs.safe_push (op);
4265 break;
4266 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4267 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4268 if (j == 0)
4269 {
4270 gimple_seq stmts;
4271 arginfo[i].op
4272 = force_gimple_operand (unshare_expr (arginfo[i].op),
4273 &stmts, true, NULL_TREE);
4274 if (stmts != NULL)
4275 {
4276 basic_block new_bb;
4277 edge pe = loop_preheader_edge (loop);
4278 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4279 gcc_assert (!new_bb);
4280 }
4281 if (arginfo[i].simd_lane_linear)
4282 {
4283 vargs.safe_push (arginfo[i].op);
4284 break;
4285 }
4286 tree phi_res = copy_ssa_name (op);
4287 gphi *new_phi = create_phi_node (phi_res, loop->header);
4288 loop_vinfo->add_stmt (new_phi);
4289 add_phi_arg (new_phi, arginfo[i].op,
4290 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4291 enum tree_code code
4292 = POINTER_TYPE_P (TREE_TYPE (op))
4293 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4294 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4295 ? sizetype : TREE_TYPE (op);
4296 widest_int cst
4297 = wi::mul (bestn->simdclone->args[i].linear_step,
4298 ncopies * nunits);
4299 tree tcst = wide_int_to_tree (type, cst);
4300 tree phi_arg = copy_ssa_name (op);
4301 gassign *new_stmt
4302 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4303 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4304 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4305 loop_vinfo->add_stmt (new_stmt);
4306 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4307 UNKNOWN_LOCATION);
4308 arginfo[i].op = phi_res;
4309 vargs.safe_push (phi_res);
4310 }
4311 else
4312 {
4313 enum tree_code code
4314 = POINTER_TYPE_P (TREE_TYPE (op))
4315 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4316 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4317 ? sizetype : TREE_TYPE (op);
4318 widest_int cst
4319 = wi::mul (bestn->simdclone->args[i].linear_step,
4320 j * nunits);
4321 tree tcst = wide_int_to_tree (type, cst);
4322 new_temp = make_ssa_name (TREE_TYPE (op));
4323 gassign *new_stmt
4324 = gimple_build_assign (new_temp, code,
4325 arginfo[i].op, tcst);
4326 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4327 vargs.safe_push (new_temp);
4328 }
4329 break;
4330 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4331 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4332 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4333 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4334 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4336 default:
4337 gcc_unreachable ();
4338 }
4339 }
4340
4341 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4342 if (vec_dest)
4343 {
4344 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4345 if (ratype)
4346 new_temp = create_tmp_var (ratype);
4347 else if (simd_clone_subparts (vectype)
4348 == simd_clone_subparts (rtype))
4349 new_temp = make_ssa_name (vec_dest, new_call);
4350 else
4351 new_temp = make_ssa_name (rtype, new_call);
4352 gimple_call_set_lhs (new_call, new_temp);
4353 }
4354 stmt_vec_info new_stmt_info
4355 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4356
4357 if (vec_dest)
4358 {
4359 if (simd_clone_subparts (vectype) < nunits)
4360 {
4361 unsigned int k, l;
4362 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4363 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4364 k = nunits / simd_clone_subparts (vectype);
4365 gcc_assert ((k & (k - 1)) == 0);
4366 for (l = 0; l < k; l++)
4367 {
4368 tree t;
4369 if (ratype)
4370 {
4371 t = build_fold_addr_expr (new_temp);
4372 t = build2 (MEM_REF, vectype, t,
4373 build_int_cst (TREE_TYPE (t), l * bytes));
4374 }
4375 else
4376 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4377 bitsize_int (prec), bitsize_int (l * prec));
4378 gimple *new_stmt
4379 = gimple_build_assign (make_ssa_name (vectype), t);
4380 new_stmt_info
4381 = vect_finish_stmt_generation (vinfo, stmt_info,
4382 new_stmt, gsi);
4383
4384 if (j == 0 && l == 0)
4385 STMT_VINFO_VEC_STMT (stmt_info)
4386 = *vec_stmt = new_stmt_info;
4387 else
4388 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4389
4390 prev_stmt_info = new_stmt_info;
4391 }
4392
4393 if (ratype)
4394 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4395 continue;
4396 }
4397 else if (simd_clone_subparts (vectype) > nunits)
4398 {
4399 unsigned int k = (simd_clone_subparts (vectype)
4400 / simd_clone_subparts (rtype));
4401 gcc_assert ((k & (k - 1)) == 0);
4402 if ((j & (k - 1)) == 0)
4403 vec_alloc (ret_ctor_elts, k);
4404 if (ratype)
4405 {
4406 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4407 for (m = 0; m < o; m++)
4408 {
4409 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4410 size_int (m), NULL_TREE, NULL_TREE);
4411 gimple *new_stmt
4412 = gimple_build_assign (make_ssa_name (rtype), tem);
4413 new_stmt_info
4414 = vect_finish_stmt_generation (vinfo, stmt_info,
4415 new_stmt, gsi);
4416 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4417 gimple_assign_lhs (new_stmt));
4418 }
4419 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4420 }
4421 else
4422 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4423 if ((j & (k - 1)) != k - 1)
4424 continue;
4425 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4426 gimple *new_stmt
4427 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4428 new_stmt_info
4429 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4430
4431 if ((unsigned) j == k - 1)
4432 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4433 else
4434 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4435
4436 prev_stmt_info = new_stmt_info;
4437 continue;
4438 }
4439 else if (ratype)
4440 {
4441 tree t = build_fold_addr_expr (new_temp);
4442 t = build2 (MEM_REF, vectype, t,
4443 build_int_cst (TREE_TYPE (t), 0));
4444 gimple *new_stmt
4445 = gimple_build_assign (make_ssa_name (vec_dest), t);
4446 new_stmt_info
4447 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4448 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4449 }
4450 }
4451
4452 if (j == 0)
4453 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4454 else
4455 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4456
4457 prev_stmt_info = new_stmt_info;
4458 }
4459
4460 vargs.release ();
4461
4462 /* The call in STMT might prevent it from being removed in dce.
4463 We however cannot remove it here, due to the way the ssa name
4464 it defines is mapped to the new definition. So just replace
4465 rhs of the statement with something harmless. */
4466
4467 if (slp_node)
4468 return true;
4469
4470 gimple *new_stmt;
4471 if (scalar_dest)
4472 {
4473 type = TREE_TYPE (scalar_dest);
4474 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4475 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4476 }
4477 else
4478 new_stmt = gimple_build_nop ();
4479 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4480 unlink_stmt_vdef (stmt);
4481
4482 return true;
4483 }
4484
4485
4486 /* Function vect_gen_widened_results_half
4487
4488 Create a vector stmt whose code, type, number of arguments, and result
4489 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4490 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4491 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4492 needs to be created (DECL is a function-decl of a target-builtin).
4493 STMT_INFO is the original scalar stmt that we are vectorizing. */
4494
4495 static gimple *
4496 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4497 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4498 tree vec_dest, gimple_stmt_iterator *gsi,
4499 stmt_vec_info stmt_info)
4500 {
4501 gimple *new_stmt;
4502 tree new_temp;
4503
4504 /* Generate half of the widened result: */
4505 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4506 if (op_type != binary_op)
4507 vec_oprnd1 = NULL;
4508 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4509 new_temp = make_ssa_name (vec_dest, new_stmt);
4510 gimple_assign_set_lhs (new_stmt, new_temp);
4511 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4512
4513 return new_stmt;
4514 }
4515
4516
4517 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4518 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4519 containing scalar operand), and for the rest we get a copy with
4520 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4521 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4522 The vectors are collected into VEC_OPRNDS. */
4523
4524 static void
4525 vect_get_loop_based_defs (vec_info *vinfo, tree *oprnd, stmt_vec_info stmt_info,
4526 vec<tree> *vec_oprnds, int multi_step_cvt)
4527 {
4528 tree vec_oprnd;
4529
4530 /* Get first vector operand. */
4531 /* All the vector operands except the very first one (that is scalar oprnd)
4532 are stmt copies. */
4533 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4534 vec_oprnd = vect_get_vec_def_for_operand (vinfo, *oprnd, stmt_info);
4535 else
4536 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4537
4538 vec_oprnds->quick_push (vec_oprnd);
4539
4540 /* Get second vector operand. */
4541 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4542 vec_oprnds->quick_push (vec_oprnd);
4543
4544 *oprnd = vec_oprnd;
4545
4546 /* For conversion in multiple steps, continue to get operands
4547 recursively. */
4548 if (multi_step_cvt)
4549 vect_get_loop_based_defs (vinfo, oprnd, stmt_info, vec_oprnds,
4550 multi_step_cvt - 1);
4551 }
4552
4553
4554 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4555 For multi-step conversions store the resulting vectors and call the function
4556 recursively. */
4557
4558 static void
4559 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4560 int multi_step_cvt,
4561 stmt_vec_info stmt_info,
4562 vec<tree> vec_dsts,
4563 gimple_stmt_iterator *gsi,
4564 slp_tree slp_node, enum tree_code code,
4565 stmt_vec_info *prev_stmt_info)
4566 {
4567 unsigned int i;
4568 tree vop0, vop1, new_tmp, vec_dest;
4569
4570 vec_dest = vec_dsts.pop ();
4571
4572 for (i = 0; i < vec_oprnds->length (); i += 2)
4573 {
4574 /* Create demotion operation. */
4575 vop0 = (*vec_oprnds)[i];
4576 vop1 = (*vec_oprnds)[i + 1];
4577 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4578 new_tmp = make_ssa_name (vec_dest, new_stmt);
4579 gimple_assign_set_lhs (new_stmt, new_tmp);
4580 stmt_vec_info new_stmt_info
4581 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4582
4583 if (multi_step_cvt)
4584 /* Store the resulting vector for next recursive call. */
4585 (*vec_oprnds)[i/2] = new_tmp;
4586 else
4587 {
4588 /* This is the last step of the conversion sequence. Store the
4589 vectors in SLP_NODE or in vector info of the scalar statement
4590 (or in STMT_VINFO_RELATED_STMT chain). */
4591 if (slp_node)
4592 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4593 else
4594 {
4595 if (!*prev_stmt_info)
4596 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4597 else
4598 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4599
4600 *prev_stmt_info = new_stmt_info;
4601 }
4602 }
4603 }
4604
4605 /* For multi-step demotion operations we first generate demotion operations
4606 from the source type to the intermediate types, and then combine the
4607 results (stored in VEC_OPRNDS) in demotion operation to the destination
4608 type. */
4609 if (multi_step_cvt)
4610 {
4611 /* At each level of recursion we have half of the operands we had at the
4612 previous level. */
4613 vec_oprnds->truncate ((i+1)/2);
4614 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4615 multi_step_cvt - 1,
4616 stmt_info, vec_dsts, gsi,
4617 slp_node, VEC_PACK_TRUNC_EXPR,
4618 prev_stmt_info);
4619 }
4620
4621 vec_dsts.quick_push (vec_dest);
4622 }
4623
4624
4625 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4626 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4627 STMT_INFO. For multi-step conversions store the resulting vectors and
4628 call the function recursively. */
4629
4630 static void
4631 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4632 vec<tree> *vec_oprnds0,
4633 vec<tree> *vec_oprnds1,
4634 stmt_vec_info stmt_info, tree vec_dest,
4635 gimple_stmt_iterator *gsi,
4636 enum tree_code code1,
4637 enum tree_code code2, int op_type)
4638 {
4639 int i;
4640 tree vop0, vop1, new_tmp1, new_tmp2;
4641 gimple *new_stmt1, *new_stmt2;
4642 vec<tree> vec_tmp = vNULL;
4643
4644 vec_tmp.create (vec_oprnds0->length () * 2);
4645 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4646 {
4647 if (op_type == binary_op)
4648 vop1 = (*vec_oprnds1)[i];
4649 else
4650 vop1 = NULL_TREE;
4651
4652 /* Generate the two halves of promotion operation. */
4653 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4654 op_type, vec_dest, gsi,
4655 stmt_info);
4656 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4657 op_type, vec_dest, gsi,
4658 stmt_info);
4659 if (is_gimple_call (new_stmt1))
4660 {
4661 new_tmp1 = gimple_call_lhs (new_stmt1);
4662 new_tmp2 = gimple_call_lhs (new_stmt2);
4663 }
4664 else
4665 {
4666 new_tmp1 = gimple_assign_lhs (new_stmt1);
4667 new_tmp2 = gimple_assign_lhs (new_stmt2);
4668 }
4669
4670 /* Store the results for the next step. */
4671 vec_tmp.quick_push (new_tmp1);
4672 vec_tmp.quick_push (new_tmp2);
4673 }
4674
4675 vec_oprnds0->release ();
4676 *vec_oprnds0 = vec_tmp;
4677 }
4678
4679
4680 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4681 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4682 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4683 Return true if STMT_INFO is vectorizable in this way. */
4684
4685 static bool
4686 vectorizable_conversion (vec_info *vinfo,
4687 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4688 stmt_vec_info *vec_stmt, slp_tree slp_node,
4689 stmt_vector_for_cost *cost_vec)
4690 {
4691 tree vec_dest;
4692 tree scalar_dest;
4693 tree op0, op1 = NULL_TREE;
4694 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4695 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4696 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4697 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4698 tree new_temp;
4699 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4700 int ndts = 2;
4701 stmt_vec_info prev_stmt_info;
4702 poly_uint64 nunits_in;
4703 poly_uint64 nunits_out;
4704 tree vectype_out, vectype_in;
4705 int ncopies, i, j;
4706 tree lhs_type, rhs_type;
4707 enum { NARROW, NONE, WIDEN } modifier;
4708 vec<tree> vec_oprnds0 = vNULL;
4709 vec<tree> vec_oprnds1 = vNULL;
4710 tree vop0;
4711 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4712 int multi_step_cvt = 0;
4713 vec<tree> interm_types = vNULL;
4714 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4715 int op_type;
4716 unsigned short fltsz;
4717
4718 /* Is STMT a vectorizable conversion? */
4719
4720 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4721 return false;
4722
4723 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4724 && ! vec_stmt)
4725 return false;
4726
4727 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4728 if (!stmt)
4729 return false;
4730
4731 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4732 return false;
4733
4734 code = gimple_assign_rhs_code (stmt);
4735 if (!CONVERT_EXPR_CODE_P (code)
4736 && code != FIX_TRUNC_EXPR
4737 && code != FLOAT_EXPR
4738 && code != WIDEN_MULT_EXPR
4739 && code != WIDEN_LSHIFT_EXPR)
4740 return false;
4741
4742 op_type = TREE_CODE_LENGTH (code);
4743
4744 /* Check types of lhs and rhs. */
4745 scalar_dest = gimple_assign_lhs (stmt);
4746 lhs_type = TREE_TYPE (scalar_dest);
4747 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4748
4749 /* Check the operands of the operation. */
4750 slp_tree slp_op0, slp_op1 = NULL;
4751 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4752 0, &op0, &slp_op0, &dt[0], &vectype_in))
4753 {
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4756 "use not simple.\n");
4757 return false;
4758 }
4759
4760 rhs_type = TREE_TYPE (op0);
4761 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4762 && !((INTEGRAL_TYPE_P (lhs_type)
4763 && INTEGRAL_TYPE_P (rhs_type))
4764 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4765 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4766 return false;
4767
4768 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4769 && ((INTEGRAL_TYPE_P (lhs_type)
4770 && !type_has_mode_precision_p (lhs_type))
4771 || (INTEGRAL_TYPE_P (rhs_type)
4772 && !type_has_mode_precision_p (rhs_type))))
4773 {
4774 if (dump_enabled_p ())
4775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4776 "type conversion to/from bit-precision unsupported."
4777 "\n");
4778 return false;
4779 }
4780
4781 if (op_type == binary_op)
4782 {
4783 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4784
4785 op1 = gimple_assign_rhs2 (stmt);
4786 tree vectype1_in;
4787 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4788 &op1, &slp_op1, &dt[1], &vectype1_in))
4789 {
4790 if (dump_enabled_p ())
4791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4792 "use not simple.\n");
4793 return false;
4794 }
4795 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4796 OP1. */
4797 if (!vectype_in)
4798 vectype_in = vectype1_in;
4799 }
4800
4801 /* If op0 is an external or constant def, infer the vector type
4802 from the scalar type. */
4803 if (!vectype_in)
4804 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4805 if (vec_stmt)
4806 gcc_assert (vectype_in);
4807 if (!vectype_in)
4808 {
4809 if (dump_enabled_p ())
4810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4811 "no vectype for scalar type %T\n", rhs_type);
4812
4813 return false;
4814 }
4815
4816 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4817 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4818 {
4819 if (dump_enabled_p ())
4820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4821 "can't convert between boolean and non "
4822 "boolean vectors %T\n", rhs_type);
4823
4824 return false;
4825 }
4826
4827 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4828 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4829 if (known_eq (nunits_out, nunits_in))
4830 modifier = NONE;
4831 else if (multiple_p (nunits_out, nunits_in))
4832 modifier = NARROW;
4833 else
4834 {
4835 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4836 modifier = WIDEN;
4837 }
4838
4839 /* Multiple types in SLP are handled by creating the appropriate number of
4840 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4841 case of SLP. */
4842 if (slp_node)
4843 ncopies = 1;
4844 else if (modifier == NARROW)
4845 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4846 else
4847 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4848
4849 /* Sanity check: make sure that at least one copy of the vectorized stmt
4850 needs to be generated. */
4851 gcc_assert (ncopies >= 1);
4852
4853 bool found_mode = false;
4854 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4855 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4856 opt_scalar_mode rhs_mode_iter;
4857
4858 /* Supportable by target? */
4859 switch (modifier)
4860 {
4861 case NONE:
4862 if (code != FIX_TRUNC_EXPR
4863 && code != FLOAT_EXPR
4864 && !CONVERT_EXPR_CODE_P (code))
4865 return false;
4866 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4867 break;
4868 /* FALLTHRU */
4869 unsupported:
4870 if (dump_enabled_p ())
4871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4872 "conversion not supported by target.\n");
4873 return false;
4874
4875 case WIDEN:
4876 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4877 vectype_in, &code1, &code2,
4878 &multi_step_cvt, &interm_types))
4879 {
4880 /* Binary widening operation can only be supported directly by the
4881 architecture. */
4882 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4883 break;
4884 }
4885
4886 if (code != FLOAT_EXPR
4887 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4888 goto unsupported;
4889
4890 fltsz = GET_MODE_SIZE (lhs_mode);
4891 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4892 {
4893 rhs_mode = rhs_mode_iter.require ();
4894 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4895 break;
4896
4897 cvt_type
4898 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4899 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4900 if (cvt_type == NULL_TREE)
4901 goto unsupported;
4902
4903 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4904 {
4905 if (!supportable_convert_operation (code, vectype_out,
4906 cvt_type, &codecvt1))
4907 goto unsupported;
4908 }
4909 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4910 vectype_out, cvt_type,
4911 &codecvt1, &codecvt2,
4912 &multi_step_cvt,
4913 &interm_types))
4914 continue;
4915 else
4916 gcc_assert (multi_step_cvt == 0);
4917
4918 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4919 cvt_type,
4920 vectype_in, &code1, &code2,
4921 &multi_step_cvt, &interm_types))
4922 {
4923 found_mode = true;
4924 break;
4925 }
4926 }
4927
4928 if (!found_mode)
4929 goto unsupported;
4930
4931 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4932 codecvt2 = ERROR_MARK;
4933 else
4934 {
4935 multi_step_cvt++;
4936 interm_types.safe_push (cvt_type);
4937 cvt_type = NULL_TREE;
4938 }
4939 break;
4940
4941 case NARROW:
4942 gcc_assert (op_type == unary_op);
4943 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4944 &code1, &multi_step_cvt,
4945 &interm_types))
4946 break;
4947
4948 if (code != FIX_TRUNC_EXPR
4949 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4950 goto unsupported;
4951
4952 cvt_type
4953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4954 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4955 if (cvt_type == NULL_TREE)
4956 goto unsupported;
4957 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4958 &codecvt1))
4959 goto unsupported;
4960 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4961 &code1, &multi_step_cvt,
4962 &interm_types))
4963 break;
4964 goto unsupported;
4965
4966 default:
4967 gcc_unreachable ();
4968 }
4969
4970 if (!vec_stmt) /* transformation not required. */
4971 {
4972 if (slp_node
4973 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4974 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4975 {
4976 if (dump_enabled_p ())
4977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4978 "incompatible vector types for invariants\n");
4979 return false;
4980 }
4981 DUMP_VECT_SCOPE ("vectorizable_conversion");
4982 if (modifier == NONE)
4983 {
4984 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4985 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4986 cost_vec);
4987 }
4988 else if (modifier == NARROW)
4989 {
4990 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4991 /* The final packing step produces one vector result per copy. */
4992 unsigned int nvectors
4993 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4994 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4995 multi_step_cvt, cost_vec);
4996 }
4997 else
4998 {
4999 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5000 /* The initial unpacking step produces two vector results
5001 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5002 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5003 unsigned int nvectors
5004 = (slp_node
5005 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5006 : ncopies * 2);
5007 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5008 multi_step_cvt, cost_vec);
5009 }
5010 interm_types.release ();
5011 return true;
5012 }
5013
5014 /* Transform. */
5015 if (dump_enabled_p ())
5016 dump_printf_loc (MSG_NOTE, vect_location,
5017 "transform conversion. ncopies = %d.\n", ncopies);
5018
5019 if (op_type == binary_op)
5020 {
5021 if (CONSTANT_CLASS_P (op0))
5022 op0 = fold_convert (TREE_TYPE (op1), op0);
5023 else if (CONSTANT_CLASS_P (op1))
5024 op1 = fold_convert (TREE_TYPE (op0), op1);
5025 }
5026
5027 /* In case of multi-step conversion, we first generate conversion operations
5028 to the intermediate types, and then from that types to the final one.
5029 We create vector destinations for the intermediate type (TYPES) received
5030 from supportable_*_operation, and store them in the correct order
5031 for future use in vect_create_vectorized_*_stmts (). */
5032 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5033 vec_dest = vect_create_destination_var (scalar_dest,
5034 (cvt_type && modifier == WIDEN)
5035 ? cvt_type : vectype_out);
5036 vec_dsts.quick_push (vec_dest);
5037
5038 if (multi_step_cvt)
5039 {
5040 for (i = interm_types.length () - 1;
5041 interm_types.iterate (i, &intermediate_type); i--)
5042 {
5043 vec_dest = vect_create_destination_var (scalar_dest,
5044 intermediate_type);
5045 vec_dsts.quick_push (vec_dest);
5046 }
5047 }
5048
5049 if (cvt_type)
5050 vec_dest = vect_create_destination_var (scalar_dest,
5051 modifier == WIDEN
5052 ? vectype_out : cvt_type);
5053
5054 if (!slp_node)
5055 {
5056 if (modifier == WIDEN)
5057 {
5058 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5059 if (op_type == binary_op)
5060 vec_oprnds1.create (1);
5061 }
5062 else if (modifier == NARROW)
5063 vec_oprnds0.create (
5064 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5065 }
5066 else if (code == WIDEN_LSHIFT_EXPR)
5067 vec_oprnds1.create (slp_node->vec_stmts_size);
5068
5069 last_oprnd = op0;
5070 prev_stmt_info = NULL;
5071 switch (modifier)
5072 {
5073 case NONE:
5074 for (j = 0; j < ncopies; j++)
5075 {
5076 if (j == 0)
5077 vect_get_vec_defs (vinfo, op0, NULL, stmt_info, &vec_oprnds0,
5078 NULL, slp_node);
5079 else
5080 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5081
5082 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5083 {
5084 stmt_vec_info new_stmt_info;
5085 /* Arguments are ready, create the new vector stmt. */
5086 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5087 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5088 new_temp = make_ssa_name (vec_dest, new_stmt);
5089 gimple_assign_set_lhs (new_stmt, new_temp);
5090 new_stmt_info
5091 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5092
5093 if (slp_node)
5094 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5095 else
5096 {
5097 if (!prev_stmt_info)
5098 STMT_VINFO_VEC_STMT (stmt_info)
5099 = *vec_stmt = new_stmt_info;
5100 else
5101 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5102 prev_stmt_info = new_stmt_info;
5103 }
5104 }
5105 }
5106 break;
5107
5108 case WIDEN:
5109 /* In case the vectorization factor (VF) is bigger than the number
5110 of elements that we can fit in a vectype (nunits), we have to
5111 generate more than one vector stmt - i.e - we need to "unroll"
5112 the vector stmt by a factor VF/nunits. */
5113 for (j = 0; j < ncopies; j++)
5114 {
5115 /* Handle uses. */
5116 if (j == 0)
5117 {
5118 if (slp_node)
5119 {
5120 if (code == WIDEN_LSHIFT_EXPR)
5121 {
5122 unsigned int k;
5123
5124 vec_oprnd1 = op1;
5125 /* Store vec_oprnd1 for every vector stmt to be created
5126 for SLP_NODE. We check during the analysis that all
5127 the shift arguments are the same. */
5128 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5129 vec_oprnds1.quick_push (vec_oprnd1);
5130
5131 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5132 &vec_oprnds0, NULL, slp_node);
5133 }
5134 else
5135 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
5136 &vec_oprnds1, slp_node);
5137 }
5138 else
5139 {
5140 vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
5141 op0, stmt_info);
5142 vec_oprnds0.quick_push (vec_oprnd0);
5143 if (op_type == binary_op)
5144 {
5145 if (code == WIDEN_LSHIFT_EXPR)
5146 vec_oprnd1 = op1;
5147 else
5148 vec_oprnd1
5149 = vect_get_vec_def_for_operand (vinfo,
5150 op1, stmt_info);
5151 vec_oprnds1.quick_push (vec_oprnd1);
5152 }
5153 }
5154 }
5155 else
5156 {
5157 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5158 vec_oprnds0.truncate (0);
5159 vec_oprnds0.quick_push (vec_oprnd0);
5160 if (op_type == binary_op)
5161 {
5162 if (code == WIDEN_LSHIFT_EXPR)
5163 vec_oprnd1 = op1;
5164 else
5165 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5166 vec_oprnd1);
5167 vec_oprnds1.truncate (0);
5168 vec_oprnds1.quick_push (vec_oprnd1);
5169 }
5170 }
5171
5172 /* Arguments are ready. Create the new vector stmts. */
5173 for (i = multi_step_cvt; i >= 0; i--)
5174 {
5175 tree this_dest = vec_dsts[i];
5176 enum tree_code c1 = code1, c2 = code2;
5177 if (i == 0 && codecvt2 != ERROR_MARK)
5178 {
5179 c1 = codecvt1;
5180 c2 = codecvt2;
5181 }
5182 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5183 &vec_oprnds1, stmt_info,
5184 this_dest, gsi,
5185 c1, c2, op_type);
5186 }
5187
5188 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5189 {
5190 stmt_vec_info new_stmt_info;
5191 if (cvt_type)
5192 {
5193 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5194 new_temp = make_ssa_name (vec_dest);
5195 gassign *new_stmt
5196 = gimple_build_assign (new_temp, codecvt1, vop0);
5197 new_stmt_info
5198 = vect_finish_stmt_generation (vinfo, stmt_info,
5199 new_stmt, gsi);
5200 }
5201 else
5202 new_stmt_info = vinfo->lookup_def (vop0);
5203
5204 if (slp_node)
5205 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5206 else
5207 {
5208 if (!prev_stmt_info)
5209 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5210 else
5211 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5212 prev_stmt_info = new_stmt_info;
5213 }
5214 }
5215 }
5216
5217 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5218 break;
5219
5220 case NARROW:
5221 /* In case the vectorization factor (VF) is bigger than the number
5222 of elements that we can fit in a vectype (nunits), we have to
5223 generate more than one vector stmt - i.e - we need to "unroll"
5224 the vector stmt by a factor VF/nunits. */
5225 for (j = 0; j < ncopies; j++)
5226 {
5227 /* Handle uses. */
5228 if (slp_node)
5229 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
5230 NULL, slp_node);
5231 else
5232 {
5233 vec_oprnds0.truncate (0);
5234 vect_get_loop_based_defs (vinfo,
5235 &last_oprnd, stmt_info, &vec_oprnds0,
5236 vect_pow2 (multi_step_cvt) - 1);
5237 }
5238
5239 /* Arguments are ready. Create the new vector stmts. */
5240 if (cvt_type)
5241 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5242 {
5243 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5244 new_temp = make_ssa_name (vec_dest);
5245 gassign *new_stmt
5246 = gimple_build_assign (new_temp, codecvt1, vop0);
5247 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5248 vec_oprnds0[i] = new_temp;
5249 }
5250
5251 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5252 multi_step_cvt,
5253 stmt_info, vec_dsts, gsi,
5254 slp_node, code1,
5255 &prev_stmt_info);
5256 }
5257
5258 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5259 break;
5260 }
5261
5262 vec_oprnds0.release ();
5263 vec_oprnds1.release ();
5264 interm_types.release ();
5265
5266 return true;
5267 }
5268
5269 /* Return true if we can assume from the scalar form of STMT_INFO that
5270 neither the scalar nor the vector forms will generate code. STMT_INFO
5271 is known not to involve a data reference. */
5272
5273 bool
5274 vect_nop_conversion_p (stmt_vec_info stmt_info)
5275 {
5276 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5277 if (!stmt)
5278 return false;
5279
5280 tree lhs = gimple_assign_lhs (stmt);
5281 tree_code code = gimple_assign_rhs_code (stmt);
5282 tree rhs = gimple_assign_rhs1 (stmt);
5283
5284 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5285 return true;
5286
5287 if (CONVERT_EXPR_CODE_P (code))
5288 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5289
5290 return false;
5291 }
5292
5293 /* Function vectorizable_assignment.
5294
5295 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5296 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5297 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5298 Return true if STMT_INFO is vectorizable in this way. */
5299
5300 static bool
5301 vectorizable_assignment (vec_info *vinfo,
5302 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5303 stmt_vec_info *vec_stmt, slp_tree slp_node,
5304 stmt_vector_for_cost *cost_vec)
5305 {
5306 tree vec_dest;
5307 tree scalar_dest;
5308 tree op;
5309 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5310 tree new_temp;
5311 enum vect_def_type dt[1] = {vect_unknown_def_type};
5312 int ndts = 1;
5313 int ncopies;
5314 int i, j;
5315 vec<tree> vec_oprnds = vNULL;
5316 tree vop;
5317 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5318 stmt_vec_info prev_stmt_info = NULL;
5319 enum tree_code code;
5320 tree vectype_in;
5321
5322 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5323 return false;
5324
5325 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5326 && ! vec_stmt)
5327 return false;
5328
5329 /* Is vectorizable assignment? */
5330 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5331 if (!stmt)
5332 return false;
5333
5334 scalar_dest = gimple_assign_lhs (stmt);
5335 if (TREE_CODE (scalar_dest) != SSA_NAME)
5336 return false;
5337
5338 if (STMT_VINFO_DATA_REF (stmt_info))
5339 return false;
5340
5341 code = gimple_assign_rhs_code (stmt);
5342 if (!(gimple_assign_single_p (stmt)
5343 || code == PAREN_EXPR
5344 || CONVERT_EXPR_CODE_P (code)))
5345 return false;
5346
5347 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5348 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5349
5350 /* Multiple types in SLP are handled by creating the appropriate number of
5351 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5352 case of SLP. */
5353 if (slp_node)
5354 ncopies = 1;
5355 else
5356 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5357
5358 gcc_assert (ncopies >= 1);
5359
5360 slp_tree slp_op;
5361 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5362 &dt[0], &vectype_in))
5363 {
5364 if (dump_enabled_p ())
5365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5366 "use not simple.\n");
5367 return false;
5368 }
5369 if (!vectype_in)
5370 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5371
5372 /* We can handle NOP_EXPR conversions that do not change the number
5373 of elements or the vector size. */
5374 if ((CONVERT_EXPR_CODE_P (code)
5375 || code == VIEW_CONVERT_EXPR)
5376 && (!vectype_in
5377 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5378 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5379 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5380 return false;
5381
5382 /* We do not handle bit-precision changes. */
5383 if ((CONVERT_EXPR_CODE_P (code)
5384 || code == VIEW_CONVERT_EXPR)
5385 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5386 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5387 || !type_has_mode_precision_p (TREE_TYPE (op)))
5388 /* But a conversion that does not change the bit-pattern is ok. */
5389 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5390 > TYPE_PRECISION (TREE_TYPE (op)))
5391 && TYPE_UNSIGNED (TREE_TYPE (op)))
5392 /* Conversion between boolean types of different sizes is
5393 a simple assignment in case their vectypes are same
5394 boolean vectors. */
5395 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5396 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5397 {
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400 "type conversion to/from bit-precision "
5401 "unsupported.\n");
5402 return false;
5403 }
5404
5405 if (!vec_stmt) /* transformation not required. */
5406 {
5407 if (slp_node
5408 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5409 {
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5412 "incompatible vector types for invariants\n");
5413 return false;
5414 }
5415 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5416 DUMP_VECT_SCOPE ("vectorizable_assignment");
5417 if (!vect_nop_conversion_p (stmt_info))
5418 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5419 cost_vec);
5420 return true;
5421 }
5422
5423 /* Transform. */
5424 if (dump_enabled_p ())
5425 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5426
5427 /* Handle def. */
5428 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5429
5430 /* Handle use. */
5431 for (j = 0; j < ncopies; j++)
5432 {
5433 /* Handle uses. */
5434 if (j == 0)
5435 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
5436 slp_node);
5437 else
5438 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5439
5440 /* Arguments are ready. create the new vector stmt. */
5441 stmt_vec_info new_stmt_info = NULL;
5442 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5443 {
5444 if (CONVERT_EXPR_CODE_P (code)
5445 || code == VIEW_CONVERT_EXPR)
5446 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5447 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5448 new_temp = make_ssa_name (vec_dest, new_stmt);
5449 gimple_assign_set_lhs (new_stmt, new_temp);
5450 new_stmt_info
5451 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5452 if (slp_node)
5453 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5454 }
5455
5456 if (slp_node)
5457 continue;
5458
5459 if (j == 0)
5460 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5461 else
5462 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5463
5464 prev_stmt_info = new_stmt_info;
5465 }
5466
5467 vec_oprnds.release ();
5468 return true;
5469 }
5470
5471
5472 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5473 either as shift by a scalar or by a vector. */
5474
5475 bool
5476 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5477 {
5478
5479 machine_mode vec_mode;
5480 optab optab;
5481 int icode;
5482 tree vectype;
5483
5484 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5485 if (!vectype)
5486 return false;
5487
5488 optab = optab_for_tree_code (code, vectype, optab_scalar);
5489 if (!optab
5490 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5491 {
5492 optab = optab_for_tree_code (code, vectype, optab_vector);
5493 if (!optab
5494 || (optab_handler (optab, TYPE_MODE (vectype))
5495 == CODE_FOR_nothing))
5496 return false;
5497 }
5498
5499 vec_mode = TYPE_MODE (vectype);
5500 icode = (int) optab_handler (optab, vec_mode);
5501 if (icode == CODE_FOR_nothing)
5502 return false;
5503
5504 return true;
5505 }
5506
5507
5508 /* Function vectorizable_shift.
5509
5510 Check if STMT_INFO performs a shift operation that can be vectorized.
5511 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5512 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5513 Return true if STMT_INFO is vectorizable in this way. */
5514
5515 static bool
5516 vectorizable_shift (vec_info *vinfo,
5517 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5518 stmt_vec_info *vec_stmt, slp_tree slp_node,
5519 stmt_vector_for_cost *cost_vec)
5520 {
5521 tree vec_dest;
5522 tree scalar_dest;
5523 tree op0, op1 = NULL;
5524 tree vec_oprnd1 = NULL_TREE;
5525 tree vectype;
5526 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5527 enum tree_code code;
5528 machine_mode vec_mode;
5529 tree new_temp;
5530 optab optab;
5531 int icode;
5532 machine_mode optab_op2_mode;
5533 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5534 int ndts = 2;
5535 stmt_vec_info prev_stmt_info;
5536 poly_uint64 nunits_in;
5537 poly_uint64 nunits_out;
5538 tree vectype_out;
5539 tree op1_vectype;
5540 int ncopies;
5541 int j, i;
5542 vec<tree> vec_oprnds0 = vNULL;
5543 vec<tree> vec_oprnds1 = vNULL;
5544 tree vop0, vop1;
5545 unsigned int k;
5546 bool scalar_shift_arg = true;
5547 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5548 bool incompatible_op1_vectype_p = false;
5549
5550 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5551 return false;
5552
5553 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5554 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5555 && ! vec_stmt)
5556 return false;
5557
5558 /* Is STMT a vectorizable binary/unary operation? */
5559 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5560 if (!stmt)
5561 return false;
5562
5563 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5564 return false;
5565
5566 code = gimple_assign_rhs_code (stmt);
5567
5568 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5569 || code == RROTATE_EXPR))
5570 return false;
5571
5572 scalar_dest = gimple_assign_lhs (stmt);
5573 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5574 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5575 {
5576 if (dump_enabled_p ())
5577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5578 "bit-precision shifts not supported.\n");
5579 return false;
5580 }
5581
5582 slp_tree slp_op0;
5583 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5584 0, &op0, &slp_op0, &dt[0], &vectype))
5585 {
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5588 "use not simple.\n");
5589 return false;
5590 }
5591 /* If op0 is an external or constant def, infer the vector type
5592 from the scalar type. */
5593 if (!vectype)
5594 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5595 if (vec_stmt)
5596 gcc_assert (vectype);
5597 if (!vectype)
5598 {
5599 if (dump_enabled_p ())
5600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5601 "no vectype for scalar type\n");
5602 return false;
5603 }
5604
5605 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5606 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5607 if (maybe_ne (nunits_out, nunits_in))
5608 return false;
5609
5610 stmt_vec_info op1_def_stmt_info;
5611 slp_tree slp_op1;
5612 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5613 &dt[1], &op1_vectype, &op1_def_stmt_info))
5614 {
5615 if (dump_enabled_p ())
5616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5617 "use not simple.\n");
5618 return false;
5619 }
5620
5621 /* Multiple types in SLP are handled by creating the appropriate number of
5622 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5623 case of SLP. */
5624 if (slp_node)
5625 ncopies = 1;
5626 else
5627 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5628
5629 gcc_assert (ncopies >= 1);
5630
5631 /* Determine whether the shift amount is a vector, or scalar. If the
5632 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5633
5634 if ((dt[1] == vect_internal_def
5635 || dt[1] == vect_induction_def
5636 || dt[1] == vect_nested_cycle)
5637 && !slp_node)
5638 scalar_shift_arg = false;
5639 else if (dt[1] == vect_constant_def
5640 || dt[1] == vect_external_def
5641 || dt[1] == vect_internal_def)
5642 {
5643 /* In SLP, need to check whether the shift count is the same,
5644 in loops if it is a constant or invariant, it is always
5645 a scalar shift. */
5646 if (slp_node)
5647 {
5648 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5649 stmt_vec_info slpstmt_info;
5650
5651 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5652 {
5653 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5654 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5655 scalar_shift_arg = false;
5656 }
5657
5658 /* For internal SLP defs we have to make sure we see scalar stmts
5659 for all vector elements.
5660 ??? For different vectors we could resort to a different
5661 scalar shift operand but code-generation below simply always
5662 takes the first. */
5663 if (dt[1] == vect_internal_def
5664 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5665 stmts.length ()))
5666 scalar_shift_arg = false;
5667 }
5668
5669 /* If the shift amount is computed by a pattern stmt we cannot
5670 use the scalar amount directly thus give up and use a vector
5671 shift. */
5672 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5673 scalar_shift_arg = false;
5674 }
5675 else
5676 {
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "operand mode requires invariant argument.\n");
5680 return false;
5681 }
5682
5683 /* Vector shifted by vector. */
5684 bool was_scalar_shift_arg = scalar_shift_arg;
5685 if (!scalar_shift_arg)
5686 {
5687 optab = optab_for_tree_code (code, vectype, optab_vector);
5688 if (dump_enabled_p ())
5689 dump_printf_loc (MSG_NOTE, vect_location,
5690 "vector/vector shift/rotate found.\n");
5691
5692 if (!op1_vectype)
5693 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5694 slp_node);
5695 incompatible_op1_vectype_p
5696 = (op1_vectype == NULL_TREE
5697 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5698 TYPE_VECTOR_SUBPARTS (vectype))
5699 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5700 if (incompatible_op1_vectype_p
5701 && (!slp_node
5702 || SLP_TREE_DEF_TYPE
5703 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5704 {
5705 if (dump_enabled_p ())
5706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5707 "unusable type for last operand in"
5708 " vector/vector shift/rotate.\n");
5709 return false;
5710 }
5711 }
5712 /* See if the machine has a vector shifted by scalar insn and if not
5713 then see if it has a vector shifted by vector insn. */
5714 else
5715 {
5716 optab = optab_for_tree_code (code, vectype, optab_scalar);
5717 if (optab
5718 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5719 {
5720 if (dump_enabled_p ())
5721 dump_printf_loc (MSG_NOTE, vect_location,
5722 "vector/scalar shift/rotate found.\n");
5723 }
5724 else
5725 {
5726 optab = optab_for_tree_code (code, vectype, optab_vector);
5727 if (optab
5728 && (optab_handler (optab, TYPE_MODE (vectype))
5729 != CODE_FOR_nothing))
5730 {
5731 scalar_shift_arg = false;
5732
5733 if (dump_enabled_p ())
5734 dump_printf_loc (MSG_NOTE, vect_location,
5735 "vector/vector shift/rotate found.\n");
5736
5737 if (!op1_vectype)
5738 op1_vectype = get_vectype_for_scalar_type (vinfo,
5739 TREE_TYPE (op1),
5740 slp_node);
5741
5742 /* Unlike the other binary operators, shifts/rotates have
5743 the rhs being int, instead of the same type as the lhs,
5744 so make sure the scalar is the right type if we are
5745 dealing with vectors of long long/long/short/char. */
5746 incompatible_op1_vectype_p
5747 = (!op1_vectype
5748 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5749 TREE_TYPE (op1)));
5750 }
5751 }
5752 }
5753
5754 /* Supportable by target? */
5755 if (!optab)
5756 {
5757 if (dump_enabled_p ())
5758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5759 "no optab.\n");
5760 return false;
5761 }
5762 vec_mode = TYPE_MODE (vectype);
5763 icode = (int) optab_handler (optab, vec_mode);
5764 if (icode == CODE_FOR_nothing)
5765 {
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5768 "op not supported by target.\n");
5769 /* Check only during analysis. */
5770 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5771 || (!vec_stmt
5772 && !vect_worthwhile_without_simd_p (vinfo, code)))
5773 return false;
5774 if (dump_enabled_p ())
5775 dump_printf_loc (MSG_NOTE, vect_location,
5776 "proceeding using word mode.\n");
5777 }
5778
5779 /* Worthwhile without SIMD support? Check only during analysis. */
5780 if (!vec_stmt
5781 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5782 && !vect_worthwhile_without_simd_p (vinfo, code))
5783 {
5784 if (dump_enabled_p ())
5785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5786 "not worthwhile without SIMD support.\n");
5787 return false;
5788 }
5789
5790 if (!vec_stmt) /* transformation not required. */
5791 {
5792 if (slp_node
5793 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5794 || (!scalar_shift_arg
5795 && !vect_maybe_update_slp_op_vectype (slp_op1, vectype))))
5796 {
5797 if (dump_enabled_p ())
5798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5799 "incompatible vector types for invariants\n");
5800 return false;
5801 }
5802 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5803 DUMP_VECT_SCOPE ("vectorizable_shift");
5804 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5805 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5806 return true;
5807 }
5808
5809 /* Transform. */
5810
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE, vect_location,
5813 "transform binary/unary operation.\n");
5814
5815 if (incompatible_op1_vectype_p && !slp_node)
5816 {
5817 op1 = fold_convert (TREE_TYPE (vectype), op1);
5818 if (dt[1] != vect_constant_def)
5819 op1 = vect_init_vector (vinfo, stmt_info, op1,
5820 TREE_TYPE (vectype), NULL);
5821 }
5822
5823 /* Handle def. */
5824 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5825
5826 prev_stmt_info = NULL;
5827 for (j = 0; j < ncopies; j++)
5828 {
5829 /* Handle uses. */
5830 if (j == 0)
5831 {
5832 if (scalar_shift_arg)
5833 {
5834 /* Vector shl and shr insn patterns can be defined with scalar
5835 operand 2 (shift operand). In this case, use constant or loop
5836 invariant op1 directly, without extending it to vector mode
5837 first. */
5838 optab_op2_mode = insn_data[icode].operand[2].mode;
5839 if (!VECTOR_MODE_P (optab_op2_mode))
5840 {
5841 if (dump_enabled_p ())
5842 dump_printf_loc (MSG_NOTE, vect_location,
5843 "operand 1 using scalar mode.\n");
5844 vec_oprnd1 = op1;
5845 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5846 vec_oprnds1.quick_push (vec_oprnd1);
5847 if (slp_node)
5848 {
5849 /* Store vec_oprnd1 for every vector stmt to be created
5850 for SLP_NODE. We check during the analysis that all
5851 the shift arguments are the same.
5852 TODO: Allow different constants for different vector
5853 stmts generated for an SLP instance. */
5854 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5855 vec_oprnds1.quick_push (vec_oprnd1);
5856 }
5857 }
5858 }
5859 else if (slp_node && incompatible_op1_vectype_p)
5860 {
5861 if (was_scalar_shift_arg)
5862 {
5863 /* If the argument was the same in all lanes create
5864 the correctly typed vector shift amount directly. */
5865 op1 = fold_convert (TREE_TYPE (vectype), op1);
5866 op1 = vect_init_vector (vinfo, stmt_info,
5867 op1, TREE_TYPE (vectype),
5868 !loop_vinfo ? gsi : NULL);
5869 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5870 !loop_vinfo ? gsi : NULL);
5871 vec_oprnds1.create (slp_node->vec_stmts_size);
5872 for (k = 0; k < slp_node->vec_stmts_size; k++)
5873 vec_oprnds1.quick_push (vec_oprnd1);
5874 }
5875 else if (dt[1] == vect_constant_def)
5876 {
5877 /* Convert the scalar constant shift amounts in-place. */
5878 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5879 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5880 for (unsigned i = 0;
5881 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5882 {
5883 SLP_TREE_SCALAR_OPS (shift)[i]
5884 = fold_convert (TREE_TYPE (vectype),
5885 SLP_TREE_SCALAR_OPS (shift)[i]);
5886 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5887 == INTEGER_CST));
5888 }
5889 }
5890 else
5891 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5892 }
5893
5894 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5895 (a special case for certain kind of vector shifts); otherwise,
5896 operand 1 should be of a vector type (the usual case). */
5897 if (vec_oprnd1)
5898 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5899 &vec_oprnds0, NULL, slp_node);
5900 else
5901 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
5902 &vec_oprnds0, &vec_oprnds1, slp_node);
5903 }
5904 else
5905 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5906
5907 /* Arguments are ready. Create the new vector stmt. */
5908 stmt_vec_info new_stmt_info = NULL;
5909 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5910 {
5911 vop1 = vec_oprnds1[i];
5912 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5913 new_temp = make_ssa_name (vec_dest, new_stmt);
5914 gimple_assign_set_lhs (new_stmt, new_temp);
5915 new_stmt_info
5916 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5917 if (slp_node)
5918 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5919 }
5920
5921 if (slp_node)
5922 continue;
5923
5924 if (j == 0)
5925 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5926 else
5927 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5928 prev_stmt_info = new_stmt_info;
5929 }
5930
5931 vec_oprnds0.release ();
5932 vec_oprnds1.release ();
5933
5934 return true;
5935 }
5936
5937
5938 /* Function vectorizable_operation.
5939
5940 Check if STMT_INFO performs a binary, unary or ternary operation that can
5941 be vectorized.
5942 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5943 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5944 Return true if STMT_INFO is vectorizable in this way. */
5945
5946 static bool
5947 vectorizable_operation (vec_info *vinfo,
5948 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5949 stmt_vec_info *vec_stmt, slp_tree slp_node,
5950 stmt_vector_for_cost *cost_vec)
5951 {
5952 tree vec_dest;
5953 tree scalar_dest;
5954 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5955 tree vectype;
5956 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5957 enum tree_code code, orig_code;
5958 machine_mode vec_mode;
5959 tree new_temp;
5960 int op_type;
5961 optab optab;
5962 bool target_support_p;
5963 enum vect_def_type dt[3]
5964 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5965 int ndts = 3;
5966 stmt_vec_info prev_stmt_info;
5967 poly_uint64 nunits_in;
5968 poly_uint64 nunits_out;
5969 tree vectype_out;
5970 int ncopies, vec_num;
5971 int j, i;
5972 vec<tree> vec_oprnds0 = vNULL;
5973 vec<tree> vec_oprnds1 = vNULL;
5974 vec<tree> vec_oprnds2 = vNULL;
5975 tree vop0, vop1, vop2;
5976 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5977
5978 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5979 return false;
5980
5981 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5982 && ! vec_stmt)
5983 return false;
5984
5985 /* Is STMT a vectorizable binary/unary operation? */
5986 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5987 if (!stmt)
5988 return false;
5989
5990 /* Loads and stores are handled in vectorizable_{load,store}. */
5991 if (STMT_VINFO_DATA_REF (stmt_info))
5992 return false;
5993
5994 orig_code = code = gimple_assign_rhs_code (stmt);
5995
5996 /* Shifts are handled in vectorizable_shift. */
5997 if (code == LSHIFT_EXPR
5998 || code == RSHIFT_EXPR
5999 || code == LROTATE_EXPR
6000 || code == RROTATE_EXPR)
6001 return false;
6002
6003 /* Comparisons are handled in vectorizable_comparison. */
6004 if (TREE_CODE_CLASS (code) == tcc_comparison)
6005 return false;
6006
6007 /* Conditions are handled in vectorizable_condition. */
6008 if (code == COND_EXPR)
6009 return false;
6010
6011 /* For pointer addition and subtraction, we should use the normal
6012 plus and minus for the vector operation. */
6013 if (code == POINTER_PLUS_EXPR)
6014 code = PLUS_EXPR;
6015 if (code == POINTER_DIFF_EXPR)
6016 code = MINUS_EXPR;
6017
6018 /* Support only unary or binary operations. */
6019 op_type = TREE_CODE_LENGTH (code);
6020 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6021 {
6022 if (dump_enabled_p ())
6023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6024 "num. args = %d (not unary/binary/ternary op).\n",
6025 op_type);
6026 return false;
6027 }
6028
6029 scalar_dest = gimple_assign_lhs (stmt);
6030 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6031
6032 /* Most operations cannot handle bit-precision types without extra
6033 truncations. */
6034 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6035 if (!mask_op_p
6036 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6037 /* Exception are bitwise binary operations. */
6038 && code != BIT_IOR_EXPR
6039 && code != BIT_XOR_EXPR
6040 && code != BIT_AND_EXPR)
6041 {
6042 if (dump_enabled_p ())
6043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6044 "bit-precision arithmetic not supported.\n");
6045 return false;
6046 }
6047
6048 slp_tree slp_op0;
6049 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6050 0, &op0, &slp_op0, &dt[0], &vectype))
6051 {
6052 if (dump_enabled_p ())
6053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6054 "use not simple.\n");
6055 return false;
6056 }
6057 /* If op0 is an external or constant def, infer the vector type
6058 from the scalar type. */
6059 if (!vectype)
6060 {
6061 /* For boolean type we cannot determine vectype by
6062 invariant value (don't know whether it is a vector
6063 of booleans or vector of integers). We use output
6064 vectype because operations on boolean don't change
6065 type. */
6066 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6067 {
6068 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6069 {
6070 if (dump_enabled_p ())
6071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6072 "not supported operation on bool value.\n");
6073 return false;
6074 }
6075 vectype = vectype_out;
6076 }
6077 else
6078 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6079 slp_node);
6080 }
6081 if (vec_stmt)
6082 gcc_assert (vectype);
6083 if (!vectype)
6084 {
6085 if (dump_enabled_p ())
6086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6087 "no vectype for scalar type %T\n",
6088 TREE_TYPE (op0));
6089
6090 return false;
6091 }
6092
6093 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6094 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6095 if (maybe_ne (nunits_out, nunits_in))
6096 return false;
6097
6098 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6099 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6100 if (op_type == binary_op || op_type == ternary_op)
6101 {
6102 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6103 1, &op1, &slp_op1, &dt[1], &vectype2))
6104 {
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6107 "use not simple.\n");
6108 return false;
6109 }
6110 }
6111 if (op_type == ternary_op)
6112 {
6113 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6114 2, &op2, &slp_op2, &dt[2], &vectype3))
6115 {
6116 if (dump_enabled_p ())
6117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6118 "use not simple.\n");
6119 return false;
6120 }
6121 }
6122
6123 /* Multiple types in SLP are handled by creating the appropriate number of
6124 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6125 case of SLP. */
6126 if (slp_node)
6127 {
6128 ncopies = 1;
6129 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6130 }
6131 else
6132 {
6133 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6134 vec_num = 1;
6135 }
6136
6137 gcc_assert (ncopies >= 1);
6138
6139 /* Reject attempts to combine mask types with nonmask types, e.g. if
6140 we have an AND between a (nonmask) boolean loaded from memory and
6141 a (mask) boolean result of a comparison.
6142
6143 TODO: We could easily fix these cases up using pattern statements. */
6144 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6145 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6146 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6147 {
6148 if (dump_enabled_p ())
6149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6150 "mixed mask and nonmask vector types\n");
6151 return false;
6152 }
6153
6154 /* Supportable by target? */
6155
6156 vec_mode = TYPE_MODE (vectype);
6157 if (code == MULT_HIGHPART_EXPR)
6158 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6159 else
6160 {
6161 optab = optab_for_tree_code (code, vectype, optab_default);
6162 if (!optab)
6163 {
6164 if (dump_enabled_p ())
6165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6166 "no optab.\n");
6167 return false;
6168 }
6169 target_support_p = (optab_handler (optab, vec_mode)
6170 != CODE_FOR_nothing);
6171 }
6172
6173 if (!target_support_p)
6174 {
6175 if (dump_enabled_p ())
6176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6177 "op not supported by target.\n");
6178 /* Check only during analysis. */
6179 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6180 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6181 return false;
6182 if (dump_enabled_p ())
6183 dump_printf_loc (MSG_NOTE, vect_location,
6184 "proceeding using word mode.\n");
6185 }
6186
6187 /* Worthwhile without SIMD support? Check only during analysis. */
6188 if (!VECTOR_MODE_P (vec_mode)
6189 && !vec_stmt
6190 && !vect_worthwhile_without_simd_p (vinfo, code))
6191 {
6192 if (dump_enabled_p ())
6193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6194 "not worthwhile without SIMD support.\n");
6195 return false;
6196 }
6197
6198 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6199 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6200 internal_fn cond_fn = get_conditional_internal_fn (code);
6201
6202 if (!vec_stmt) /* transformation not required. */
6203 {
6204 /* If this operation is part of a reduction, a fully-masked loop
6205 should only change the active lanes of the reduction chain,
6206 keeping the inactive lanes as-is. */
6207 if (loop_vinfo
6208 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6209 && reduc_idx >= 0)
6210 {
6211 if (cond_fn == IFN_LAST
6212 || !direct_internal_fn_supported_p (cond_fn, vectype,
6213 OPTIMIZE_FOR_SPEED))
6214 {
6215 if (dump_enabled_p ())
6216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6217 "can't use a fully-masked loop because no"
6218 " conditional operation is available.\n");
6219 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6220 }
6221 else
6222 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6223 vectype, NULL);
6224 }
6225
6226 /* Put types on constant and invariant SLP children. */
6227 if (slp_node
6228 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6229 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6230 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6231 {
6232 if (dump_enabled_p ())
6233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6234 "incompatible vector types for invariants\n");
6235 return false;
6236 }
6237
6238 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6239 DUMP_VECT_SCOPE ("vectorizable_operation");
6240 vect_model_simple_cost (vinfo, stmt_info,
6241 ncopies, dt, ndts, slp_node, cost_vec);
6242 return true;
6243 }
6244
6245 /* Transform. */
6246
6247 if (dump_enabled_p ())
6248 dump_printf_loc (MSG_NOTE, vect_location,
6249 "transform binary/unary operation.\n");
6250
6251 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6252
6253 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6254 vectors with unsigned elements, but the result is signed. So, we
6255 need to compute the MINUS_EXPR into vectype temporary and
6256 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6257 tree vec_cvt_dest = NULL_TREE;
6258 if (orig_code == POINTER_DIFF_EXPR)
6259 {
6260 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6261 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6262 }
6263 /* Handle def. */
6264 else
6265 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6266
6267 /* In case the vectorization factor (VF) is bigger than the number
6268 of elements that we can fit in a vectype (nunits), we have to generate
6269 more than one vector stmt - i.e - we need to "unroll" the
6270 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6271 from one copy of the vector stmt to the next, in the field
6272 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6273 stages to find the correct vector defs to be used when vectorizing
6274 stmts that use the defs of the current stmt. The example below
6275 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6276 we need to create 4 vectorized stmts):
6277
6278 before vectorization:
6279 RELATED_STMT VEC_STMT
6280 S1: x = memref - -
6281 S2: z = x + 1 - -
6282
6283 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6284 there):
6285 RELATED_STMT VEC_STMT
6286 VS1_0: vx0 = memref0 VS1_1 -
6287 VS1_1: vx1 = memref1 VS1_2 -
6288 VS1_2: vx2 = memref2 VS1_3 -
6289 VS1_3: vx3 = memref3 - -
6290 S1: x = load - VS1_0
6291 S2: z = x + 1 - -
6292
6293 step2: vectorize stmt S2 (done here):
6294 To vectorize stmt S2 we first need to find the relevant vector
6295 def for the first operand 'x'. This is, as usual, obtained from
6296 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6297 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6298 relevant vector def 'vx0'. Having found 'vx0' we can generate
6299 the vector stmt VS2_0, and as usual, record it in the
6300 STMT_VINFO_VEC_STMT of stmt S2.
6301 When creating the second copy (VS2_1), we obtain the relevant vector
6302 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6303 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6304 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6305 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6306 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6307 chain of stmts and pointers:
6308 RELATED_STMT VEC_STMT
6309 VS1_0: vx0 = memref0 VS1_1 -
6310 VS1_1: vx1 = memref1 VS1_2 -
6311 VS1_2: vx2 = memref2 VS1_3 -
6312 VS1_3: vx3 = memref3 - -
6313 S1: x = load - VS1_0
6314 VS2_0: vz0 = vx0 + v1 VS2_1 -
6315 VS2_1: vz1 = vx1 + v1 VS2_2 -
6316 VS2_2: vz2 = vx2 + v1 VS2_3 -
6317 VS2_3: vz3 = vx3 + v1 - -
6318 S2: z = x + 1 - VS2_0 */
6319
6320 prev_stmt_info = NULL;
6321 for (j = 0; j < ncopies; j++)
6322 {
6323 /* Handle uses. */
6324 if (j == 0)
6325 {
6326 if (op_type == binary_op)
6327 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
6328 &vec_oprnds0, &vec_oprnds1, slp_node);
6329 else if (op_type == ternary_op)
6330 {
6331 if (slp_node)
6332 {
6333 auto_vec<vec<tree> > vec_defs(3);
6334 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
6335 vec_oprnds0 = vec_defs[0];
6336 vec_oprnds1 = vec_defs[1];
6337 vec_oprnds2 = vec_defs[2];
6338 }
6339 else
6340 {
6341 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
6342 &vec_oprnds1, NULL);
6343 vect_get_vec_defs (vinfo, op2, NULL_TREE, stmt_info,
6344 &vec_oprnds2, NULL, NULL);
6345 }
6346 }
6347 else
6348 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
6349 NULL, slp_node);
6350 }
6351 else
6352 {
6353 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6354 if (op_type == ternary_op)
6355 {
6356 tree vec_oprnd = vec_oprnds2.pop ();
6357 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6358 vec_oprnd));
6359 }
6360 }
6361
6362 /* Arguments are ready. Create the new vector stmt. */
6363 stmt_vec_info new_stmt_info = NULL;
6364 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6365 {
6366 vop1 = ((op_type == binary_op || op_type == ternary_op)
6367 ? vec_oprnds1[i] : NULL_TREE);
6368 vop2 = ((op_type == ternary_op)
6369 ? vec_oprnds2[i] : NULL_TREE);
6370 if (masked_loop_p && reduc_idx >= 0)
6371 {
6372 /* Perform the operation on active elements only and take
6373 inactive elements from the reduction chain input. */
6374 gcc_assert (!vop2);
6375 vop2 = reduc_idx == 1 ? vop1 : vop0;
6376 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6377 vectype, i * ncopies + j);
6378 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6379 vop0, vop1, vop2);
6380 new_temp = make_ssa_name (vec_dest, call);
6381 gimple_call_set_lhs (call, new_temp);
6382 gimple_call_set_nothrow (call, true);
6383 new_stmt_info
6384 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6385 }
6386 else
6387 {
6388 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6389 vop0, vop1, vop2);
6390 new_temp = make_ssa_name (vec_dest, new_stmt);
6391 gimple_assign_set_lhs (new_stmt, new_temp);
6392 new_stmt_info
6393 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6394 if (vec_cvt_dest)
6395 {
6396 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6397 gassign *new_stmt
6398 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6399 new_temp);
6400 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6401 gimple_assign_set_lhs (new_stmt, new_temp);
6402 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
6403 new_stmt, gsi);
6404 }
6405 }
6406 if (slp_node)
6407 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6408 }
6409
6410 if (slp_node)
6411 continue;
6412
6413 if (j == 0)
6414 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6415 else
6416 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6417 prev_stmt_info = new_stmt_info;
6418 }
6419
6420 vec_oprnds0.release ();
6421 vec_oprnds1.release ();
6422 vec_oprnds2.release ();
6423
6424 return true;
6425 }
6426
6427 /* A helper function to ensure data reference DR_INFO's base alignment. */
6428
6429 static void
6430 ensure_base_align (dr_vec_info *dr_info)
6431 {
6432 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6433 return;
6434
6435 if (dr_info->base_misaligned)
6436 {
6437 tree base_decl = dr_info->base_decl;
6438
6439 // We should only be able to increase the alignment of a base object if
6440 // we know what its new alignment should be at compile time.
6441 unsigned HOST_WIDE_INT align_base_to =
6442 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6443
6444 if (decl_in_symtab_p (base_decl))
6445 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6446 else if (DECL_ALIGN (base_decl) < align_base_to)
6447 {
6448 SET_DECL_ALIGN (base_decl, align_base_to);
6449 DECL_USER_ALIGN (base_decl) = 1;
6450 }
6451 dr_info->base_misaligned = false;
6452 }
6453 }
6454
6455
6456 /* Function get_group_alias_ptr_type.
6457
6458 Return the alias type for the group starting at FIRST_STMT_INFO. */
6459
6460 static tree
6461 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6462 {
6463 struct data_reference *first_dr, *next_dr;
6464
6465 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6466 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6467 while (next_stmt_info)
6468 {
6469 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6470 if (get_alias_set (DR_REF (first_dr))
6471 != get_alias_set (DR_REF (next_dr)))
6472 {
6473 if (dump_enabled_p ())
6474 dump_printf_loc (MSG_NOTE, vect_location,
6475 "conflicting alias set types.\n");
6476 return ptr_type_node;
6477 }
6478 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6479 }
6480 return reference_alias_ptr_type (DR_REF (first_dr));
6481 }
6482
6483
6484 /* Function scan_operand_equal_p.
6485
6486 Helper function for check_scan_store. Compare two references
6487 with .GOMP_SIMD_LANE bases. */
6488
6489 static bool
6490 scan_operand_equal_p (tree ref1, tree ref2)
6491 {
6492 tree ref[2] = { ref1, ref2 };
6493 poly_int64 bitsize[2], bitpos[2];
6494 tree offset[2], base[2];
6495 for (int i = 0; i < 2; ++i)
6496 {
6497 machine_mode mode;
6498 int unsignedp, reversep, volatilep = 0;
6499 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6500 &offset[i], &mode, &unsignedp,
6501 &reversep, &volatilep);
6502 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6503 return false;
6504 if (TREE_CODE (base[i]) == MEM_REF
6505 && offset[i] == NULL_TREE
6506 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6507 {
6508 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6509 if (is_gimple_assign (def_stmt)
6510 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6511 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6512 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6513 {
6514 if (maybe_ne (mem_ref_offset (base[i]), 0))
6515 return false;
6516 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6517 offset[i] = gimple_assign_rhs2 (def_stmt);
6518 }
6519 }
6520 }
6521
6522 if (!operand_equal_p (base[0], base[1], 0))
6523 return false;
6524 if (maybe_ne (bitsize[0], bitsize[1]))
6525 return false;
6526 if (offset[0] != offset[1])
6527 {
6528 if (!offset[0] || !offset[1])
6529 return false;
6530 if (!operand_equal_p (offset[0], offset[1], 0))
6531 {
6532 tree step[2];
6533 for (int i = 0; i < 2; ++i)
6534 {
6535 step[i] = integer_one_node;
6536 if (TREE_CODE (offset[i]) == SSA_NAME)
6537 {
6538 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6539 if (is_gimple_assign (def_stmt)
6540 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6541 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6542 == INTEGER_CST))
6543 {
6544 step[i] = gimple_assign_rhs2 (def_stmt);
6545 offset[i] = gimple_assign_rhs1 (def_stmt);
6546 }
6547 }
6548 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6549 {
6550 step[i] = TREE_OPERAND (offset[i], 1);
6551 offset[i] = TREE_OPERAND (offset[i], 0);
6552 }
6553 tree rhs1 = NULL_TREE;
6554 if (TREE_CODE (offset[i]) == SSA_NAME)
6555 {
6556 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6557 if (gimple_assign_cast_p (def_stmt))
6558 rhs1 = gimple_assign_rhs1 (def_stmt);
6559 }
6560 else if (CONVERT_EXPR_P (offset[i]))
6561 rhs1 = TREE_OPERAND (offset[i], 0);
6562 if (rhs1
6563 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6564 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6565 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6566 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6567 offset[i] = rhs1;
6568 }
6569 if (!operand_equal_p (offset[0], offset[1], 0)
6570 || !operand_equal_p (step[0], step[1], 0))
6571 return false;
6572 }
6573 }
6574 return true;
6575 }
6576
6577
6578 enum scan_store_kind {
6579 /* Normal permutation. */
6580 scan_store_kind_perm,
6581
6582 /* Whole vector left shift permutation with zero init. */
6583 scan_store_kind_lshift_zero,
6584
6585 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6586 scan_store_kind_lshift_cond
6587 };
6588
6589 /* Function check_scan_store.
6590
6591 Verify if we can perform the needed permutations or whole vector shifts.
6592 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6593 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6594 to do at each step. */
6595
6596 static int
6597 scan_store_can_perm_p (tree vectype, tree init,
6598 vec<enum scan_store_kind> *use_whole_vector = NULL)
6599 {
6600 enum machine_mode vec_mode = TYPE_MODE (vectype);
6601 unsigned HOST_WIDE_INT nunits;
6602 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6603 return -1;
6604 int units_log2 = exact_log2 (nunits);
6605 if (units_log2 <= 0)
6606 return -1;
6607
6608 int i;
6609 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6610 for (i = 0; i <= units_log2; ++i)
6611 {
6612 unsigned HOST_WIDE_INT j, k;
6613 enum scan_store_kind kind = scan_store_kind_perm;
6614 vec_perm_builder sel (nunits, nunits, 1);
6615 sel.quick_grow (nunits);
6616 if (i == units_log2)
6617 {
6618 for (j = 0; j < nunits; ++j)
6619 sel[j] = nunits - 1;
6620 }
6621 else
6622 {
6623 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6624 sel[j] = j;
6625 for (k = 0; j < nunits; ++j, ++k)
6626 sel[j] = nunits + k;
6627 }
6628 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6629 if (!can_vec_perm_const_p (vec_mode, indices))
6630 {
6631 if (i == units_log2)
6632 return -1;
6633
6634 if (whole_vector_shift_kind == scan_store_kind_perm)
6635 {
6636 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6637 return -1;
6638 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6639 /* Whole vector shifts shift in zeros, so if init is all zero
6640 constant, there is no need to do anything further. */
6641 if ((TREE_CODE (init) != INTEGER_CST
6642 && TREE_CODE (init) != REAL_CST)
6643 || !initializer_zerop (init))
6644 {
6645 tree masktype = truth_type_for (vectype);
6646 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6647 return -1;
6648 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6649 }
6650 }
6651 kind = whole_vector_shift_kind;
6652 }
6653 if (use_whole_vector)
6654 {
6655 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6656 use_whole_vector->safe_grow_cleared (i);
6657 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6658 use_whole_vector->safe_push (kind);
6659 }
6660 }
6661
6662 return units_log2;
6663 }
6664
6665
6666 /* Function check_scan_store.
6667
6668 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6669
6670 static bool
6671 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6672 enum vect_def_type rhs_dt, bool slp, tree mask,
6673 vect_memory_access_type memory_access_type)
6674 {
6675 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6676 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6677 tree ref_type;
6678
6679 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6680 if (slp
6681 || mask
6682 || memory_access_type != VMAT_CONTIGUOUS
6683 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6684 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6685 || loop_vinfo == NULL
6686 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6687 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6688 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6689 || !integer_zerop (DR_INIT (dr_info->dr))
6690 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6691 || !alias_sets_conflict_p (get_alias_set (vectype),
6692 get_alias_set (TREE_TYPE (ref_type))))
6693 {
6694 if (dump_enabled_p ())
6695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6696 "unsupported OpenMP scan store.\n");
6697 return false;
6698 }
6699
6700 /* We need to pattern match code built by OpenMP lowering and simplified
6701 by following optimizations into something we can handle.
6702 #pragma omp simd reduction(inscan,+:r)
6703 for (...)
6704 {
6705 r += something ();
6706 #pragma omp scan inclusive (r)
6707 use (r);
6708 }
6709 shall have body with:
6710 // Initialization for input phase, store the reduction initializer:
6711 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6712 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6713 D.2042[_21] = 0;
6714 // Actual input phase:
6715 ...
6716 r.0_5 = D.2042[_20];
6717 _6 = _4 + r.0_5;
6718 D.2042[_20] = _6;
6719 // Initialization for scan phase:
6720 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6721 _26 = D.2043[_25];
6722 _27 = D.2042[_25];
6723 _28 = _26 + _27;
6724 D.2043[_25] = _28;
6725 D.2042[_25] = _28;
6726 // Actual scan phase:
6727 ...
6728 r.1_8 = D.2042[_20];
6729 ...
6730 The "omp simd array" variable D.2042 holds the privatized copy used
6731 inside of the loop and D.2043 is another one that holds copies of
6732 the current original list item. The separate GOMP_SIMD_LANE ifn
6733 kinds are there in order to allow optimizing the initializer store
6734 and combiner sequence, e.g. if it is originally some C++ish user
6735 defined reduction, but allow the vectorizer to pattern recognize it
6736 and turn into the appropriate vectorized scan.
6737
6738 For exclusive scan, this is slightly different:
6739 #pragma omp simd reduction(inscan,+:r)
6740 for (...)
6741 {
6742 use (r);
6743 #pragma omp scan exclusive (r)
6744 r += something ();
6745 }
6746 shall have body with:
6747 // Initialization for input phase, store the reduction initializer:
6748 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6749 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6750 D.2042[_21] = 0;
6751 // Actual input phase:
6752 ...
6753 r.0_5 = D.2042[_20];
6754 _6 = _4 + r.0_5;
6755 D.2042[_20] = _6;
6756 // Initialization for scan phase:
6757 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6758 _26 = D.2043[_25];
6759 D.2044[_25] = _26;
6760 _27 = D.2042[_25];
6761 _28 = _26 + _27;
6762 D.2043[_25] = _28;
6763 // Actual scan phase:
6764 ...
6765 r.1_8 = D.2044[_20];
6766 ... */
6767
6768 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6769 {
6770 /* Match the D.2042[_21] = 0; store above. Just require that
6771 it is a constant or external definition store. */
6772 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6773 {
6774 fail_init:
6775 if (dump_enabled_p ())
6776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6777 "unsupported OpenMP scan initializer store.\n");
6778 return false;
6779 }
6780
6781 if (! loop_vinfo->scan_map)
6782 loop_vinfo->scan_map = new hash_map<tree, tree>;
6783 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6784 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6785 if (cached)
6786 goto fail_init;
6787 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6788
6789 /* These stores can be vectorized normally. */
6790 return true;
6791 }
6792
6793 if (rhs_dt != vect_internal_def)
6794 {
6795 fail:
6796 if (dump_enabled_p ())
6797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6798 "unsupported OpenMP scan combiner pattern.\n");
6799 return false;
6800 }
6801
6802 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6803 tree rhs = gimple_assign_rhs1 (stmt);
6804 if (TREE_CODE (rhs) != SSA_NAME)
6805 goto fail;
6806
6807 gimple *other_store_stmt = NULL;
6808 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6809 bool inscan_var_store
6810 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6811
6812 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6813 {
6814 if (!inscan_var_store)
6815 {
6816 use_operand_p use_p;
6817 imm_use_iterator iter;
6818 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6819 {
6820 gimple *use_stmt = USE_STMT (use_p);
6821 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6822 continue;
6823 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6824 || !is_gimple_assign (use_stmt)
6825 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6826 || other_store_stmt
6827 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6828 goto fail;
6829 other_store_stmt = use_stmt;
6830 }
6831 if (other_store_stmt == NULL)
6832 goto fail;
6833 rhs = gimple_assign_lhs (other_store_stmt);
6834 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6835 goto fail;
6836 }
6837 }
6838 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6839 {
6840 use_operand_p use_p;
6841 imm_use_iterator iter;
6842 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6843 {
6844 gimple *use_stmt = USE_STMT (use_p);
6845 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6846 continue;
6847 if (other_store_stmt)
6848 goto fail;
6849 other_store_stmt = use_stmt;
6850 }
6851 }
6852 else
6853 goto fail;
6854
6855 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6856 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6857 || !is_gimple_assign (def_stmt)
6858 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6859 goto fail;
6860
6861 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6862 /* For pointer addition, we should use the normal plus for the vector
6863 operation. */
6864 switch (code)
6865 {
6866 case POINTER_PLUS_EXPR:
6867 code = PLUS_EXPR;
6868 break;
6869 case MULT_HIGHPART_EXPR:
6870 goto fail;
6871 default:
6872 break;
6873 }
6874 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6875 goto fail;
6876
6877 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6878 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6879 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6880 goto fail;
6881
6882 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6883 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6884 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6885 || !gimple_assign_load_p (load1_stmt)
6886 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6887 || !gimple_assign_load_p (load2_stmt))
6888 goto fail;
6889
6890 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6891 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6892 if (load1_stmt_info == NULL
6893 || load2_stmt_info == NULL
6894 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6895 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6896 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6897 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6898 goto fail;
6899
6900 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6901 {
6902 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6903 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6904 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6905 goto fail;
6906 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6907 tree lrhs;
6908 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6909 lrhs = rhs1;
6910 else
6911 lrhs = rhs2;
6912 use_operand_p use_p;
6913 imm_use_iterator iter;
6914 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6915 {
6916 gimple *use_stmt = USE_STMT (use_p);
6917 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6918 continue;
6919 if (other_store_stmt)
6920 goto fail;
6921 other_store_stmt = use_stmt;
6922 }
6923 }
6924
6925 if (other_store_stmt == NULL)
6926 goto fail;
6927 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6928 || !gimple_store_p (other_store_stmt))
6929 goto fail;
6930
6931 stmt_vec_info other_store_stmt_info
6932 = loop_vinfo->lookup_stmt (other_store_stmt);
6933 if (other_store_stmt_info == NULL
6934 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6935 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6936 goto fail;
6937
6938 gimple *stmt1 = stmt;
6939 gimple *stmt2 = other_store_stmt;
6940 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6941 std::swap (stmt1, stmt2);
6942 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6943 gimple_assign_rhs1 (load2_stmt)))
6944 {
6945 std::swap (rhs1, rhs2);
6946 std::swap (load1_stmt, load2_stmt);
6947 std::swap (load1_stmt_info, load2_stmt_info);
6948 }
6949 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6950 gimple_assign_rhs1 (load1_stmt)))
6951 goto fail;
6952
6953 tree var3 = NULL_TREE;
6954 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6955 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6956 gimple_assign_rhs1 (load2_stmt)))
6957 goto fail;
6958 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6959 {
6960 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6961 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6962 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6963 goto fail;
6964 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6965 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6966 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6967 || lookup_attribute ("omp simd inscan exclusive",
6968 DECL_ATTRIBUTES (var3)))
6969 goto fail;
6970 }
6971
6972 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6973 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6974 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6975 goto fail;
6976
6977 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6978 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6979 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6980 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6981 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6982 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6983 goto fail;
6984
6985 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6986 std::swap (var1, var2);
6987
6988 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6989 {
6990 if (!lookup_attribute ("omp simd inscan exclusive",
6991 DECL_ATTRIBUTES (var1)))
6992 goto fail;
6993 var1 = var3;
6994 }
6995
6996 if (loop_vinfo->scan_map == NULL)
6997 goto fail;
6998 tree *init = loop_vinfo->scan_map->get (var1);
6999 if (init == NULL)
7000 goto fail;
7001
7002 /* The IL is as expected, now check if we can actually vectorize it.
7003 Inclusive scan:
7004 _26 = D.2043[_25];
7005 _27 = D.2042[_25];
7006 _28 = _26 + _27;
7007 D.2043[_25] = _28;
7008 D.2042[_25] = _28;
7009 should be vectorized as (where _40 is the vectorized rhs
7010 from the D.2042[_21] = 0; store):
7011 _30 = MEM <vector(8) int> [(int *)&D.2043];
7012 _31 = MEM <vector(8) int> [(int *)&D.2042];
7013 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7014 _33 = _31 + _32;
7015 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7016 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7017 _35 = _33 + _34;
7018 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7019 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7020 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7021 _37 = _35 + _36;
7022 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7023 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7024 _38 = _30 + _37;
7025 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7026 MEM <vector(8) int> [(int *)&D.2043] = _39;
7027 MEM <vector(8) int> [(int *)&D.2042] = _38;
7028 Exclusive scan:
7029 _26 = D.2043[_25];
7030 D.2044[_25] = _26;
7031 _27 = D.2042[_25];
7032 _28 = _26 + _27;
7033 D.2043[_25] = _28;
7034 should be vectorized as (where _40 is the vectorized rhs
7035 from the D.2042[_21] = 0; store):
7036 _30 = MEM <vector(8) int> [(int *)&D.2043];
7037 _31 = MEM <vector(8) int> [(int *)&D.2042];
7038 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7039 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7040 _34 = _32 + _33;
7041 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7042 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7043 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7044 _36 = _34 + _35;
7045 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7046 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7047 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7048 _38 = _36 + _37;
7049 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7050 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7051 _39 = _30 + _38;
7052 _50 = _31 + _39;
7053 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7054 MEM <vector(8) int> [(int *)&D.2044] = _39;
7055 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7056 enum machine_mode vec_mode = TYPE_MODE (vectype);
7057 optab optab = optab_for_tree_code (code, vectype, optab_default);
7058 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7059 goto fail;
7060
7061 int units_log2 = scan_store_can_perm_p (vectype, *init);
7062 if (units_log2 == -1)
7063 goto fail;
7064
7065 return true;
7066 }
7067
7068
7069 /* Function vectorizable_scan_store.
7070
7071 Helper of vectorizable_score, arguments like on vectorizable_store.
7072 Handle only the transformation, checking is done in check_scan_store. */
7073
7074 static bool
7075 vectorizable_scan_store (vec_info *vinfo,
7076 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7077 stmt_vec_info *vec_stmt, int ncopies)
7078 {
7079 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7080 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7081 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7082 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7083
7084 if (dump_enabled_p ())
7085 dump_printf_loc (MSG_NOTE, vect_location,
7086 "transform scan store. ncopies = %d\n", ncopies);
7087
7088 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7089 tree rhs = gimple_assign_rhs1 (stmt);
7090 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7091
7092 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7093 bool inscan_var_store
7094 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7095
7096 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7097 {
7098 use_operand_p use_p;
7099 imm_use_iterator iter;
7100 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7101 {
7102 gimple *use_stmt = USE_STMT (use_p);
7103 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7104 continue;
7105 rhs = gimple_assign_lhs (use_stmt);
7106 break;
7107 }
7108 }
7109
7110 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7111 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7112 if (code == POINTER_PLUS_EXPR)
7113 code = PLUS_EXPR;
7114 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7115 && commutative_tree_code (code));
7116 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7117 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7118 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7119 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7120 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7121 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7122 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7123 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7124 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7125 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7126 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7127
7128 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7129 {
7130 std::swap (rhs1, rhs2);
7131 std::swap (var1, var2);
7132 std::swap (load1_dr_info, load2_dr_info);
7133 }
7134
7135 tree *init = loop_vinfo->scan_map->get (var1);
7136 gcc_assert (init);
7137
7138 unsigned HOST_WIDE_INT nunits;
7139 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7140 gcc_unreachable ();
7141 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7142 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7143 gcc_assert (units_log2 > 0);
7144 auto_vec<tree, 16> perms;
7145 perms.quick_grow (units_log2 + 1);
7146 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7147 for (int i = 0; i <= units_log2; ++i)
7148 {
7149 unsigned HOST_WIDE_INT j, k;
7150 vec_perm_builder sel (nunits, nunits, 1);
7151 sel.quick_grow (nunits);
7152 if (i == units_log2)
7153 for (j = 0; j < nunits; ++j)
7154 sel[j] = nunits - 1;
7155 else
7156 {
7157 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7158 sel[j] = j;
7159 for (k = 0; j < nunits; ++j, ++k)
7160 sel[j] = nunits + k;
7161 }
7162 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7163 if (!use_whole_vector.is_empty ()
7164 && use_whole_vector[i] != scan_store_kind_perm)
7165 {
7166 if (zero_vec == NULL_TREE)
7167 zero_vec = build_zero_cst (vectype);
7168 if (masktype == NULL_TREE
7169 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7170 masktype = truth_type_for (vectype);
7171 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7172 }
7173 else
7174 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7175 }
7176
7177 stmt_vec_info prev_stmt_info = NULL;
7178 tree vec_oprnd1 = NULL_TREE;
7179 tree vec_oprnd2 = NULL_TREE;
7180 tree vec_oprnd3 = NULL_TREE;
7181 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7182 tree dataref_offset = build_int_cst (ref_type, 0);
7183 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7184 vectype, VMAT_CONTIGUOUS);
7185 tree ldataref_ptr = NULL_TREE;
7186 tree orig = NULL_TREE;
7187 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7188 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7189 for (int j = 0; j < ncopies; j++)
7190 {
7191 stmt_vec_info new_stmt_info;
7192 if (j == 0)
7193 {
7194 vec_oprnd1 = vect_get_vec_def_for_operand (vinfo, *init, stmt_info);
7195 if (ldataref_ptr == NULL)
7196 vec_oprnd2 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info);
7197 vec_oprnd3 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info);
7198 orig = vec_oprnd3;
7199 }
7200 else
7201 {
7202 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7203 if (ldataref_ptr == NULL)
7204 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7205 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7206 if (!inscan_var_store)
7207 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7208 }
7209
7210 if (ldataref_ptr)
7211 {
7212 vec_oprnd2 = make_ssa_name (vectype);
7213 tree data_ref = fold_build2 (MEM_REF, vectype,
7214 unshare_expr (ldataref_ptr),
7215 dataref_offset);
7216 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7217 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7218 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7219 if (prev_stmt_info == NULL)
7220 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7221 else
7222 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7223 prev_stmt_info = new_stmt_info;
7224 }
7225
7226 tree v = vec_oprnd2;
7227 for (int i = 0; i < units_log2; ++i)
7228 {
7229 tree new_temp = make_ssa_name (vectype);
7230 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7231 (zero_vec
7232 && (use_whole_vector[i]
7233 != scan_store_kind_perm))
7234 ? zero_vec : vec_oprnd1, v,
7235 perms[i]);
7236 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7237 if (prev_stmt_info == NULL)
7238 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7239 else
7240 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7241 prev_stmt_info = new_stmt_info;
7242
7243 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7244 {
7245 /* Whole vector shift shifted in zero bits, but if *init
7246 is not initializer_zerop, we need to replace those elements
7247 with elements from vec_oprnd1. */
7248 tree_vector_builder vb (masktype, nunits, 1);
7249 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7250 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7251 ? boolean_false_node : boolean_true_node);
7252
7253 tree new_temp2 = make_ssa_name (vectype);
7254 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7255 new_temp, vec_oprnd1);
7256 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
7257 g, gsi);
7258 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7259 prev_stmt_info = new_stmt_info;
7260 new_temp = new_temp2;
7261 }
7262
7263 /* For exclusive scan, perform the perms[i] permutation once
7264 more. */
7265 if (i == 0
7266 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7267 && v == vec_oprnd2)
7268 {
7269 v = new_temp;
7270 --i;
7271 continue;
7272 }
7273
7274 tree new_temp2 = make_ssa_name (vectype);
7275 g = gimple_build_assign (new_temp2, code, v, new_temp);
7276 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7277 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7278 prev_stmt_info = new_stmt_info;
7279
7280 v = new_temp2;
7281 }
7282
7283 tree new_temp = make_ssa_name (vectype);
7284 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7285 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7287 prev_stmt_info = new_stmt_info;
7288
7289 tree last_perm_arg = new_temp;
7290 /* For exclusive scan, new_temp computed above is the exclusive scan
7291 prefix sum. Turn it into inclusive prefix sum for the broadcast
7292 of the last element into orig. */
7293 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7294 {
7295 last_perm_arg = make_ssa_name (vectype);
7296 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7297 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7298 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7299 prev_stmt_info = new_stmt_info;
7300 }
7301
7302 orig = make_ssa_name (vectype);
7303 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7304 last_perm_arg, perms[units_log2]);
7305 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7306 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7307 prev_stmt_info = new_stmt_info;
7308
7309 if (!inscan_var_store)
7310 {
7311 tree data_ref = fold_build2 (MEM_REF, vectype,
7312 unshare_expr (dataref_ptr),
7313 dataref_offset);
7314 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7315 g = gimple_build_assign (data_ref, new_temp);
7316 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7317 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7318 prev_stmt_info = new_stmt_info;
7319 }
7320 }
7321
7322 if (inscan_var_store)
7323 for (int j = 0; j < ncopies; j++)
7324 {
7325 if (j != 0)
7326 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7327
7328 tree data_ref = fold_build2 (MEM_REF, vectype,
7329 unshare_expr (dataref_ptr),
7330 dataref_offset);
7331 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7332 gimple *g = gimple_build_assign (data_ref, orig);
7333 stmt_vec_info new_stmt_info
7334 = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7335 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7336 prev_stmt_info = new_stmt_info;
7337 }
7338 return true;
7339 }
7340
7341
7342 /* Function vectorizable_store.
7343
7344 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7345 that can be vectorized.
7346 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7347 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7348 Return true if STMT_INFO is vectorizable in this way. */
7349
7350 static bool
7351 vectorizable_store (vec_info *vinfo,
7352 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7353 stmt_vec_info *vec_stmt, slp_tree slp_node,
7354 stmt_vector_for_cost *cost_vec)
7355 {
7356 tree data_ref;
7357 tree op;
7358 tree vec_oprnd = NULL_TREE;
7359 tree elem_type;
7360 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7361 class loop *loop = NULL;
7362 machine_mode vec_mode;
7363 tree dummy;
7364 enum dr_alignment_support alignment_support_scheme;
7365 enum vect_def_type rhs_dt = vect_unknown_def_type;
7366 enum vect_def_type mask_dt = vect_unknown_def_type;
7367 stmt_vec_info prev_stmt_info = NULL;
7368 tree dataref_ptr = NULL_TREE;
7369 tree dataref_offset = NULL_TREE;
7370 gimple *ptr_incr = NULL;
7371 int ncopies;
7372 int j;
7373 stmt_vec_info first_stmt_info;
7374 bool grouped_store;
7375 unsigned int group_size, i;
7376 vec<tree> oprnds = vNULL;
7377 vec<tree> result_chain = vNULL;
7378 tree offset = NULL_TREE;
7379 vec<tree> vec_oprnds = vNULL;
7380 bool slp = (slp_node != NULL);
7381 unsigned int vec_num;
7382 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7383 tree aggr_type;
7384 gather_scatter_info gs_info;
7385 poly_uint64 vf;
7386 vec_load_store_type vls_type;
7387 tree ref_type;
7388
7389 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7390 return false;
7391
7392 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7393 && ! vec_stmt)
7394 return false;
7395
7396 /* Is vectorizable store? */
7397
7398 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7399 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7400 {
7401 tree scalar_dest = gimple_assign_lhs (assign);
7402 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7403 && is_pattern_stmt_p (stmt_info))
7404 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7405 if (TREE_CODE (scalar_dest) != ARRAY_REF
7406 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7407 && TREE_CODE (scalar_dest) != INDIRECT_REF
7408 && TREE_CODE (scalar_dest) != COMPONENT_REF
7409 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7410 && TREE_CODE (scalar_dest) != REALPART_EXPR
7411 && TREE_CODE (scalar_dest) != MEM_REF)
7412 return false;
7413 }
7414 else
7415 {
7416 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7417 if (!call || !gimple_call_internal_p (call))
7418 return false;
7419
7420 internal_fn ifn = gimple_call_internal_fn (call);
7421 if (!internal_store_fn_p (ifn))
7422 return false;
7423
7424 if (slp_node != NULL)
7425 {
7426 if (dump_enabled_p ())
7427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7428 "SLP of masked stores not supported.\n");
7429 return false;
7430 }
7431
7432 int mask_index = internal_fn_mask_index (ifn);
7433 if (mask_index >= 0)
7434 {
7435 mask = gimple_call_arg (call, mask_index);
7436 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7437 &mask_vectype))
7438 return false;
7439 }
7440 }
7441
7442 op = vect_get_store_rhs (stmt_info);
7443
7444 /* Cannot have hybrid store SLP -- that would mean storing to the
7445 same location twice. */
7446 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7447
7448 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7449 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7450
7451 if (loop_vinfo)
7452 {
7453 loop = LOOP_VINFO_LOOP (loop_vinfo);
7454 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7455 }
7456 else
7457 vf = 1;
7458
7459 /* Multiple types in SLP are handled by creating the appropriate number of
7460 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7461 case of SLP. */
7462 if (slp)
7463 ncopies = 1;
7464 else
7465 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7466
7467 gcc_assert (ncopies >= 1);
7468
7469 /* FORNOW. This restriction should be relaxed. */
7470 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7471 {
7472 if (dump_enabled_p ())
7473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7474 "multiple types in nested loop.\n");
7475 return false;
7476 }
7477
7478 if (!vect_check_store_rhs (vinfo, stmt_info,
7479 op, &rhs_dt, &rhs_vectype, &vls_type))
7480 return false;
7481
7482 elem_type = TREE_TYPE (vectype);
7483 vec_mode = TYPE_MODE (vectype);
7484
7485 if (!STMT_VINFO_DATA_REF (stmt_info))
7486 return false;
7487
7488 vect_memory_access_type memory_access_type;
7489 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
7490 ncopies, &memory_access_type, &gs_info))
7491 return false;
7492
7493 if (mask)
7494 {
7495 if (memory_access_type == VMAT_CONTIGUOUS)
7496 {
7497 if (!VECTOR_MODE_P (vec_mode)
7498 || !can_vec_mask_load_store_p (vec_mode,
7499 TYPE_MODE (mask_vectype), false))
7500 return false;
7501 }
7502 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7503 && (memory_access_type != VMAT_GATHER_SCATTER
7504 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7505 {
7506 if (dump_enabled_p ())
7507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7508 "unsupported access type for masked store.\n");
7509 return false;
7510 }
7511 }
7512 else
7513 {
7514 /* FORNOW. In some cases can vectorize even if data-type not supported
7515 (e.g. - array initialization with 0). */
7516 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7517 return false;
7518 }
7519
7520 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7521 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7522 && memory_access_type != VMAT_GATHER_SCATTER
7523 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7524 if (grouped_store)
7525 {
7526 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7527 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7528 group_size = DR_GROUP_SIZE (first_stmt_info);
7529 }
7530 else
7531 {
7532 first_stmt_info = stmt_info;
7533 first_dr_info = dr_info;
7534 group_size = vec_num = 1;
7535 }
7536
7537 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7538 {
7539 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7540 memory_access_type))
7541 return false;
7542 }
7543
7544 if (!vec_stmt) /* transformation not required. */
7545 {
7546 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7547
7548 if (loop_vinfo
7549 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7550 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7551 memory_access_type, &gs_info, mask);
7552
7553 if (slp_node
7554 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7555 vectype))
7556 {
7557 if (dump_enabled_p ())
7558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7559 "incompatible vector types for invariants\n");
7560 return false;
7561 }
7562
7563 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7564 vect_model_store_cost (vinfo, stmt_info, ncopies,
7565 memory_access_type, vls_type, slp_node, cost_vec);
7566 return true;
7567 }
7568 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7569
7570 /* Transform. */
7571
7572 ensure_base_align (dr_info);
7573
7574 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7575 {
7576 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7577 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7578 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7579 tree ptr, var, scale, vec_mask;
7580 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7581 tree mask_halfvectype = mask_vectype;
7582 edge pe = loop_preheader_edge (loop);
7583 gimple_seq seq;
7584 basic_block new_bb;
7585 enum { NARROW, NONE, WIDEN } modifier;
7586 poly_uint64 scatter_off_nunits
7587 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7588
7589 if (known_eq (nunits, scatter_off_nunits))
7590 modifier = NONE;
7591 else if (known_eq (nunits * 2, scatter_off_nunits))
7592 {
7593 modifier = WIDEN;
7594
7595 /* Currently gathers and scatters are only supported for
7596 fixed-length vectors. */
7597 unsigned int count = scatter_off_nunits.to_constant ();
7598 vec_perm_builder sel (count, count, 1);
7599 for (i = 0; i < (unsigned int) count; ++i)
7600 sel.quick_push (i | (count / 2));
7601
7602 vec_perm_indices indices (sel, 1, count);
7603 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7604 indices);
7605 gcc_assert (perm_mask != NULL_TREE);
7606 }
7607 else if (known_eq (nunits, scatter_off_nunits * 2))
7608 {
7609 modifier = NARROW;
7610
7611 /* Currently gathers and scatters are only supported for
7612 fixed-length vectors. */
7613 unsigned int count = nunits.to_constant ();
7614 vec_perm_builder sel (count, count, 1);
7615 for (i = 0; i < (unsigned int) count; ++i)
7616 sel.quick_push (i | (count / 2));
7617
7618 vec_perm_indices indices (sel, 2, count);
7619 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7620 gcc_assert (perm_mask != NULL_TREE);
7621 ncopies *= 2;
7622
7623 if (mask)
7624 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7625 }
7626 else
7627 gcc_unreachable ();
7628
7629 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7630 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7631 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7632 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7633 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7634 scaletype = TREE_VALUE (arglist);
7635
7636 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7637 && TREE_CODE (rettype) == VOID_TYPE);
7638
7639 ptr = fold_convert (ptrtype, gs_info.base);
7640 if (!is_gimple_min_invariant (ptr))
7641 {
7642 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7643 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7644 gcc_assert (!new_bb);
7645 }
7646
7647 if (mask == NULL_TREE)
7648 {
7649 mask_arg = build_int_cst (masktype, -1);
7650 mask_arg = vect_init_vector (vinfo, stmt_info,
7651 mask_arg, masktype, NULL);
7652 }
7653
7654 scale = build_int_cst (scaletype, gs_info.scale);
7655
7656 prev_stmt_info = NULL;
7657 for (j = 0; j < ncopies; ++j)
7658 {
7659 if (j == 0)
7660 {
7661 src = vec_oprnd1 = vect_get_vec_def_for_operand (vinfo,
7662 op, stmt_info);
7663 op = vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
7664 gs_info.offset,
7665 stmt_info);
7666 if (mask)
7667 mask_op = vec_mask = vect_get_vec_def_for_operand (vinfo, mask,
7668 stmt_info);
7669 }
7670 else if (modifier != NONE && (j & 1))
7671 {
7672 if (modifier == WIDEN)
7673 {
7674 src
7675 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7676 vec_oprnd1);
7677 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7678 perm_mask, stmt_info, gsi);
7679 if (mask)
7680 mask_op
7681 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7682 vec_mask);
7683 }
7684 else if (modifier == NARROW)
7685 {
7686 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7687 perm_mask, stmt_info, gsi);
7688 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7689 vec_oprnd0);
7690 }
7691 else
7692 gcc_unreachable ();
7693 }
7694 else
7695 {
7696 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7697 vec_oprnd1);
7698 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7699 vec_oprnd0);
7700 if (mask)
7701 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7702 vec_mask);
7703 }
7704
7705 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7706 {
7707 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7708 TYPE_VECTOR_SUBPARTS (srctype)));
7709 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7710 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7711 gassign *new_stmt
7712 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7713 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7714 src = var;
7715 }
7716
7717 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7718 {
7719 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7720 TYPE_VECTOR_SUBPARTS (idxtype)));
7721 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7722 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7723 gassign *new_stmt
7724 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7725 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7726 op = var;
7727 }
7728
7729 if (mask)
7730 {
7731 tree utype;
7732 mask_arg = mask_op;
7733 if (modifier == NARROW)
7734 {
7735 var = vect_get_new_ssa_name (mask_halfvectype,
7736 vect_simple_var);
7737 gassign *new_stmt
7738 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7739 : VEC_UNPACK_LO_EXPR,
7740 mask_op);
7741 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7742 mask_arg = var;
7743 }
7744 tree optype = TREE_TYPE (mask_arg);
7745 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7746 utype = masktype;
7747 else
7748 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7749 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7750 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7751 gassign *new_stmt
7752 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7753 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7754 mask_arg = var;
7755 if (!useless_type_conversion_p (masktype, utype))
7756 {
7757 gcc_assert (TYPE_PRECISION (utype)
7758 <= TYPE_PRECISION (masktype));
7759 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7760 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7761 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7762 mask_arg = var;
7763 }
7764 }
7765
7766 gcall *new_stmt
7767 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7768 stmt_vec_info new_stmt_info
7769 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7770
7771 if (prev_stmt_info == NULL)
7772 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7773 else
7774 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7775 prev_stmt_info = new_stmt_info;
7776 }
7777 return true;
7778 }
7779 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7780 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7781
7782 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7783 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7784
7785 if (grouped_store)
7786 {
7787 /* FORNOW */
7788 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7789
7790 /* We vectorize all the stmts of the interleaving group when we
7791 reach the last stmt in the group. */
7792 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7793 < DR_GROUP_SIZE (first_stmt_info)
7794 && !slp)
7795 {
7796 *vec_stmt = NULL;
7797 return true;
7798 }
7799
7800 if (slp)
7801 {
7802 grouped_store = false;
7803 /* VEC_NUM is the number of vect stmts to be created for this
7804 group. */
7805 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7806 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7807 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7808 == first_stmt_info);
7809 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7810 op = vect_get_store_rhs (first_stmt_info);
7811 }
7812 else
7813 /* VEC_NUM is the number of vect stmts to be created for this
7814 group. */
7815 vec_num = group_size;
7816
7817 ref_type = get_group_alias_ptr_type (first_stmt_info);
7818 }
7819 else
7820 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7821
7822 if (dump_enabled_p ())
7823 dump_printf_loc (MSG_NOTE, vect_location,
7824 "transform store. ncopies = %d\n", ncopies);
7825
7826 if (memory_access_type == VMAT_ELEMENTWISE
7827 || memory_access_type == VMAT_STRIDED_SLP)
7828 {
7829 gimple_stmt_iterator incr_gsi;
7830 bool insert_after;
7831 gimple *incr;
7832 tree offvar;
7833 tree ivstep;
7834 tree running_off;
7835 tree stride_base, stride_step, alias_off;
7836 tree vec_oprnd;
7837 tree dr_offset;
7838 unsigned int g;
7839 /* Checked by get_load_store_type. */
7840 unsigned int const_nunits = nunits.to_constant ();
7841
7842 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7843 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7844
7845 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7846 stride_base
7847 = fold_build_pointer_plus
7848 (DR_BASE_ADDRESS (first_dr_info->dr),
7849 size_binop (PLUS_EXPR,
7850 convert_to_ptrofftype (dr_offset),
7851 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7852 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7853
7854 /* For a store with loop-invariant (but other than power-of-2)
7855 stride (i.e. not a grouped access) like so:
7856
7857 for (i = 0; i < n; i += stride)
7858 array[i] = ...;
7859
7860 we generate a new induction variable and new stores from
7861 the components of the (vectorized) rhs:
7862
7863 for (j = 0; ; j += VF*stride)
7864 vectemp = ...;
7865 tmp1 = vectemp[0];
7866 array[j] = tmp1;
7867 tmp2 = vectemp[1];
7868 array[j + stride] = tmp2;
7869 ...
7870 */
7871
7872 unsigned nstores = const_nunits;
7873 unsigned lnel = 1;
7874 tree ltype = elem_type;
7875 tree lvectype = vectype;
7876 if (slp)
7877 {
7878 if (group_size < const_nunits
7879 && const_nunits % group_size == 0)
7880 {
7881 nstores = const_nunits / group_size;
7882 lnel = group_size;
7883 ltype = build_vector_type (elem_type, group_size);
7884 lvectype = vectype;
7885
7886 /* First check if vec_extract optab doesn't support extraction
7887 of vector elts directly. */
7888 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7889 machine_mode vmode;
7890 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7891 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7892 group_size).exists (&vmode)
7893 || (convert_optab_handler (vec_extract_optab,
7894 TYPE_MODE (vectype), vmode)
7895 == CODE_FOR_nothing))
7896 {
7897 /* Try to avoid emitting an extract of vector elements
7898 by performing the extracts using an integer type of the
7899 same size, extracting from a vector of those and then
7900 re-interpreting it as the original vector type if
7901 supported. */
7902 unsigned lsize
7903 = group_size * GET_MODE_BITSIZE (elmode);
7904 unsigned int lnunits = const_nunits / group_size;
7905 /* If we can't construct such a vector fall back to
7906 element extracts from the original vector type and
7907 element size stores. */
7908 if (int_mode_for_size (lsize, 0).exists (&elmode)
7909 && VECTOR_MODE_P (TYPE_MODE (vectype))
7910 && related_vector_mode (TYPE_MODE (vectype), elmode,
7911 lnunits).exists (&vmode)
7912 && (convert_optab_handler (vec_extract_optab,
7913 vmode, elmode)
7914 != CODE_FOR_nothing))
7915 {
7916 nstores = lnunits;
7917 lnel = group_size;
7918 ltype = build_nonstandard_integer_type (lsize, 1);
7919 lvectype = build_vector_type (ltype, nstores);
7920 }
7921 /* Else fall back to vector extraction anyway.
7922 Fewer stores are more important than avoiding spilling
7923 of the vector we extract from. Compared to the
7924 construction case in vectorizable_load no store-forwarding
7925 issue exists here for reasonable archs. */
7926 }
7927 }
7928 else if (group_size >= const_nunits
7929 && group_size % const_nunits == 0)
7930 {
7931 nstores = 1;
7932 lnel = const_nunits;
7933 ltype = vectype;
7934 lvectype = vectype;
7935 }
7936 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7937 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7938 }
7939
7940 ivstep = stride_step;
7941 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7942 build_int_cst (TREE_TYPE (ivstep), vf));
7943
7944 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7945
7946 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7947 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7948 create_iv (stride_base, ivstep, NULL,
7949 loop, &incr_gsi, insert_after,
7950 &offvar, NULL);
7951 incr = gsi_stmt (incr_gsi);
7952 loop_vinfo->add_stmt (incr);
7953
7954 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7955
7956 prev_stmt_info = NULL;
7957 alias_off = build_int_cst (ref_type, 0);
7958 stmt_vec_info next_stmt_info = first_stmt_info;
7959 for (g = 0; g < group_size; g++)
7960 {
7961 running_off = offvar;
7962 if (g)
7963 {
7964 tree size = TYPE_SIZE_UNIT (ltype);
7965 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7966 size);
7967 tree newoff = copy_ssa_name (running_off, NULL);
7968 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7969 running_off, pos);
7970 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7971 running_off = newoff;
7972 }
7973 unsigned int group_el = 0;
7974 unsigned HOST_WIDE_INT
7975 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7976 for (j = 0; j < ncopies; j++)
7977 {
7978 /* We've set op and dt above, from vect_get_store_rhs,
7979 and first_stmt_info == stmt_info. */
7980 if (j == 0)
7981 {
7982 if (slp)
7983 {
7984 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info,
7985 &vec_oprnds, NULL, slp_node);
7986 vec_oprnd = vec_oprnds[0];
7987 }
7988 else
7989 {
7990 op = vect_get_store_rhs (next_stmt_info);
7991 vec_oprnd = vect_get_vec_def_for_operand
7992 (vinfo, op, next_stmt_info);
7993 }
7994 }
7995 else
7996 {
7997 if (slp)
7998 vec_oprnd = vec_oprnds[j];
7999 else
8000 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
8001 vec_oprnd);
8002 }
8003 /* Pun the vector to extract from if necessary. */
8004 if (lvectype != vectype)
8005 {
8006 tree tem = make_ssa_name (lvectype);
8007 gimple *pun
8008 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8009 lvectype, vec_oprnd));
8010 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8011 vec_oprnd = tem;
8012 }
8013 for (i = 0; i < nstores; i++)
8014 {
8015 tree newref, newoff;
8016 gimple *incr, *assign;
8017 tree size = TYPE_SIZE (ltype);
8018 /* Extract the i'th component. */
8019 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8020 bitsize_int (i), size);
8021 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8022 size, pos);
8023
8024 elem = force_gimple_operand_gsi (gsi, elem, true,
8025 NULL_TREE, true,
8026 GSI_SAME_STMT);
8027
8028 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8029 group_el * elsz);
8030 newref = build2 (MEM_REF, ltype,
8031 running_off, this_off);
8032 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8033
8034 /* And store it to *running_off. */
8035 assign = gimple_build_assign (newref, elem);
8036 stmt_vec_info assign_info
8037 = vect_finish_stmt_generation (vinfo, stmt_info,
8038 assign, gsi);
8039
8040 group_el += lnel;
8041 if (! slp
8042 || group_el == group_size)
8043 {
8044 newoff = copy_ssa_name (running_off, NULL);
8045 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8046 running_off, stride_step);
8047 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8048
8049 running_off = newoff;
8050 group_el = 0;
8051 }
8052 if (g == group_size - 1
8053 && !slp)
8054 {
8055 if (j == 0 && i == 0)
8056 STMT_VINFO_VEC_STMT (stmt_info)
8057 = *vec_stmt = assign_info;
8058 else
8059 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8060 prev_stmt_info = assign_info;
8061 }
8062 }
8063 }
8064 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8065 if (slp)
8066 break;
8067 }
8068
8069 vec_oprnds.release ();
8070 return true;
8071 }
8072
8073 auto_vec<tree> dr_chain (group_size);
8074 oprnds.create (group_size);
8075
8076 /* Gather-scatter accesses perform only component accesses, alignment
8077 is irrelevant for them. */
8078 if (memory_access_type == VMAT_GATHER_SCATTER)
8079 alignment_support_scheme = dr_unaligned_supported;
8080 else
8081 alignment_support_scheme
8082 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
8083
8084 gcc_assert (alignment_support_scheme);
8085 vec_loop_masks *loop_masks
8086 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8087 ? &LOOP_VINFO_MASKS (loop_vinfo)
8088 : NULL);
8089 /* Targets with store-lane instructions must not require explicit
8090 realignment. vect_supportable_dr_alignment always returns either
8091 dr_aligned or dr_unaligned_supported for masked operations. */
8092 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8093 && !mask
8094 && !loop_masks)
8095 || alignment_support_scheme == dr_aligned
8096 || alignment_support_scheme == dr_unaligned_supported);
8097
8098 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8099 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8100 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8101
8102 tree bump;
8103 tree vec_offset = NULL_TREE;
8104 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8105 {
8106 aggr_type = NULL_TREE;
8107 bump = NULL_TREE;
8108 }
8109 else if (memory_access_type == VMAT_GATHER_SCATTER)
8110 {
8111 aggr_type = elem_type;
8112 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8113 &bump, &vec_offset);
8114 }
8115 else
8116 {
8117 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8118 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8119 else
8120 aggr_type = vectype;
8121 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8122 memory_access_type);
8123 }
8124
8125 if (mask)
8126 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8127
8128 /* In case the vectorization factor (VF) is bigger than the number
8129 of elements that we can fit in a vectype (nunits), we have to generate
8130 more than one vector stmt - i.e - we need to "unroll" the
8131 vector stmt by a factor VF/nunits. For more details see documentation in
8132 vect_get_vec_def_for_copy_stmt. */
8133
8134 /* In case of interleaving (non-unit grouped access):
8135
8136 S1: &base + 2 = x2
8137 S2: &base = x0
8138 S3: &base + 1 = x1
8139 S4: &base + 3 = x3
8140
8141 We create vectorized stores starting from base address (the access of the
8142 first stmt in the chain (S2 in the above example), when the last store stmt
8143 of the chain (S4) is reached:
8144
8145 VS1: &base = vx2
8146 VS2: &base + vec_size*1 = vx0
8147 VS3: &base + vec_size*2 = vx1
8148 VS4: &base + vec_size*3 = vx3
8149
8150 Then permutation statements are generated:
8151
8152 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8153 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8154 ...
8155
8156 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8157 (the order of the data-refs in the output of vect_permute_store_chain
8158 corresponds to the order of scalar stmts in the interleaving chain - see
8159 the documentation of vect_permute_store_chain()).
8160
8161 In case of both multiple types and interleaving, above vector stores and
8162 permutation stmts are created for every copy. The result vector stmts are
8163 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8164 STMT_VINFO_RELATED_STMT for the next copies.
8165 */
8166
8167 prev_stmt_info = NULL;
8168 tree vec_mask = NULL_TREE;
8169 for (j = 0; j < ncopies; j++)
8170 {
8171 stmt_vec_info new_stmt_info;
8172 if (j == 0)
8173 {
8174 if (slp)
8175 {
8176 /* Get vectorized arguments for SLP_NODE. */
8177 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info, &vec_oprnds,
8178 NULL, slp_node);
8179
8180 vec_oprnd = vec_oprnds[0];
8181 }
8182 else
8183 {
8184 /* For interleaved stores we collect vectorized defs for all the
8185 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8186 used as an input to vect_permute_store_chain(), and OPRNDS as
8187 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8188
8189 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8190 OPRNDS are of size 1. */
8191 stmt_vec_info next_stmt_info = first_stmt_info;
8192 for (i = 0; i < group_size; i++)
8193 {
8194 /* Since gaps are not supported for interleaved stores,
8195 DR_GROUP_SIZE is the exact number of stmts in the chain.
8196 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8197 that there is no interleaving, DR_GROUP_SIZE is 1,
8198 and only one iteration of the loop will be executed. */
8199 op = vect_get_store_rhs (next_stmt_info);
8200 vec_oprnd = vect_get_vec_def_for_operand
8201 (vinfo, op, next_stmt_info);
8202 dr_chain.quick_push (vec_oprnd);
8203 oprnds.quick_push (vec_oprnd);
8204 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8205 }
8206 if (mask)
8207 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
8208 mask_vectype);
8209 }
8210
8211 /* We should have catched mismatched types earlier. */
8212 gcc_assert (useless_type_conversion_p (vectype,
8213 TREE_TYPE (vec_oprnd)));
8214 bool simd_lane_access_p
8215 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8216 if (simd_lane_access_p
8217 && !loop_masks
8218 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8219 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8220 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8221 && integer_zerop (DR_INIT (first_dr_info->dr))
8222 && alias_sets_conflict_p (get_alias_set (aggr_type),
8223 get_alias_set (TREE_TYPE (ref_type))))
8224 {
8225 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8226 dataref_offset = build_int_cst (ref_type, 0);
8227 }
8228 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8229 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8230 &dataref_ptr, &vec_offset);
8231 else
8232 dataref_ptr
8233 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8234 simd_lane_access_p ? loop : NULL,
8235 offset, &dummy, gsi, &ptr_incr,
8236 simd_lane_access_p, NULL_TREE, bump);
8237 }
8238 else
8239 {
8240 /* For interleaved stores we created vectorized defs for all the
8241 defs stored in OPRNDS in the previous iteration (previous copy).
8242 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8243 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8244 next copy.
8245 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8246 OPRNDS are of size 1. */
8247 for (i = 0; i < group_size; i++)
8248 {
8249 op = oprnds[i];
8250 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8251 dr_chain[i] = vec_oprnd;
8252 oprnds[i] = vec_oprnd;
8253 }
8254 if (mask)
8255 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8256 if (dataref_offset)
8257 dataref_offset
8258 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8259 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8260 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8261 else
8262 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8263 stmt_info, bump);
8264 }
8265
8266 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8267 {
8268 tree vec_array;
8269
8270 /* Get an array into which we can store the individual vectors. */
8271 vec_array = create_vector_array (vectype, vec_num);
8272
8273 /* Invalidate the current contents of VEC_ARRAY. This should
8274 become an RTL clobber too, which prevents the vector registers
8275 from being upward-exposed. */
8276 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8277
8278 /* Store the individual vectors into the array. */
8279 for (i = 0; i < vec_num; i++)
8280 {
8281 vec_oprnd = dr_chain[i];
8282 write_vector_array (vinfo, stmt_info,
8283 gsi, vec_oprnd, vec_array, i);
8284 }
8285
8286 tree final_mask = NULL;
8287 if (loop_masks)
8288 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8289 vectype, j);
8290 if (vec_mask)
8291 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8292 vec_mask, gsi);
8293
8294 gcall *call;
8295 if (final_mask)
8296 {
8297 /* Emit:
8298 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8299 VEC_ARRAY). */
8300 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8301 tree alias_ptr = build_int_cst (ref_type, align);
8302 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8303 dataref_ptr, alias_ptr,
8304 final_mask, vec_array);
8305 }
8306 else
8307 {
8308 /* Emit:
8309 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8310 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8311 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8312 vec_array);
8313 gimple_call_set_lhs (call, data_ref);
8314 }
8315 gimple_call_set_nothrow (call, true);
8316 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
8317 call, gsi);
8318
8319 /* Record that VEC_ARRAY is now dead. */
8320 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8321 }
8322 else
8323 {
8324 new_stmt_info = NULL;
8325 if (grouped_store)
8326 {
8327 if (j == 0)
8328 result_chain.create (group_size);
8329 /* Permute. */
8330 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8331 gsi, &result_chain);
8332 }
8333
8334 stmt_vec_info next_stmt_info = first_stmt_info;
8335 for (i = 0; i < vec_num; i++)
8336 {
8337 unsigned misalign;
8338 unsigned HOST_WIDE_INT align;
8339
8340 tree final_mask = NULL_TREE;
8341 if (loop_masks)
8342 final_mask = vect_get_loop_mask (gsi, loop_masks,
8343 vec_num * ncopies,
8344 vectype, vec_num * j + i);
8345 if (vec_mask)
8346 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8347 vec_mask, gsi);
8348
8349 if (memory_access_type == VMAT_GATHER_SCATTER)
8350 {
8351 tree scale = size_int (gs_info.scale);
8352 gcall *call;
8353 if (loop_masks)
8354 call = gimple_build_call_internal
8355 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8356 scale, vec_oprnd, final_mask);
8357 else
8358 call = gimple_build_call_internal
8359 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8360 scale, vec_oprnd);
8361 gimple_call_set_nothrow (call, true);
8362 new_stmt_info
8363 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8364 break;
8365 }
8366
8367 if (i > 0)
8368 /* Bump the vector pointer. */
8369 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8370 gsi, stmt_info, bump);
8371
8372 if (slp)
8373 vec_oprnd = vec_oprnds[i];
8374 else if (grouped_store)
8375 /* For grouped stores vectorized defs are interleaved in
8376 vect_permute_store_chain(). */
8377 vec_oprnd = result_chain[i];
8378
8379 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8380 if (aligned_access_p (first_dr_info))
8381 misalign = 0;
8382 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8383 {
8384 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8385 misalign = 0;
8386 }
8387 else
8388 misalign = DR_MISALIGNMENT (first_dr_info);
8389 if (dataref_offset == NULL_TREE
8390 && TREE_CODE (dataref_ptr) == SSA_NAME)
8391 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8392 misalign);
8393
8394 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8395 {
8396 tree perm_mask = perm_mask_for_reverse (vectype);
8397 tree perm_dest = vect_create_destination_var
8398 (vect_get_store_rhs (stmt_info), vectype);
8399 tree new_temp = make_ssa_name (perm_dest);
8400
8401 /* Generate the permute statement. */
8402 gimple *perm_stmt
8403 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8404 vec_oprnd, perm_mask);
8405 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8406
8407 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8408 vec_oprnd = new_temp;
8409 }
8410
8411 /* Arguments are ready. Create the new vector stmt. */
8412 if (final_mask)
8413 {
8414 align = least_bit_hwi (misalign | align);
8415 tree ptr = build_int_cst (ref_type, align);
8416 gcall *call
8417 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8418 dataref_ptr, ptr,
8419 final_mask, vec_oprnd);
8420 gimple_call_set_nothrow (call, true);
8421 new_stmt_info
8422 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8423 }
8424 else
8425 {
8426 data_ref = fold_build2 (MEM_REF, vectype,
8427 dataref_ptr,
8428 dataref_offset
8429 ? dataref_offset
8430 : build_int_cst (ref_type, 0));
8431 if (aligned_access_p (first_dr_info))
8432 ;
8433 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8434 TREE_TYPE (data_ref)
8435 = build_aligned_type (TREE_TYPE (data_ref),
8436 align * BITS_PER_UNIT);
8437 else
8438 TREE_TYPE (data_ref)
8439 = build_aligned_type (TREE_TYPE (data_ref),
8440 TYPE_ALIGN (elem_type));
8441 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8442 gassign *new_stmt
8443 = gimple_build_assign (data_ref, vec_oprnd);
8444 new_stmt_info
8445 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8446 }
8447
8448 if (slp)
8449 continue;
8450
8451 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8452 if (!next_stmt_info)
8453 break;
8454 }
8455 }
8456 if (!slp)
8457 {
8458 if (j == 0)
8459 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8460 else
8461 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8462 prev_stmt_info = new_stmt_info;
8463 }
8464 }
8465
8466 oprnds.release ();
8467 result_chain.release ();
8468 vec_oprnds.release ();
8469
8470 return true;
8471 }
8472
8473 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8474 VECTOR_CST mask. No checks are made that the target platform supports the
8475 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8476 vect_gen_perm_mask_checked. */
8477
8478 tree
8479 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8480 {
8481 tree mask_type;
8482
8483 poly_uint64 nunits = sel.length ();
8484 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8485
8486 mask_type = build_vector_type (ssizetype, nunits);
8487 return vec_perm_indices_to_tree (mask_type, sel);
8488 }
8489
8490 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8491 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8492
8493 tree
8494 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8495 {
8496 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8497 return vect_gen_perm_mask_any (vectype, sel);
8498 }
8499
8500 /* Given a vector variable X and Y, that was generated for the scalar
8501 STMT_INFO, generate instructions to permute the vector elements of X and Y
8502 using permutation mask MASK_VEC, insert them at *GSI and return the
8503 permuted vector variable. */
8504
8505 static tree
8506 permute_vec_elements (vec_info *vinfo,
8507 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8508 gimple_stmt_iterator *gsi)
8509 {
8510 tree vectype = TREE_TYPE (x);
8511 tree perm_dest, data_ref;
8512 gimple *perm_stmt;
8513
8514 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8515 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8516 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8517 else
8518 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8519 data_ref = make_ssa_name (perm_dest);
8520
8521 /* Generate the permute statement. */
8522 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8523 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8524
8525 return data_ref;
8526 }
8527
8528 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8529 inserting them on the loops preheader edge. Returns true if we
8530 were successful in doing so (and thus STMT_INFO can be moved then),
8531 otherwise returns false. */
8532
8533 static bool
8534 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8535 {
8536 ssa_op_iter i;
8537 tree op;
8538 bool any = false;
8539
8540 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8541 {
8542 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8543 if (!gimple_nop_p (def_stmt)
8544 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8545 {
8546 /* Make sure we don't need to recurse. While we could do
8547 so in simple cases when there are more complex use webs
8548 we don't have an easy way to preserve stmt order to fulfil
8549 dependencies within them. */
8550 tree op2;
8551 ssa_op_iter i2;
8552 if (gimple_code (def_stmt) == GIMPLE_PHI)
8553 return false;
8554 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8555 {
8556 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8557 if (!gimple_nop_p (def_stmt2)
8558 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8559 return false;
8560 }
8561 any = true;
8562 }
8563 }
8564
8565 if (!any)
8566 return true;
8567
8568 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8569 {
8570 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8571 if (!gimple_nop_p (def_stmt)
8572 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8573 {
8574 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8575 gsi_remove (&gsi, false);
8576 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8577 }
8578 }
8579
8580 return true;
8581 }
8582
8583 /* vectorizable_load.
8584
8585 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8586 that can be vectorized.
8587 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8588 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8589 Return true if STMT_INFO is vectorizable in this way. */
8590
8591 static bool
8592 vectorizable_load (vec_info *vinfo,
8593 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8594 stmt_vec_info *vec_stmt, slp_tree slp_node,
8595 stmt_vector_for_cost *cost_vec)
8596 {
8597 tree scalar_dest;
8598 tree vec_dest = NULL;
8599 tree data_ref = NULL;
8600 stmt_vec_info prev_stmt_info;
8601 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8602 class loop *loop = NULL;
8603 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8604 bool nested_in_vect_loop = false;
8605 tree elem_type;
8606 tree new_temp;
8607 machine_mode mode;
8608 tree dummy;
8609 enum dr_alignment_support alignment_support_scheme;
8610 tree dataref_ptr = NULL_TREE;
8611 tree dataref_offset = NULL_TREE;
8612 gimple *ptr_incr = NULL;
8613 int ncopies;
8614 int i, j;
8615 unsigned int group_size;
8616 poly_uint64 group_gap_adj;
8617 tree msq = NULL_TREE, lsq;
8618 tree offset = NULL_TREE;
8619 tree byte_offset = NULL_TREE;
8620 tree realignment_token = NULL_TREE;
8621 gphi *phi = NULL;
8622 vec<tree> dr_chain = vNULL;
8623 bool grouped_load = false;
8624 stmt_vec_info first_stmt_info;
8625 stmt_vec_info first_stmt_info_for_drptr = NULL;
8626 bool compute_in_loop = false;
8627 class loop *at_loop;
8628 int vec_num;
8629 bool slp = (slp_node != NULL);
8630 bool slp_perm = false;
8631 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8632 poly_uint64 vf;
8633 tree aggr_type;
8634 gather_scatter_info gs_info;
8635 tree ref_type;
8636 enum vect_def_type mask_dt = vect_unknown_def_type;
8637
8638 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8639 return false;
8640
8641 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8642 && ! vec_stmt)
8643 return false;
8644
8645 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8646 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8647 {
8648 scalar_dest = gimple_assign_lhs (assign);
8649 if (TREE_CODE (scalar_dest) != SSA_NAME)
8650 return false;
8651
8652 tree_code code = gimple_assign_rhs_code (assign);
8653 if (code != ARRAY_REF
8654 && code != BIT_FIELD_REF
8655 && code != INDIRECT_REF
8656 && code != COMPONENT_REF
8657 && code != IMAGPART_EXPR
8658 && code != REALPART_EXPR
8659 && code != MEM_REF
8660 && TREE_CODE_CLASS (code) != tcc_declaration)
8661 return false;
8662 }
8663 else
8664 {
8665 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8666 if (!call || !gimple_call_internal_p (call))
8667 return false;
8668
8669 internal_fn ifn = gimple_call_internal_fn (call);
8670 if (!internal_load_fn_p (ifn))
8671 return false;
8672
8673 scalar_dest = gimple_call_lhs (call);
8674 if (!scalar_dest)
8675 return false;
8676
8677 int mask_index = internal_fn_mask_index (ifn);
8678 if (mask_index >= 0)
8679 {
8680 mask = gimple_call_arg (call, mask_index);
8681 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8682 &mask_vectype))
8683 return false;
8684 }
8685 }
8686
8687 if (!STMT_VINFO_DATA_REF (stmt_info))
8688 return false;
8689
8690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8691 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8692
8693 if (loop_vinfo)
8694 {
8695 loop = LOOP_VINFO_LOOP (loop_vinfo);
8696 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8697 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8698 }
8699 else
8700 vf = 1;
8701
8702 /* Multiple types in SLP are handled by creating the appropriate number of
8703 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8704 case of SLP. */
8705 if (slp)
8706 ncopies = 1;
8707 else
8708 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8709
8710 gcc_assert (ncopies >= 1);
8711
8712 /* FORNOW. This restriction should be relaxed. */
8713 if (nested_in_vect_loop && ncopies > 1)
8714 {
8715 if (dump_enabled_p ())
8716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8717 "multiple types in nested loop.\n");
8718 return false;
8719 }
8720
8721 /* Invalidate assumptions made by dependence analysis when vectorization
8722 on the unrolled body effectively re-orders stmts. */
8723 if (ncopies > 1
8724 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8725 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8726 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8727 {
8728 if (dump_enabled_p ())
8729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8730 "cannot perform implicit CSE when unrolling "
8731 "with negative dependence distance\n");
8732 return false;
8733 }
8734
8735 elem_type = TREE_TYPE (vectype);
8736 mode = TYPE_MODE (vectype);
8737
8738 /* FORNOW. In some cases can vectorize even if data-type not supported
8739 (e.g. - data copies). */
8740 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8741 {
8742 if (dump_enabled_p ())
8743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8744 "Aligned load, but unsupported type.\n");
8745 return false;
8746 }
8747
8748 /* Check if the load is a part of an interleaving chain. */
8749 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8750 {
8751 grouped_load = true;
8752 /* FORNOW */
8753 gcc_assert (!nested_in_vect_loop);
8754 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8755
8756 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8757 group_size = DR_GROUP_SIZE (first_stmt_info);
8758
8759 /* Refuse non-SLP vectorization of SLP-only groups. */
8760 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8761 {
8762 if (dump_enabled_p ())
8763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8764 "cannot vectorize load in non-SLP mode.\n");
8765 return false;
8766 }
8767
8768 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8769 {
8770 slp_perm = true;
8771
8772 if (!loop_vinfo)
8773 {
8774 /* In BB vectorization we may not actually use a loaded vector
8775 accessing elements in excess of DR_GROUP_SIZE. */
8776 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8777 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8778 unsigned HOST_WIDE_INT nunits;
8779 unsigned j, k, maxk = 0;
8780 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8781 if (k > maxk)
8782 maxk = k;
8783 tree vectype = STMT_VINFO_VECTYPE (group_info);
8784 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8785 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8786 {
8787 if (dump_enabled_p ())
8788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8789 "BB vectorization with gaps at the end of "
8790 "a load is not supported\n");
8791 return false;
8792 }
8793 }
8794
8795 auto_vec<tree> tem;
8796 unsigned n_perms;
8797 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8798 true, &n_perms))
8799 {
8800 if (dump_enabled_p ())
8801 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8802 vect_location,
8803 "unsupported load permutation\n");
8804 return false;
8805 }
8806 }
8807
8808 /* Invalidate assumptions made by dependence analysis when vectorization
8809 on the unrolled body effectively re-orders stmts. */
8810 if (!PURE_SLP_STMT (stmt_info)
8811 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8812 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8813 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8814 {
8815 if (dump_enabled_p ())
8816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8817 "cannot perform implicit CSE when performing "
8818 "group loads with negative dependence distance\n");
8819 return false;
8820 }
8821 }
8822 else
8823 group_size = 1;
8824
8825 vect_memory_access_type memory_access_type;
8826 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
8827 ncopies, &memory_access_type, &gs_info))
8828 return false;
8829
8830 if (mask)
8831 {
8832 if (memory_access_type == VMAT_CONTIGUOUS)
8833 {
8834 machine_mode vec_mode = TYPE_MODE (vectype);
8835 if (!VECTOR_MODE_P (vec_mode)
8836 || !can_vec_mask_load_store_p (vec_mode,
8837 TYPE_MODE (mask_vectype), true))
8838 return false;
8839 }
8840 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8841 && memory_access_type != VMAT_GATHER_SCATTER)
8842 {
8843 if (dump_enabled_p ())
8844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8845 "unsupported access type for masked load.\n");
8846 return false;
8847 }
8848 }
8849
8850 if (!vec_stmt) /* transformation not required. */
8851 {
8852 if (!slp)
8853 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8854
8855 if (loop_vinfo
8856 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8857 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8858 memory_access_type, &gs_info, mask);
8859
8860 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8861 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8862 slp_node, cost_vec);
8863 return true;
8864 }
8865
8866 if (!slp)
8867 gcc_assert (memory_access_type
8868 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8869
8870 if (dump_enabled_p ())
8871 dump_printf_loc (MSG_NOTE, vect_location,
8872 "transform load. ncopies = %d\n", ncopies);
8873
8874 /* Transform. */
8875
8876 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8877 ensure_base_align (dr_info);
8878
8879 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8880 {
8881 vect_build_gather_load_calls (vinfo,
8882 stmt_info, gsi, vec_stmt, &gs_info, mask);
8883 return true;
8884 }
8885
8886 if (memory_access_type == VMAT_INVARIANT)
8887 {
8888 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8889 /* If we have versioned for aliasing or the loop doesn't
8890 have any data dependencies that would preclude this,
8891 then we are sure this is a loop invariant load and
8892 thus we can insert it on the preheader edge. */
8893 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8894 && !nested_in_vect_loop
8895 && hoist_defs_of_uses (stmt_info, loop));
8896 if (hoist_p)
8897 {
8898 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8899 if (dump_enabled_p ())
8900 dump_printf_loc (MSG_NOTE, vect_location,
8901 "hoisting out of the vectorized loop: %G", stmt);
8902 scalar_dest = copy_ssa_name (scalar_dest);
8903 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8904 gsi_insert_on_edge_immediate
8905 (loop_preheader_edge (loop),
8906 gimple_build_assign (scalar_dest, rhs));
8907 }
8908 /* These copies are all equivalent, but currently the representation
8909 requires a separate STMT_VINFO_VEC_STMT for each one. */
8910 prev_stmt_info = NULL;
8911 gimple_stmt_iterator gsi2 = *gsi;
8912 gsi_next (&gsi2);
8913 for (j = 0; j < ncopies; j++)
8914 {
8915 stmt_vec_info new_stmt_info;
8916 if (hoist_p)
8917 {
8918 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8919 vectype, NULL);
8920 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8921 new_stmt_info = vinfo->add_stmt (new_stmt);
8922 }
8923 else
8924 {
8925 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8926 vectype, &gsi2);
8927 new_stmt_info = vinfo->lookup_def (new_temp);
8928 }
8929 if (slp)
8930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8931 else if (j == 0)
8932 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8933 else
8934 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8935 prev_stmt_info = new_stmt_info;
8936 }
8937 return true;
8938 }
8939
8940 if (memory_access_type == VMAT_ELEMENTWISE
8941 || memory_access_type == VMAT_STRIDED_SLP)
8942 {
8943 gimple_stmt_iterator incr_gsi;
8944 bool insert_after;
8945 gimple *incr;
8946 tree offvar;
8947 tree ivstep;
8948 tree running_off;
8949 vec<constructor_elt, va_gc> *v = NULL;
8950 tree stride_base, stride_step, alias_off;
8951 /* Checked by get_load_store_type. */
8952 unsigned int const_nunits = nunits.to_constant ();
8953 unsigned HOST_WIDE_INT cst_offset = 0;
8954 tree dr_offset;
8955
8956 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8957 gcc_assert (!nested_in_vect_loop);
8958
8959 if (grouped_load)
8960 {
8961 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8962 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8963 }
8964 else
8965 {
8966 first_stmt_info = stmt_info;
8967 first_dr_info = dr_info;
8968 }
8969 if (slp && grouped_load)
8970 {
8971 group_size = DR_GROUP_SIZE (first_stmt_info);
8972 ref_type = get_group_alias_ptr_type (first_stmt_info);
8973 }
8974 else
8975 {
8976 if (grouped_load)
8977 cst_offset
8978 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8979 * vect_get_place_in_interleaving_chain (stmt_info,
8980 first_stmt_info));
8981 group_size = 1;
8982 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8983 }
8984
8985 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8986 stride_base
8987 = fold_build_pointer_plus
8988 (DR_BASE_ADDRESS (first_dr_info->dr),
8989 size_binop (PLUS_EXPR,
8990 convert_to_ptrofftype (dr_offset),
8991 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8992 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8993
8994 /* For a load with loop-invariant (but other than power-of-2)
8995 stride (i.e. not a grouped access) like so:
8996
8997 for (i = 0; i < n; i += stride)
8998 ... = array[i];
8999
9000 we generate a new induction variable and new accesses to
9001 form a new vector (or vectors, depending on ncopies):
9002
9003 for (j = 0; ; j += VF*stride)
9004 tmp1 = array[j];
9005 tmp2 = array[j + stride];
9006 ...
9007 vectemp = {tmp1, tmp2, ...}
9008 */
9009
9010 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9011 build_int_cst (TREE_TYPE (stride_step), vf));
9012
9013 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9014
9015 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9016 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9017 create_iv (stride_base, ivstep, NULL,
9018 loop, &incr_gsi, insert_after,
9019 &offvar, NULL);
9020 incr = gsi_stmt (incr_gsi);
9021 loop_vinfo->add_stmt (incr);
9022
9023 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9024
9025 prev_stmt_info = NULL;
9026 running_off = offvar;
9027 alias_off = build_int_cst (ref_type, 0);
9028 int nloads = const_nunits;
9029 int lnel = 1;
9030 tree ltype = TREE_TYPE (vectype);
9031 tree lvectype = vectype;
9032 auto_vec<tree> dr_chain;
9033 if (memory_access_type == VMAT_STRIDED_SLP)
9034 {
9035 if (group_size < const_nunits)
9036 {
9037 /* First check if vec_init optab supports construction from vector
9038 elts directly. Otherwise avoid emitting a constructor of
9039 vector elements by performing the loads using an integer type
9040 of the same size, constructing a vector of those and then
9041 re-interpreting it as the original vector type. This avoids a
9042 huge runtime penalty due to the general inability to perform
9043 store forwarding from smaller stores to a larger load. */
9044 tree ptype;
9045 tree vtype
9046 = vector_vector_composition_type (vectype,
9047 const_nunits / group_size,
9048 &ptype);
9049 if (vtype != NULL_TREE)
9050 {
9051 nloads = const_nunits / group_size;
9052 lnel = group_size;
9053 lvectype = vtype;
9054 ltype = ptype;
9055 }
9056 }
9057 else
9058 {
9059 nloads = 1;
9060 lnel = const_nunits;
9061 ltype = vectype;
9062 }
9063 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9064 }
9065 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9066 else if (nloads == 1)
9067 ltype = vectype;
9068
9069 if (slp)
9070 {
9071 /* For SLP permutation support we need to load the whole group,
9072 not only the number of vector stmts the permutation result
9073 fits in. */
9074 if (slp_perm)
9075 {
9076 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9077 variable VF. */
9078 unsigned int const_vf = vf.to_constant ();
9079 ncopies = CEIL (group_size * const_vf, const_nunits);
9080 dr_chain.create (ncopies);
9081 }
9082 else
9083 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9084 }
9085 unsigned int group_el = 0;
9086 unsigned HOST_WIDE_INT
9087 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9088 for (j = 0; j < ncopies; j++)
9089 {
9090 if (nloads > 1)
9091 vec_alloc (v, nloads);
9092 stmt_vec_info new_stmt_info = NULL;
9093 for (i = 0; i < nloads; i++)
9094 {
9095 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9096 group_el * elsz + cst_offset);
9097 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9098 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9099 gassign *new_stmt
9100 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9101 new_stmt_info
9102 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9103 if (nloads > 1)
9104 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9105 gimple_assign_lhs (new_stmt));
9106
9107 group_el += lnel;
9108 if (! slp
9109 || group_el == group_size)
9110 {
9111 tree newoff = copy_ssa_name (running_off);
9112 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9113 running_off, stride_step);
9114 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9115
9116 running_off = newoff;
9117 group_el = 0;
9118 }
9119 }
9120 if (nloads > 1)
9121 {
9122 tree vec_inv = build_constructor (lvectype, v);
9123 new_temp = vect_init_vector (vinfo, stmt_info,
9124 vec_inv, lvectype, gsi);
9125 new_stmt_info = vinfo->lookup_def (new_temp);
9126 if (lvectype != vectype)
9127 {
9128 gassign *new_stmt
9129 = gimple_build_assign (make_ssa_name (vectype),
9130 VIEW_CONVERT_EXPR,
9131 build1 (VIEW_CONVERT_EXPR,
9132 vectype, new_temp));
9133 new_stmt_info
9134 = vect_finish_stmt_generation (vinfo, stmt_info,
9135 new_stmt, gsi);
9136 }
9137 }
9138
9139 if (slp)
9140 {
9141 if (slp_perm)
9142 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9143 else
9144 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9145 }
9146 else
9147 {
9148 if (j == 0)
9149 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9150 else
9151 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9152 prev_stmt_info = new_stmt_info;
9153 }
9154 }
9155 if (slp_perm)
9156 {
9157 unsigned n_perms;
9158 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9159 false, &n_perms);
9160 }
9161 return true;
9162 }
9163
9164 if (memory_access_type == VMAT_GATHER_SCATTER
9165 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9166 grouped_load = false;
9167
9168 if (grouped_load)
9169 {
9170 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9171 group_size = DR_GROUP_SIZE (first_stmt_info);
9172 /* For SLP vectorization we directly vectorize a subchain
9173 without permutation. */
9174 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9175 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9176 /* For BB vectorization always use the first stmt to base
9177 the data ref pointer on. */
9178 if (bb_vinfo)
9179 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9180
9181 /* Check if the chain of loads is already vectorized. */
9182 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9183 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9184 ??? But we can only do so if there is exactly one
9185 as we have no way to get at the rest. Leave the CSE
9186 opportunity alone.
9187 ??? With the group load eventually participating
9188 in multiple different permutations (having multiple
9189 slp nodes which refer to the same group) the CSE
9190 is even wrong code. See PR56270. */
9191 && !slp)
9192 {
9193 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9194 return true;
9195 }
9196 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9197 group_gap_adj = 0;
9198
9199 /* VEC_NUM is the number of vect stmts to be created for this group. */
9200 if (slp)
9201 {
9202 grouped_load = false;
9203 /* If an SLP permutation is from N elements to N elements,
9204 and if one vector holds a whole number of N, we can load
9205 the inputs to the permutation in the same way as an
9206 unpermuted sequence. In other cases we need to load the
9207 whole group, not only the number of vector stmts the
9208 permutation result fits in. */
9209 unsigned scalar_lanes = SLP_TREE_SCALAR_STMTS (slp_node).length ();
9210 if (slp_perm
9211 && (group_size != scalar_lanes
9212 || !multiple_p (nunits, group_size)))
9213 {
9214 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9215 variable VF; see vect_transform_slp_perm_load. */
9216 unsigned int const_vf = vf.to_constant ();
9217 unsigned int const_nunits = nunits.to_constant ();
9218 vec_num = CEIL (group_size * const_vf, const_nunits);
9219 group_gap_adj = vf * group_size - nunits * vec_num;
9220 }
9221 else
9222 {
9223 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9224 group_gap_adj
9225 = group_size - scalar_lanes;
9226 }
9227 }
9228 else
9229 vec_num = group_size;
9230
9231 ref_type = get_group_alias_ptr_type (first_stmt_info);
9232 }
9233 else
9234 {
9235 first_stmt_info = stmt_info;
9236 first_dr_info = dr_info;
9237 group_size = vec_num = 1;
9238 group_gap_adj = 0;
9239 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9240 }
9241
9242 /* Gather-scatter accesses perform only component accesses, alignment
9243 is irrelevant for them. */
9244 if (memory_access_type == VMAT_GATHER_SCATTER)
9245 alignment_support_scheme = dr_unaligned_supported;
9246 else
9247 alignment_support_scheme
9248 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
9249
9250 gcc_assert (alignment_support_scheme);
9251 vec_loop_masks *loop_masks
9252 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9253 ? &LOOP_VINFO_MASKS (loop_vinfo)
9254 : NULL);
9255 /* Targets with store-lane instructions must not require explicit
9256 realignment. vect_supportable_dr_alignment always returns either
9257 dr_aligned or dr_unaligned_supported for masked operations. */
9258 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9259 && !mask
9260 && !loop_masks)
9261 || alignment_support_scheme == dr_aligned
9262 || alignment_support_scheme == dr_unaligned_supported);
9263
9264 /* In case the vectorization factor (VF) is bigger than the number
9265 of elements that we can fit in a vectype (nunits), we have to generate
9266 more than one vector stmt - i.e - we need to "unroll" the
9267 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9268 from one copy of the vector stmt to the next, in the field
9269 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9270 stages to find the correct vector defs to be used when vectorizing
9271 stmts that use the defs of the current stmt. The example below
9272 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9273 need to create 4 vectorized stmts):
9274
9275 before vectorization:
9276 RELATED_STMT VEC_STMT
9277 S1: x = memref - -
9278 S2: z = x + 1 - -
9279
9280 step 1: vectorize stmt S1:
9281 We first create the vector stmt VS1_0, and, as usual, record a
9282 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9283 Next, we create the vector stmt VS1_1, and record a pointer to
9284 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9285 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9286 stmts and pointers:
9287 RELATED_STMT VEC_STMT
9288 VS1_0: vx0 = memref0 VS1_1 -
9289 VS1_1: vx1 = memref1 VS1_2 -
9290 VS1_2: vx2 = memref2 VS1_3 -
9291 VS1_3: vx3 = memref3 - -
9292 S1: x = load - VS1_0
9293 S2: z = x + 1 - -
9294
9295 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9296 information we recorded in RELATED_STMT field is used to vectorize
9297 stmt S2. */
9298
9299 /* In case of interleaving (non-unit grouped access):
9300
9301 S1: x2 = &base + 2
9302 S2: x0 = &base
9303 S3: x1 = &base + 1
9304 S4: x3 = &base + 3
9305
9306 Vectorized loads are created in the order of memory accesses
9307 starting from the access of the first stmt of the chain:
9308
9309 VS1: vx0 = &base
9310 VS2: vx1 = &base + vec_size*1
9311 VS3: vx3 = &base + vec_size*2
9312 VS4: vx4 = &base + vec_size*3
9313
9314 Then permutation statements are generated:
9315
9316 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9317 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9318 ...
9319
9320 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9321 (the order of the data-refs in the output of vect_permute_load_chain
9322 corresponds to the order of scalar stmts in the interleaving chain - see
9323 the documentation of vect_permute_load_chain()).
9324 The generation of permutation stmts and recording them in
9325 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9326
9327 In case of both multiple types and interleaving, the vector loads and
9328 permutation stmts above are created for every copy. The result vector
9329 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9330 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9331
9332 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9333 on a target that supports unaligned accesses (dr_unaligned_supported)
9334 we generate the following code:
9335 p = initial_addr;
9336 indx = 0;
9337 loop {
9338 p = p + indx * vectype_size;
9339 vec_dest = *(p);
9340 indx = indx + 1;
9341 }
9342
9343 Otherwise, the data reference is potentially unaligned on a target that
9344 does not support unaligned accesses (dr_explicit_realign_optimized) -
9345 then generate the following code, in which the data in each iteration is
9346 obtained by two vector loads, one from the previous iteration, and one
9347 from the current iteration:
9348 p1 = initial_addr;
9349 msq_init = *(floor(p1))
9350 p2 = initial_addr + VS - 1;
9351 realignment_token = call target_builtin;
9352 indx = 0;
9353 loop {
9354 p2 = p2 + indx * vectype_size
9355 lsq = *(floor(p2))
9356 vec_dest = realign_load (msq, lsq, realignment_token)
9357 indx = indx + 1;
9358 msq = lsq;
9359 } */
9360
9361 /* If the misalignment remains the same throughout the execution of the
9362 loop, we can create the init_addr and permutation mask at the loop
9363 preheader. Otherwise, it needs to be created inside the loop.
9364 This can only occur when vectorizing memory accesses in the inner-loop
9365 nested within an outer-loop that is being vectorized. */
9366
9367 if (nested_in_vect_loop
9368 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9369 GET_MODE_SIZE (TYPE_MODE (vectype))))
9370 {
9371 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9372 compute_in_loop = true;
9373 }
9374
9375 bool diff_first_stmt_info
9376 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9377
9378 if ((alignment_support_scheme == dr_explicit_realign_optimized
9379 || alignment_support_scheme == dr_explicit_realign)
9380 && !compute_in_loop)
9381 {
9382 /* If we have different first_stmt_info, we can't set up realignment
9383 here, since we can't guarantee first_stmt_info DR has been
9384 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9385 distance from first_stmt_info DR instead as below. */
9386 if (!diff_first_stmt_info)
9387 msq = vect_setup_realignment (vinfo,
9388 first_stmt_info, gsi, &realignment_token,
9389 alignment_support_scheme, NULL_TREE,
9390 &at_loop);
9391 if (alignment_support_scheme == dr_explicit_realign_optimized)
9392 {
9393 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9394 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9395 size_one_node);
9396 gcc_assert (!first_stmt_info_for_drptr);
9397 }
9398 }
9399 else
9400 at_loop = loop;
9401
9402 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9403 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9404
9405 tree bump;
9406 tree vec_offset = NULL_TREE;
9407 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9408 {
9409 aggr_type = NULL_TREE;
9410 bump = NULL_TREE;
9411 }
9412 else if (memory_access_type == VMAT_GATHER_SCATTER)
9413 {
9414 aggr_type = elem_type;
9415 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9416 &bump, &vec_offset);
9417 }
9418 else
9419 {
9420 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9421 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9422 else
9423 aggr_type = vectype;
9424 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9425 memory_access_type);
9426 }
9427
9428 tree vec_mask = NULL_TREE;
9429 prev_stmt_info = NULL;
9430 poly_uint64 group_elt = 0;
9431 for (j = 0; j < ncopies; j++)
9432 {
9433 stmt_vec_info new_stmt_info = NULL;
9434 /* 1. Create the vector or array pointer update chain. */
9435 if (j == 0)
9436 {
9437 bool simd_lane_access_p
9438 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9439 if (simd_lane_access_p
9440 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9441 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9442 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9443 && integer_zerop (DR_INIT (first_dr_info->dr))
9444 && alias_sets_conflict_p (get_alias_set (aggr_type),
9445 get_alias_set (TREE_TYPE (ref_type)))
9446 && (alignment_support_scheme == dr_aligned
9447 || alignment_support_scheme == dr_unaligned_supported))
9448 {
9449 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9450 dataref_offset = build_int_cst (ref_type, 0);
9451 }
9452 else if (diff_first_stmt_info)
9453 {
9454 dataref_ptr
9455 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9456 aggr_type, at_loop, offset, &dummy,
9457 gsi, &ptr_incr, simd_lane_access_p,
9458 byte_offset, bump);
9459 /* Adjust the pointer by the difference to first_stmt. */
9460 data_reference_p ptrdr
9461 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9462 tree diff
9463 = fold_convert (sizetype,
9464 size_binop (MINUS_EXPR,
9465 DR_INIT (first_dr_info->dr),
9466 DR_INIT (ptrdr)));
9467 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9468 stmt_info, diff);
9469 if (alignment_support_scheme == dr_explicit_realign)
9470 {
9471 msq = vect_setup_realignment (vinfo,
9472 first_stmt_info_for_drptr, gsi,
9473 &realignment_token,
9474 alignment_support_scheme,
9475 dataref_ptr, &at_loop);
9476 gcc_assert (!compute_in_loop);
9477 }
9478 }
9479 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9480 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9481 &dataref_ptr, &vec_offset);
9482 else
9483 dataref_ptr
9484 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9485 at_loop,
9486 offset, &dummy, gsi, &ptr_incr,
9487 simd_lane_access_p,
9488 byte_offset, bump);
9489 if (mask)
9490 {
9491 if (slp_node)
9492 {
9493 auto_vec<vec<tree> > vec_defs (1);
9494 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
9495 vec_mask = vec_defs[0][0];
9496 }
9497 else
9498 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
9499 mask_vectype);
9500 }
9501 }
9502 else
9503 {
9504 if (dataref_offset)
9505 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9506 bump);
9507 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9508 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9509 else
9510 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9511 stmt_info, bump);
9512 if (mask)
9513 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9514 }
9515
9516 if (grouped_load || slp_perm)
9517 dr_chain.create (vec_num);
9518
9519 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9520 {
9521 tree vec_array;
9522
9523 vec_array = create_vector_array (vectype, vec_num);
9524
9525 tree final_mask = NULL_TREE;
9526 if (loop_masks)
9527 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9528 vectype, j);
9529 if (vec_mask)
9530 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9531 vec_mask, gsi);
9532
9533 gcall *call;
9534 if (final_mask)
9535 {
9536 /* Emit:
9537 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9538 VEC_MASK). */
9539 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9540 tree alias_ptr = build_int_cst (ref_type, align);
9541 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9542 dataref_ptr, alias_ptr,
9543 final_mask);
9544 }
9545 else
9546 {
9547 /* Emit:
9548 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9549 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9550 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9551 }
9552 gimple_call_set_lhs (call, vec_array);
9553 gimple_call_set_nothrow (call, true);
9554 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
9555 call, gsi);
9556
9557 /* Extract each vector into an SSA_NAME. */
9558 for (i = 0; i < vec_num; i++)
9559 {
9560 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9561 vec_array, i);
9562 dr_chain.quick_push (new_temp);
9563 }
9564
9565 /* Record the mapping between SSA_NAMEs and statements. */
9566 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9567
9568 /* Record that VEC_ARRAY is now dead. */
9569 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9570 }
9571 else
9572 {
9573 for (i = 0; i < vec_num; i++)
9574 {
9575 tree final_mask = NULL_TREE;
9576 if (loop_masks
9577 && memory_access_type != VMAT_INVARIANT)
9578 final_mask = vect_get_loop_mask (gsi, loop_masks,
9579 vec_num * ncopies,
9580 vectype, vec_num * j + i);
9581 if (vec_mask)
9582 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9583 vec_mask, gsi);
9584
9585 if (i > 0)
9586 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9587 gsi, stmt_info, bump);
9588
9589 /* 2. Create the vector-load in the loop. */
9590 gimple *new_stmt = NULL;
9591 switch (alignment_support_scheme)
9592 {
9593 case dr_aligned:
9594 case dr_unaligned_supported:
9595 {
9596 unsigned int misalign;
9597 unsigned HOST_WIDE_INT align;
9598
9599 if (memory_access_type == VMAT_GATHER_SCATTER)
9600 {
9601 tree zero = build_zero_cst (vectype);
9602 tree scale = size_int (gs_info.scale);
9603 gcall *call;
9604 if (loop_masks)
9605 call = gimple_build_call_internal
9606 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9607 vec_offset, scale, zero, final_mask);
9608 else
9609 call = gimple_build_call_internal
9610 (IFN_GATHER_LOAD, 4, dataref_ptr,
9611 vec_offset, scale, zero);
9612 gimple_call_set_nothrow (call, true);
9613 new_stmt = call;
9614 data_ref = NULL_TREE;
9615 break;
9616 }
9617
9618 align =
9619 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9620 if (alignment_support_scheme == dr_aligned)
9621 {
9622 gcc_assert (aligned_access_p (first_dr_info));
9623 misalign = 0;
9624 }
9625 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9626 {
9627 align = dr_alignment
9628 (vect_dr_behavior (vinfo, first_dr_info));
9629 misalign = 0;
9630 }
9631 else
9632 misalign = DR_MISALIGNMENT (first_dr_info);
9633 if (dataref_offset == NULL_TREE
9634 && TREE_CODE (dataref_ptr) == SSA_NAME)
9635 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9636 align, misalign);
9637
9638 if (final_mask)
9639 {
9640 align = least_bit_hwi (misalign | align);
9641 tree ptr = build_int_cst (ref_type, align);
9642 gcall *call
9643 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9644 dataref_ptr, ptr,
9645 final_mask);
9646 gimple_call_set_nothrow (call, true);
9647 new_stmt = call;
9648 data_ref = NULL_TREE;
9649 }
9650 else
9651 {
9652 tree ltype = vectype;
9653 tree new_vtype = NULL_TREE;
9654 unsigned HOST_WIDE_INT gap
9655 = DR_GROUP_GAP (first_stmt_info);
9656 unsigned int vect_align
9657 = vect_known_alignment_in_bytes (first_dr_info);
9658 unsigned int scalar_dr_size
9659 = vect_get_scalar_dr_size (first_dr_info);
9660 /* If there's no peeling for gaps but we have a gap
9661 with slp loads then load the lower half of the
9662 vector only. See get_group_load_store_type for
9663 when we apply this optimization. */
9664 if (slp
9665 && loop_vinfo
9666 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9667 && gap != 0
9668 && known_eq (nunits, (group_size - gap) * 2)
9669 && known_eq (nunits, group_size)
9670 && gap >= (vect_align / scalar_dr_size))
9671 {
9672 tree half_vtype;
9673 new_vtype
9674 = vector_vector_composition_type (vectype, 2,
9675 &half_vtype);
9676 if (new_vtype != NULL_TREE)
9677 ltype = half_vtype;
9678 }
9679 tree offset
9680 = (dataref_offset ? dataref_offset
9681 : build_int_cst (ref_type, 0));
9682 if (ltype != vectype
9683 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9684 {
9685 unsigned HOST_WIDE_INT gap_offset
9686 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9687 tree gapcst = build_int_cst (ref_type, gap_offset);
9688 offset = size_binop (PLUS_EXPR, offset, gapcst);
9689 }
9690 data_ref
9691 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9692 if (alignment_support_scheme == dr_aligned)
9693 ;
9694 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9695 TREE_TYPE (data_ref)
9696 = build_aligned_type (TREE_TYPE (data_ref),
9697 align * BITS_PER_UNIT);
9698 else
9699 TREE_TYPE (data_ref)
9700 = build_aligned_type (TREE_TYPE (data_ref),
9701 TYPE_ALIGN (elem_type));
9702 if (ltype != vectype)
9703 {
9704 vect_copy_ref_info (data_ref,
9705 DR_REF (first_dr_info->dr));
9706 tree tem = make_ssa_name (ltype);
9707 new_stmt = gimple_build_assign (tem, data_ref);
9708 vect_finish_stmt_generation (vinfo, stmt_info,
9709 new_stmt, gsi);
9710 data_ref = NULL;
9711 vec<constructor_elt, va_gc> *v;
9712 vec_alloc (v, 2);
9713 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9714 {
9715 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9716 build_zero_cst (ltype));
9717 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9718 }
9719 else
9720 {
9721 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9722 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9723 build_zero_cst (ltype));
9724 }
9725 gcc_assert (new_vtype != NULL_TREE);
9726 if (new_vtype == vectype)
9727 new_stmt = gimple_build_assign (
9728 vec_dest, build_constructor (vectype, v));
9729 else
9730 {
9731 tree new_vname = make_ssa_name (new_vtype);
9732 new_stmt = gimple_build_assign (
9733 new_vname, build_constructor (new_vtype, v));
9734 vect_finish_stmt_generation (vinfo, stmt_info,
9735 new_stmt, gsi);
9736 new_stmt = gimple_build_assign (
9737 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9738 new_vname));
9739 }
9740 }
9741 }
9742 break;
9743 }
9744 case dr_explicit_realign:
9745 {
9746 tree ptr, bump;
9747
9748 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9749
9750 if (compute_in_loop)
9751 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9752 &realignment_token,
9753 dr_explicit_realign,
9754 dataref_ptr, NULL);
9755
9756 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9757 ptr = copy_ssa_name (dataref_ptr);
9758 else
9759 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9760 // For explicit realign the target alignment should be
9761 // known at compile time.
9762 unsigned HOST_WIDE_INT align =
9763 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9764 new_stmt = gimple_build_assign
9765 (ptr, BIT_AND_EXPR, dataref_ptr,
9766 build_int_cst
9767 (TREE_TYPE (dataref_ptr),
9768 -(HOST_WIDE_INT) align));
9769 vect_finish_stmt_generation (vinfo, stmt_info,
9770 new_stmt, gsi);
9771 data_ref
9772 = build2 (MEM_REF, vectype, ptr,
9773 build_int_cst (ref_type, 0));
9774 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9775 vec_dest = vect_create_destination_var (scalar_dest,
9776 vectype);
9777 new_stmt = gimple_build_assign (vec_dest, data_ref);
9778 new_temp = make_ssa_name (vec_dest, new_stmt);
9779 gimple_assign_set_lhs (new_stmt, new_temp);
9780 gimple_move_vops (new_stmt, stmt_info->stmt);
9781 vect_finish_stmt_generation (vinfo, stmt_info,
9782 new_stmt, gsi);
9783 msq = new_temp;
9784
9785 bump = size_binop (MULT_EXPR, vs,
9786 TYPE_SIZE_UNIT (elem_type));
9787 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9788 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9789 stmt_info, bump);
9790 new_stmt = gimple_build_assign
9791 (NULL_TREE, BIT_AND_EXPR, ptr,
9792 build_int_cst
9793 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9794 ptr = copy_ssa_name (ptr, new_stmt);
9795 gimple_assign_set_lhs (new_stmt, ptr);
9796 vect_finish_stmt_generation (vinfo, stmt_info,
9797 new_stmt, gsi);
9798 data_ref
9799 = build2 (MEM_REF, vectype, ptr,
9800 build_int_cst (ref_type, 0));
9801 break;
9802 }
9803 case dr_explicit_realign_optimized:
9804 {
9805 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9806 new_temp = copy_ssa_name (dataref_ptr);
9807 else
9808 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9809 // We should only be doing this if we know the target
9810 // alignment at compile time.
9811 unsigned HOST_WIDE_INT align =
9812 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9813 new_stmt = gimple_build_assign
9814 (new_temp, BIT_AND_EXPR, dataref_ptr,
9815 build_int_cst (TREE_TYPE (dataref_ptr),
9816 -(HOST_WIDE_INT) align));
9817 vect_finish_stmt_generation (vinfo, stmt_info,
9818 new_stmt, gsi);
9819 data_ref
9820 = build2 (MEM_REF, vectype, new_temp,
9821 build_int_cst (ref_type, 0));
9822 break;
9823 }
9824 default:
9825 gcc_unreachable ();
9826 }
9827 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9828 /* DATA_REF is null if we've already built the statement. */
9829 if (data_ref)
9830 {
9831 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9832 new_stmt = gimple_build_assign (vec_dest, data_ref);
9833 }
9834 new_temp = make_ssa_name (vec_dest, new_stmt);
9835 gimple_set_lhs (new_stmt, new_temp);
9836 new_stmt_info
9837 = vect_finish_stmt_generation (vinfo, stmt_info,
9838 new_stmt, gsi);
9839
9840 /* 3. Handle explicit realignment if necessary/supported.
9841 Create in loop:
9842 vec_dest = realign_load (msq, lsq, realignment_token) */
9843 if (alignment_support_scheme == dr_explicit_realign_optimized
9844 || alignment_support_scheme == dr_explicit_realign)
9845 {
9846 lsq = gimple_assign_lhs (new_stmt);
9847 if (!realignment_token)
9848 realignment_token = dataref_ptr;
9849 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9850 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9851 msq, lsq, realignment_token);
9852 new_temp = make_ssa_name (vec_dest, new_stmt);
9853 gimple_assign_set_lhs (new_stmt, new_temp);
9854 new_stmt_info
9855 = vect_finish_stmt_generation (vinfo, stmt_info,
9856 new_stmt, gsi);
9857
9858 if (alignment_support_scheme == dr_explicit_realign_optimized)
9859 {
9860 gcc_assert (phi);
9861 if (i == vec_num - 1 && j == ncopies - 1)
9862 add_phi_arg (phi, lsq,
9863 loop_latch_edge (containing_loop),
9864 UNKNOWN_LOCATION);
9865 msq = lsq;
9866 }
9867 }
9868
9869 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9870 {
9871 tree perm_mask = perm_mask_for_reverse (vectype);
9872 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9873 perm_mask, stmt_info, gsi);
9874 new_stmt_info = vinfo->lookup_def (new_temp);
9875 }
9876
9877 /* Collect vector loads and later create their permutation in
9878 vect_transform_grouped_load (). */
9879 if (grouped_load || slp_perm)
9880 dr_chain.quick_push (new_temp);
9881
9882 /* Store vector loads in the corresponding SLP_NODE. */
9883 if (slp && !slp_perm)
9884 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9885
9886 /* With SLP permutation we load the gaps as well, without
9887 we need to skip the gaps after we manage to fully load
9888 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9889 group_elt += nunits;
9890 if (maybe_ne (group_gap_adj, 0U)
9891 && !slp_perm
9892 && known_eq (group_elt, group_size - group_gap_adj))
9893 {
9894 poly_wide_int bump_val
9895 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9896 * group_gap_adj);
9897 tree bump = wide_int_to_tree (sizetype, bump_val);
9898 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9899 gsi, stmt_info, bump);
9900 group_elt = 0;
9901 }
9902 }
9903 /* Bump the vector pointer to account for a gap or for excess
9904 elements loaded for a permuted SLP load. */
9905 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9906 {
9907 poly_wide_int bump_val
9908 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9909 * group_gap_adj);
9910 tree bump = wide_int_to_tree (sizetype, bump_val);
9911 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9912 stmt_info, bump);
9913 }
9914 }
9915
9916 if (slp && !slp_perm)
9917 continue;
9918
9919 if (slp_perm)
9920 {
9921 unsigned n_perms;
9922 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9923 gsi, vf, false, &n_perms);
9924 gcc_assert (ok);
9925 }
9926 else
9927 {
9928 if (grouped_load)
9929 {
9930 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9931 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9932 group_size, gsi);
9933 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9934 }
9935 else
9936 {
9937 if (j == 0)
9938 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9939 else
9940 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9941 prev_stmt_info = new_stmt_info;
9942 }
9943 }
9944 dr_chain.release ();
9945 }
9946
9947 return true;
9948 }
9949
9950 /* Function vect_is_simple_cond.
9951
9952 Input:
9953 LOOP - the loop that is being vectorized.
9954 COND - Condition that is checked for simple use.
9955
9956 Output:
9957 *COMP_VECTYPE - the vector type for the comparison.
9958 *DTS - The def types for the arguments of the comparison
9959
9960 Returns whether a COND can be vectorized. Checks whether
9961 condition operands are supportable using vec_is_simple_use. */
9962
9963 static bool
9964 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9965 tree *comp_vectype, enum vect_def_type *dts,
9966 tree vectype)
9967 {
9968 tree lhs, rhs;
9969 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9970
9971 /* Mask case. */
9972 if (TREE_CODE (cond) == SSA_NAME
9973 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9974 {
9975 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9976 || !*comp_vectype
9977 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9978 return false;
9979 return true;
9980 }
9981
9982 if (!COMPARISON_CLASS_P (cond))
9983 return false;
9984
9985 lhs = TREE_OPERAND (cond, 0);
9986 rhs = TREE_OPERAND (cond, 1);
9987
9988 if (TREE_CODE (lhs) == SSA_NAME)
9989 {
9990 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9991 return false;
9992 }
9993 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9994 || TREE_CODE (lhs) == FIXED_CST)
9995 dts[0] = vect_constant_def;
9996 else
9997 return false;
9998
9999 if (TREE_CODE (rhs) == SSA_NAME)
10000 {
10001 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
10002 return false;
10003 }
10004 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10005 || TREE_CODE (rhs) == FIXED_CST)
10006 dts[1] = vect_constant_def;
10007 else
10008 return false;
10009
10010 if (vectype1 && vectype2
10011 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10012 TYPE_VECTOR_SUBPARTS (vectype2)))
10013 return false;
10014
10015 *comp_vectype = vectype1 ? vectype1 : vectype2;
10016 /* Invariant comparison. */
10017 if (! *comp_vectype)
10018 {
10019 tree scalar_type = TREE_TYPE (lhs);
10020 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10021 *comp_vectype = truth_type_for (vectype);
10022 else
10023 {
10024 /* If we can widen the comparison to match vectype do so. */
10025 if (INTEGRAL_TYPE_P (scalar_type)
10026 && !slp_node
10027 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10028 TYPE_SIZE (TREE_TYPE (vectype))))
10029 scalar_type = build_nonstandard_integer_type
10030 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10031 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10032 slp_node);
10033 }
10034 }
10035
10036 return true;
10037 }
10038
10039 /* vectorizable_condition.
10040
10041 Check if STMT_INFO is conditional modify expression that can be vectorized.
10042 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10043 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10044 at GSI.
10045
10046 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10047
10048 Return true if STMT_INFO is vectorizable in this way. */
10049
10050 static bool
10051 vectorizable_condition (vec_info *vinfo,
10052 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10053 stmt_vec_info *vec_stmt,
10054 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10055 {
10056 tree scalar_dest = NULL_TREE;
10057 tree vec_dest = NULL_TREE;
10058 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10059 tree then_clause, else_clause;
10060 tree comp_vectype = NULL_TREE;
10061 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10062 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10063 tree vec_compare;
10064 tree new_temp;
10065 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10066 enum vect_def_type dts[4]
10067 = {vect_unknown_def_type, vect_unknown_def_type,
10068 vect_unknown_def_type, vect_unknown_def_type};
10069 int ndts = 4;
10070 int ncopies;
10071 int vec_num;
10072 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10073 stmt_vec_info prev_stmt_info = NULL;
10074 int i, j;
10075 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10076 vec<tree> vec_oprnds0 = vNULL;
10077 vec<tree> vec_oprnds1 = vNULL;
10078 vec<tree> vec_oprnds2 = vNULL;
10079 vec<tree> vec_oprnds3 = vNULL;
10080 tree vec_cmp_type;
10081 bool masked = false;
10082
10083 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10084 return false;
10085
10086 /* Is vectorizable conditional operation? */
10087 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10088 if (!stmt)
10089 return false;
10090
10091 code = gimple_assign_rhs_code (stmt);
10092 if (code != COND_EXPR)
10093 return false;
10094
10095 stmt_vec_info reduc_info = NULL;
10096 int reduc_index = -1;
10097 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10098 bool for_reduction
10099 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10100 if (for_reduction)
10101 {
10102 if (STMT_SLP_TYPE (stmt_info))
10103 return false;
10104 reduc_info = info_for_reduction (vinfo, stmt_info);
10105 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10106 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10107 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10108 || reduc_index != -1);
10109 }
10110 else
10111 {
10112 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10113 return false;
10114
10115 /* FORNOW: only supported as part of a reduction. */
10116 if (STMT_VINFO_LIVE_P (stmt_info))
10117 {
10118 if (dump_enabled_p ())
10119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10120 "value used after loop.\n");
10121 return false;
10122 }
10123 }
10124
10125 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10126 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10127
10128 if (slp_node)
10129 {
10130 ncopies = 1;
10131 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10132 }
10133 else
10134 {
10135 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10136 vec_num = 1;
10137 }
10138
10139 gcc_assert (ncopies >= 1);
10140 if (for_reduction && ncopies > 1)
10141 return false; /* FORNOW */
10142
10143 cond_expr = gimple_assign_rhs1 (stmt);
10144
10145 if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
10146 &comp_vectype, &dts[0], vectype)
10147 || !comp_vectype)
10148 return false;
10149
10150 unsigned slp_adjust = 0;
10151 if (slp_node && SLP_TREE_CHILDREN (slp_node).length () == 4)
10152 /* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor
10153 things more... */
10154 slp_adjust = 1;
10155 slp_tree then_slp_node, else_slp_node;
10156 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + slp_adjust,
10157 &then_clause, &then_slp_node, &dts[2], &vectype1))
10158 return false;
10159 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + slp_adjust,
10160 &else_clause, &else_slp_node, &dts[3], &vectype2))
10161 return false;
10162
10163 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10164 return false;
10165
10166 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10167 return false;
10168
10169 masked = !COMPARISON_CLASS_P (cond_expr);
10170 vec_cmp_type = truth_type_for (comp_vectype);
10171
10172 if (vec_cmp_type == NULL_TREE)
10173 return false;
10174
10175 cond_code = TREE_CODE (cond_expr);
10176 if (!masked)
10177 {
10178 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10179 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10180 }
10181
10182 /* For conditional reductions, the "then" value needs to be the candidate
10183 value calculated by this iteration while the "else" value needs to be
10184 the result carried over from previous iterations. If the COND_EXPR
10185 is the other way around, we need to swap it. */
10186 bool must_invert_cmp_result = false;
10187 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10188 {
10189 if (masked)
10190 must_invert_cmp_result = true;
10191 else
10192 {
10193 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10194 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10195 if (new_code == ERROR_MARK)
10196 must_invert_cmp_result = true;
10197 else
10198 {
10199 cond_code = new_code;
10200 /* Make sure we don't accidentally use the old condition. */
10201 cond_expr = NULL_TREE;
10202 }
10203 }
10204 std::swap (then_clause, else_clause);
10205 }
10206
10207 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10208 {
10209 /* Boolean values may have another representation in vectors
10210 and therefore we prefer bit operations over comparison for
10211 them (which also works for scalar masks). We store opcodes
10212 to use in bitop1 and bitop2. Statement is vectorized as
10213 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10214 depending on bitop1 and bitop2 arity. */
10215 switch (cond_code)
10216 {
10217 case GT_EXPR:
10218 bitop1 = BIT_NOT_EXPR;
10219 bitop2 = BIT_AND_EXPR;
10220 break;
10221 case GE_EXPR:
10222 bitop1 = BIT_NOT_EXPR;
10223 bitop2 = BIT_IOR_EXPR;
10224 break;
10225 case LT_EXPR:
10226 bitop1 = BIT_NOT_EXPR;
10227 bitop2 = BIT_AND_EXPR;
10228 std::swap (cond_expr0, cond_expr1);
10229 break;
10230 case LE_EXPR:
10231 bitop1 = BIT_NOT_EXPR;
10232 bitop2 = BIT_IOR_EXPR;
10233 std::swap (cond_expr0, cond_expr1);
10234 break;
10235 case NE_EXPR:
10236 bitop1 = BIT_XOR_EXPR;
10237 break;
10238 case EQ_EXPR:
10239 bitop1 = BIT_XOR_EXPR;
10240 bitop2 = BIT_NOT_EXPR;
10241 break;
10242 default:
10243 return false;
10244 }
10245 cond_code = SSA_NAME;
10246 }
10247
10248 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10249 && reduction_type == EXTRACT_LAST_REDUCTION
10250 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10251 {
10252 if (dump_enabled_p ())
10253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10254 "reduction comparison operation not supported.\n");
10255 return false;
10256 }
10257
10258 if (!vec_stmt)
10259 {
10260 if (bitop1 != NOP_EXPR)
10261 {
10262 machine_mode mode = TYPE_MODE (comp_vectype);
10263 optab optab;
10264
10265 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10266 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10267 return false;
10268
10269 if (bitop2 != NOP_EXPR)
10270 {
10271 optab = optab_for_tree_code (bitop2, comp_vectype,
10272 optab_default);
10273 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10274 return false;
10275 }
10276 }
10277
10278 vect_cost_for_stmt kind = vector_stmt;
10279 if (reduction_type == EXTRACT_LAST_REDUCTION)
10280 /* Count one reduction-like operation per vector. */
10281 kind = vec_to_scalar;
10282 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10283 return false;
10284
10285 if (slp_node
10286 && (!vect_maybe_update_slp_op_vectype
10287 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10288 || (slp_adjust == 1
10289 && !vect_maybe_update_slp_op_vectype
10290 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10291 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10292 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10293 {
10294 if (dump_enabled_p ())
10295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10296 "incompatible vector types for invariants\n");
10297 return false;
10298 }
10299
10300 if (loop_vinfo
10301 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10302 && reduction_type == EXTRACT_LAST_REDUCTION)
10303 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10304 ncopies * vec_num, vectype, NULL);
10305
10306 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10307 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10308 cost_vec, kind);
10309 return true;
10310 }
10311
10312 /* Transform. */
10313
10314 if (!slp_node)
10315 {
10316 vec_oprnds0.create (1);
10317 vec_oprnds1.create (1);
10318 vec_oprnds2.create (1);
10319 vec_oprnds3.create (1);
10320 }
10321
10322 /* Handle def. */
10323 scalar_dest = gimple_assign_lhs (stmt);
10324 if (reduction_type != EXTRACT_LAST_REDUCTION)
10325 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10326
10327 /* Handle cond expr. */
10328 for (j = 0; j < ncopies; j++)
10329 {
10330 bool swap_cond_operands = false;
10331
10332 /* See whether another part of the vectorized code applies a loop
10333 mask to the condition, or to its inverse. */
10334
10335 vec_loop_masks *masks = NULL;
10336 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10337 {
10338 if (reduction_type == EXTRACT_LAST_REDUCTION)
10339 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10340 else
10341 {
10342 scalar_cond_masked_key cond (cond_expr, ncopies);
10343 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10344 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10345 else
10346 {
10347 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10348 cond.code = invert_tree_comparison (cond.code, honor_nans);
10349 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10350 {
10351 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10352 cond_code = cond.code;
10353 swap_cond_operands = true;
10354 }
10355 }
10356 }
10357 }
10358
10359 stmt_vec_info new_stmt_info = NULL;
10360 if (j == 0)
10361 {
10362 if (slp_node)
10363 {
10364 auto_vec<vec<tree>, 4> vec_defs;
10365 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10366 vec_oprnds3 = vec_defs.pop ();
10367 vec_oprnds2 = vec_defs.pop ();
10368 if (!masked)
10369 vec_oprnds1 = vec_defs.pop ();
10370 vec_oprnds0 = vec_defs.pop ();
10371 }
10372 else
10373 {
10374 if (masked)
10375 {
10376 vec_cond_lhs
10377 = vect_get_vec_def_for_operand (vinfo, cond_expr, stmt_info,
10378 comp_vectype);
10379 }
10380 else
10381 {
10382 vec_cond_lhs
10383 = vect_get_vec_def_for_operand (vinfo, cond_expr0,
10384 stmt_info, comp_vectype);
10385 vec_cond_rhs
10386 = vect_get_vec_def_for_operand (vinfo, cond_expr1,
10387 stmt_info, comp_vectype);
10388 }
10389 vec_then_clause = vect_get_vec_def_for_operand (vinfo,
10390 then_clause,
10391 stmt_info);
10392 if (reduction_type != EXTRACT_LAST_REDUCTION)
10393 vec_else_clause = vect_get_vec_def_for_operand (vinfo,
10394 else_clause,
10395 stmt_info);
10396 }
10397 }
10398 else
10399 {
10400 vec_cond_lhs
10401 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10402 if (!masked)
10403 vec_cond_rhs
10404 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10405
10406 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10407 vec_oprnds2.pop ());
10408 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10409 vec_oprnds3.pop ());
10410 }
10411
10412 if (!slp_node)
10413 {
10414 vec_oprnds0.quick_push (vec_cond_lhs);
10415 if (!masked)
10416 vec_oprnds1.quick_push (vec_cond_rhs);
10417 vec_oprnds2.quick_push (vec_then_clause);
10418 vec_oprnds3.quick_push (vec_else_clause);
10419 }
10420
10421 /* Arguments are ready. Create the new vector stmt. */
10422 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10423 {
10424 vec_then_clause = vec_oprnds2[i];
10425 vec_else_clause = vec_oprnds3[i];
10426
10427 if (swap_cond_operands)
10428 std::swap (vec_then_clause, vec_else_clause);
10429
10430 if (masked)
10431 vec_compare = vec_cond_lhs;
10432 else
10433 {
10434 vec_cond_rhs = vec_oprnds1[i];
10435 if (bitop1 == NOP_EXPR)
10436 vec_compare = build2 (cond_code, vec_cmp_type,
10437 vec_cond_lhs, vec_cond_rhs);
10438 else
10439 {
10440 new_temp = make_ssa_name (vec_cmp_type);
10441 gassign *new_stmt;
10442 if (bitop1 == BIT_NOT_EXPR)
10443 new_stmt = gimple_build_assign (new_temp, bitop1,
10444 vec_cond_rhs);
10445 else
10446 new_stmt
10447 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10448 vec_cond_rhs);
10449 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10450 if (bitop2 == NOP_EXPR)
10451 vec_compare = new_temp;
10452 else if (bitop2 == BIT_NOT_EXPR)
10453 {
10454 /* Instead of doing ~x ? y : z do x ? z : y. */
10455 vec_compare = new_temp;
10456 std::swap (vec_then_clause, vec_else_clause);
10457 }
10458 else
10459 {
10460 vec_compare = make_ssa_name (vec_cmp_type);
10461 new_stmt
10462 = gimple_build_assign (vec_compare, bitop2,
10463 vec_cond_lhs, new_temp);
10464 vect_finish_stmt_generation (vinfo, stmt_info,
10465 new_stmt, gsi);
10466 }
10467 }
10468 }
10469
10470 /* If we decided to apply a loop mask to the result of the vector
10471 comparison, AND the comparison with the mask now. Later passes
10472 should then be able to reuse the AND results between mulitple
10473 vector statements.
10474
10475 For example:
10476 for (int i = 0; i < 100; ++i)
10477 x[i] = y[i] ? z[i] : 10;
10478
10479 results in following optimized GIMPLE:
10480
10481 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10482 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10483 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10484 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10485 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10486 vect_iftmp.11_47, { 10, ... }>;
10487
10488 instead of using a masked and unmasked forms of
10489 vec != { 0, ... } (masked in the MASK_LOAD,
10490 unmasked in the VEC_COND_EXPR). */
10491
10492 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10493 in cases where that's necessary. */
10494
10495 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10496 {
10497 if (!is_gimple_val (vec_compare))
10498 {
10499 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10500 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10501 vec_compare);
10502 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10503 vec_compare = vec_compare_name;
10504 }
10505
10506 if (must_invert_cmp_result)
10507 {
10508 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10509 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10510 BIT_NOT_EXPR,
10511 vec_compare);
10512 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10513 vec_compare = vec_compare_name;
10514 }
10515
10516 if (masks)
10517 {
10518 unsigned vec_num = vec_oprnds0.length ();
10519 tree loop_mask
10520 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10521 vectype, vec_num * j + i);
10522 tree tmp2 = make_ssa_name (vec_cmp_type);
10523 gassign *g
10524 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10525 loop_mask);
10526 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10527 vec_compare = tmp2;
10528 }
10529 }
10530
10531 if (reduction_type == EXTRACT_LAST_REDUCTION)
10532 {
10533 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10534 tree lhs = gimple_get_lhs (old_stmt);
10535 gcall *new_stmt = gimple_build_call_internal
10536 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10537 vec_then_clause);
10538 gimple_call_set_lhs (new_stmt, lhs);
10539 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10540 if (old_stmt == gsi_stmt (*gsi))
10541 new_stmt_info = vect_finish_replace_stmt (vinfo,
10542 stmt_info, new_stmt);
10543 else
10544 {
10545 /* In this case we're moving the definition to later in the
10546 block. That doesn't matter because the only uses of the
10547 lhs are in phi statements. */
10548 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10549 gsi_remove (&old_gsi, true);
10550 new_stmt_info
10551 = vect_finish_stmt_generation (vinfo, stmt_info,
10552 new_stmt, gsi);
10553 }
10554 }
10555 else
10556 {
10557 new_temp = make_ssa_name (vec_dest);
10558 gassign *new_stmt
10559 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10560 vec_then_clause, vec_else_clause);
10561 new_stmt_info
10562 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10563 }
10564 if (slp_node)
10565 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10566 }
10567
10568 if (slp_node)
10569 continue;
10570
10571 if (j == 0)
10572 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10573 else
10574 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10575
10576 prev_stmt_info = new_stmt_info;
10577 }
10578
10579 vec_oprnds0.release ();
10580 vec_oprnds1.release ();
10581 vec_oprnds2.release ();
10582 vec_oprnds3.release ();
10583
10584 return true;
10585 }
10586
10587 /* vectorizable_comparison.
10588
10589 Check if STMT_INFO is comparison expression that can be vectorized.
10590 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10591 comparison, put it in VEC_STMT, and insert it at GSI.
10592
10593 Return true if STMT_INFO is vectorizable in this way. */
10594
10595 static bool
10596 vectorizable_comparison (vec_info *vinfo,
10597 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10598 stmt_vec_info *vec_stmt,
10599 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10600 {
10601 tree lhs, rhs1, rhs2;
10602 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10603 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10604 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10605 tree new_temp;
10606 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10607 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10608 int ndts = 2;
10609 poly_uint64 nunits;
10610 int ncopies;
10611 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10612 stmt_vec_info prev_stmt_info = NULL;
10613 int i, j;
10614 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10615 vec<tree> vec_oprnds0 = vNULL;
10616 vec<tree> vec_oprnds1 = vNULL;
10617 tree mask_type;
10618 tree mask;
10619
10620 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10621 return false;
10622
10623 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10624 return false;
10625
10626 mask_type = vectype;
10627 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10628
10629 if (slp_node)
10630 ncopies = 1;
10631 else
10632 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10633
10634 gcc_assert (ncopies >= 1);
10635 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10636 return false;
10637
10638 if (STMT_VINFO_LIVE_P (stmt_info))
10639 {
10640 if (dump_enabled_p ())
10641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10642 "value used after loop.\n");
10643 return false;
10644 }
10645
10646 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10647 if (!stmt)
10648 return false;
10649
10650 code = gimple_assign_rhs_code (stmt);
10651
10652 if (TREE_CODE_CLASS (code) != tcc_comparison)
10653 return false;
10654
10655 slp_tree slp_rhs1, slp_rhs2;
10656 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10657 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10658 return false;
10659
10660 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10661 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10662 return false;
10663
10664 if (vectype1 && vectype2
10665 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10666 TYPE_VECTOR_SUBPARTS (vectype2)))
10667 return false;
10668
10669 vectype = vectype1 ? vectype1 : vectype2;
10670
10671 /* Invariant comparison. */
10672 if (!vectype)
10673 {
10674 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10675 vectype = mask_type;
10676 else
10677 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10678 slp_node);
10679 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10680 return false;
10681 }
10682 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10683 return false;
10684
10685 /* Can't compare mask and non-mask types. */
10686 if (vectype1 && vectype2
10687 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10688 return false;
10689
10690 /* Boolean values may have another representation in vectors
10691 and therefore we prefer bit operations over comparison for
10692 them (which also works for scalar masks). We store opcodes
10693 to use in bitop1 and bitop2. Statement is vectorized as
10694 BITOP2 (rhs1 BITOP1 rhs2) or
10695 rhs1 BITOP2 (BITOP1 rhs2)
10696 depending on bitop1 and bitop2 arity. */
10697 bool swap_p = false;
10698 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10699 {
10700 if (code == GT_EXPR)
10701 {
10702 bitop1 = BIT_NOT_EXPR;
10703 bitop2 = BIT_AND_EXPR;
10704 }
10705 else if (code == GE_EXPR)
10706 {
10707 bitop1 = BIT_NOT_EXPR;
10708 bitop2 = BIT_IOR_EXPR;
10709 }
10710 else if (code == LT_EXPR)
10711 {
10712 bitop1 = BIT_NOT_EXPR;
10713 bitop2 = BIT_AND_EXPR;
10714 swap_p = true;
10715 }
10716 else if (code == LE_EXPR)
10717 {
10718 bitop1 = BIT_NOT_EXPR;
10719 bitop2 = BIT_IOR_EXPR;
10720 swap_p = true;
10721 }
10722 else
10723 {
10724 bitop1 = BIT_XOR_EXPR;
10725 if (code == EQ_EXPR)
10726 bitop2 = BIT_NOT_EXPR;
10727 }
10728 }
10729
10730 if (!vec_stmt)
10731 {
10732 if (bitop1 == NOP_EXPR)
10733 {
10734 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10735 return false;
10736 }
10737 else
10738 {
10739 machine_mode mode = TYPE_MODE (vectype);
10740 optab optab;
10741
10742 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10743 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10744 return false;
10745
10746 if (bitop2 != NOP_EXPR)
10747 {
10748 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10749 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10750 return false;
10751 }
10752 }
10753
10754 /* Put types on constant and invariant SLP children. */
10755 if (slp_node
10756 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10757 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10758 {
10759 if (dump_enabled_p ())
10760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10761 "incompatible vector types for invariants\n");
10762 return false;
10763 }
10764
10765 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10766 vect_model_simple_cost (vinfo, stmt_info,
10767 ncopies * (1 + (bitop2 != NOP_EXPR)),
10768 dts, ndts, slp_node, cost_vec);
10769 return true;
10770 }
10771
10772 /* Transform. */
10773 if (!slp_node)
10774 {
10775 vec_oprnds0.create (1);
10776 vec_oprnds1.create (1);
10777 }
10778
10779 /* Handle def. */
10780 lhs = gimple_assign_lhs (stmt);
10781 mask = vect_create_destination_var (lhs, mask_type);
10782
10783 /* Handle cmp expr. */
10784 for (j = 0; j < ncopies; j++)
10785 {
10786 stmt_vec_info new_stmt_info = NULL;
10787 if (j == 0)
10788 {
10789 if (slp_node)
10790 {
10791 auto_vec<vec<tree>, 2> vec_defs;
10792 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10793 vec_oprnds1 = vec_defs.pop ();
10794 vec_oprnds0 = vec_defs.pop ();
10795 if (swap_p)
10796 std::swap (vec_oprnds0, vec_oprnds1);
10797 }
10798 else
10799 {
10800 vec_rhs1 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info,
10801 vectype);
10802 vec_rhs2 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info,
10803 vectype);
10804 }
10805 }
10806 else
10807 {
10808 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10809 vec_oprnds0.pop ());
10810 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10811 vec_oprnds1.pop ());
10812 }
10813
10814 if (!slp_node)
10815 {
10816 if (swap_p && j == 0)
10817 std::swap (vec_rhs1, vec_rhs2);
10818 vec_oprnds0.quick_push (vec_rhs1);
10819 vec_oprnds1.quick_push (vec_rhs2);
10820 }
10821
10822 /* Arguments are ready. Create the new vector stmt. */
10823 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10824 {
10825 vec_rhs2 = vec_oprnds1[i];
10826
10827 new_temp = make_ssa_name (mask);
10828 if (bitop1 == NOP_EXPR)
10829 {
10830 gassign *new_stmt = gimple_build_assign (new_temp, code,
10831 vec_rhs1, vec_rhs2);
10832 new_stmt_info
10833 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10834 }
10835 else
10836 {
10837 gassign *new_stmt;
10838 if (bitop1 == BIT_NOT_EXPR)
10839 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10840 else
10841 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10842 vec_rhs2);
10843 new_stmt_info
10844 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10845 if (bitop2 != NOP_EXPR)
10846 {
10847 tree res = make_ssa_name (mask);
10848 if (bitop2 == BIT_NOT_EXPR)
10849 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10850 else
10851 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10852 new_temp);
10853 new_stmt_info
10854 = vect_finish_stmt_generation (vinfo, stmt_info,
10855 new_stmt, gsi);
10856 }
10857 }
10858 if (slp_node)
10859 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10860 }
10861
10862 if (slp_node)
10863 continue;
10864
10865 if (j == 0)
10866 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10867 else
10868 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10869
10870 prev_stmt_info = new_stmt_info;
10871 }
10872
10873 vec_oprnds0.release ();
10874 vec_oprnds1.release ();
10875
10876 return true;
10877 }
10878
10879 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10880 can handle all live statements in the node. Otherwise return true
10881 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10882 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10883
10884 static bool
10885 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10886 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10887 slp_tree slp_node, slp_instance slp_node_instance,
10888 bool vec_stmt_p,
10889 stmt_vector_for_cost *cost_vec)
10890 {
10891 if (slp_node)
10892 {
10893 stmt_vec_info slp_stmt_info;
10894 unsigned int i;
10895 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10896 {
10897 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10898 && !vectorizable_live_operation (loop_vinfo,
10899 slp_stmt_info, gsi, slp_node,
10900 slp_node_instance, i,
10901 vec_stmt_p, cost_vec))
10902 return false;
10903 }
10904 }
10905 else if (STMT_VINFO_LIVE_P (stmt_info)
10906 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10907 slp_node, slp_node_instance, -1,
10908 vec_stmt_p, cost_vec))
10909 return false;
10910
10911 return true;
10912 }
10913
10914 /* Make sure the statement is vectorizable. */
10915
10916 opt_result
10917 vect_analyze_stmt (vec_info *vinfo,
10918 stmt_vec_info stmt_info, bool *need_to_vectorize,
10919 slp_tree node, slp_instance node_instance,
10920 stmt_vector_for_cost *cost_vec)
10921 {
10922 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10923 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10924 bool ok;
10925 gimple_seq pattern_def_seq;
10926
10927 if (dump_enabled_p ())
10928 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10929 stmt_info->stmt);
10930
10931 if (gimple_has_volatile_ops (stmt_info->stmt))
10932 return opt_result::failure_at (stmt_info->stmt,
10933 "not vectorized:"
10934 " stmt has volatile operands: %G\n",
10935 stmt_info->stmt);
10936
10937 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10938 && node == NULL
10939 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10940 {
10941 gimple_stmt_iterator si;
10942
10943 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10944 {
10945 stmt_vec_info pattern_def_stmt_info
10946 = vinfo->lookup_stmt (gsi_stmt (si));
10947 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10948 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10949 {
10950 /* Analyze def stmt of STMT if it's a pattern stmt. */
10951 if (dump_enabled_p ())
10952 dump_printf_loc (MSG_NOTE, vect_location,
10953 "==> examining pattern def statement: %G",
10954 pattern_def_stmt_info->stmt);
10955
10956 opt_result res
10957 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10958 need_to_vectorize, node, node_instance,
10959 cost_vec);
10960 if (!res)
10961 return res;
10962 }
10963 }
10964 }
10965
10966 /* Skip stmts that do not need to be vectorized. In loops this is expected
10967 to include:
10968 - the COND_EXPR which is the loop exit condition
10969 - any LABEL_EXPRs in the loop
10970 - computations that are used only for array indexing or loop control.
10971 In basic blocks we only analyze statements that are a part of some SLP
10972 instance, therefore, all the statements are relevant.
10973
10974 Pattern statement needs to be analyzed instead of the original statement
10975 if the original statement is not relevant. Otherwise, we analyze both
10976 statements. In basic blocks we are called from some SLP instance
10977 traversal, don't analyze pattern stmts instead, the pattern stmts
10978 already will be part of SLP instance. */
10979
10980 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10981 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10982 && !STMT_VINFO_LIVE_P (stmt_info))
10983 {
10984 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10985 && pattern_stmt_info
10986 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10987 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10988 {
10989 /* Analyze PATTERN_STMT instead of the original stmt. */
10990 stmt_info = pattern_stmt_info;
10991 if (dump_enabled_p ())
10992 dump_printf_loc (MSG_NOTE, vect_location,
10993 "==> examining pattern statement: %G",
10994 stmt_info->stmt);
10995 }
10996 else
10997 {
10998 if (dump_enabled_p ())
10999 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11000
11001 return opt_result::success ();
11002 }
11003 }
11004 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11005 && node == NULL
11006 && pattern_stmt_info
11007 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11008 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11009 {
11010 /* Analyze PATTERN_STMT too. */
11011 if (dump_enabled_p ())
11012 dump_printf_loc (MSG_NOTE, vect_location,
11013 "==> examining pattern statement: %G",
11014 pattern_stmt_info->stmt);
11015
11016 opt_result res
11017 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11018 node_instance, cost_vec);
11019 if (!res)
11020 return res;
11021 }
11022
11023 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11024 {
11025 case vect_internal_def:
11026 break;
11027
11028 case vect_reduction_def:
11029 case vect_nested_cycle:
11030 gcc_assert (!bb_vinfo
11031 && (relevance == vect_used_in_outer
11032 || relevance == vect_used_in_outer_by_reduction
11033 || relevance == vect_used_by_reduction
11034 || relevance == vect_unused_in_scope
11035 || relevance == vect_used_only_live));
11036 break;
11037
11038 case vect_induction_def:
11039 gcc_assert (!bb_vinfo);
11040 break;
11041
11042 case vect_constant_def:
11043 case vect_external_def:
11044 case vect_unknown_def_type:
11045 default:
11046 gcc_unreachable ();
11047 }
11048
11049 if (STMT_VINFO_RELEVANT_P (stmt_info))
11050 {
11051 tree type = gimple_expr_type (stmt_info->stmt);
11052 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
11053 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11054 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11055 || (call && gimple_call_lhs (call) == NULL_TREE));
11056 *need_to_vectorize = true;
11057 }
11058
11059 if (PURE_SLP_STMT (stmt_info) && !node)
11060 {
11061 if (dump_enabled_p ())
11062 dump_printf_loc (MSG_NOTE, vect_location,
11063 "handled only by SLP analysis\n");
11064 return opt_result::success ();
11065 }
11066
11067 ok = true;
11068 if (!bb_vinfo
11069 && (STMT_VINFO_RELEVANT_P (stmt_info)
11070 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11071 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11072 -mveclibabi= takes preference over library functions with
11073 the simd attribute. */
11074 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11075 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11076 cost_vec)
11077 || vectorizable_conversion (vinfo, stmt_info,
11078 NULL, NULL, node, cost_vec)
11079 || vectorizable_operation (vinfo, stmt_info,
11080 NULL, NULL, node, cost_vec)
11081 || vectorizable_assignment (vinfo, stmt_info,
11082 NULL, NULL, node, cost_vec)
11083 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11084 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11085 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11086 node, node_instance, cost_vec)
11087 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11088 NULL, NULL, node, cost_vec)
11089 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11090 || vectorizable_condition (vinfo, stmt_info,
11091 NULL, NULL, node, cost_vec)
11092 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11093 cost_vec)
11094 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11095 stmt_info, NULL, node));
11096 else
11097 {
11098 if (bb_vinfo)
11099 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11100 || vectorizable_simd_clone_call (vinfo, stmt_info,
11101 NULL, NULL, node, cost_vec)
11102 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11103 cost_vec)
11104 || vectorizable_shift (vinfo, stmt_info,
11105 NULL, NULL, node, cost_vec)
11106 || vectorizable_operation (vinfo, stmt_info,
11107 NULL, NULL, node, cost_vec)
11108 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11109 cost_vec)
11110 || vectorizable_load (vinfo, stmt_info,
11111 NULL, NULL, node, cost_vec)
11112 || vectorizable_store (vinfo, stmt_info,
11113 NULL, NULL, node, cost_vec)
11114 || vectorizable_condition (vinfo, stmt_info,
11115 NULL, NULL, node, cost_vec)
11116 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11117 cost_vec));
11118 }
11119
11120 if (!ok)
11121 return opt_result::failure_at (stmt_info->stmt,
11122 "not vectorized:"
11123 " relevant stmt not supported: %G",
11124 stmt_info->stmt);
11125
11126 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11127 need extra handling, except for vectorizable reductions. */
11128 if (!bb_vinfo
11129 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11130 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11131 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11132 stmt_info, NULL, node, node_instance,
11133 false, cost_vec))
11134 return opt_result::failure_at (stmt_info->stmt,
11135 "not vectorized:"
11136 " live stmt not supported: %G",
11137 stmt_info->stmt);
11138
11139 return opt_result::success ();
11140 }
11141
11142
11143 /* Function vect_transform_stmt.
11144
11145 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11146
11147 bool
11148 vect_transform_stmt (vec_info *vinfo,
11149 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11150 slp_tree slp_node, slp_instance slp_node_instance)
11151 {
11152 bool is_store = false;
11153 stmt_vec_info vec_stmt = NULL;
11154 bool done;
11155
11156 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11157 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11158
11159 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11160 bool nested_p = (loop_vinfo
11161 && nested_in_vect_loop_p
11162 (LOOP_VINFO_LOOP (loop_vinfo), stmt_info));
11163
11164 gimple *stmt = stmt_info->stmt;
11165 switch (STMT_VINFO_TYPE (stmt_info))
11166 {
11167 case type_demotion_vec_info_type:
11168 case type_promotion_vec_info_type:
11169 case type_conversion_vec_info_type:
11170 done = vectorizable_conversion (vinfo, stmt_info,
11171 gsi, &vec_stmt, slp_node, NULL);
11172 gcc_assert (done);
11173 break;
11174
11175 case induc_vec_info_type:
11176 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11177 stmt_info, gsi, &vec_stmt, slp_node,
11178 NULL);
11179 gcc_assert (done);
11180 break;
11181
11182 case shift_vec_info_type:
11183 done = vectorizable_shift (vinfo, stmt_info,
11184 gsi, &vec_stmt, slp_node, NULL);
11185 gcc_assert (done);
11186 break;
11187
11188 case op_vec_info_type:
11189 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11190 NULL);
11191 gcc_assert (done);
11192 break;
11193
11194 case assignment_vec_info_type:
11195 done = vectorizable_assignment (vinfo, stmt_info,
11196 gsi, &vec_stmt, slp_node, NULL);
11197 gcc_assert (done);
11198 break;
11199
11200 case load_vec_info_type:
11201 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11202 NULL);
11203 gcc_assert (done);
11204 break;
11205
11206 case store_vec_info_type:
11207 done = vectorizable_store (vinfo, stmt_info,
11208 gsi, &vec_stmt, slp_node, NULL);
11209 gcc_assert (done);
11210 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11211 {
11212 /* In case of interleaving, the whole chain is vectorized when the
11213 last store in the chain is reached. Store stmts before the last
11214 one are skipped, and there vec_stmt_info shouldn't be freed
11215 meanwhile. */
11216 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11217 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11218 is_store = true;
11219 }
11220 else
11221 is_store = true;
11222 break;
11223
11224 case condition_vec_info_type:
11225 done = vectorizable_condition (vinfo, stmt_info,
11226 gsi, &vec_stmt, slp_node, NULL);
11227 gcc_assert (done);
11228 break;
11229
11230 case comparison_vec_info_type:
11231 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11232 slp_node, NULL);
11233 gcc_assert (done);
11234 break;
11235
11236 case call_vec_info_type:
11237 done = vectorizable_call (vinfo, stmt_info,
11238 gsi, &vec_stmt, slp_node, NULL);
11239 stmt = gsi_stmt (*gsi);
11240 break;
11241
11242 case call_simd_clone_vec_info_type:
11243 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11244 slp_node, NULL);
11245 stmt = gsi_stmt (*gsi);
11246 break;
11247
11248 case reduc_vec_info_type:
11249 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11250 gsi, &vec_stmt, slp_node);
11251 gcc_assert (done);
11252 break;
11253
11254 case cycle_phi_info_type:
11255 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11256 &vec_stmt, slp_node, slp_node_instance);
11257 gcc_assert (done);
11258 break;
11259
11260 case lc_phi_info_type:
11261 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11262 stmt_info, &vec_stmt, slp_node);
11263 gcc_assert (done);
11264 break;
11265
11266 default:
11267 if (!STMT_VINFO_LIVE_P (stmt_info))
11268 {
11269 if (dump_enabled_p ())
11270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11271 "stmt not supported.\n");
11272 gcc_unreachable ();
11273 }
11274 done = true;
11275 }
11276
11277 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11278 This would break hybrid SLP vectorization. */
11279 if (slp_node)
11280 gcc_assert (!vec_stmt
11281 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11282
11283 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11284 is being vectorized, but outside the immediately enclosing loop. */
11285 if (vec_stmt
11286 && nested_p
11287 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11288 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11289 || STMT_VINFO_RELEVANT (stmt_info) ==
11290 vect_used_in_outer_by_reduction))
11291 {
11292 class loop *innerloop = LOOP_VINFO_LOOP (loop_vinfo)->inner;
11293 imm_use_iterator imm_iter;
11294 use_operand_p use_p;
11295 tree scalar_dest;
11296
11297 if (dump_enabled_p ())
11298 dump_printf_loc (MSG_NOTE, vect_location,
11299 "Record the vdef for outer-loop vectorization.\n");
11300
11301 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11302 (to be used when vectorizing outer-loop stmts that use the DEF of
11303 STMT). */
11304 if (gimple_code (stmt) == GIMPLE_PHI)
11305 scalar_dest = PHI_RESULT (stmt);
11306 else
11307 scalar_dest = gimple_get_lhs (stmt);
11308
11309 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11310 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11311 {
11312 stmt_vec_info exit_phi_info
11313 = vinfo->lookup_stmt (USE_STMT (use_p));
11314 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11315 }
11316 }
11317
11318 if (vec_stmt)
11319 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11320
11321 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11322 return is_store;
11323
11324 /* If this stmt defines a value used on a backedge, update the
11325 vectorized PHIs. */
11326 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11327 stmt_vec_info reduc_info;
11328 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11329 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11330 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
11331 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11332 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11333 {
11334 gphi *phi;
11335 edge e;
11336 if (!slp_node
11337 && (phi = dyn_cast <gphi *>
11338 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11339 && dominated_by_p (CDI_DOMINATORS,
11340 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11341 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11342 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11343 == gimple_get_lhs (orig_stmt_info->stmt)))
11344 {
11345 stmt_vec_info phi_info
11346 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11347 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11348 do
11349 {
11350 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11351 gimple_get_lhs (vec_stmt->stmt), e,
11352 gimple_phi_arg_location (phi, e->dest_idx));
11353 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11354 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11355 }
11356 while (phi_info);
11357 gcc_assert (!vec_stmt);
11358 }
11359 else if (slp_node
11360 && slp_node != slp_node_instance->reduc_phis)
11361 {
11362 slp_tree phi_node = slp_node_instance->reduc_phis;
11363 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11364 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11365 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11366 == SLP_TREE_VEC_STMTS (slp_node).length ());
11367 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11368 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11369 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11370 e, gimple_phi_arg_location (phi, e->dest_idx));
11371 }
11372 }
11373
11374 /* Handle stmts whose DEF is used outside the loop-nest that is
11375 being vectorized. */
11376 if (is_a <loop_vec_info> (vinfo))
11377 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11378 stmt_info, gsi, slp_node,
11379 slp_node_instance, true, NULL);
11380 gcc_assert (done);
11381
11382 return false;
11383 }
11384
11385
11386 /* Remove a group of stores (for SLP or interleaving), free their
11387 stmt_vec_info. */
11388
11389 void
11390 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11391 {
11392 stmt_vec_info next_stmt_info = first_stmt_info;
11393
11394 while (next_stmt_info)
11395 {
11396 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11397 next_stmt_info = vect_orig_stmt (next_stmt_info);
11398 /* Free the attached stmt_vec_info and remove the stmt. */
11399 vinfo->remove_stmt (next_stmt_info);
11400 next_stmt_info = tmp;
11401 }
11402 }
11403
11404 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11405 elements of type SCALAR_TYPE, or null if the target doesn't support
11406 such a type.
11407
11408 If NUNITS is zero, return a vector type that contains elements of
11409 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11410
11411 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11412 for this vectorization region and want to "autodetect" the best choice.
11413 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11414 and we want the new type to be interoperable with it. PREVAILING_MODE
11415 in this case can be a scalar integer mode or a vector mode; when it
11416 is a vector mode, the function acts like a tree-level version of
11417 related_vector_mode. */
11418
11419 tree
11420 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11421 tree scalar_type, poly_uint64 nunits)
11422 {
11423 tree orig_scalar_type = scalar_type;
11424 scalar_mode inner_mode;
11425 machine_mode simd_mode;
11426 tree vectype;
11427
11428 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11429 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11430 return NULL_TREE;
11431
11432 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11433
11434 /* For vector types of elements whose mode precision doesn't
11435 match their types precision we use a element type of mode
11436 precision. The vectorization routines will have to make sure
11437 they support the proper result truncation/extension.
11438 We also make sure to build vector types with INTEGER_TYPE
11439 component type only. */
11440 if (INTEGRAL_TYPE_P (scalar_type)
11441 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11442 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11443 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11444 TYPE_UNSIGNED (scalar_type));
11445
11446 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11447 When the component mode passes the above test simply use a type
11448 corresponding to that mode. The theory is that any use that
11449 would cause problems with this will disable vectorization anyway. */
11450 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11451 && !INTEGRAL_TYPE_P (scalar_type))
11452 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11453
11454 /* We can't build a vector type of elements with alignment bigger than
11455 their size. */
11456 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11457 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11458 TYPE_UNSIGNED (scalar_type));
11459
11460 /* If we felt back to using the mode fail if there was
11461 no scalar type for it. */
11462 if (scalar_type == NULL_TREE)
11463 return NULL_TREE;
11464
11465 /* If no prevailing mode was supplied, use the mode the target prefers.
11466 Otherwise lookup a vector mode based on the prevailing mode. */
11467 if (prevailing_mode == VOIDmode)
11468 {
11469 gcc_assert (known_eq (nunits, 0U));
11470 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11471 if (SCALAR_INT_MODE_P (simd_mode))
11472 {
11473 /* Traditional behavior is not to take the integer mode
11474 literally, but simply to use it as a way of determining
11475 the vector size. It is up to mode_for_vector to decide
11476 what the TYPE_MODE should be.
11477
11478 Note that nunits == 1 is allowed in order to support single
11479 element vector types. */
11480 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11481 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11482 return NULL_TREE;
11483 }
11484 }
11485 else if (SCALAR_INT_MODE_P (prevailing_mode)
11486 || !related_vector_mode (prevailing_mode,
11487 inner_mode, nunits).exists (&simd_mode))
11488 {
11489 /* Fall back to using mode_for_vector, mostly in the hope of being
11490 able to use an integer mode. */
11491 if (known_eq (nunits, 0U)
11492 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11493 return NULL_TREE;
11494
11495 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11496 return NULL_TREE;
11497 }
11498
11499 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11500
11501 /* In cases where the mode was chosen by mode_for_vector, check that
11502 the target actually supports the chosen mode, or that it at least
11503 allows the vector mode to be replaced by a like-sized integer. */
11504 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11505 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11506 return NULL_TREE;
11507
11508 /* Re-attach the address-space qualifier if we canonicalized the scalar
11509 type. */
11510 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11511 return build_qualified_type
11512 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11513
11514 return vectype;
11515 }
11516
11517 /* Function get_vectype_for_scalar_type.
11518
11519 Returns the vector type corresponding to SCALAR_TYPE as supported
11520 by the target. If GROUP_SIZE is nonzero and we're performing BB
11521 vectorization, make sure that the number of elements in the vector
11522 is no bigger than GROUP_SIZE. */
11523
11524 tree
11525 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11526 unsigned int group_size)
11527 {
11528 /* For BB vectorization, we should always have a group size once we've
11529 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11530 are tentative requests during things like early data reference
11531 analysis and pattern recognition. */
11532 if (is_a <bb_vec_info> (vinfo))
11533 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11534 else
11535 group_size = 0;
11536
11537 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11538 scalar_type);
11539 if (vectype && vinfo->vector_mode == VOIDmode)
11540 vinfo->vector_mode = TYPE_MODE (vectype);
11541
11542 /* Register the natural choice of vector type, before the group size
11543 has been applied. */
11544 if (vectype)
11545 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11546
11547 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11548 try again with an explicit number of elements. */
11549 if (vectype
11550 && group_size
11551 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11552 {
11553 /* Start with the biggest number of units that fits within
11554 GROUP_SIZE and halve it until we find a valid vector type.
11555 Usually either the first attempt will succeed or all will
11556 fail (in the latter case because GROUP_SIZE is too small
11557 for the target), but it's possible that a target could have
11558 a hole between supported vector types.
11559
11560 If GROUP_SIZE is not a power of 2, this has the effect of
11561 trying the largest power of 2 that fits within the group,
11562 even though the group is not a multiple of that vector size.
11563 The BB vectorizer will then try to carve up the group into
11564 smaller pieces. */
11565 unsigned int nunits = 1 << floor_log2 (group_size);
11566 do
11567 {
11568 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11569 scalar_type, nunits);
11570 nunits /= 2;
11571 }
11572 while (nunits > 1 && !vectype);
11573 }
11574
11575 return vectype;
11576 }
11577
11578 /* Return the vector type corresponding to SCALAR_TYPE as supported
11579 by the target. NODE, if nonnull, is the SLP tree node that will
11580 use the returned vector type. */
11581
11582 tree
11583 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11584 {
11585 unsigned int group_size = 0;
11586 if (node)
11587 {
11588 group_size = SLP_TREE_SCALAR_OPS (node).length ();
11589 if (group_size == 0)
11590 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11591 }
11592 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11593 }
11594
11595 /* Function get_mask_type_for_scalar_type.
11596
11597 Returns the mask type corresponding to a result of comparison
11598 of vectors of specified SCALAR_TYPE as supported by target.
11599 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11600 make sure that the number of elements in the vector is no bigger
11601 than GROUP_SIZE. */
11602
11603 tree
11604 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11605 unsigned int group_size)
11606 {
11607 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11608
11609 if (!vectype)
11610 return NULL;
11611
11612 return truth_type_for (vectype);
11613 }
11614
11615 /* Function get_same_sized_vectype
11616
11617 Returns a vector type corresponding to SCALAR_TYPE of size
11618 VECTOR_TYPE if supported by the target. */
11619
11620 tree
11621 get_same_sized_vectype (tree scalar_type, tree vector_type)
11622 {
11623 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11624 return truth_type_for (vector_type);
11625
11626 poly_uint64 nunits;
11627 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11628 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11629 return NULL_TREE;
11630
11631 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11632 scalar_type, nunits);
11633 }
11634
11635 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11636 would not change the chosen vector modes. */
11637
11638 bool
11639 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11640 {
11641 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11642 i != vinfo->used_vector_modes.end (); ++i)
11643 if (!VECTOR_MODE_P (*i)
11644 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11645 return false;
11646 return true;
11647 }
11648
11649 /* Function vect_is_simple_use.
11650
11651 Input:
11652 VINFO - the vect info of the loop or basic block that is being vectorized.
11653 OPERAND - operand in the loop or bb.
11654 Output:
11655 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11656 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11657 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11658 the definition could be anywhere in the function
11659 DT - the type of definition
11660
11661 Returns whether a stmt with OPERAND can be vectorized.
11662 For loops, supportable operands are constants, loop invariants, and operands
11663 that are defined by the current iteration of the loop. Unsupportable
11664 operands are those that are defined by a previous iteration of the loop (as
11665 is the case in reduction/induction computations).
11666 For basic blocks, supportable operands are constants and bb invariants.
11667 For now, operands defined outside the basic block are not supported. */
11668
11669 bool
11670 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11671 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11672 {
11673 if (def_stmt_info_out)
11674 *def_stmt_info_out = NULL;
11675 if (def_stmt_out)
11676 *def_stmt_out = NULL;
11677 *dt = vect_unknown_def_type;
11678
11679 if (dump_enabled_p ())
11680 {
11681 dump_printf_loc (MSG_NOTE, vect_location,
11682 "vect_is_simple_use: operand ");
11683 if (TREE_CODE (operand) == SSA_NAME
11684 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11685 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11686 else
11687 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11688 }
11689
11690 if (CONSTANT_CLASS_P (operand))
11691 *dt = vect_constant_def;
11692 else if (is_gimple_min_invariant (operand))
11693 *dt = vect_external_def;
11694 else if (TREE_CODE (operand) != SSA_NAME)
11695 *dt = vect_unknown_def_type;
11696 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11697 *dt = vect_external_def;
11698 else
11699 {
11700 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11701 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11702 if (!stmt_vinfo)
11703 *dt = vect_external_def;
11704 else
11705 {
11706 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11707 def_stmt = stmt_vinfo->stmt;
11708 switch (gimple_code (def_stmt))
11709 {
11710 case GIMPLE_PHI:
11711 case GIMPLE_ASSIGN:
11712 case GIMPLE_CALL:
11713 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11714 break;
11715 default:
11716 *dt = vect_unknown_def_type;
11717 break;
11718 }
11719 if (def_stmt_info_out)
11720 *def_stmt_info_out = stmt_vinfo;
11721 }
11722 if (def_stmt_out)
11723 *def_stmt_out = def_stmt;
11724 }
11725
11726 if (dump_enabled_p ())
11727 {
11728 dump_printf (MSG_NOTE, ", type of def: ");
11729 switch (*dt)
11730 {
11731 case vect_uninitialized_def:
11732 dump_printf (MSG_NOTE, "uninitialized\n");
11733 break;
11734 case vect_constant_def:
11735 dump_printf (MSG_NOTE, "constant\n");
11736 break;
11737 case vect_external_def:
11738 dump_printf (MSG_NOTE, "external\n");
11739 break;
11740 case vect_internal_def:
11741 dump_printf (MSG_NOTE, "internal\n");
11742 break;
11743 case vect_induction_def:
11744 dump_printf (MSG_NOTE, "induction\n");
11745 break;
11746 case vect_reduction_def:
11747 dump_printf (MSG_NOTE, "reduction\n");
11748 break;
11749 case vect_double_reduction_def:
11750 dump_printf (MSG_NOTE, "double reduction\n");
11751 break;
11752 case vect_nested_cycle:
11753 dump_printf (MSG_NOTE, "nested cycle\n");
11754 break;
11755 case vect_unknown_def_type:
11756 dump_printf (MSG_NOTE, "unknown\n");
11757 break;
11758 }
11759 }
11760
11761 if (*dt == vect_unknown_def_type)
11762 {
11763 if (dump_enabled_p ())
11764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11765 "Unsupported pattern.\n");
11766 return false;
11767 }
11768
11769 return true;
11770 }
11771
11772 /* Function vect_is_simple_use.
11773
11774 Same as vect_is_simple_use but also determines the vector operand
11775 type of OPERAND and stores it to *VECTYPE. If the definition of
11776 OPERAND is vect_uninitialized_def, vect_constant_def or
11777 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11778 is responsible to compute the best suited vector type for the
11779 scalar operand. */
11780
11781 bool
11782 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11783 tree *vectype, stmt_vec_info *def_stmt_info_out,
11784 gimple **def_stmt_out)
11785 {
11786 stmt_vec_info def_stmt_info;
11787 gimple *def_stmt;
11788 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11789 return false;
11790
11791 if (def_stmt_out)
11792 *def_stmt_out = def_stmt;
11793 if (def_stmt_info_out)
11794 *def_stmt_info_out = def_stmt_info;
11795
11796 /* Now get a vector type if the def is internal, otherwise supply
11797 NULL_TREE and leave it up to the caller to figure out a proper
11798 type for the use stmt. */
11799 if (*dt == vect_internal_def
11800 || *dt == vect_induction_def
11801 || *dt == vect_reduction_def
11802 || *dt == vect_double_reduction_def
11803 || *dt == vect_nested_cycle)
11804 {
11805 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11806 gcc_assert (*vectype != NULL_TREE);
11807 if (dump_enabled_p ())
11808 dump_printf_loc (MSG_NOTE, vect_location,
11809 "vect_is_simple_use: vectype %T\n", *vectype);
11810 }
11811 else if (*dt == vect_uninitialized_def
11812 || *dt == vect_constant_def
11813 || *dt == vect_external_def)
11814 *vectype = NULL_TREE;
11815 else
11816 gcc_unreachable ();
11817
11818 return true;
11819 }
11820
11821 /* Function vect_is_simple_use.
11822
11823 Same as vect_is_simple_use but determines the operand by operand
11824 position OPERAND from either STMT or SLP_NODE, filling in *OP
11825 and *SLP_DEF (when SLP_NODE is not NULL). */
11826
11827 bool
11828 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11829 unsigned operand, tree *op, slp_tree *slp_def,
11830 enum vect_def_type *dt,
11831 tree *vectype, stmt_vec_info *def_stmt_info_out)
11832 {
11833 if (slp_node)
11834 {
11835 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11836 *slp_def = child;
11837 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11838 *op = gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child)[0]->stmt);
11839 else
11840 *op = SLP_TREE_SCALAR_OPS (child)[0];
11841 }
11842 else
11843 {
11844 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11845 {
11846 *op = gimple_op (ass, operand + 1);
11847 /* ??? Ick. But it will vanish with SLP only. */
11848 if (TREE_CODE (*op) == VIEW_CONVERT_EXPR)
11849 *op = TREE_OPERAND (*op, 0);
11850 }
11851 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11852 *op = gimple_call_arg (call, operand);
11853 else
11854 gcc_unreachable ();
11855 }
11856
11857 /* ??? We might want to update *vectype from *slp_def here though
11858 when sharing nodes this would prevent unsharing in the caller. */
11859 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11860 }
11861
11862 /* If OP is not NULL and is external or constant update its vector
11863 type with VECTYPE. Returns true if successful or false if not,
11864 for example when conflicting vector types are present. */
11865
11866 bool
11867 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11868 {
11869 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11870 return true;
11871 if (SLP_TREE_VECTYPE (op))
11872 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11873 SLP_TREE_VECTYPE (op) = vectype;
11874 return true;
11875 }
11876
11877 /* Function supportable_widening_operation
11878
11879 Check whether an operation represented by the code CODE is a
11880 widening operation that is supported by the target platform in
11881 vector form (i.e., when operating on arguments of type VECTYPE_IN
11882 producing a result of type VECTYPE_OUT).
11883
11884 Widening operations we currently support are NOP (CONVERT), FLOAT,
11885 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11886 are supported by the target platform either directly (via vector
11887 tree-codes), or via target builtins.
11888
11889 Output:
11890 - CODE1 and CODE2 are codes of vector operations to be used when
11891 vectorizing the operation, if available.
11892 - MULTI_STEP_CVT determines the number of required intermediate steps in
11893 case of multi-step conversion (like char->short->int - in that case
11894 MULTI_STEP_CVT will be 1).
11895 - INTERM_TYPES contains the intermediate type required to perform the
11896 widening operation (short in the above example). */
11897
11898 bool
11899 supportable_widening_operation (vec_info *vinfo,
11900 enum tree_code code, stmt_vec_info stmt_info,
11901 tree vectype_out, tree vectype_in,
11902 enum tree_code *code1, enum tree_code *code2,
11903 int *multi_step_cvt,
11904 vec<tree> *interm_types)
11905 {
11906 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11907 class loop *vect_loop = NULL;
11908 machine_mode vec_mode;
11909 enum insn_code icode1, icode2;
11910 optab optab1, optab2;
11911 tree vectype = vectype_in;
11912 tree wide_vectype = vectype_out;
11913 enum tree_code c1, c2;
11914 int i;
11915 tree prev_type, intermediate_type;
11916 machine_mode intermediate_mode, prev_mode;
11917 optab optab3, optab4;
11918
11919 *multi_step_cvt = 0;
11920 if (loop_info)
11921 vect_loop = LOOP_VINFO_LOOP (loop_info);
11922
11923 switch (code)
11924 {
11925 case WIDEN_MULT_EXPR:
11926 /* The result of a vectorized widening operation usually requires
11927 two vectors (because the widened results do not fit into one vector).
11928 The generated vector results would normally be expected to be
11929 generated in the same order as in the original scalar computation,
11930 i.e. if 8 results are generated in each vector iteration, they are
11931 to be organized as follows:
11932 vect1: [res1,res2,res3,res4],
11933 vect2: [res5,res6,res7,res8].
11934
11935 However, in the special case that the result of the widening
11936 operation is used in a reduction computation only, the order doesn't
11937 matter (because when vectorizing a reduction we change the order of
11938 the computation). Some targets can take advantage of this and
11939 generate more efficient code. For example, targets like Altivec,
11940 that support widen_mult using a sequence of {mult_even,mult_odd}
11941 generate the following vectors:
11942 vect1: [res1,res3,res5,res7],
11943 vect2: [res2,res4,res6,res8].
11944
11945 When vectorizing outer-loops, we execute the inner-loop sequentially
11946 (each vectorized inner-loop iteration contributes to VF outer-loop
11947 iterations in parallel). We therefore don't allow to change the
11948 order of the computation in the inner-loop during outer-loop
11949 vectorization. */
11950 /* TODO: Another case in which order doesn't *really* matter is when we
11951 widen and then contract again, e.g. (short)((int)x * y >> 8).
11952 Normally, pack_trunc performs an even/odd permute, whereas the
11953 repack from an even/odd expansion would be an interleave, which
11954 would be significantly simpler for e.g. AVX2. */
11955 /* In any case, in order to avoid duplicating the code below, recurse
11956 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11957 are properly set up for the caller. If we fail, we'll continue with
11958 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11959 if (vect_loop
11960 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11961 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11962 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11963 stmt_info, vectype_out,
11964 vectype_in, code1, code2,
11965 multi_step_cvt, interm_types))
11966 {
11967 /* Elements in a vector with vect_used_by_reduction property cannot
11968 be reordered if the use chain with this property does not have the
11969 same operation. One such an example is s += a * b, where elements
11970 in a and b cannot be reordered. Here we check if the vector defined
11971 by STMT is only directly used in the reduction statement. */
11972 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11973 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11974 if (use_stmt_info
11975 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11976 return true;
11977 }
11978 c1 = VEC_WIDEN_MULT_LO_EXPR;
11979 c2 = VEC_WIDEN_MULT_HI_EXPR;
11980 break;
11981
11982 case DOT_PROD_EXPR:
11983 c1 = DOT_PROD_EXPR;
11984 c2 = DOT_PROD_EXPR;
11985 break;
11986
11987 case SAD_EXPR:
11988 c1 = SAD_EXPR;
11989 c2 = SAD_EXPR;
11990 break;
11991
11992 case VEC_WIDEN_MULT_EVEN_EXPR:
11993 /* Support the recursion induced just above. */
11994 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11995 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11996 break;
11997
11998 case WIDEN_LSHIFT_EXPR:
11999 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12000 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12001 break;
12002
12003 CASE_CONVERT:
12004 c1 = VEC_UNPACK_LO_EXPR;
12005 c2 = VEC_UNPACK_HI_EXPR;
12006 break;
12007
12008 case FLOAT_EXPR:
12009 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12010 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12011 break;
12012
12013 case FIX_TRUNC_EXPR:
12014 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12015 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12016 break;
12017
12018 default:
12019 gcc_unreachable ();
12020 }
12021
12022 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12023 std::swap (c1, c2);
12024
12025 if (code == FIX_TRUNC_EXPR)
12026 {
12027 /* The signedness is determined from output operand. */
12028 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12029 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12030 }
12031 else if (CONVERT_EXPR_CODE_P (code)
12032 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12033 && VECTOR_BOOLEAN_TYPE_P (vectype)
12034 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12035 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12036 {
12037 /* If the input and result modes are the same, a different optab
12038 is needed where we pass in the number of units in vectype. */
12039 optab1 = vec_unpacks_sbool_lo_optab;
12040 optab2 = vec_unpacks_sbool_hi_optab;
12041 }
12042 else
12043 {
12044 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12045 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12046 }
12047
12048 if (!optab1 || !optab2)
12049 return false;
12050
12051 vec_mode = TYPE_MODE (vectype);
12052 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12053 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12054 return false;
12055
12056 *code1 = c1;
12057 *code2 = c2;
12058
12059 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12060 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12061 {
12062 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12063 return true;
12064 /* For scalar masks we may have different boolean
12065 vector types having the same QImode. Thus we
12066 add additional check for elements number. */
12067 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12068 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12069 return true;
12070 }
12071
12072 /* Check if it's a multi-step conversion that can be done using intermediate
12073 types. */
12074
12075 prev_type = vectype;
12076 prev_mode = vec_mode;
12077
12078 if (!CONVERT_EXPR_CODE_P (code))
12079 return false;
12080
12081 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12082 intermediate steps in promotion sequence. We try
12083 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12084 not. */
12085 interm_types->create (MAX_INTERM_CVT_STEPS);
12086 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12087 {
12088 intermediate_mode = insn_data[icode1].operand[0].mode;
12089 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12090 intermediate_type
12091 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12092 else
12093 intermediate_type
12094 = lang_hooks.types.type_for_mode (intermediate_mode,
12095 TYPE_UNSIGNED (prev_type));
12096
12097 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12098 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12099 && intermediate_mode == prev_mode
12100 && SCALAR_INT_MODE_P (prev_mode))
12101 {
12102 /* If the input and result modes are the same, a different optab
12103 is needed where we pass in the number of units in vectype. */
12104 optab3 = vec_unpacks_sbool_lo_optab;
12105 optab4 = vec_unpacks_sbool_hi_optab;
12106 }
12107 else
12108 {
12109 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12110 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12111 }
12112
12113 if (!optab3 || !optab4
12114 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12115 || insn_data[icode1].operand[0].mode != intermediate_mode
12116 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12117 || insn_data[icode2].operand[0].mode != intermediate_mode
12118 || ((icode1 = optab_handler (optab3, intermediate_mode))
12119 == CODE_FOR_nothing)
12120 || ((icode2 = optab_handler (optab4, intermediate_mode))
12121 == CODE_FOR_nothing))
12122 break;
12123
12124 interm_types->quick_push (intermediate_type);
12125 (*multi_step_cvt)++;
12126
12127 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12128 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12129 {
12130 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12131 return true;
12132 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12133 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12134 return true;
12135 }
12136
12137 prev_type = intermediate_type;
12138 prev_mode = intermediate_mode;
12139 }
12140
12141 interm_types->release ();
12142 return false;
12143 }
12144
12145
12146 /* Function supportable_narrowing_operation
12147
12148 Check whether an operation represented by the code CODE is a
12149 narrowing operation that is supported by the target platform in
12150 vector form (i.e., when operating on arguments of type VECTYPE_IN
12151 and producing a result of type VECTYPE_OUT).
12152
12153 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12154 and FLOAT. This function checks if these operations are supported by
12155 the target platform directly via vector tree-codes.
12156
12157 Output:
12158 - CODE1 is the code of a vector operation to be used when
12159 vectorizing the operation, if available.
12160 - MULTI_STEP_CVT determines the number of required intermediate steps in
12161 case of multi-step conversion (like int->short->char - in that case
12162 MULTI_STEP_CVT will be 1).
12163 - INTERM_TYPES contains the intermediate type required to perform the
12164 narrowing operation (short in the above example). */
12165
12166 bool
12167 supportable_narrowing_operation (enum tree_code code,
12168 tree vectype_out, tree vectype_in,
12169 enum tree_code *code1, int *multi_step_cvt,
12170 vec<tree> *interm_types)
12171 {
12172 machine_mode vec_mode;
12173 enum insn_code icode1;
12174 optab optab1, interm_optab;
12175 tree vectype = vectype_in;
12176 tree narrow_vectype = vectype_out;
12177 enum tree_code c1;
12178 tree intermediate_type, prev_type;
12179 machine_mode intermediate_mode, prev_mode;
12180 int i;
12181 bool uns;
12182
12183 *multi_step_cvt = 0;
12184 switch (code)
12185 {
12186 CASE_CONVERT:
12187 c1 = VEC_PACK_TRUNC_EXPR;
12188 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12189 && VECTOR_BOOLEAN_TYPE_P (vectype)
12190 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12191 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12192 optab1 = vec_pack_sbool_trunc_optab;
12193 else
12194 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12195 break;
12196
12197 case FIX_TRUNC_EXPR:
12198 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12199 /* The signedness is determined from output operand. */
12200 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12201 break;
12202
12203 case FLOAT_EXPR:
12204 c1 = VEC_PACK_FLOAT_EXPR;
12205 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12206 break;
12207
12208 default:
12209 gcc_unreachable ();
12210 }
12211
12212 if (!optab1)
12213 return false;
12214
12215 vec_mode = TYPE_MODE (vectype);
12216 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12217 return false;
12218
12219 *code1 = c1;
12220
12221 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12222 {
12223 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12224 return true;
12225 /* For scalar masks we may have different boolean
12226 vector types having the same QImode. Thus we
12227 add additional check for elements number. */
12228 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12229 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12230 return true;
12231 }
12232
12233 if (code == FLOAT_EXPR)
12234 return false;
12235
12236 /* Check if it's a multi-step conversion that can be done using intermediate
12237 types. */
12238 prev_mode = vec_mode;
12239 prev_type = vectype;
12240 if (code == FIX_TRUNC_EXPR)
12241 uns = TYPE_UNSIGNED (vectype_out);
12242 else
12243 uns = TYPE_UNSIGNED (vectype);
12244
12245 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12246 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12247 costly than signed. */
12248 if (code == FIX_TRUNC_EXPR && uns)
12249 {
12250 enum insn_code icode2;
12251
12252 intermediate_type
12253 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12254 interm_optab
12255 = optab_for_tree_code (c1, intermediate_type, optab_default);
12256 if (interm_optab != unknown_optab
12257 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12258 && insn_data[icode1].operand[0].mode
12259 == insn_data[icode2].operand[0].mode)
12260 {
12261 uns = false;
12262 optab1 = interm_optab;
12263 icode1 = icode2;
12264 }
12265 }
12266
12267 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12268 intermediate steps in promotion sequence. We try
12269 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12270 interm_types->create (MAX_INTERM_CVT_STEPS);
12271 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12272 {
12273 intermediate_mode = insn_data[icode1].operand[0].mode;
12274 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12275 intermediate_type
12276 = vect_double_mask_nunits (prev_type, intermediate_mode);
12277 else
12278 intermediate_type
12279 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12280 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12281 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12282 && intermediate_mode == prev_mode
12283 && SCALAR_INT_MODE_P (prev_mode))
12284 interm_optab = vec_pack_sbool_trunc_optab;
12285 else
12286 interm_optab
12287 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12288 optab_default);
12289 if (!interm_optab
12290 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12291 || insn_data[icode1].operand[0].mode != intermediate_mode
12292 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12293 == CODE_FOR_nothing))
12294 break;
12295
12296 interm_types->quick_push (intermediate_type);
12297 (*multi_step_cvt)++;
12298
12299 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12300 {
12301 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12302 return true;
12303 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12304 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12305 return true;
12306 }
12307
12308 prev_mode = intermediate_mode;
12309 prev_type = intermediate_type;
12310 optab1 = interm_optab;
12311 }
12312
12313 interm_types->release ();
12314 return false;
12315 }
12316
12317 /* Generate and return a statement that sets vector mask MASK such that
12318 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12319
12320 gcall *
12321 vect_gen_while (tree mask, tree start_index, tree end_index)
12322 {
12323 tree cmp_type = TREE_TYPE (start_index);
12324 tree mask_type = TREE_TYPE (mask);
12325 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12326 cmp_type, mask_type,
12327 OPTIMIZE_FOR_SPEED));
12328 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12329 start_index, end_index,
12330 build_zero_cst (mask_type));
12331 gimple_call_set_lhs (call, mask);
12332 return call;
12333 }
12334
12335 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12336 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12337
12338 tree
12339 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12340 tree end_index)
12341 {
12342 tree tmp = make_ssa_name (mask_type);
12343 gcall *call = vect_gen_while (tmp, start_index, end_index);
12344 gimple_seq_add_stmt (seq, call);
12345 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12346 }
12347
12348 /* Try to compute the vector types required to vectorize STMT_INFO,
12349 returning true on success and false if vectorization isn't possible.
12350 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12351 take sure that the number of elements in the vectors is no bigger
12352 than GROUP_SIZE.
12353
12354 On success:
12355
12356 - Set *STMT_VECTYPE_OUT to:
12357 - NULL_TREE if the statement doesn't need to be vectorized;
12358 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12359
12360 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12361 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12362 statement does not help to determine the overall number of units. */
12363
12364 opt_result
12365 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12366 tree *stmt_vectype_out,
12367 tree *nunits_vectype_out,
12368 unsigned int group_size)
12369 {
12370 gimple *stmt = stmt_info->stmt;
12371
12372 /* For BB vectorization, we should always have a group size once we've
12373 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12374 are tentative requests during things like early data reference
12375 analysis and pattern recognition. */
12376 if (is_a <bb_vec_info> (vinfo))
12377 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12378 else
12379 group_size = 0;
12380
12381 *stmt_vectype_out = NULL_TREE;
12382 *nunits_vectype_out = NULL_TREE;
12383
12384 if (gimple_get_lhs (stmt) == NULL_TREE
12385 /* MASK_STORE has no lhs, but is ok. */
12386 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12387 {
12388 if (is_a <gcall *> (stmt))
12389 {
12390 /* Ignore calls with no lhs. These must be calls to
12391 #pragma omp simd functions, and what vectorization factor
12392 it really needs can't be determined until
12393 vectorizable_simd_clone_call. */
12394 if (dump_enabled_p ())
12395 dump_printf_loc (MSG_NOTE, vect_location,
12396 "defer to SIMD clone analysis.\n");
12397 return opt_result::success ();
12398 }
12399
12400 return opt_result::failure_at (stmt,
12401 "not vectorized: irregular stmt.%G", stmt);
12402 }
12403
12404 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12405 return opt_result::failure_at (stmt,
12406 "not vectorized: vector stmt in loop:%G",
12407 stmt);
12408
12409 tree vectype;
12410 tree scalar_type = NULL_TREE;
12411 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12412 {
12413 vectype = STMT_VINFO_VECTYPE (stmt_info);
12414 if (dump_enabled_p ())
12415 dump_printf_loc (MSG_NOTE, vect_location,
12416 "precomputed vectype: %T\n", vectype);
12417 }
12418 else if (vect_use_mask_type_p (stmt_info))
12419 {
12420 unsigned int precision = stmt_info->mask_precision;
12421 scalar_type = build_nonstandard_integer_type (precision, 1);
12422 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12423 if (!vectype)
12424 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12425 " data-type %T\n", scalar_type);
12426 if (dump_enabled_p ())
12427 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12428 }
12429 else
12430 {
12431 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12432 scalar_type = TREE_TYPE (DR_REF (dr));
12433 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12434 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12435 else
12436 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12437
12438 if (dump_enabled_p ())
12439 {
12440 if (group_size)
12441 dump_printf_loc (MSG_NOTE, vect_location,
12442 "get vectype for scalar type (group size %d):"
12443 " %T\n", group_size, scalar_type);
12444 else
12445 dump_printf_loc (MSG_NOTE, vect_location,
12446 "get vectype for scalar type: %T\n", scalar_type);
12447 }
12448 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12449 if (!vectype)
12450 return opt_result::failure_at (stmt,
12451 "not vectorized:"
12452 " unsupported data-type %T\n",
12453 scalar_type);
12454
12455 if (dump_enabled_p ())
12456 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12457 }
12458 *stmt_vectype_out = vectype;
12459
12460 /* Don't try to compute scalar types if the stmt produces a boolean
12461 vector; use the existing vector type instead. */
12462 tree nunits_vectype = vectype;
12463 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12464 {
12465 /* The number of units is set according to the smallest scalar
12466 type (or the largest vector size, but we only support one
12467 vector size per vectorization). */
12468 HOST_WIDE_INT dummy;
12469 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12470 if (scalar_type != TREE_TYPE (vectype))
12471 {
12472 if (dump_enabled_p ())
12473 dump_printf_loc (MSG_NOTE, vect_location,
12474 "get vectype for smallest scalar type: %T\n",
12475 scalar_type);
12476 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12477 group_size);
12478 if (!nunits_vectype)
12479 return opt_result::failure_at
12480 (stmt, "not vectorized: unsupported data-type %T\n",
12481 scalar_type);
12482 if (dump_enabled_p ())
12483 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12484 nunits_vectype);
12485 }
12486 }
12487
12488 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12489 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12490
12491 if (dump_enabled_p ())
12492 {
12493 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12494 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12495 dump_printf (MSG_NOTE, "\n");
12496 }
12497
12498 *nunits_vectype_out = nunits_vectype;
12499 return opt_result::success ();
12500 }