Daily bump.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
55
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
58
59 /* Return the vectorized type for the given statement. */
60
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 {
64 return STMT_VINFO_VECTYPE (stmt_info);
65 }
66
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 {
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83 }
84
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
93 {
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
100
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
103
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
107 }
108
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
110
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
113 {
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
116 }
117
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
122
123 static tree
124 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
126 {
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
129
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
136
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
141
142 return vect_name;
143 }
144
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
148
149 static void
150 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
152 {
153 tree array_ref;
154 gimple *new_stmt;
155
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
159
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
162 }
163
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
167
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
170 {
171 tree mem_ref;
172
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
177 }
178
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
181
182 static void
183 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
185 {
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
189 }
190
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193 /* Function vect_mark_relevant.
194
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
196
197 static void
198 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
199 enum vect_relevant relevant, bool live_p)
200 {
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
203
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "mark relevant %d, live %d: %G", relevant, live_p,
207 stmt_info->stmt);
208
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
214 {
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
219
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info = stmt_info;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
227 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
229 }
230
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
234
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237 {
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
242 }
243
244 worklist->safe_push (stmt_info);
245 }
246
247
248 /* Function is_simple_and_all_uses_invariant
249
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
251
252 bool
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
254 loop_vec_info loop_vinfo)
255 {
256 tree op;
257 ssa_op_iter iter;
258
259 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
260 if (!stmt)
261 return false;
262
263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
264 {
265 enum vect_def_type dt = vect_uninitialized_def;
266
267 if (!vect_is_simple_use (op, loop_vinfo, &dt))
268 {
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
271 "use not simple.\n");
272 return false;
273 }
274
275 if (dt != vect_external_def && dt != vect_constant_def)
276 return false;
277 }
278 return true;
279 }
280
281 /* Function vect_stmt_relevant_p.
282
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
285
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
290
291 CHECKME: what other side effects would the vectorizer allow? */
292
293 static bool
294 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
295 enum vect_relevant *relevant, bool *live_p)
296 {
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
302
303 *relevant = vect_unused_in_scope;
304 *live_p = false;
305
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info->stmt)
308 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
310
311 /* changing memory. */
312 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt_info->stmt)
314 && !gimple_clobber_p (stmt_info->stmt))
315 {
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
320 }
321
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
324 {
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 {
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
329 {
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
333
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
336
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
341
342 *live_p = true;
343 }
344 }
345 }
346
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
354 }
355
356 return (*live_p || *relevant);
357 }
358
359
360 /* Function exist_non_indexing_operands_for_use_p
361
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
364
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
367 {
368 tree operand;
369
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
375
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
381
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
384 for array indexing.
385
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
388
389 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
390 if (!assign || !gimple_assign_copy_p (assign))
391 {
392 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
393 if (call && gimple_call_internal_p (call))
394 {
395 internal_fn ifn = gimple_call_internal_fn (call);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (call, mask_index))
399 return true;
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (call, stored_value_index))
403 return true;
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (call, 1))
406 return true;
407 }
408 return false;
409 }
410
411 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (assign);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
416
417 if (operand == use)
418 return true;
419
420 return false;
421 }
422
423
424 /*
425 Function process_use.
426
427 Inputs:
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
434
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
448
449 Return true if everything is as expected. Return false otherwise. */
450
451 static bool
452 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
454 bool force)
455 {
456 stmt_vec_info dstmt_vinfo;
457 basic_block bb, def_bb;
458 enum vect_def_type dt;
459
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
463 return true;
464
465 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
466 {
467 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
469 "not vectorized: unsupported use in stmt.\n");
470 return false;
471 }
472
473 if (!dstmt_vinfo)
474 return true;
475
476 def_bb = gimple_bb (dstmt_vinfo->stmt);
477
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 DSTMT_VINFO must have already been processed, because this should be the
480 only way that STMT, which is a reduction-phi, was put in the worklist,
481 as there should be no other uses for DSTMT_VINFO in the loop. So we just
482 check that everything is as expected, and we are done. */
483 bb = gimple_bb (stmt_vinfo->stmt);
484 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
485 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
486 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
488 && bb->loop_father == def_bb->loop_father)
489 {
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE, vect_location,
492 "reduc-stmt defining reduc-phi in the same nest.\n");
493 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
494 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
495 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
496 return true;
497 }
498
499 /* case 3a: outer-loop stmt defining an inner-loop stmt:
500 outer-loop-header-bb:
501 d = dstmt_vinfo
502 inner-loop:
503 stmt # use (d)
504 outer-loop-tail-bb:
505 ... */
506 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
507 {
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location,
510 "outer-loop def-stmt defining inner-loop stmt.\n");
511
512 switch (relevant)
513 {
514 case vect_unused_in_scope:
515 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
516 vect_used_in_scope : vect_unused_in_scope;
517 break;
518
519 case vect_used_in_outer_by_reduction:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_by_reduction;
522 break;
523
524 case vect_used_in_outer:
525 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
526 relevant = vect_used_in_scope;
527 break;
528
529 case vect_used_in_scope:
530 break;
531
532 default:
533 gcc_unreachable ();
534 }
535 }
536
537 /* case 3b: inner-loop stmt defining an outer-loop stmt:
538 outer-loop-header-bb:
539 ...
540 inner-loop:
541 d = dstmt_vinfo
542 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
543 stmt # use (d) */
544 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
545 {
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE, vect_location,
548 "inner-loop def-stmt defining outer-loop stmt.\n");
549
550 switch (relevant)
551 {
552 case vect_unused_in_scope:
553 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
554 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
555 vect_used_in_outer_by_reduction : vect_unused_in_scope;
556 break;
557
558 case vect_used_by_reduction:
559 case vect_used_only_live:
560 relevant = vect_used_in_outer_by_reduction;
561 break;
562
563 case vect_used_in_scope:
564 relevant = vect_used_in_outer;
565 break;
566
567 default:
568 gcc_unreachable ();
569 }
570 }
571 /* We are also not interested in uses on loop PHI backedges that are
572 inductions. Otherwise we'll needlessly vectorize the IV increment
573 and cause hybrid SLP for SLP inductions. Unless the PHI is live
574 of course. */
575 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
576 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
577 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
578 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
579 loop_latch_edge (bb->loop_father))
580 == use))
581 {
582 if (dump_enabled_p ())
583 dump_printf_loc (MSG_NOTE, vect_location,
584 "induction value on backedge.\n");
585 return true;
586 }
587
588
589 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
590 return true;
591 }
592
593
594 /* Function vect_mark_stmts_to_be_vectorized.
595
596 Not all stmts in the loop need to be vectorized. For example:
597
598 for i...
599 for j...
600 1. T0 = i + j
601 2. T1 = a[T0]
602
603 3. j = j + 1
604
605 Stmt 1 and 3 do not need to be vectorized, because loop control and
606 addressing of vectorized data-refs are handled differently.
607
608 This pass detects such stmts. */
609
610 bool
611 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
612 {
613 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
614 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
615 unsigned int nbbs = loop->num_nodes;
616 gimple_stmt_iterator si;
617 unsigned int i;
618 basic_block bb;
619 bool live_p;
620 enum vect_relevant relevant;
621
622 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
623
624 auto_vec<stmt_vec_info, 64> worklist;
625
626 /* 1. Init worklist. */
627 for (i = 0; i < nbbs; i++)
628 {
629 bb = bbs[i];
630 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 {
632 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
635 phi_info->stmt);
636
637 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
638 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
639 }
640 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
641 {
642 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "init: stmt relevant? %G", stmt_info->stmt);
646
647 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
648 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
649 }
650 }
651
652 /* 2. Process_worklist */
653 while (worklist.length () > 0)
654 {
655 use_operand_p use_p;
656 ssa_op_iter iter;
657
658 stmt_vec_info stmt_vinfo = worklist.pop ();
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location,
661 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662
663 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
664 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 of STMT. */
666 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667
668 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
669 propagated as is to the DEF_STMTs of its USEs.
670
671 One exception is when STMT has been identified as defining a reduction
672 variable; in this case we set the relevance to vect_used_by_reduction.
673 This is because we distinguish between two kinds of relevant stmts -
674 those that are used by a reduction computation, and those that are
675 (also) used by a regular computation. This allows us later on to
676 identify stmts that are used solely by a reduction, and therefore the
677 order of the results that they produce does not have to be kept. */
678
679 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 {
681 case vect_reduction_def:
682 gcc_assert (relevant != vect_unused_in_scope);
683 if (relevant != vect_unused_in_scope
684 && relevant != vect_used_in_scope
685 && relevant != vect_used_by_reduction
686 && relevant != vect_used_only_live)
687 {
688 if (dump_enabled_p ())
689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
690 "unsupported use of reduction.\n");
691 return false;
692 }
693 break;
694
695 case vect_nested_cycle:
696 if (relevant != vect_unused_in_scope
697 && relevant != vect_used_in_outer_by_reduction
698 && relevant != vect_used_in_outer)
699 {
700 if (dump_enabled_p ())
701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
702 "unsupported use of nested cycle.\n");
703
704 return false;
705 }
706 break;
707
708 case vect_double_reduction_def:
709 if (relevant != vect_unused_in_scope
710 && relevant != vect_used_by_reduction
711 && relevant != vect_used_only_live)
712 {
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "unsupported use of double reduction.\n");
716
717 return false;
718 }
719 break;
720
721 default:
722 break;
723 }
724
725 if (is_pattern_stmt_p (stmt_vinfo))
726 {
727 /* Pattern statements are not inserted into the code, so
728 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
729 have to scan the RHS or function arguments instead. */
730 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
731 {
732 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
733 tree op = gimple_assign_rhs1 (assign);
734
735 i = 1;
736 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
737 {
738 if (!process_use (stmt_vinfo, TREE_OPERAND (op, 0),
739 loop_vinfo, relevant, &worklist, false)
740 || !process_use (stmt_vinfo, TREE_OPERAND (op, 1),
741 loop_vinfo, relevant, &worklist, false))
742 return false;
743 i = 2;
744 }
745 for (; i < gimple_num_ops (assign); i++)
746 {
747 op = gimple_op (assign, i);
748 if (TREE_CODE (op) == SSA_NAME
749 && !process_use (stmt_vinfo, op, loop_vinfo, relevant,
750 &worklist, false))
751 return false;
752 }
753 }
754 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
755 {
756 for (i = 0; i < gimple_call_num_args (call); i++)
757 {
758 tree arg = gimple_call_arg (call, i);
759 if (!process_use (stmt_vinfo, arg, loop_vinfo, relevant,
760 &worklist, false))
761 return false;
762 }
763 }
764 }
765 else
766 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
767 {
768 tree op = USE_FROM_PTR (use_p);
769 if (!process_use (stmt_vinfo, op, loop_vinfo, relevant,
770 &worklist, false))
771 return false;
772 }
773
774 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
775 {
776 gather_scatter_info gs_info;
777 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
778 gcc_unreachable ();
779 if (!process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
780 &worklist, true))
781 return false;
782 }
783 } /* while worklist */
784
785 return true;
786 }
787
788 /* Compute the prologue cost for invariant or constant operands. */
789
790 static unsigned
791 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
792 unsigned opno, enum vect_def_type dt,
793 stmt_vector_for_cost *cost_vec)
794 {
795 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796 tree op = gimple_op (stmt, opno);
797 unsigned prologue_cost = 0;
798
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
803 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804 unsigned num_vects_to_check;
805 unsigned HOST_WIDE_INT const_nunits;
806 unsigned nelt_limit;
807 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808 && ! multiple_p (const_nunits, group_size))
809 {
810 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811 nelt_limit = const_nunits;
812 }
813 else
814 {
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check = 1;
819 nelt_limit = group_size;
820 }
821 tree elt = NULL_TREE;
822 unsigned nelt = 0;
823 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
824 {
825 unsigned si = j % group_size;
826 if (nelt == 0)
827 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 opno))
833 elt = NULL_TREE;
834 nelt++;
835 if (nelt == nelt_limit)
836 {
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost += record_stmt_cost
840 (cost_vec, 1,
841 dt == vect_external_def
842 ? (elt ? scalar_to_vec : vec_construct)
843 : vector_load,
844 stmt_info, 0, vect_prologue);
845 nelt = 0;
846 }
847 }
848
849 return prologue_cost;
850 }
851
852 /* Function vect_model_simple_cost.
853
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
857
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 enum vect_def_type *dt,
861 int ndts,
862 slp_tree node,
863 stmt_vector_for_cost *cost_vec)
864 {
865 int inside_cost = 0, prologue_cost = 0;
866
867 gcc_assert (cost_vec != NULL);
868
869 /* ??? Somehow we need to fix this at the callers. */
870 if (node)
871 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
872
873 if (node)
874 {
875 /* Scan operands and account for prologue cost of constants/externals.
876 ??? This over-estimates cost for multiple uses and should be
877 re-engineered. */
878 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
879 tree lhs = gimple_get_lhs (stmt);
880 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
881 {
882 tree op = gimple_op (stmt, i);
883 enum vect_def_type dt;
884 if (!op || op == lhs)
885 continue;
886 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
887 && (dt == vect_constant_def || dt == vect_external_def))
888 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
889 i, dt, cost_vec);
890 }
891 }
892 else
893 /* Cost the "broadcast" of a scalar operand in to a vector operand.
894 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
895 cost model. */
896 for (int i = 0; i < ndts; i++)
897 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
898 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
899 stmt_info, 0, vect_prologue);
900
901 /* Adjust for two-operator SLP nodes. */
902 if (node && SLP_TREE_TWO_OPERATORS (node))
903 {
904 ncopies *= 2;
905 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
906 stmt_info, 0, vect_body);
907 }
908
909 /* Pass the inside-of-loop statements to the target-specific cost model. */
910 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
911 stmt_info, 0, vect_body);
912
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_NOTE, vect_location,
915 "vect_model_simple_cost: inside_cost = %d, "
916 "prologue_cost = %d .\n", inside_cost, prologue_cost);
917 }
918
919
920 /* Model cost for type demotion and promotion operations. PWR is normally
921 zero for single-step promotions and demotions. It will be one if
922 two-step promotion/demotion is required, and so on. Each additional
923 step doubles the number of instructions required. */
924
925 static void
926 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
927 enum vect_def_type *dt, int pwr,
928 stmt_vector_for_cost *cost_vec)
929 {
930 int i, tmp;
931 int inside_cost = 0, prologue_cost = 0;
932
933 for (i = 0; i < pwr + 1; i++)
934 {
935 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
936 (i + 1) : i;
937 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
938 vec_promote_demote, stmt_info, 0,
939 vect_body);
940 }
941
942 /* FORNOW: Assuming maximum 2 args per stmts. */
943 for (i = 0; i < 2; i++)
944 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
945 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
946 stmt_info, 0, vect_prologue);
947
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE, vect_location,
950 "vect_model_promotion_demotion_cost: inside_cost = %d, "
951 "prologue_cost = %d .\n", inside_cost, prologue_cost);
952 }
953
954 /* Function vect_model_store_cost
955
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
958
959 static void
960 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
961 enum vect_def_type dt,
962 vect_memory_access_type memory_access_type,
963 vec_load_store_type vls_type, slp_tree slp_node,
964 stmt_vector_for_cost *cost_vec)
965 {
966 unsigned int inside_cost = 0, prologue_cost = 0;
967 stmt_vec_info first_stmt_info = stmt_info;
968 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
969
970 /* ??? Somehow we need to fix this at the callers. */
971 if (slp_node)
972 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
973
974 if (vls_type == VLS_STORE_INVARIANT)
975 {
976 if (slp_node)
977 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
978 1, dt, cost_vec);
979 else
980 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
981 stmt_info, 0, vect_prologue);
982 }
983
984 /* Grouped stores update all elements in the group at once,
985 so we want the DR for the first statement. */
986 if (!slp_node && grouped_access_p)
987 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
988
989 /* True if we should include any once-per-group costs as well as
990 the cost of the statement itself. For SLP we only get called
991 once per group anyhow. */
992 bool first_stmt_p = (first_stmt_info == stmt_info);
993
994 /* We assume that the cost of a single store-lanes instruction is
995 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
996 access is instead being provided by a permute-and-store operation,
997 include the cost of the permutes. */
998 if (first_stmt_p
999 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1000 {
1001 /* Uses a high and low interleave or shuffle operations for each
1002 needed permute. */
1003 int group_size = DR_GROUP_SIZE (first_stmt_info);
1004 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1005 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1006 stmt_info, 0, vect_body);
1007
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: strided group_size = %d .\n",
1011 group_size);
1012 }
1013
1014 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1015 /* Costs of the stores. */
1016 if (memory_access_type == VMAT_ELEMENTWISE
1017 || memory_access_type == VMAT_GATHER_SCATTER)
1018 {
1019 /* N scalar stores plus extracting the elements. */
1020 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1021 inside_cost += record_stmt_cost (cost_vec,
1022 ncopies * assumed_nunits,
1023 scalar_store, stmt_info, 0, vect_body);
1024 }
1025 else
1026 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1027
1028 if (memory_access_type == VMAT_ELEMENTWISE
1029 || memory_access_type == VMAT_STRIDED_SLP)
1030 {
1031 /* N scalar stores plus extracting the elements. */
1032 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1033 inside_cost += record_stmt_cost (cost_vec,
1034 ncopies * assumed_nunits,
1035 vec_to_scalar, stmt_info, 0, vect_body);
1036 }
1037
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_NOTE, vect_location,
1040 "vect_model_store_cost: inside_cost = %d, "
1041 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1042 }
1043
1044
1045 /* Calculate cost of DR's memory access. */
1046 void
1047 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1048 unsigned int *inside_cost,
1049 stmt_vector_for_cost *body_cost_vec)
1050 {
1051 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1052 int alignment_support_scheme
1053 = vect_supportable_dr_alignment (dr_info, false);
1054
1055 switch (alignment_support_scheme)
1056 {
1057 case dr_aligned:
1058 {
1059 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1060 vector_store, stmt_info, 0,
1061 vect_body);
1062
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "vect_model_store_cost: aligned.\n");
1066 break;
1067 }
1068
1069 case dr_unaligned_supported:
1070 {
1071 /* Here, we assign an additional cost for the unaligned store. */
1072 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1073 unaligned_store, stmt_info,
1074 DR_MISALIGNMENT (dr_info),
1075 vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_store_cost: unaligned supported by "
1079 "hardware.\n");
1080 break;
1081 }
1082
1083 case dr_unaligned_unsupported:
1084 {
1085 *inside_cost = VECT_MAX_COST;
1086
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1089 "vect_model_store_cost: unsupported access.\n");
1090 break;
1091 }
1092
1093 default:
1094 gcc_unreachable ();
1095 }
1096 }
1097
1098
1099 /* Function vect_model_load_cost
1100
1101 Models cost for loads. In the case of grouped accesses, one access has
1102 the overhead of the grouped access attributed to it. Since unaligned
1103 accesses are supported for loads, we also account for the costs of the
1104 access scheme chosen. */
1105
1106 static void
1107 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1108 vect_memory_access_type memory_access_type,
1109 slp_instance instance,
1110 slp_tree slp_node,
1111 stmt_vector_for_cost *cost_vec)
1112 {
1113 unsigned int inside_cost = 0, prologue_cost = 0;
1114 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1115
1116 gcc_assert (cost_vec);
1117
1118 /* ??? Somehow we need to fix this at the callers. */
1119 if (slp_node)
1120 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1121
1122 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1123 {
1124 /* If the load is permuted then the alignment is determined by
1125 the first group element not by the first scalar stmt DR. */
1126 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1127 /* Record the cost for the permutation. */
1128 unsigned n_perms;
1129 unsigned assumed_nunits
1130 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1131 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1132 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1133 slp_vf, instance, true,
1134 &n_perms);
1135 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1136 first_stmt_info, 0, vect_body);
1137 /* And adjust the number of loads performed. This handles
1138 redundancies as well as loads that are later dead. */
1139 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1140 bitmap_clear (perm);
1141 for (unsigned i = 0;
1142 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1143 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1144 ncopies = 0;
1145 bool load_seen = false;
1146 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1147 {
1148 if (i % assumed_nunits == 0)
1149 {
1150 if (load_seen)
1151 ncopies++;
1152 load_seen = false;
1153 }
1154 if (bitmap_bit_p (perm, i))
1155 load_seen = true;
1156 }
1157 if (load_seen)
1158 ncopies++;
1159 gcc_assert (ncopies
1160 <= (DR_GROUP_SIZE (first_stmt_info)
1161 - DR_GROUP_GAP (first_stmt_info)
1162 + assumed_nunits - 1) / assumed_nunits);
1163 }
1164
1165 /* Grouped loads read all elements in the group at once,
1166 so we want the DR for the first statement. */
1167 stmt_vec_info first_stmt_info = stmt_info;
1168 if (!slp_node && grouped_access_p)
1169 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1170
1171 /* True if we should include any once-per-group costs as well as
1172 the cost of the statement itself. For SLP we only get called
1173 once per group anyhow. */
1174 bool first_stmt_p = (first_stmt_info == stmt_info);
1175
1176 /* We assume that the cost of a single load-lanes instruction is
1177 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1178 access is instead being provided by a load-and-permute operation,
1179 include the cost of the permutes. */
1180 if (first_stmt_p
1181 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1182 {
1183 /* Uses an even and odd extract operations or shuffle operations
1184 for each needed permute. */
1185 int group_size = DR_GROUP_SIZE (first_stmt_info);
1186 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1187 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1188 stmt_info, 0, vect_body);
1189
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE, vect_location,
1192 "vect_model_load_cost: strided group_size = %d .\n",
1193 group_size);
1194 }
1195
1196 /* The loads themselves. */
1197 if (memory_access_type == VMAT_ELEMENTWISE
1198 || memory_access_type == VMAT_GATHER_SCATTER)
1199 {
1200 /* N scalar loads plus gathering them into a vector. */
1201 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1202 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1203 inside_cost += record_stmt_cost (cost_vec,
1204 ncopies * assumed_nunits,
1205 scalar_load, stmt_info, 0, vect_body);
1206 }
1207 else
1208 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1209 &inside_cost, &prologue_cost,
1210 cost_vec, cost_vec, true);
1211 if (memory_access_type == VMAT_ELEMENTWISE
1212 || memory_access_type == VMAT_STRIDED_SLP)
1213 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 stmt_info, 0, vect_body);
1215
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1220 }
1221
1222
1223 /* Calculate cost of DR's memory access. */
1224 void
1225 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1226 bool add_realign_cost, unsigned int *inside_cost,
1227 unsigned int *prologue_cost,
1228 stmt_vector_for_cost *prologue_cost_vec,
1229 stmt_vector_for_cost *body_cost_vec,
1230 bool record_prologue_costs)
1231 {
1232 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1233 int alignment_support_scheme
1234 = vect_supportable_dr_alignment (dr_info, false);
1235
1236 switch (alignment_support_scheme)
1237 {
1238 case dr_aligned:
1239 {
1240 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1241 stmt_info, 0, vect_body);
1242
1243 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE, vect_location,
1245 "vect_model_load_cost: aligned.\n");
1246
1247 break;
1248 }
1249 case dr_unaligned_supported:
1250 {
1251 /* Here, we assign an additional cost for the unaligned load. */
1252 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1253 unaligned_load, stmt_info,
1254 DR_MISALIGNMENT (dr_info),
1255 vect_body);
1256
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned supported by "
1260 "hardware.\n");
1261
1262 break;
1263 }
1264 case dr_explicit_realign:
1265 {
1266 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1267 vector_load, stmt_info, 0, vect_body);
1268 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1269 vec_perm, stmt_info, 0, vect_body);
1270
1271 /* FIXME: If the misalignment remains fixed across the iterations of
1272 the containing loop, the following cost should be added to the
1273 prologue costs. */
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1276 stmt_info, 0, vect_body);
1277
1278 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "vect_model_load_cost: explicit realign\n");
1281
1282 break;
1283 }
1284 case dr_explicit_realign_optimized:
1285 {
1286 if (dump_enabled_p ())
1287 dump_printf_loc (MSG_NOTE, vect_location,
1288 "vect_model_load_cost: unaligned software "
1289 "pipelined.\n");
1290
1291 /* Unaligned software pipeline has a load of an address, an initial
1292 load, and possibly a mask operation to "prime" the loop. However,
1293 if this is an access in a group of loads, which provide grouped
1294 access, then the above cost should only be considered for one
1295 access in the group. Inside the loop, there is a load op
1296 and a realignment op. */
1297
1298 if (add_realign_cost && record_prologue_costs)
1299 {
1300 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1301 vector_stmt, stmt_info,
1302 0, vect_prologue);
1303 if (targetm.vectorize.builtin_mask_for_load)
1304 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1305 vector_stmt, stmt_info,
1306 0, vect_prologue);
1307 }
1308
1309 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1310 stmt_info, 0, vect_body);
1311 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1312 stmt_info, 0, vect_body);
1313
1314 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_NOTE, vect_location,
1316 "vect_model_load_cost: explicit realign optimized"
1317 "\n");
1318
1319 break;
1320 }
1321
1322 case dr_unaligned_unsupported:
1323 {
1324 *inside_cost = VECT_MAX_COST;
1325
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1328 "vect_model_load_cost: unsupported access.\n");
1329 break;
1330 }
1331
1332 default:
1333 gcc_unreachable ();
1334 }
1335 }
1336
1337 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1338 the loop preheader for the vectorized stmt STMT_VINFO. */
1339
1340 static void
1341 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1342 gimple_stmt_iterator *gsi)
1343 {
1344 if (gsi)
1345 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1346 else
1347 {
1348 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1349
1350 if (loop_vinfo)
1351 {
1352 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1353 basic_block new_bb;
1354 edge pe;
1355
1356 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1357 loop = loop->inner;
1358
1359 pe = loop_preheader_edge (loop);
1360 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1361 gcc_assert (!new_bb);
1362 }
1363 else
1364 {
1365 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1366 basic_block bb;
1367 gimple_stmt_iterator gsi_bb_start;
1368
1369 gcc_assert (bb_vinfo);
1370 bb = BB_VINFO_BB (bb_vinfo);
1371 gsi_bb_start = gsi_after_labels (bb);
1372 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1373 }
1374 }
1375
1376 if (dump_enabled_p ())
1377 dump_printf_loc (MSG_NOTE, vect_location,
1378 "created new init_stmt: %G", new_stmt);
1379 }
1380
1381 /* Function vect_init_vector.
1382
1383 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1384 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1385 vector type a vector with all elements equal to VAL is created first.
1386 Place the initialization at BSI if it is not NULL. Otherwise, place the
1387 initialization at the loop preheader.
1388 Return the DEF of INIT_STMT.
1389 It will be used in the vectorization of STMT_INFO. */
1390
1391 tree
1392 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1393 gimple_stmt_iterator *gsi)
1394 {
1395 gimple *init_stmt;
1396 tree new_temp;
1397
1398 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1399 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1400 {
1401 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1402 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1403 {
1404 /* Scalar boolean value should be transformed into
1405 all zeros or all ones value before building a vector. */
1406 if (VECTOR_BOOLEAN_TYPE_P (type))
1407 {
1408 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1409 tree false_val = build_zero_cst (TREE_TYPE (type));
1410
1411 if (CONSTANT_CLASS_P (val))
1412 val = integer_zerop (val) ? false_val : true_val;
1413 else
1414 {
1415 new_temp = make_ssa_name (TREE_TYPE (type));
1416 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1417 val, true_val, false_val);
1418 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1419 val = new_temp;
1420 }
1421 }
1422 else if (CONSTANT_CLASS_P (val))
1423 val = fold_convert (TREE_TYPE (type), val);
1424 else
1425 {
1426 new_temp = make_ssa_name (TREE_TYPE (type));
1427 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1428 init_stmt = gimple_build_assign (new_temp,
1429 fold_build1 (VIEW_CONVERT_EXPR,
1430 TREE_TYPE (type),
1431 val));
1432 else
1433 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1434 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1435 val = new_temp;
1436 }
1437 }
1438 val = build_vector_from_val (type, val);
1439 }
1440
1441 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1442 init_stmt = gimple_build_assign (new_temp, val);
1443 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1444 return new_temp;
1445 }
1446
1447 /* Function vect_get_vec_def_for_operand_1.
1448
1449 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1450 with type DT that will be used in the vectorized stmt. */
1451
1452 tree
1453 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1454 enum vect_def_type dt)
1455 {
1456 tree vec_oprnd;
1457 stmt_vec_info vec_stmt_info;
1458
1459 switch (dt)
1460 {
1461 /* operand is a constant or a loop invariant. */
1462 case vect_constant_def:
1463 case vect_external_def:
1464 /* Code should use vect_get_vec_def_for_operand. */
1465 gcc_unreachable ();
1466
1467 /* operand is defined inside the loop. */
1468 case vect_internal_def:
1469 {
1470 /* Get the def from the vectorized stmt. */
1471 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1472 /* Get vectorized pattern statement. */
1473 if (!vec_stmt_info
1474 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1475 && !STMT_VINFO_RELEVANT (def_stmt_info))
1476 vec_stmt_info = (STMT_VINFO_VEC_STMT
1477 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1478 gcc_assert (vec_stmt_info);
1479 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1480 vec_oprnd = PHI_RESULT (phi);
1481 else
1482 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1483 return vec_oprnd;
1484 }
1485
1486 /* operand is defined by a loop header phi. */
1487 case vect_reduction_def:
1488 case vect_double_reduction_def:
1489 case vect_nested_cycle:
1490 case vect_induction_def:
1491 {
1492 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI);
1493
1494 /* Get the def from the vectorized stmt. */
1495 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1496 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1497 vec_oprnd = PHI_RESULT (phi);
1498 else
1499 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1500 return vec_oprnd;
1501 }
1502
1503 default:
1504 gcc_unreachable ();
1505 }
1506 }
1507
1508
1509 /* Function vect_get_vec_def_for_operand.
1510
1511 OP is an operand in STMT_VINFO. This function returns a (vector) def
1512 that will be used in the vectorized stmt for STMT_VINFO.
1513
1514 In the case that OP is an SSA_NAME which is defined in the loop, then
1515 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1516
1517 In case OP is an invariant or constant, a new stmt that creates a vector def
1518 needs to be introduced. VECTYPE may be used to specify a required type for
1519 vector invariant. */
1520
1521 tree
1522 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1523 {
1524 gimple *def_stmt;
1525 enum vect_def_type dt;
1526 bool is_simple_use;
1527 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1528
1529 if (dump_enabled_p ())
1530 dump_printf_loc (MSG_NOTE, vect_location,
1531 "vect_get_vec_def_for_operand: %T\n", op);
1532
1533 stmt_vec_info def_stmt_info;
1534 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1535 &def_stmt_info, &def_stmt);
1536 gcc_assert (is_simple_use);
1537 if (def_stmt && dump_enabled_p ())
1538 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1539
1540 if (dt == vect_constant_def || dt == vect_external_def)
1541 {
1542 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1543 tree vector_type;
1544
1545 if (vectype)
1546 vector_type = vectype;
1547 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1548 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1549 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1550 else
1551 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1552
1553 gcc_assert (vector_type);
1554 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1555 }
1556 else
1557 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1558 }
1559
1560
1561 /* Function vect_get_vec_def_for_stmt_copy
1562
1563 Return a vector-def for an operand. This function is used when the
1564 vectorized stmt to be created (by the caller to this function) is a "copy"
1565 created in case the vectorized result cannot fit in one vector, and several
1566 copies of the vector-stmt are required. In this case the vector-def is
1567 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1568 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1569
1570 Context:
1571 In case the vectorization factor (VF) is bigger than the number
1572 of elements that can fit in a vectype (nunits), we have to generate
1573 more than one vector stmt to vectorize the scalar stmt. This situation
1574 arises when there are multiple data-types operated upon in the loop; the
1575 smallest data-type determines the VF, and as a result, when vectorizing
1576 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1577 vector stmt (each computing a vector of 'nunits' results, and together
1578 computing 'VF' results in each iteration). This function is called when
1579 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1580 which VF=16 and nunits=4, so the number of copies required is 4):
1581
1582 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1583
1584 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1585 VS1.1: vx.1 = memref1 VS1.2
1586 VS1.2: vx.2 = memref2 VS1.3
1587 VS1.3: vx.3 = memref3
1588
1589 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1590 VSnew.1: vz1 = vx.1 + ... VSnew.2
1591 VSnew.2: vz2 = vx.2 + ... VSnew.3
1592 VSnew.3: vz3 = vx.3 + ...
1593
1594 The vectorization of S1 is explained in vectorizable_load.
1595 The vectorization of S2:
1596 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1597 the function 'vect_get_vec_def_for_operand' is called to
1598 get the relevant vector-def for each operand of S2. For operand x it
1599 returns the vector-def 'vx.0'.
1600
1601 To create the remaining copies of the vector-stmt (VSnew.j), this
1602 function is called to get the relevant vector-def for each operand. It is
1603 obtained from the respective VS1.j stmt, which is recorded in the
1604 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1605
1606 For example, to obtain the vector-def 'vx.1' in order to create the
1607 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1608 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1609 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1610 and return its def ('vx.1').
1611 Overall, to create the above sequence this function will be called 3 times:
1612 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1613 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1614 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1615
1616 tree
1617 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1618 {
1619 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1620 if (!def_stmt_info)
1621 /* Do nothing; can reuse same def. */
1622 return vec_oprnd;
1623
1624 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1625 gcc_assert (def_stmt_info);
1626 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1627 vec_oprnd = PHI_RESULT (phi);
1628 else
1629 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1630 return vec_oprnd;
1631 }
1632
1633
1634 /* Get vectorized definitions for the operands to create a copy of an original
1635 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1636
1637 void
1638 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1639 vec<tree> *vec_oprnds0,
1640 vec<tree> *vec_oprnds1)
1641 {
1642 tree vec_oprnd = vec_oprnds0->pop ();
1643
1644 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1645 vec_oprnds0->quick_push (vec_oprnd);
1646
1647 if (vec_oprnds1 && vec_oprnds1->length ())
1648 {
1649 vec_oprnd = vec_oprnds1->pop ();
1650 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1651 vec_oprnds1->quick_push (vec_oprnd);
1652 }
1653 }
1654
1655
1656 /* Get vectorized definitions for OP0 and OP1. */
1657
1658 void
1659 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1660 vec<tree> *vec_oprnds0,
1661 vec<tree> *vec_oprnds1,
1662 slp_tree slp_node)
1663 {
1664 if (slp_node)
1665 {
1666 int nops = (op1 == NULL_TREE) ? 1 : 2;
1667 auto_vec<tree> ops (nops);
1668 auto_vec<vec<tree> > vec_defs (nops);
1669
1670 ops.quick_push (op0);
1671 if (op1)
1672 ops.quick_push (op1);
1673
1674 vect_get_slp_defs (ops, slp_node, &vec_defs);
1675
1676 *vec_oprnds0 = vec_defs[0];
1677 if (op1)
1678 *vec_oprnds1 = vec_defs[1];
1679 }
1680 else
1681 {
1682 tree vec_oprnd;
1683
1684 vec_oprnds0->create (1);
1685 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1686 vec_oprnds0->quick_push (vec_oprnd);
1687
1688 if (op1)
1689 {
1690 vec_oprnds1->create (1);
1691 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1692 vec_oprnds1->quick_push (vec_oprnd);
1693 }
1694 }
1695 }
1696
1697 /* Helper function called by vect_finish_replace_stmt and
1698 vect_finish_stmt_generation. Set the location of the new
1699 statement and create and return a stmt_vec_info for it. */
1700
1701 static stmt_vec_info
1702 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1703 {
1704 vec_info *vinfo = stmt_info->vinfo;
1705
1706 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1707
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1710
1711 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1712
1713 /* While EH edges will generally prevent vectorization, stmt might
1714 e.g. be in a must-not-throw region. Ensure newly created stmts
1715 that could throw are part of the same region. */
1716 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1717 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1718 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1719
1720 return vec_stmt_info;
1721 }
1722
1723 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1724 which sets the same scalar result as STMT_INFO did. Create and return a
1725 stmt_vec_info for VEC_STMT. */
1726
1727 stmt_vec_info
1728 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1729 {
1730 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1731
1732 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1733 gsi_replace (&gsi, vec_stmt, false);
1734
1735 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1736 }
1737
1738 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1739 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1740
1741 stmt_vec_info
1742 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1743 gimple_stmt_iterator *gsi)
1744 {
1745 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1746
1747 if (!gsi_end_p (*gsi)
1748 && gimple_has_mem_ops (vec_stmt))
1749 {
1750 gimple *at_stmt = gsi_stmt (*gsi);
1751 tree vuse = gimple_vuse (at_stmt);
1752 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1753 {
1754 tree vdef = gimple_vdef (at_stmt);
1755 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1756 /* If we have an SSA vuse and insert a store, update virtual
1757 SSA form to avoid triggering the renamer. Do so only
1758 if we can easily see all uses - which is what almost always
1759 happens with the way vectorized stmts are inserted. */
1760 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1761 && ((is_gimple_assign (vec_stmt)
1762 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1763 || (is_gimple_call (vec_stmt)
1764 && !(gimple_call_flags (vec_stmt)
1765 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1766 {
1767 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1768 gimple_set_vdef (vec_stmt, new_vdef);
1769 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1770 }
1771 }
1772 }
1773 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1774 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1775 }
1776
1777 /* We want to vectorize a call to combined function CFN with function
1778 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1779 as the types of all inputs. Check whether this is possible using
1780 an internal function, returning its code if so or IFN_LAST if not. */
1781
1782 static internal_fn
1783 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1784 tree vectype_out, tree vectype_in)
1785 {
1786 internal_fn ifn;
1787 if (internal_fn_p (cfn))
1788 ifn = as_internal_fn (cfn);
1789 else
1790 ifn = associated_internal_fn (fndecl);
1791 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1792 {
1793 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1794 if (info.vectorizable)
1795 {
1796 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1797 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1798 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1799 OPTIMIZE_FOR_SPEED))
1800 return ifn;
1801 }
1802 }
1803 return IFN_LAST;
1804 }
1805
1806
1807 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1808 gimple_stmt_iterator *);
1809
1810 /* Check whether a load or store statement in the loop described by
1811 LOOP_VINFO is possible in a fully-masked loop. This is testing
1812 whether the vectorizer pass has the appropriate support, as well as
1813 whether the target does.
1814
1815 VLS_TYPE says whether the statement is a load or store and VECTYPE
1816 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1817 says how the load or store is going to be implemented and GROUP_SIZE
1818 is the number of load or store statements in the containing group.
1819 If the access is a gather load or scatter store, GS_INFO describes
1820 its arguments.
1821
1822 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1823 supported, otherwise record the required mask types. */
1824
1825 static void
1826 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1827 vec_load_store_type vls_type, int group_size,
1828 vect_memory_access_type memory_access_type,
1829 gather_scatter_info *gs_info)
1830 {
1831 /* Invariant loads need no special support. */
1832 if (memory_access_type == VMAT_INVARIANT)
1833 return;
1834
1835 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1836 machine_mode vecmode = TYPE_MODE (vectype);
1837 bool is_load = (vls_type == VLS_LOAD);
1838 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1839 {
1840 if (is_load
1841 ? !vect_load_lanes_supported (vectype, group_size, true)
1842 : !vect_store_lanes_supported (vectype, group_size, true))
1843 {
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "can't use a fully-masked loop because the"
1847 " target doesn't have an appropriate masked"
1848 " load/store-lanes instruction.\n");
1849 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1850 return;
1851 }
1852 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1853 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1854 return;
1855 }
1856
1857 if (memory_access_type == VMAT_GATHER_SCATTER)
1858 {
1859 internal_fn ifn = (is_load
1860 ? IFN_MASK_GATHER_LOAD
1861 : IFN_MASK_SCATTER_STORE);
1862 tree offset_type = TREE_TYPE (gs_info->offset);
1863 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1864 gs_info->memory_type,
1865 TYPE_SIGN (offset_type),
1866 gs_info->scale))
1867 {
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1870 "can't use a fully-masked loop because the"
1871 " target doesn't have an appropriate masked"
1872 " gather load or scatter store instruction.\n");
1873 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1874 return;
1875 }
1876 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1877 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1878 return;
1879 }
1880
1881 if (memory_access_type != VMAT_CONTIGUOUS
1882 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1883 {
1884 /* Element X of the data must come from iteration i * VF + X of the
1885 scalar loop. We need more work to support other mappings. */
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888 "can't use a fully-masked loop because an access"
1889 " isn't contiguous.\n");
1890 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1891 return;
1892 }
1893
1894 machine_mode mask_mode;
1895 if (!(targetm.vectorize.get_mask_mode
1896 (GET_MODE_NUNITS (vecmode),
1897 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1898 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1899 {
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1902 "can't use a fully-masked loop because the target"
1903 " doesn't have the appropriate masked load or"
1904 " store.\n");
1905 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1906 return;
1907 }
1908 /* We might load more scalars than we need for permuting SLP loads.
1909 We checked in get_group_load_store_type that the extra elements
1910 don't leak into a new vector. */
1911 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1912 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1913 unsigned int nvectors;
1914 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1915 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1916 else
1917 gcc_unreachable ();
1918 }
1919
1920 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1921 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1922 that needs to be applied to all loads and stores in a vectorized loop.
1923 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1924
1925 MASK_TYPE is the type of both masks. If new statements are needed,
1926 insert them before GSI. */
1927
1928 static tree
1929 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1930 gimple_stmt_iterator *gsi)
1931 {
1932 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1933 if (!loop_mask)
1934 return vec_mask;
1935
1936 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1937 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1938 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1939 vec_mask, loop_mask);
1940 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1941 return and_res;
1942 }
1943
1944 /* Determine whether we can use a gather load or scatter store to vectorize
1945 strided load or store STMT_INFO by truncating the current offset to a
1946 smaller width. We need to be able to construct an offset vector:
1947
1948 { 0, X, X*2, X*3, ... }
1949
1950 without loss of precision, where X is STMT_INFO's DR_STEP.
1951
1952 Return true if this is possible, describing the gather load or scatter
1953 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1954
1955 static bool
1956 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1957 loop_vec_info loop_vinfo, bool masked_p,
1958 gather_scatter_info *gs_info)
1959 {
1960 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1961 data_reference *dr = dr_info->dr;
1962 tree step = DR_STEP (dr);
1963 if (TREE_CODE (step) != INTEGER_CST)
1964 {
1965 /* ??? Perhaps we could use range information here? */
1966 if (dump_enabled_p ())
1967 dump_printf_loc (MSG_NOTE, vect_location,
1968 "cannot truncate variable step.\n");
1969 return false;
1970 }
1971
1972 /* Get the number of bits in an element. */
1973 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1974 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1975 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1976
1977 /* Set COUNT to the upper limit on the number of elements - 1.
1978 Start with the maximum vectorization factor. */
1979 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1980
1981 /* Try lowering COUNT to the number of scalar latch iterations. */
1982 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1983 widest_int max_iters;
1984 if (max_loop_iterations (loop, &max_iters)
1985 && max_iters < count)
1986 count = max_iters.to_shwi ();
1987
1988 /* Try scales of 1 and the element size. */
1989 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1990 wi::overflow_type overflow = wi::OVF_NONE;
1991 for (int i = 0; i < 2; ++i)
1992 {
1993 int scale = scales[i];
1994 widest_int factor;
1995 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1996 continue;
1997
1998 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1999 in OFFSET_BITS bits. */
2000 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2001 if (overflow)
2002 continue;
2003 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2004 if (wi::min_precision (range, sign) > element_bits)
2005 {
2006 overflow = wi::OVF_UNKNOWN;
2007 continue;
2008 }
2009
2010 /* See whether the target supports the operation. */
2011 tree memory_type = TREE_TYPE (DR_REF (dr));
2012 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2013 memory_type, element_bits, sign, scale,
2014 &gs_info->ifn, &gs_info->element_type))
2015 continue;
2016
2017 tree offset_type = build_nonstandard_integer_type (element_bits,
2018 sign == UNSIGNED);
2019
2020 gs_info->decl = NULL_TREE;
2021 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2022 but we don't need to store that here. */
2023 gs_info->base = NULL_TREE;
2024 gs_info->offset = fold_convert (offset_type, step);
2025 gs_info->offset_dt = vect_constant_def;
2026 gs_info->offset_vectype = NULL_TREE;
2027 gs_info->scale = scale;
2028 gs_info->memory_type = memory_type;
2029 return true;
2030 }
2031
2032 if (overflow && dump_enabled_p ())
2033 dump_printf_loc (MSG_NOTE, vect_location,
2034 "truncating gather/scatter offset to %d bits"
2035 " might change its value.\n", element_bits);
2036
2037 return false;
2038 }
2039
2040 /* Return true if we can use gather/scatter internal functions to
2041 vectorize STMT_INFO, which is a grouped or strided load or store.
2042 MASKED_P is true if load or store is conditional. When returning
2043 true, fill in GS_INFO with the information required to perform the
2044 operation. */
2045
2046 static bool
2047 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2048 loop_vec_info loop_vinfo, bool masked_p,
2049 gather_scatter_info *gs_info)
2050 {
2051 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2052 || gs_info->decl)
2053 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2054 masked_p, gs_info);
2055
2056 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2057 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2058 tree offset_type = TREE_TYPE (gs_info->offset);
2059 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2060
2061 /* Enforced by vect_check_gather_scatter. */
2062 gcc_assert (element_bits >= offset_bits);
2063
2064 /* If the elements are wider than the offset, convert the offset to the
2065 same width, without changing its sign. */
2066 if (element_bits > offset_bits)
2067 {
2068 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2069 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2070 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2071 }
2072
2073 if (dump_enabled_p ())
2074 dump_printf_loc (MSG_NOTE, vect_location,
2075 "using gather/scatter for strided/grouped access,"
2076 " scale = %d\n", gs_info->scale);
2077
2078 return true;
2079 }
2080
2081 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2082 elements with a known constant step. Return -1 if that step
2083 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2084
2085 static int
2086 compare_step_with_zero (stmt_vec_info stmt_info)
2087 {
2088 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2089 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2090 size_zero_node);
2091 }
2092
2093 /* If the target supports a permute mask that reverses the elements in
2094 a vector of type VECTYPE, return that mask, otherwise return null. */
2095
2096 static tree
2097 perm_mask_for_reverse (tree vectype)
2098 {
2099 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2100
2101 /* The encoding has a single stepped pattern. */
2102 vec_perm_builder sel (nunits, 1, 3);
2103 for (int i = 0; i < 3; ++i)
2104 sel.quick_push (nunits - 1 - i);
2105
2106 vec_perm_indices indices (sel, 1, nunits);
2107 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2108 return NULL_TREE;
2109 return vect_gen_perm_mask_checked (vectype, indices);
2110 }
2111
2112 /* STMT_INFO is either a masked or unconditional store. Return the value
2113 being stored. */
2114
2115 tree
2116 vect_get_store_rhs (stmt_vec_info stmt_info)
2117 {
2118 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2119 {
2120 gcc_assert (gimple_assign_single_p (assign));
2121 return gimple_assign_rhs1 (assign);
2122 }
2123 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2124 {
2125 internal_fn ifn = gimple_call_internal_fn (call);
2126 int index = internal_fn_stored_value_index (ifn);
2127 gcc_assert (index >= 0);
2128 return gimple_call_arg (call, index);
2129 }
2130 gcc_unreachable ();
2131 }
2132
2133 /* A subroutine of get_load_store_type, with a subset of the same
2134 arguments. Handle the case where STMT_INFO is part of a grouped load
2135 or store.
2136
2137 For stores, the statements in the group are all consecutive
2138 and there is no gap at the end. For loads, the statements in the
2139 group might not be consecutive; there can be gaps between statements
2140 as well as at the end. */
2141
2142 static bool
2143 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2144 bool masked_p, vec_load_store_type vls_type,
2145 vect_memory_access_type *memory_access_type,
2146 gather_scatter_info *gs_info)
2147 {
2148 vec_info *vinfo = stmt_info->vinfo;
2149 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2150 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2151 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2152 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2153 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2154 bool single_element_p = (stmt_info == first_stmt_info
2155 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2156 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2157 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2158
2159 /* True if the vectorized statements would access beyond the last
2160 statement in the group. */
2161 bool overrun_p = false;
2162
2163 /* True if we can cope with such overrun by peeling for gaps, so that
2164 there is at least one final scalar iteration after the vector loop. */
2165 bool can_overrun_p = (!masked_p
2166 && vls_type == VLS_LOAD
2167 && loop_vinfo
2168 && !loop->inner);
2169
2170 /* There can only be a gap at the end of the group if the stride is
2171 known at compile time. */
2172 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2173
2174 /* Stores can't yet have gaps. */
2175 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2176
2177 if (slp)
2178 {
2179 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2180 {
2181 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2182 separated by the stride, until we have a complete vector.
2183 Fall back to scalar accesses if that isn't possible. */
2184 if (multiple_p (nunits, group_size))
2185 *memory_access_type = VMAT_STRIDED_SLP;
2186 else
2187 *memory_access_type = VMAT_ELEMENTWISE;
2188 }
2189 else
2190 {
2191 overrun_p = loop_vinfo && gap != 0;
2192 if (overrun_p && vls_type != VLS_LOAD)
2193 {
2194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195 "Grouped store with gaps requires"
2196 " non-consecutive accesses\n");
2197 return false;
2198 }
2199 /* An overrun is fine if the trailing elements are smaller
2200 than the alignment boundary B. Every vector access will
2201 be a multiple of B and so we are guaranteed to access a
2202 non-gap element in the same B-sized block. */
2203 if (overrun_p
2204 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2205 / vect_get_scalar_dr_size (first_dr_info)))
2206 overrun_p = false;
2207 if (overrun_p && !can_overrun_p)
2208 {
2209 if (dump_enabled_p ())
2210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2211 "Peeling for outer loop is not supported\n");
2212 return false;
2213 }
2214 *memory_access_type = VMAT_CONTIGUOUS;
2215 }
2216 }
2217 else
2218 {
2219 /* We can always handle this case using elementwise accesses,
2220 but see if something more efficient is available. */
2221 *memory_access_type = VMAT_ELEMENTWISE;
2222
2223 /* If there is a gap at the end of the group then these optimizations
2224 would access excess elements in the last iteration. */
2225 bool would_overrun_p = (gap != 0);
2226 /* An overrun is fine if the trailing elements are smaller than the
2227 alignment boundary B. Every vector access will be a multiple of B
2228 and so we are guaranteed to access a non-gap element in the
2229 same B-sized block. */
2230 if (would_overrun_p
2231 && !masked_p
2232 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2233 / vect_get_scalar_dr_size (first_dr_info)))
2234 would_overrun_p = false;
2235
2236 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2237 && (can_overrun_p || !would_overrun_p)
2238 && compare_step_with_zero (stmt_info) > 0)
2239 {
2240 /* First cope with the degenerate case of a single-element
2241 vector. */
2242 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2243 *memory_access_type = VMAT_CONTIGUOUS;
2244
2245 /* Otherwise try using LOAD/STORE_LANES. */
2246 if (*memory_access_type == VMAT_ELEMENTWISE
2247 && (vls_type == VLS_LOAD
2248 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2249 : vect_store_lanes_supported (vectype, group_size,
2250 masked_p)))
2251 {
2252 *memory_access_type = VMAT_LOAD_STORE_LANES;
2253 overrun_p = would_overrun_p;
2254 }
2255
2256 /* If that fails, try using permuting loads. */
2257 if (*memory_access_type == VMAT_ELEMENTWISE
2258 && (vls_type == VLS_LOAD
2259 ? vect_grouped_load_supported (vectype, single_element_p,
2260 group_size)
2261 : vect_grouped_store_supported (vectype, group_size)))
2262 {
2263 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2264 overrun_p = would_overrun_p;
2265 }
2266 }
2267
2268 /* As a last resort, trying using a gather load or scatter store.
2269
2270 ??? Although the code can handle all group sizes correctly,
2271 it probably isn't a win to use separate strided accesses based
2272 on nearby locations. Or, even if it's a win over scalar code,
2273 it might not be a win over vectorizing at a lower VF, if that
2274 allows us to use contiguous accesses. */
2275 if (*memory_access_type == VMAT_ELEMENTWISE
2276 && single_element_p
2277 && loop_vinfo
2278 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2279 masked_p, gs_info))
2280 *memory_access_type = VMAT_GATHER_SCATTER;
2281 }
2282
2283 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2284 {
2285 /* STMT is the leader of the group. Check the operands of all the
2286 stmts of the group. */
2287 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2288 while (next_stmt_info)
2289 {
2290 tree op = vect_get_store_rhs (next_stmt_info);
2291 enum vect_def_type dt;
2292 if (!vect_is_simple_use (op, vinfo, &dt))
2293 {
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2296 "use not simple.\n");
2297 return false;
2298 }
2299 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2300 }
2301 }
2302
2303 if (overrun_p)
2304 {
2305 gcc_assert (can_overrun_p);
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "Data access with gaps requires scalar "
2309 "epilogue loop\n");
2310 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2311 }
2312
2313 return true;
2314 }
2315
2316 /* A subroutine of get_load_store_type, with a subset of the same
2317 arguments. Handle the case where STMT_INFO is a load or store that
2318 accesses consecutive elements with a negative step. */
2319
2320 static vect_memory_access_type
2321 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2322 vec_load_store_type vls_type,
2323 unsigned int ncopies)
2324 {
2325 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2326 dr_alignment_support alignment_support_scheme;
2327
2328 if (ncopies > 1)
2329 {
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2332 "multiple types with negative step.\n");
2333 return VMAT_ELEMENTWISE;
2334 }
2335
2336 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2337 if (alignment_support_scheme != dr_aligned
2338 && alignment_support_scheme != dr_unaligned_supported)
2339 {
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2342 "negative step but alignment required.\n");
2343 return VMAT_ELEMENTWISE;
2344 }
2345
2346 if (vls_type == VLS_STORE_INVARIANT)
2347 {
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_NOTE, vect_location,
2350 "negative step with invariant source;"
2351 " no permute needed.\n");
2352 return VMAT_CONTIGUOUS_DOWN;
2353 }
2354
2355 if (!perm_mask_for_reverse (vectype))
2356 {
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "negative step and reversing not supported.\n");
2360 return VMAT_ELEMENTWISE;
2361 }
2362
2363 return VMAT_CONTIGUOUS_REVERSE;
2364 }
2365
2366 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2367 if there is a memory access type that the vectorized form can use,
2368 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2369 or scatters, fill in GS_INFO accordingly.
2370
2371 SLP says whether we're performing SLP rather than loop vectorization.
2372 MASKED_P is true if the statement is conditional on a vectorized mask.
2373 VECTYPE is the vector type that the vectorized statements will use.
2374 NCOPIES is the number of vector statements that will be needed. */
2375
2376 static bool
2377 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2378 bool masked_p, vec_load_store_type vls_type,
2379 unsigned int ncopies,
2380 vect_memory_access_type *memory_access_type,
2381 gather_scatter_info *gs_info)
2382 {
2383 vec_info *vinfo = stmt_info->vinfo;
2384 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2385 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2386 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2387 {
2388 *memory_access_type = VMAT_GATHER_SCATTER;
2389 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2390 gcc_unreachable ();
2391 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2392 &gs_info->offset_dt,
2393 &gs_info->offset_vectype))
2394 {
2395 if (dump_enabled_p ())
2396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2397 "%s index use not simple.\n",
2398 vls_type == VLS_LOAD ? "gather" : "scatter");
2399 return false;
2400 }
2401 }
2402 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2403 {
2404 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2405 vls_type, memory_access_type, gs_info))
2406 return false;
2407 }
2408 else if (STMT_VINFO_STRIDED_P (stmt_info))
2409 {
2410 gcc_assert (!slp);
2411 if (loop_vinfo
2412 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2413 masked_p, gs_info))
2414 *memory_access_type = VMAT_GATHER_SCATTER;
2415 else
2416 *memory_access_type = VMAT_ELEMENTWISE;
2417 }
2418 else
2419 {
2420 int cmp = compare_step_with_zero (stmt_info);
2421 if (cmp < 0)
2422 *memory_access_type = get_negative_load_store_type
2423 (stmt_info, vectype, vls_type, ncopies);
2424 else if (cmp == 0)
2425 {
2426 gcc_assert (vls_type == VLS_LOAD);
2427 *memory_access_type = VMAT_INVARIANT;
2428 }
2429 else
2430 *memory_access_type = VMAT_CONTIGUOUS;
2431 }
2432
2433 if ((*memory_access_type == VMAT_ELEMENTWISE
2434 || *memory_access_type == VMAT_STRIDED_SLP)
2435 && !nunits.is_constant ())
2436 {
2437 if (dump_enabled_p ())
2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2439 "Not using elementwise accesses due to variable "
2440 "vectorization factor.\n");
2441 return false;
2442 }
2443
2444 /* FIXME: At the moment the cost model seems to underestimate the
2445 cost of using elementwise accesses. This check preserves the
2446 traditional behavior until that can be fixed. */
2447 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2448 if (!first_stmt_info)
2449 first_stmt_info = stmt_info;
2450 if (*memory_access_type == VMAT_ELEMENTWISE
2451 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2452 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2453 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2454 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2455 {
2456 if (dump_enabled_p ())
2457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458 "not falling back to elementwise accesses\n");
2459 return false;
2460 }
2461 return true;
2462 }
2463
2464 /* Return true if boolean argument MASK is suitable for vectorizing
2465 conditional load or store STMT_INFO. When returning true, store the type
2466 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2467 in *MASK_VECTYPE_OUT. */
2468
2469 static bool
2470 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2471 vect_def_type *mask_dt_out,
2472 tree *mask_vectype_out)
2473 {
2474 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2475 {
2476 if (dump_enabled_p ())
2477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2478 "mask argument is not a boolean.\n");
2479 return false;
2480 }
2481
2482 if (TREE_CODE (mask) != SSA_NAME)
2483 {
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "mask argument is not an SSA name.\n");
2487 return false;
2488 }
2489
2490 enum vect_def_type mask_dt;
2491 tree mask_vectype;
2492 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2493 {
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2496 "mask use not simple.\n");
2497 return false;
2498 }
2499
2500 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2501 if (!mask_vectype)
2502 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2503
2504 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2505 {
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2508 "could not find an appropriate vector mask type.\n");
2509 return false;
2510 }
2511
2512 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2513 TYPE_VECTOR_SUBPARTS (vectype)))
2514 {
2515 if (dump_enabled_p ())
2516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2517 "vector mask type %T",
2518 " does not match vector data type %T.\n",
2519 mask_vectype, vectype);
2520
2521 return false;
2522 }
2523
2524 *mask_dt_out = mask_dt;
2525 *mask_vectype_out = mask_vectype;
2526 return true;
2527 }
2528
2529 /* Return true if stored value RHS is suitable for vectorizing store
2530 statement STMT_INFO. When returning true, store the type of the
2531 definition in *RHS_DT_OUT, the type of the vectorized store value in
2532 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2533
2534 static bool
2535 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2536 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2537 vec_load_store_type *vls_type_out)
2538 {
2539 /* In the case this is a store from a constant make sure
2540 native_encode_expr can handle it. */
2541 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2542 {
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "cannot encode constant as a byte sequence.\n");
2546 return false;
2547 }
2548
2549 enum vect_def_type rhs_dt;
2550 tree rhs_vectype;
2551 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2552 {
2553 if (dump_enabled_p ())
2554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2555 "use not simple.\n");
2556 return false;
2557 }
2558
2559 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2560 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2561 {
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "incompatible vector types.\n");
2565 return false;
2566 }
2567
2568 *rhs_dt_out = rhs_dt;
2569 *rhs_vectype_out = rhs_vectype;
2570 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2571 *vls_type_out = VLS_STORE_INVARIANT;
2572 else
2573 *vls_type_out = VLS_STORE;
2574 return true;
2575 }
2576
2577 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2578 Note that we support masks with floating-point type, in which case the
2579 floats are interpreted as a bitmask. */
2580
2581 static tree
2582 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2583 {
2584 if (TREE_CODE (masktype) == INTEGER_TYPE)
2585 return build_int_cst (masktype, -1);
2586 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2587 {
2588 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2589 mask = build_vector_from_val (masktype, mask);
2590 return vect_init_vector (stmt_info, mask, masktype, NULL);
2591 }
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2593 {
2594 REAL_VALUE_TYPE r;
2595 long tmp[6];
2596 for (int j = 0; j < 6; ++j)
2597 tmp[j] = -1;
2598 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2599 tree mask = build_real (TREE_TYPE (masktype), r);
2600 mask = build_vector_from_val (masktype, mask);
2601 return vect_init_vector (stmt_info, mask, masktype, NULL);
2602 }
2603 gcc_unreachable ();
2604 }
2605
2606 /* Build an all-zero merge value of type VECTYPE while vectorizing
2607 STMT_INFO as a gather load. */
2608
2609 static tree
2610 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2611 {
2612 tree merge;
2613 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2614 merge = build_int_cst (TREE_TYPE (vectype), 0);
2615 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2616 {
2617 REAL_VALUE_TYPE r;
2618 long tmp[6];
2619 for (int j = 0; j < 6; ++j)
2620 tmp[j] = 0;
2621 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2622 merge = build_real (TREE_TYPE (vectype), r);
2623 }
2624 else
2625 gcc_unreachable ();
2626 merge = build_vector_from_val (vectype, merge);
2627 return vect_init_vector (stmt_info, merge, vectype, NULL);
2628 }
2629
2630 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2631 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2632 the gather load operation. If the load is conditional, MASK is the
2633 unvectorized condition and MASK_DT is its definition type, otherwise
2634 MASK is null. */
2635
2636 static void
2637 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2638 gimple_stmt_iterator *gsi,
2639 stmt_vec_info *vec_stmt,
2640 gather_scatter_info *gs_info,
2641 tree mask)
2642 {
2643 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2644 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2645 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2646 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2647 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2648 edge pe = loop_preheader_edge (loop);
2649 enum { NARROW, NONE, WIDEN } modifier;
2650 poly_uint64 gather_off_nunits
2651 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2652
2653 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2654 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2655 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2656 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2657 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2658 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2659 tree scaletype = TREE_VALUE (arglist);
2660 gcc_checking_assert (types_compatible_p (srctype, rettype)
2661 && (!mask || types_compatible_p (srctype, masktype)));
2662
2663 tree perm_mask = NULL_TREE;
2664 tree mask_perm_mask = NULL_TREE;
2665 if (known_eq (nunits, gather_off_nunits))
2666 modifier = NONE;
2667 else if (known_eq (nunits * 2, gather_off_nunits))
2668 {
2669 modifier = WIDEN;
2670
2671 /* Currently widening gathers and scatters are only supported for
2672 fixed-length vectors. */
2673 int count = gather_off_nunits.to_constant ();
2674 vec_perm_builder sel (count, count, 1);
2675 for (int i = 0; i < count; ++i)
2676 sel.quick_push (i | (count / 2));
2677
2678 vec_perm_indices indices (sel, 1, count);
2679 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2680 indices);
2681 }
2682 else if (known_eq (nunits, gather_off_nunits * 2))
2683 {
2684 modifier = NARROW;
2685
2686 /* Currently narrowing gathers and scatters are only supported for
2687 fixed-length vectors. */
2688 int count = nunits.to_constant ();
2689 vec_perm_builder sel (count, count, 1);
2690 sel.quick_grow (count);
2691 for (int i = 0; i < count; ++i)
2692 sel[i] = i < count / 2 ? i : i + count / 2;
2693 vec_perm_indices indices (sel, 2, count);
2694 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2695
2696 ncopies *= 2;
2697
2698 if (mask)
2699 {
2700 for (int i = 0; i < count; ++i)
2701 sel[i] = i | (count / 2);
2702 indices.new_vector (sel, 2, count);
2703 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2704 }
2705 }
2706 else
2707 gcc_unreachable ();
2708
2709 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2710 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2711
2712 tree ptr = fold_convert (ptrtype, gs_info->base);
2713 if (!is_gimple_min_invariant (ptr))
2714 {
2715 gimple_seq seq;
2716 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2717 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2718 gcc_assert (!new_bb);
2719 }
2720
2721 tree scale = build_int_cst (scaletype, gs_info->scale);
2722
2723 tree vec_oprnd0 = NULL_TREE;
2724 tree vec_mask = NULL_TREE;
2725 tree src_op = NULL_TREE;
2726 tree mask_op = NULL_TREE;
2727 tree prev_res = NULL_TREE;
2728 stmt_vec_info prev_stmt_info = NULL;
2729
2730 if (!mask)
2731 {
2732 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2733 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2734 }
2735
2736 for (int j = 0; j < ncopies; ++j)
2737 {
2738 tree op, var;
2739 if (modifier == WIDEN && (j & 1))
2740 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2741 perm_mask, stmt_info, gsi);
2742 else if (j == 0)
2743 op = vec_oprnd0
2744 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2745 else
2746 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2747 vec_oprnd0);
2748
2749 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2750 {
2751 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2752 TYPE_VECTOR_SUBPARTS (idxtype)));
2753 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2754 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2755 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2756 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2757 op = var;
2758 }
2759
2760 if (mask)
2761 {
2762 if (mask_perm_mask && (j & 1))
2763 mask_op = permute_vec_elements (mask_op, mask_op,
2764 mask_perm_mask, stmt_info, gsi);
2765 else
2766 {
2767 if (j == 0)
2768 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2769 else
2770 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2771 vec_mask);
2772
2773 mask_op = vec_mask;
2774 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2775 {
2776 gcc_assert
2777 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2778 TYPE_VECTOR_SUBPARTS (masktype)));
2779 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2780 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2781 gassign *new_stmt
2782 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2783 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2784 mask_op = var;
2785 }
2786 }
2787 src_op = mask_op;
2788 }
2789
2790 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2791 mask_op, scale);
2792
2793 stmt_vec_info new_stmt_info;
2794 if (!useless_type_conversion_p (vectype, rettype))
2795 {
2796 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2797 TYPE_VECTOR_SUBPARTS (rettype)));
2798 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2799 gimple_call_set_lhs (new_call, op);
2800 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2801 var = make_ssa_name (vec_dest);
2802 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2803 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2804 new_stmt_info
2805 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2806 }
2807 else
2808 {
2809 var = make_ssa_name (vec_dest, new_call);
2810 gimple_call_set_lhs (new_call, var);
2811 new_stmt_info
2812 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2813 }
2814
2815 if (modifier == NARROW)
2816 {
2817 if ((j & 1) == 0)
2818 {
2819 prev_res = var;
2820 continue;
2821 }
2822 var = permute_vec_elements (prev_res, var, perm_mask,
2823 stmt_info, gsi);
2824 new_stmt_info = loop_vinfo->lookup_def (var);
2825 }
2826
2827 if (prev_stmt_info == NULL)
2828 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2829 else
2830 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2831 prev_stmt_info = new_stmt_info;
2832 }
2833 }
2834
2835 /* Prepare the base and offset in GS_INFO for vectorization.
2836 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2837 to the vectorized offset argument for the first copy of STMT_INFO.
2838 STMT_INFO is the statement described by GS_INFO and LOOP is the
2839 containing loop. */
2840
2841 static void
2842 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2843 gather_scatter_info *gs_info,
2844 tree *dataref_ptr, tree *vec_offset)
2845 {
2846 gimple_seq stmts = NULL;
2847 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2848 if (stmts != NULL)
2849 {
2850 basic_block new_bb;
2851 edge pe = loop_preheader_edge (loop);
2852 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2853 gcc_assert (!new_bb);
2854 }
2855 tree offset_type = TREE_TYPE (gs_info->offset);
2856 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2857 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2858 offset_vectype);
2859 }
2860
2861 /* Prepare to implement a grouped or strided load or store using
2862 the gather load or scatter store operation described by GS_INFO.
2863 STMT_INFO is the load or store statement.
2864
2865 Set *DATAREF_BUMP to the amount that should be added to the base
2866 address after each copy of the vectorized statement. Set *VEC_OFFSET
2867 to an invariant offset vector in which element I has the value
2868 I * DR_STEP / SCALE. */
2869
2870 static void
2871 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2872 loop_vec_info loop_vinfo,
2873 gather_scatter_info *gs_info,
2874 tree *dataref_bump, tree *vec_offset)
2875 {
2876 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2877 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2878 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2879 gimple_seq stmts;
2880
2881 tree bump = size_binop (MULT_EXPR,
2882 fold_convert (sizetype, DR_STEP (dr)),
2883 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2884 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2885 if (stmts)
2886 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2887
2888 /* The offset given in GS_INFO can have pointer type, so use the element
2889 type of the vector instead. */
2890 tree offset_type = TREE_TYPE (gs_info->offset);
2891 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2892 offset_type = TREE_TYPE (offset_vectype);
2893
2894 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2895 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2896 ssize_int (gs_info->scale));
2897 step = fold_convert (offset_type, step);
2898 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2899
2900 /* Create {0, X, X*2, X*3, ...}. */
2901 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2902 build_zero_cst (offset_type), step);
2903 if (stmts)
2904 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2905 }
2906
2907 /* Return the amount that should be added to a vector pointer to move
2908 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2909 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2910 vectorization. */
2911
2912 static tree
2913 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
2914 vect_memory_access_type memory_access_type)
2915 {
2916 if (memory_access_type == VMAT_INVARIANT)
2917 return size_zero_node;
2918
2919 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2920 tree step = vect_dr_behavior (dr_info)->step;
2921 if (tree_int_cst_sgn (step) == -1)
2922 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2923 return iv_step;
2924 }
2925
2926 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2927
2928 static bool
2929 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2930 stmt_vec_info *vec_stmt, slp_tree slp_node,
2931 tree vectype_in, stmt_vector_for_cost *cost_vec)
2932 {
2933 tree op, vectype;
2934 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2935 vec_info *vinfo = stmt_info->vinfo;
2936 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2937 unsigned ncopies;
2938
2939 op = gimple_call_arg (stmt, 0);
2940 vectype = STMT_VINFO_VECTYPE (stmt_info);
2941 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2942
2943 /* Multiple types in SLP are handled by creating the appropriate number of
2944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2945 case of SLP. */
2946 if (slp_node)
2947 ncopies = 1;
2948 else
2949 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2950
2951 gcc_assert (ncopies >= 1);
2952
2953 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2954 if (! char_vectype)
2955 return false;
2956
2957 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2958 unsigned word_bytes;
2959 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2960 return false;
2961
2962 /* The encoding uses one stepped pattern for each byte in the word. */
2963 vec_perm_builder elts (num_bytes, word_bytes, 3);
2964 for (unsigned i = 0; i < 3; ++i)
2965 for (unsigned j = 0; j < word_bytes; ++j)
2966 elts.quick_push ((i + 1) * word_bytes - j - 1);
2967
2968 vec_perm_indices indices (elts, 1, num_bytes);
2969 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2970 return false;
2971
2972 if (! vec_stmt)
2973 {
2974 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2975 DUMP_VECT_SCOPE ("vectorizable_bswap");
2976 if (! slp_node)
2977 {
2978 record_stmt_cost (cost_vec,
2979 1, vector_stmt, stmt_info, 0, vect_prologue);
2980 record_stmt_cost (cost_vec,
2981 ncopies, vec_perm, stmt_info, 0, vect_body);
2982 }
2983 return true;
2984 }
2985
2986 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2987
2988 /* Transform. */
2989 vec<tree> vec_oprnds = vNULL;
2990 stmt_vec_info new_stmt_info = NULL;
2991 stmt_vec_info prev_stmt_info = NULL;
2992 for (unsigned j = 0; j < ncopies; j++)
2993 {
2994 /* Handle uses. */
2995 if (j == 0)
2996 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
2997 else
2998 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
2999
3000 /* Arguments are ready. create the new vector stmt. */
3001 unsigned i;
3002 tree vop;
3003 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3004 {
3005 gimple *new_stmt;
3006 tree tem = make_ssa_name (char_vectype);
3007 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3008 char_vectype, vop));
3009 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3010 tree tem2 = make_ssa_name (char_vectype);
3011 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3012 tem, tem, bswap_vconst);
3013 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3014 tem = make_ssa_name (vectype);
3015 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3016 vectype, tem2));
3017 new_stmt_info
3018 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3019 if (slp_node)
3020 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3021 }
3022
3023 if (slp_node)
3024 continue;
3025
3026 if (j == 0)
3027 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3028 else
3029 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3030
3031 prev_stmt_info = new_stmt_info;
3032 }
3033
3034 vec_oprnds.release ();
3035 return true;
3036 }
3037
3038 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3039 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3040 in a single step. On success, store the binary pack code in
3041 *CONVERT_CODE. */
3042
3043 static bool
3044 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3045 tree_code *convert_code)
3046 {
3047 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3048 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3049 return false;
3050
3051 tree_code code;
3052 int multi_step_cvt = 0;
3053 auto_vec <tree, 8> interm_types;
3054 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3055 &code, &multi_step_cvt,
3056 &interm_types)
3057 || multi_step_cvt)
3058 return false;
3059
3060 *convert_code = code;
3061 return true;
3062 }
3063
3064 /* Function vectorizable_call.
3065
3066 Check if STMT_INFO performs a function call that can be vectorized.
3067 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3068 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3069 Return true if STMT_INFO is vectorizable in this way. */
3070
3071 static bool
3072 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3073 stmt_vec_info *vec_stmt, slp_tree slp_node,
3074 stmt_vector_for_cost *cost_vec)
3075 {
3076 gcall *stmt;
3077 tree vec_dest;
3078 tree scalar_dest;
3079 tree op;
3080 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3081 stmt_vec_info prev_stmt_info;
3082 tree vectype_out, vectype_in;
3083 poly_uint64 nunits_in;
3084 poly_uint64 nunits_out;
3085 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3086 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3087 vec_info *vinfo = stmt_info->vinfo;
3088 tree fndecl, new_temp, rhs_type;
3089 enum vect_def_type dt[4]
3090 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3091 vect_unknown_def_type };
3092 int ndts = ARRAY_SIZE (dt);
3093 int ncopies, j;
3094 auto_vec<tree, 8> vargs;
3095 auto_vec<tree, 8> orig_vargs;
3096 enum { NARROW, NONE, WIDEN } modifier;
3097 size_t i, nargs;
3098 tree lhs;
3099
3100 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3101 return false;
3102
3103 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3104 && ! vec_stmt)
3105 return false;
3106
3107 /* Is STMT_INFO a vectorizable call? */
3108 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3109 if (!stmt)
3110 return false;
3111
3112 if (gimple_call_internal_p (stmt)
3113 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3114 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3115 /* Handled by vectorizable_load and vectorizable_store. */
3116 return false;
3117
3118 if (gimple_call_lhs (stmt) == NULL_TREE
3119 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3120 return false;
3121
3122 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3123
3124 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3125
3126 /* Process function arguments. */
3127 rhs_type = NULL_TREE;
3128 vectype_in = NULL_TREE;
3129 nargs = gimple_call_num_args (stmt);
3130
3131 /* Bail out if the function has more than three arguments, we do not have
3132 interesting builtin functions to vectorize with more than two arguments
3133 except for fma. No arguments is also not good. */
3134 if (nargs == 0 || nargs > 4)
3135 return false;
3136
3137 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3138 combined_fn cfn = gimple_call_combined_fn (stmt);
3139 if (cfn == CFN_GOMP_SIMD_LANE)
3140 {
3141 nargs = 0;
3142 rhs_type = unsigned_type_node;
3143 }
3144
3145 int mask_opno = -1;
3146 if (internal_fn_p (cfn))
3147 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3148
3149 for (i = 0; i < nargs; i++)
3150 {
3151 tree opvectype;
3152
3153 op = gimple_call_arg (stmt, i);
3154 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3155 {
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3158 "use not simple.\n");
3159 return false;
3160 }
3161
3162 /* Skip the mask argument to an internal function. This operand
3163 has been converted via a pattern if necessary. */
3164 if ((int) i == mask_opno)
3165 continue;
3166
3167 /* We can only handle calls with arguments of the same type. */
3168 if (rhs_type
3169 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3170 {
3171 if (dump_enabled_p ())
3172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3173 "argument types differ.\n");
3174 return false;
3175 }
3176 if (!rhs_type)
3177 rhs_type = TREE_TYPE (op);
3178
3179 if (!vectype_in)
3180 vectype_in = opvectype;
3181 else if (opvectype
3182 && opvectype != vectype_in)
3183 {
3184 if (dump_enabled_p ())
3185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3186 "argument vector types differ.\n");
3187 return false;
3188 }
3189 }
3190 /* If all arguments are external or constant defs use a vector type with
3191 the same size as the output vector type. */
3192 if (!vectype_in)
3193 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3194 if (vec_stmt)
3195 gcc_assert (vectype_in);
3196 if (!vectype_in)
3197 {
3198 if (dump_enabled_p ())
3199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3200 "no vectype for scalar type %T\n", rhs_type);
3201
3202 return false;
3203 }
3204
3205 /* FORNOW */
3206 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3207 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3208 if (known_eq (nunits_in * 2, nunits_out))
3209 modifier = NARROW;
3210 else if (known_eq (nunits_out, nunits_in))
3211 modifier = NONE;
3212 else if (known_eq (nunits_out * 2, nunits_in))
3213 modifier = WIDEN;
3214 else
3215 return false;
3216
3217 /* We only handle functions that do not read or clobber memory. */
3218 if (gimple_vuse (stmt))
3219 {
3220 if (dump_enabled_p ())
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3222 "function reads from or writes to memory.\n");
3223 return false;
3224 }
3225
3226 /* For now, we only vectorize functions if a target specific builtin
3227 is available. TODO -- in some cases, it might be profitable to
3228 insert the calls for pieces of the vector, in order to be able
3229 to vectorize other operations in the loop. */
3230 fndecl = NULL_TREE;
3231 internal_fn ifn = IFN_LAST;
3232 tree callee = gimple_call_fndecl (stmt);
3233
3234 /* First try using an internal function. */
3235 tree_code convert_code = ERROR_MARK;
3236 if (cfn != CFN_LAST
3237 && (modifier == NONE
3238 || (modifier == NARROW
3239 && simple_integer_narrowing (vectype_out, vectype_in,
3240 &convert_code))))
3241 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3242 vectype_in);
3243
3244 /* If that fails, try asking for a target-specific built-in function. */
3245 if (ifn == IFN_LAST)
3246 {
3247 if (cfn != CFN_LAST)
3248 fndecl = targetm.vectorize.builtin_vectorized_function
3249 (cfn, vectype_out, vectype_in);
3250 else if (callee)
3251 fndecl = targetm.vectorize.builtin_md_vectorized_function
3252 (callee, vectype_out, vectype_in);
3253 }
3254
3255 if (ifn == IFN_LAST && !fndecl)
3256 {
3257 if (cfn == CFN_GOMP_SIMD_LANE
3258 && !slp_node
3259 && loop_vinfo
3260 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3261 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3262 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3263 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3264 {
3265 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3266 { 0, 1, 2, ... vf - 1 } vector. */
3267 gcc_assert (nargs == 0);
3268 }
3269 else if (modifier == NONE
3270 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3271 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3272 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3273 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3274 vectype_in, cost_vec);
3275 else
3276 {
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3279 "function is not vectorizable.\n");
3280 return false;
3281 }
3282 }
3283
3284 if (slp_node)
3285 ncopies = 1;
3286 else if (modifier == NARROW && ifn == IFN_LAST)
3287 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3288 else
3289 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3290
3291 /* Sanity check: make sure that at least one copy of the vectorized stmt
3292 needs to be generated. */
3293 gcc_assert (ncopies >= 1);
3294
3295 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3296 if (!vec_stmt) /* transformation not required. */
3297 {
3298 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3299 DUMP_VECT_SCOPE ("vectorizable_call");
3300 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3301 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3302 record_stmt_cost (cost_vec, ncopies / 2,
3303 vec_promote_demote, stmt_info, 0, vect_body);
3304
3305 if (loop_vinfo && mask_opno >= 0)
3306 {
3307 unsigned int nvectors = (slp_node
3308 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3309 : ncopies);
3310 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3311 }
3312 return true;
3313 }
3314
3315 /* Transform. */
3316
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3319
3320 /* Handle def. */
3321 scalar_dest = gimple_call_lhs (stmt);
3322 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3323
3324 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3325
3326 stmt_vec_info new_stmt_info = NULL;
3327 prev_stmt_info = NULL;
3328 if (modifier == NONE || ifn != IFN_LAST)
3329 {
3330 tree prev_res = NULL_TREE;
3331 vargs.safe_grow (nargs);
3332 orig_vargs.safe_grow (nargs);
3333 for (j = 0; j < ncopies; ++j)
3334 {
3335 /* Build argument list for the vectorized call. */
3336 if (slp_node)
3337 {
3338 auto_vec<vec<tree> > vec_defs (nargs);
3339 vec<tree> vec_oprnds0;
3340
3341 for (i = 0; i < nargs; i++)
3342 vargs[i] = gimple_call_arg (stmt, i);
3343 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3344 vec_oprnds0 = vec_defs[0];
3345
3346 /* Arguments are ready. Create the new vector stmt. */
3347 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3348 {
3349 size_t k;
3350 for (k = 0; k < nargs; k++)
3351 {
3352 vec<tree> vec_oprndsk = vec_defs[k];
3353 vargs[k] = vec_oprndsk[i];
3354 }
3355 if (modifier == NARROW)
3356 {
3357 /* We don't define any narrowing conditional functions
3358 at present. */
3359 gcc_assert (mask_opno < 0);
3360 tree half_res = make_ssa_name (vectype_in);
3361 gcall *call
3362 = gimple_build_call_internal_vec (ifn, vargs);
3363 gimple_call_set_lhs (call, half_res);
3364 gimple_call_set_nothrow (call, true);
3365 new_stmt_info
3366 = vect_finish_stmt_generation (stmt_info, call, gsi);
3367 if ((i & 1) == 0)
3368 {
3369 prev_res = half_res;
3370 continue;
3371 }
3372 new_temp = make_ssa_name (vec_dest);
3373 gimple *new_stmt
3374 = gimple_build_assign (new_temp, convert_code,
3375 prev_res, half_res);
3376 new_stmt_info
3377 = vect_finish_stmt_generation (stmt_info, new_stmt,
3378 gsi);
3379 }
3380 else
3381 {
3382 if (mask_opno >= 0 && masked_loop_p)
3383 {
3384 unsigned int vec_num = vec_oprnds0.length ();
3385 /* Always true for SLP. */
3386 gcc_assert (ncopies == 1);
3387 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3388 vectype_out, i);
3389 vargs[mask_opno] = prepare_load_store_mask
3390 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3391 }
3392
3393 gcall *call;
3394 if (ifn != IFN_LAST)
3395 call = gimple_build_call_internal_vec (ifn, vargs);
3396 else
3397 call = gimple_build_call_vec (fndecl, vargs);
3398 new_temp = make_ssa_name (vec_dest, call);
3399 gimple_call_set_lhs (call, new_temp);
3400 gimple_call_set_nothrow (call, true);
3401 new_stmt_info
3402 = vect_finish_stmt_generation (stmt_info, call, gsi);
3403 }
3404 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3405 }
3406
3407 for (i = 0; i < nargs; i++)
3408 {
3409 vec<tree> vec_oprndsi = vec_defs[i];
3410 vec_oprndsi.release ();
3411 }
3412 continue;
3413 }
3414
3415 for (i = 0; i < nargs; i++)
3416 {
3417 op = gimple_call_arg (stmt, i);
3418 if (j == 0)
3419 vec_oprnd0
3420 = vect_get_vec_def_for_operand (op, stmt_info);
3421 else
3422 vec_oprnd0
3423 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3424
3425 orig_vargs[i] = vargs[i] = vec_oprnd0;
3426 }
3427
3428 if (mask_opno >= 0 && masked_loop_p)
3429 {
3430 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3431 vectype_out, j);
3432 vargs[mask_opno]
3433 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3434 vargs[mask_opno], gsi);
3435 }
3436
3437 if (cfn == CFN_GOMP_SIMD_LANE)
3438 {
3439 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3440 tree new_var
3441 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3442 gimple *init_stmt = gimple_build_assign (new_var, cst);
3443 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3444 new_temp = make_ssa_name (vec_dest);
3445 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3446 new_stmt_info
3447 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3448 }
3449 else if (modifier == NARROW)
3450 {
3451 /* We don't define any narrowing conditional functions at
3452 present. */
3453 gcc_assert (mask_opno < 0);
3454 tree half_res = make_ssa_name (vectype_in);
3455 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3456 gimple_call_set_lhs (call, half_res);
3457 gimple_call_set_nothrow (call, true);
3458 new_stmt_info
3459 = vect_finish_stmt_generation (stmt_info, call, gsi);
3460 if ((j & 1) == 0)
3461 {
3462 prev_res = half_res;
3463 continue;
3464 }
3465 new_temp = make_ssa_name (vec_dest);
3466 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3467 prev_res, half_res);
3468 new_stmt_info
3469 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3470 }
3471 else
3472 {
3473 gcall *call;
3474 if (ifn != IFN_LAST)
3475 call = gimple_build_call_internal_vec (ifn, vargs);
3476 else
3477 call = gimple_build_call_vec (fndecl, vargs);
3478 new_temp = make_ssa_name (vec_dest, call);
3479 gimple_call_set_lhs (call, new_temp);
3480 gimple_call_set_nothrow (call, true);
3481 new_stmt_info
3482 = vect_finish_stmt_generation (stmt_info, call, gsi);
3483 }
3484
3485 if (j == (modifier == NARROW ? 1 : 0))
3486 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3487 else
3488 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3489
3490 prev_stmt_info = new_stmt_info;
3491 }
3492 }
3493 else if (modifier == NARROW)
3494 {
3495 /* We don't define any narrowing conditional functions at present. */
3496 gcc_assert (mask_opno < 0);
3497 for (j = 0; j < ncopies; ++j)
3498 {
3499 /* Build argument list for the vectorized call. */
3500 if (j == 0)
3501 vargs.create (nargs * 2);
3502 else
3503 vargs.truncate (0);
3504
3505 if (slp_node)
3506 {
3507 auto_vec<vec<tree> > vec_defs (nargs);
3508 vec<tree> vec_oprnds0;
3509
3510 for (i = 0; i < nargs; i++)
3511 vargs.quick_push (gimple_call_arg (stmt, i));
3512 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3513 vec_oprnds0 = vec_defs[0];
3514
3515 /* Arguments are ready. Create the new vector stmt. */
3516 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3517 {
3518 size_t k;
3519 vargs.truncate (0);
3520 for (k = 0; k < nargs; k++)
3521 {
3522 vec<tree> vec_oprndsk = vec_defs[k];
3523 vargs.quick_push (vec_oprndsk[i]);
3524 vargs.quick_push (vec_oprndsk[i + 1]);
3525 }
3526 gcall *call;
3527 if (ifn != IFN_LAST)
3528 call = gimple_build_call_internal_vec (ifn, vargs);
3529 else
3530 call = gimple_build_call_vec (fndecl, vargs);
3531 new_temp = make_ssa_name (vec_dest, call);
3532 gimple_call_set_lhs (call, new_temp);
3533 gimple_call_set_nothrow (call, true);
3534 new_stmt_info
3535 = vect_finish_stmt_generation (stmt_info, call, gsi);
3536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3537 }
3538
3539 for (i = 0; i < nargs; i++)
3540 {
3541 vec<tree> vec_oprndsi = vec_defs[i];
3542 vec_oprndsi.release ();
3543 }
3544 continue;
3545 }
3546
3547 for (i = 0; i < nargs; i++)
3548 {
3549 op = gimple_call_arg (stmt, i);
3550 if (j == 0)
3551 {
3552 vec_oprnd0
3553 = vect_get_vec_def_for_operand (op, stmt_info);
3554 vec_oprnd1
3555 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3556 }
3557 else
3558 {
3559 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3560 2 * i + 1);
3561 vec_oprnd0
3562 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3563 vec_oprnd1
3564 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3565 }
3566
3567 vargs.quick_push (vec_oprnd0);
3568 vargs.quick_push (vec_oprnd1);
3569 }
3570
3571 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3572 new_temp = make_ssa_name (vec_dest, new_stmt);
3573 gimple_call_set_lhs (new_stmt, new_temp);
3574 new_stmt_info
3575 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3576
3577 if (j == 0)
3578 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3579 else
3580 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3581
3582 prev_stmt_info = new_stmt_info;
3583 }
3584
3585 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3586 }
3587 else
3588 /* No current target implements this case. */
3589 return false;
3590
3591 vargs.release ();
3592
3593 /* The call in STMT might prevent it from being removed in dce.
3594 We however cannot remove it here, due to the way the ssa name
3595 it defines is mapped to the new definition. So just replace
3596 rhs of the statement with something harmless. */
3597
3598 if (slp_node)
3599 return true;
3600
3601 stmt_info = vect_orig_stmt (stmt_info);
3602 lhs = gimple_get_lhs (stmt_info->stmt);
3603
3604 gassign *new_stmt
3605 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3606 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3607
3608 return true;
3609 }
3610
3611
3612 struct simd_call_arg_info
3613 {
3614 tree vectype;
3615 tree op;
3616 HOST_WIDE_INT linear_step;
3617 enum vect_def_type dt;
3618 unsigned int align;
3619 bool simd_lane_linear;
3620 };
3621
3622 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3623 is linear within simd lane (but not within whole loop), note it in
3624 *ARGINFO. */
3625
3626 static void
3627 vect_simd_lane_linear (tree op, struct loop *loop,
3628 struct simd_call_arg_info *arginfo)
3629 {
3630 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3631
3632 if (!is_gimple_assign (def_stmt)
3633 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3634 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3635 return;
3636
3637 tree base = gimple_assign_rhs1 (def_stmt);
3638 HOST_WIDE_INT linear_step = 0;
3639 tree v = gimple_assign_rhs2 (def_stmt);
3640 while (TREE_CODE (v) == SSA_NAME)
3641 {
3642 tree t;
3643 def_stmt = SSA_NAME_DEF_STMT (v);
3644 if (is_gimple_assign (def_stmt))
3645 switch (gimple_assign_rhs_code (def_stmt))
3646 {
3647 case PLUS_EXPR:
3648 t = gimple_assign_rhs2 (def_stmt);
3649 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3650 return;
3651 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3652 v = gimple_assign_rhs1 (def_stmt);
3653 continue;
3654 case MULT_EXPR:
3655 t = gimple_assign_rhs2 (def_stmt);
3656 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3657 return;
3658 linear_step = tree_to_shwi (t);
3659 v = gimple_assign_rhs1 (def_stmt);
3660 continue;
3661 CASE_CONVERT:
3662 t = gimple_assign_rhs1 (def_stmt);
3663 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3664 || (TYPE_PRECISION (TREE_TYPE (v))
3665 < TYPE_PRECISION (TREE_TYPE (t))))
3666 return;
3667 if (!linear_step)
3668 linear_step = 1;
3669 v = t;
3670 continue;
3671 default:
3672 return;
3673 }
3674 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3675 && loop->simduid
3676 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3677 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3678 == loop->simduid))
3679 {
3680 if (!linear_step)
3681 linear_step = 1;
3682 arginfo->linear_step = linear_step;
3683 arginfo->op = base;
3684 arginfo->simd_lane_linear = true;
3685 return;
3686 }
3687 }
3688 }
3689
3690 /* Return the number of elements in vector type VECTYPE, which is associated
3691 with a SIMD clone. At present these vectors always have a constant
3692 length. */
3693
3694 static unsigned HOST_WIDE_INT
3695 simd_clone_subparts (tree vectype)
3696 {
3697 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3698 }
3699
3700 /* Function vectorizable_simd_clone_call.
3701
3702 Check if STMT_INFO performs a function call that can be vectorized
3703 by calling a simd clone of the function.
3704 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3705 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3706 Return true if STMT_INFO is vectorizable in this way. */
3707
3708 static bool
3709 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3710 gimple_stmt_iterator *gsi,
3711 stmt_vec_info *vec_stmt, slp_tree slp_node,
3712 stmt_vector_for_cost *)
3713 {
3714 tree vec_dest;
3715 tree scalar_dest;
3716 tree op, type;
3717 tree vec_oprnd0 = NULL_TREE;
3718 stmt_vec_info prev_stmt_info;
3719 tree vectype;
3720 unsigned int nunits;
3721 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3722 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3723 vec_info *vinfo = stmt_info->vinfo;
3724 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3725 tree fndecl, new_temp;
3726 int ncopies, j;
3727 auto_vec<simd_call_arg_info> arginfo;
3728 vec<tree> vargs = vNULL;
3729 size_t i, nargs;
3730 tree lhs, rtype, ratype;
3731 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3732
3733 /* Is STMT a vectorizable call? */
3734 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3735 if (!stmt)
3736 return false;
3737
3738 fndecl = gimple_call_fndecl (stmt);
3739 if (fndecl == NULL_TREE)
3740 return false;
3741
3742 struct cgraph_node *node = cgraph_node::get (fndecl);
3743 if (node == NULL || node->simd_clones == NULL)
3744 return false;
3745
3746 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3747 return false;
3748
3749 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3750 && ! vec_stmt)
3751 return false;
3752
3753 if (gimple_call_lhs (stmt)
3754 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3755 return false;
3756
3757 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3758
3759 vectype = STMT_VINFO_VECTYPE (stmt_info);
3760
3761 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3762 return false;
3763
3764 /* FORNOW */
3765 if (slp_node)
3766 return false;
3767
3768 /* Process function arguments. */
3769 nargs = gimple_call_num_args (stmt);
3770
3771 /* Bail out if the function has zero arguments. */
3772 if (nargs == 0)
3773 return false;
3774
3775 arginfo.reserve (nargs, true);
3776
3777 for (i = 0; i < nargs; i++)
3778 {
3779 simd_call_arg_info thisarginfo;
3780 affine_iv iv;
3781
3782 thisarginfo.linear_step = 0;
3783 thisarginfo.align = 0;
3784 thisarginfo.op = NULL_TREE;
3785 thisarginfo.simd_lane_linear = false;
3786
3787 op = gimple_call_arg (stmt, i);
3788 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3789 &thisarginfo.vectype)
3790 || thisarginfo.dt == vect_uninitialized_def)
3791 {
3792 if (dump_enabled_p ())
3793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3794 "use not simple.\n");
3795 return false;
3796 }
3797
3798 if (thisarginfo.dt == vect_constant_def
3799 || thisarginfo.dt == vect_external_def)
3800 gcc_assert (thisarginfo.vectype == NULL_TREE);
3801 else
3802 gcc_assert (thisarginfo.vectype != NULL_TREE);
3803
3804 /* For linear arguments, the analyze phase should have saved
3805 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3806 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3807 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3808 {
3809 gcc_assert (vec_stmt);
3810 thisarginfo.linear_step
3811 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3812 thisarginfo.op
3813 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3814 thisarginfo.simd_lane_linear
3815 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3816 == boolean_true_node);
3817 /* If loop has been peeled for alignment, we need to adjust it. */
3818 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3819 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3820 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3821 {
3822 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3823 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3824 tree opt = TREE_TYPE (thisarginfo.op);
3825 bias = fold_convert (TREE_TYPE (step), bias);
3826 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3827 thisarginfo.op
3828 = fold_build2 (POINTER_TYPE_P (opt)
3829 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3830 thisarginfo.op, bias);
3831 }
3832 }
3833 else if (!vec_stmt
3834 && thisarginfo.dt != vect_constant_def
3835 && thisarginfo.dt != vect_external_def
3836 && loop_vinfo
3837 && TREE_CODE (op) == SSA_NAME
3838 && simple_iv (loop, loop_containing_stmt (stmt), op,
3839 &iv, false)
3840 && tree_fits_shwi_p (iv.step))
3841 {
3842 thisarginfo.linear_step = tree_to_shwi (iv.step);
3843 thisarginfo.op = iv.base;
3844 }
3845 else if ((thisarginfo.dt == vect_constant_def
3846 || thisarginfo.dt == vect_external_def)
3847 && POINTER_TYPE_P (TREE_TYPE (op)))
3848 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3849 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3850 linear too. */
3851 if (POINTER_TYPE_P (TREE_TYPE (op))
3852 && !thisarginfo.linear_step
3853 && !vec_stmt
3854 && thisarginfo.dt != vect_constant_def
3855 && thisarginfo.dt != vect_external_def
3856 && loop_vinfo
3857 && !slp_node
3858 && TREE_CODE (op) == SSA_NAME)
3859 vect_simd_lane_linear (op, loop, &thisarginfo);
3860
3861 arginfo.quick_push (thisarginfo);
3862 }
3863
3864 unsigned HOST_WIDE_INT vf;
3865 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3866 {
3867 if (dump_enabled_p ())
3868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3869 "not considering SIMD clones; not yet supported"
3870 " for variable-width vectors.\n");
3871 return false;
3872 }
3873
3874 unsigned int badness = 0;
3875 struct cgraph_node *bestn = NULL;
3876 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3877 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3878 else
3879 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3880 n = n->simdclone->next_clone)
3881 {
3882 unsigned int this_badness = 0;
3883 if (n->simdclone->simdlen > vf
3884 || n->simdclone->nargs != nargs)
3885 continue;
3886 if (n->simdclone->simdlen < vf)
3887 this_badness += (exact_log2 (vf)
3888 - exact_log2 (n->simdclone->simdlen)) * 1024;
3889 if (n->simdclone->inbranch)
3890 this_badness += 2048;
3891 int target_badness = targetm.simd_clone.usable (n);
3892 if (target_badness < 0)
3893 continue;
3894 this_badness += target_badness * 512;
3895 /* FORNOW: Have to add code to add the mask argument. */
3896 if (n->simdclone->inbranch)
3897 continue;
3898 for (i = 0; i < nargs; i++)
3899 {
3900 switch (n->simdclone->args[i].arg_type)
3901 {
3902 case SIMD_CLONE_ARG_TYPE_VECTOR:
3903 if (!useless_type_conversion_p
3904 (n->simdclone->args[i].orig_type,
3905 TREE_TYPE (gimple_call_arg (stmt, i))))
3906 i = -1;
3907 else if (arginfo[i].dt == vect_constant_def
3908 || arginfo[i].dt == vect_external_def
3909 || arginfo[i].linear_step)
3910 this_badness += 64;
3911 break;
3912 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3913 if (arginfo[i].dt != vect_constant_def
3914 && arginfo[i].dt != vect_external_def)
3915 i = -1;
3916 break;
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3918 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3919 if (arginfo[i].dt == vect_constant_def
3920 || arginfo[i].dt == vect_external_def
3921 || (arginfo[i].linear_step
3922 != n->simdclone->args[i].linear_step))
3923 i = -1;
3924 break;
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3928 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3929 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3930 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3931 /* FORNOW */
3932 i = -1;
3933 break;
3934 case SIMD_CLONE_ARG_TYPE_MASK:
3935 gcc_unreachable ();
3936 }
3937 if (i == (size_t) -1)
3938 break;
3939 if (n->simdclone->args[i].alignment > arginfo[i].align)
3940 {
3941 i = -1;
3942 break;
3943 }
3944 if (arginfo[i].align)
3945 this_badness += (exact_log2 (arginfo[i].align)
3946 - exact_log2 (n->simdclone->args[i].alignment));
3947 }
3948 if (i == (size_t) -1)
3949 continue;
3950 if (bestn == NULL || this_badness < badness)
3951 {
3952 bestn = n;
3953 badness = this_badness;
3954 }
3955 }
3956
3957 if (bestn == NULL)
3958 return false;
3959
3960 for (i = 0; i < nargs; i++)
3961 if ((arginfo[i].dt == vect_constant_def
3962 || arginfo[i].dt == vect_external_def)
3963 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3964 {
3965 arginfo[i].vectype
3966 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3967 i)));
3968 if (arginfo[i].vectype == NULL
3969 || (simd_clone_subparts (arginfo[i].vectype)
3970 > bestn->simdclone->simdlen))
3971 return false;
3972 }
3973
3974 fndecl = bestn->decl;
3975 nunits = bestn->simdclone->simdlen;
3976 ncopies = vf / nunits;
3977
3978 /* If the function isn't const, only allow it in simd loops where user
3979 has asserted that at least nunits consecutive iterations can be
3980 performed using SIMD instructions. */
3981 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3982 && gimple_vuse (stmt))
3983 return false;
3984
3985 /* Sanity check: make sure that at least one copy of the vectorized stmt
3986 needs to be generated. */
3987 gcc_assert (ncopies >= 1);
3988
3989 if (!vec_stmt) /* transformation not required. */
3990 {
3991 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3992 for (i = 0; i < nargs; i++)
3993 if ((bestn->simdclone->args[i].arg_type
3994 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3995 || (bestn->simdclone->args[i].arg_type
3996 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3997 {
3998 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3999 + 1);
4000 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4001 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4002 ? size_type_node : TREE_TYPE (arginfo[i].op);
4003 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4005 tree sll = arginfo[i].simd_lane_linear
4006 ? boolean_true_node : boolean_false_node;
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4008 }
4009 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4010 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4011 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4012 return true;
4013 }
4014
4015 /* Transform. */
4016
4017 if (dump_enabled_p ())
4018 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4019
4020 /* Handle def. */
4021 scalar_dest = gimple_call_lhs (stmt);
4022 vec_dest = NULL_TREE;
4023 rtype = NULL_TREE;
4024 ratype = NULL_TREE;
4025 if (scalar_dest)
4026 {
4027 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4028 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4029 if (TREE_CODE (rtype) == ARRAY_TYPE)
4030 {
4031 ratype = rtype;
4032 rtype = TREE_TYPE (ratype);
4033 }
4034 }
4035
4036 prev_stmt_info = NULL;
4037 for (j = 0; j < ncopies; ++j)
4038 {
4039 /* Build argument list for the vectorized call. */
4040 if (j == 0)
4041 vargs.create (nargs);
4042 else
4043 vargs.truncate (0);
4044
4045 for (i = 0; i < nargs; i++)
4046 {
4047 unsigned int k, l, m, o;
4048 tree atype;
4049 op = gimple_call_arg (stmt, i);
4050 switch (bestn->simdclone->args[i].arg_type)
4051 {
4052 case SIMD_CLONE_ARG_TYPE_VECTOR:
4053 atype = bestn->simdclone->args[i].vector_type;
4054 o = nunits / simd_clone_subparts (atype);
4055 for (m = j * o; m < (j + 1) * o; m++)
4056 {
4057 if (simd_clone_subparts (atype)
4058 < simd_clone_subparts (arginfo[i].vectype))
4059 {
4060 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4061 k = (simd_clone_subparts (arginfo[i].vectype)
4062 / simd_clone_subparts (atype));
4063 gcc_assert ((k & (k - 1)) == 0);
4064 if (m == 0)
4065 vec_oprnd0
4066 = vect_get_vec_def_for_operand (op, stmt_info);
4067 else
4068 {
4069 vec_oprnd0 = arginfo[i].op;
4070 if ((m & (k - 1)) == 0)
4071 vec_oprnd0
4072 = vect_get_vec_def_for_stmt_copy (vinfo,
4073 vec_oprnd0);
4074 }
4075 arginfo[i].op = vec_oprnd0;
4076 vec_oprnd0
4077 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4078 bitsize_int (prec),
4079 bitsize_int ((m & (k - 1)) * prec));
4080 gassign *new_stmt
4081 = gimple_build_assign (make_ssa_name (atype),
4082 vec_oprnd0);
4083 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4084 vargs.safe_push (gimple_assign_lhs (new_stmt));
4085 }
4086 else
4087 {
4088 k = (simd_clone_subparts (atype)
4089 / simd_clone_subparts (arginfo[i].vectype));
4090 gcc_assert ((k & (k - 1)) == 0);
4091 vec<constructor_elt, va_gc> *ctor_elts;
4092 if (k != 1)
4093 vec_alloc (ctor_elts, k);
4094 else
4095 ctor_elts = NULL;
4096 for (l = 0; l < k; l++)
4097 {
4098 if (m == 0 && l == 0)
4099 vec_oprnd0
4100 = vect_get_vec_def_for_operand (op, stmt_info);
4101 else
4102 vec_oprnd0
4103 = vect_get_vec_def_for_stmt_copy (vinfo,
4104 arginfo[i].op);
4105 arginfo[i].op = vec_oprnd0;
4106 if (k == 1)
4107 break;
4108 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4109 vec_oprnd0);
4110 }
4111 if (k == 1)
4112 vargs.safe_push (vec_oprnd0);
4113 else
4114 {
4115 vec_oprnd0 = build_constructor (atype, ctor_elts);
4116 gassign *new_stmt
4117 = gimple_build_assign (make_ssa_name (atype),
4118 vec_oprnd0);
4119 vect_finish_stmt_generation (stmt_info, new_stmt,
4120 gsi);
4121 vargs.safe_push (gimple_assign_lhs (new_stmt));
4122 }
4123 }
4124 }
4125 break;
4126 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4127 vargs.safe_push (op);
4128 break;
4129 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4130 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4131 if (j == 0)
4132 {
4133 gimple_seq stmts;
4134 arginfo[i].op
4135 = force_gimple_operand (arginfo[i].op, &stmts, true,
4136 NULL_TREE);
4137 if (stmts != NULL)
4138 {
4139 basic_block new_bb;
4140 edge pe = loop_preheader_edge (loop);
4141 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4142 gcc_assert (!new_bb);
4143 }
4144 if (arginfo[i].simd_lane_linear)
4145 {
4146 vargs.safe_push (arginfo[i].op);
4147 break;
4148 }
4149 tree phi_res = copy_ssa_name (op);
4150 gphi *new_phi = create_phi_node (phi_res, loop->header);
4151 loop_vinfo->add_stmt (new_phi);
4152 add_phi_arg (new_phi, arginfo[i].op,
4153 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4154 enum tree_code code
4155 = POINTER_TYPE_P (TREE_TYPE (op))
4156 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4157 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4158 ? sizetype : TREE_TYPE (op);
4159 widest_int cst
4160 = wi::mul (bestn->simdclone->args[i].linear_step,
4161 ncopies * nunits);
4162 tree tcst = wide_int_to_tree (type, cst);
4163 tree phi_arg = copy_ssa_name (op);
4164 gassign *new_stmt
4165 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4166 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4167 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4168 loop_vinfo->add_stmt (new_stmt);
4169 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4170 UNKNOWN_LOCATION);
4171 arginfo[i].op = phi_res;
4172 vargs.safe_push (phi_res);
4173 }
4174 else
4175 {
4176 enum tree_code code
4177 = POINTER_TYPE_P (TREE_TYPE (op))
4178 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4179 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4180 ? sizetype : TREE_TYPE (op);
4181 widest_int cst
4182 = wi::mul (bestn->simdclone->args[i].linear_step,
4183 j * nunits);
4184 tree tcst = wide_int_to_tree (type, cst);
4185 new_temp = make_ssa_name (TREE_TYPE (op));
4186 gassign *new_stmt
4187 = gimple_build_assign (new_temp, code,
4188 arginfo[i].op, tcst);
4189 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4190 vargs.safe_push (new_temp);
4191 }
4192 break;
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4198 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4199 default:
4200 gcc_unreachable ();
4201 }
4202 }
4203
4204 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4205 if (vec_dest)
4206 {
4207 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4208 if (ratype)
4209 new_temp = create_tmp_var (ratype);
4210 else if (simd_clone_subparts (vectype)
4211 == simd_clone_subparts (rtype))
4212 new_temp = make_ssa_name (vec_dest, new_call);
4213 else
4214 new_temp = make_ssa_name (rtype, new_call);
4215 gimple_call_set_lhs (new_call, new_temp);
4216 }
4217 stmt_vec_info new_stmt_info
4218 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4219
4220 if (vec_dest)
4221 {
4222 if (simd_clone_subparts (vectype) < nunits)
4223 {
4224 unsigned int k, l;
4225 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4226 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4227 k = nunits / simd_clone_subparts (vectype);
4228 gcc_assert ((k & (k - 1)) == 0);
4229 for (l = 0; l < k; l++)
4230 {
4231 tree t;
4232 if (ratype)
4233 {
4234 t = build_fold_addr_expr (new_temp);
4235 t = build2 (MEM_REF, vectype, t,
4236 build_int_cst (TREE_TYPE (t), l * bytes));
4237 }
4238 else
4239 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4240 bitsize_int (prec), bitsize_int (l * prec));
4241 gimple *new_stmt
4242 = gimple_build_assign (make_ssa_name (vectype), t);
4243 new_stmt_info
4244 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4245
4246 if (j == 0 && l == 0)
4247 STMT_VINFO_VEC_STMT (stmt_info)
4248 = *vec_stmt = new_stmt_info;
4249 else
4250 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4251
4252 prev_stmt_info = new_stmt_info;
4253 }
4254
4255 if (ratype)
4256 vect_clobber_variable (stmt_info, gsi, new_temp);
4257 continue;
4258 }
4259 else if (simd_clone_subparts (vectype) > nunits)
4260 {
4261 unsigned int k = (simd_clone_subparts (vectype)
4262 / simd_clone_subparts (rtype));
4263 gcc_assert ((k & (k - 1)) == 0);
4264 if ((j & (k - 1)) == 0)
4265 vec_alloc (ret_ctor_elts, k);
4266 if (ratype)
4267 {
4268 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4269 for (m = 0; m < o; m++)
4270 {
4271 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4272 size_int (m), NULL_TREE, NULL_TREE);
4273 gimple *new_stmt
4274 = gimple_build_assign (make_ssa_name (rtype), tem);
4275 new_stmt_info
4276 = vect_finish_stmt_generation (stmt_info, new_stmt,
4277 gsi);
4278 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4279 gimple_assign_lhs (new_stmt));
4280 }
4281 vect_clobber_variable (stmt_info, gsi, new_temp);
4282 }
4283 else
4284 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4285 if ((j & (k - 1)) != k - 1)
4286 continue;
4287 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4288 gimple *new_stmt
4289 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4290 new_stmt_info
4291 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4292
4293 if ((unsigned) j == k - 1)
4294 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4295 else
4296 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4297
4298 prev_stmt_info = new_stmt_info;
4299 continue;
4300 }
4301 else if (ratype)
4302 {
4303 tree t = build_fold_addr_expr (new_temp);
4304 t = build2 (MEM_REF, vectype, t,
4305 build_int_cst (TREE_TYPE (t), 0));
4306 gimple *new_stmt
4307 = gimple_build_assign (make_ssa_name (vec_dest), t);
4308 new_stmt_info
4309 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4310 vect_clobber_variable (stmt_info, gsi, new_temp);
4311 }
4312 }
4313
4314 if (j == 0)
4315 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4316 else
4317 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4318
4319 prev_stmt_info = new_stmt_info;
4320 }
4321
4322 vargs.release ();
4323
4324 /* The call in STMT might prevent it from being removed in dce.
4325 We however cannot remove it here, due to the way the ssa name
4326 it defines is mapped to the new definition. So just replace
4327 rhs of the statement with something harmless. */
4328
4329 if (slp_node)
4330 return true;
4331
4332 gimple *new_stmt;
4333 if (scalar_dest)
4334 {
4335 type = TREE_TYPE (scalar_dest);
4336 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4337 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4338 }
4339 else
4340 new_stmt = gimple_build_nop ();
4341 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4342 unlink_stmt_vdef (stmt);
4343
4344 return true;
4345 }
4346
4347
4348 /* Function vect_gen_widened_results_half
4349
4350 Create a vector stmt whose code, type, number of arguments, and result
4351 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4352 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4353 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4354 needs to be created (DECL is a function-decl of a target-builtin).
4355 STMT_INFO is the original scalar stmt that we are vectorizing. */
4356
4357 static gimple *
4358 vect_gen_widened_results_half (enum tree_code code,
4359 tree decl,
4360 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4361 tree vec_dest, gimple_stmt_iterator *gsi,
4362 stmt_vec_info stmt_info)
4363 {
4364 gimple *new_stmt;
4365 tree new_temp;
4366
4367 /* Generate half of the widened result: */
4368 if (code == CALL_EXPR)
4369 {
4370 /* Target specific support */
4371 if (op_type == binary_op)
4372 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4373 else
4374 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4375 new_temp = make_ssa_name (vec_dest, new_stmt);
4376 gimple_call_set_lhs (new_stmt, new_temp);
4377 }
4378 else
4379 {
4380 /* Generic support */
4381 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4382 if (op_type != binary_op)
4383 vec_oprnd1 = NULL;
4384 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4385 new_temp = make_ssa_name (vec_dest, new_stmt);
4386 gimple_assign_set_lhs (new_stmt, new_temp);
4387 }
4388 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4389
4390 return new_stmt;
4391 }
4392
4393
4394 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4395 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4396 containing scalar operand), and for the rest we get a copy with
4397 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4398 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4399 The vectors are collected into VEC_OPRNDS. */
4400
4401 static void
4402 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4403 vec<tree> *vec_oprnds, int multi_step_cvt)
4404 {
4405 vec_info *vinfo = stmt_info->vinfo;
4406 tree vec_oprnd;
4407
4408 /* Get first vector operand. */
4409 /* All the vector operands except the very first one (that is scalar oprnd)
4410 are stmt copies. */
4411 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4412 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4413 else
4414 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4415
4416 vec_oprnds->quick_push (vec_oprnd);
4417
4418 /* Get second vector operand. */
4419 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4420 vec_oprnds->quick_push (vec_oprnd);
4421
4422 *oprnd = vec_oprnd;
4423
4424 /* For conversion in multiple steps, continue to get operands
4425 recursively. */
4426 if (multi_step_cvt)
4427 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4428 multi_step_cvt - 1);
4429 }
4430
4431
4432 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4433 For multi-step conversions store the resulting vectors and call the function
4434 recursively. */
4435
4436 static void
4437 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4438 int multi_step_cvt,
4439 stmt_vec_info stmt_info,
4440 vec<tree> vec_dsts,
4441 gimple_stmt_iterator *gsi,
4442 slp_tree slp_node, enum tree_code code,
4443 stmt_vec_info *prev_stmt_info)
4444 {
4445 unsigned int i;
4446 tree vop0, vop1, new_tmp, vec_dest;
4447
4448 vec_dest = vec_dsts.pop ();
4449
4450 for (i = 0; i < vec_oprnds->length (); i += 2)
4451 {
4452 /* Create demotion operation. */
4453 vop0 = (*vec_oprnds)[i];
4454 vop1 = (*vec_oprnds)[i + 1];
4455 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4456 new_tmp = make_ssa_name (vec_dest, new_stmt);
4457 gimple_assign_set_lhs (new_stmt, new_tmp);
4458 stmt_vec_info new_stmt_info
4459 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4460
4461 if (multi_step_cvt)
4462 /* Store the resulting vector for next recursive call. */
4463 (*vec_oprnds)[i/2] = new_tmp;
4464 else
4465 {
4466 /* This is the last step of the conversion sequence. Store the
4467 vectors in SLP_NODE or in vector info of the scalar statement
4468 (or in STMT_VINFO_RELATED_STMT chain). */
4469 if (slp_node)
4470 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4471 else
4472 {
4473 if (!*prev_stmt_info)
4474 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4475 else
4476 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4477
4478 *prev_stmt_info = new_stmt_info;
4479 }
4480 }
4481 }
4482
4483 /* For multi-step demotion operations we first generate demotion operations
4484 from the source type to the intermediate types, and then combine the
4485 results (stored in VEC_OPRNDS) in demotion operation to the destination
4486 type. */
4487 if (multi_step_cvt)
4488 {
4489 /* At each level of recursion we have half of the operands we had at the
4490 previous level. */
4491 vec_oprnds->truncate ((i+1)/2);
4492 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4493 stmt_info, vec_dsts, gsi,
4494 slp_node, VEC_PACK_TRUNC_EXPR,
4495 prev_stmt_info);
4496 }
4497
4498 vec_dsts.quick_push (vec_dest);
4499 }
4500
4501
4502 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4503 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4504 STMT_INFO. For multi-step conversions store the resulting vectors and
4505 call the function recursively. */
4506
4507 static void
4508 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4509 vec<tree> *vec_oprnds1,
4510 stmt_vec_info stmt_info, tree vec_dest,
4511 gimple_stmt_iterator *gsi,
4512 enum tree_code code1,
4513 enum tree_code code2, tree decl1,
4514 tree decl2, int op_type)
4515 {
4516 int i;
4517 tree vop0, vop1, new_tmp1, new_tmp2;
4518 gimple *new_stmt1, *new_stmt2;
4519 vec<tree> vec_tmp = vNULL;
4520
4521 vec_tmp.create (vec_oprnds0->length () * 2);
4522 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4523 {
4524 if (op_type == binary_op)
4525 vop1 = (*vec_oprnds1)[i];
4526 else
4527 vop1 = NULL_TREE;
4528
4529 /* Generate the two halves of promotion operation. */
4530 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4531 op_type, vec_dest, gsi,
4532 stmt_info);
4533 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4534 op_type, vec_dest, gsi,
4535 stmt_info);
4536 if (is_gimple_call (new_stmt1))
4537 {
4538 new_tmp1 = gimple_call_lhs (new_stmt1);
4539 new_tmp2 = gimple_call_lhs (new_stmt2);
4540 }
4541 else
4542 {
4543 new_tmp1 = gimple_assign_lhs (new_stmt1);
4544 new_tmp2 = gimple_assign_lhs (new_stmt2);
4545 }
4546
4547 /* Store the results for the next step. */
4548 vec_tmp.quick_push (new_tmp1);
4549 vec_tmp.quick_push (new_tmp2);
4550 }
4551
4552 vec_oprnds0->release ();
4553 *vec_oprnds0 = vec_tmp;
4554 }
4555
4556
4557 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4558 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4559 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4560 Return true if STMT_INFO is vectorizable in this way. */
4561
4562 static bool
4563 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4564 stmt_vec_info *vec_stmt, slp_tree slp_node,
4565 stmt_vector_for_cost *cost_vec)
4566 {
4567 tree vec_dest;
4568 tree scalar_dest;
4569 tree op0, op1 = NULL_TREE;
4570 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4571 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4572 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4573 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4574 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4575 tree new_temp;
4576 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4577 int ndts = 2;
4578 stmt_vec_info prev_stmt_info;
4579 poly_uint64 nunits_in;
4580 poly_uint64 nunits_out;
4581 tree vectype_out, vectype_in;
4582 int ncopies, i, j;
4583 tree lhs_type, rhs_type;
4584 enum { NARROW, NONE, WIDEN } modifier;
4585 vec<tree> vec_oprnds0 = vNULL;
4586 vec<tree> vec_oprnds1 = vNULL;
4587 tree vop0;
4588 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4589 vec_info *vinfo = stmt_info->vinfo;
4590 int multi_step_cvt = 0;
4591 vec<tree> interm_types = vNULL;
4592 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4593 int op_type;
4594 unsigned short fltsz;
4595
4596 /* Is STMT a vectorizable conversion? */
4597
4598 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4599 return false;
4600
4601 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4602 && ! vec_stmt)
4603 return false;
4604
4605 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4606 if (!stmt)
4607 return false;
4608
4609 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4610 return false;
4611
4612 code = gimple_assign_rhs_code (stmt);
4613 if (!CONVERT_EXPR_CODE_P (code)
4614 && code != FIX_TRUNC_EXPR
4615 && code != FLOAT_EXPR
4616 && code != WIDEN_MULT_EXPR
4617 && code != WIDEN_LSHIFT_EXPR)
4618 return false;
4619
4620 op_type = TREE_CODE_LENGTH (code);
4621
4622 /* Check types of lhs and rhs. */
4623 scalar_dest = gimple_assign_lhs (stmt);
4624 lhs_type = TREE_TYPE (scalar_dest);
4625 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4626
4627 op0 = gimple_assign_rhs1 (stmt);
4628 rhs_type = TREE_TYPE (op0);
4629
4630 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4631 && !((INTEGRAL_TYPE_P (lhs_type)
4632 && INTEGRAL_TYPE_P (rhs_type))
4633 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4634 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4635 return false;
4636
4637 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4638 && ((INTEGRAL_TYPE_P (lhs_type)
4639 && !type_has_mode_precision_p (lhs_type))
4640 || (INTEGRAL_TYPE_P (rhs_type)
4641 && !type_has_mode_precision_p (rhs_type))))
4642 {
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4645 "type conversion to/from bit-precision unsupported."
4646 "\n");
4647 return false;
4648 }
4649
4650 /* Check the operands of the operation. */
4651 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4652 {
4653 if (dump_enabled_p ())
4654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4655 "use not simple.\n");
4656 return false;
4657 }
4658 if (op_type == binary_op)
4659 {
4660 bool ok;
4661
4662 op1 = gimple_assign_rhs2 (stmt);
4663 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4664 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4665 OP1. */
4666 if (CONSTANT_CLASS_P (op0))
4667 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4668 else
4669 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4670
4671 if (!ok)
4672 {
4673 if (dump_enabled_p ())
4674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4675 "use not simple.\n");
4676 return false;
4677 }
4678 }
4679
4680 /* If op0 is an external or constant defs use a vector type of
4681 the same size as the output vector type. */
4682 if (!vectype_in)
4683 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4684 if (vec_stmt)
4685 gcc_assert (vectype_in);
4686 if (!vectype_in)
4687 {
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4690 "no vectype for scalar type %T\n", rhs_type);
4691
4692 return false;
4693 }
4694
4695 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4696 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4697 {
4698 if (dump_enabled_p ())
4699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4700 "can't convert between boolean and non "
4701 "boolean vectors %T\n", rhs_type);
4702
4703 return false;
4704 }
4705
4706 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4707 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4708 if (known_eq (nunits_out, nunits_in))
4709 modifier = NONE;
4710 else if (multiple_p (nunits_out, nunits_in))
4711 modifier = NARROW;
4712 else
4713 {
4714 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4715 modifier = WIDEN;
4716 }
4717
4718 /* Multiple types in SLP are handled by creating the appropriate number of
4719 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4720 case of SLP. */
4721 if (slp_node)
4722 ncopies = 1;
4723 else if (modifier == NARROW)
4724 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4725 else
4726 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4727
4728 /* Sanity check: make sure that at least one copy of the vectorized stmt
4729 needs to be generated. */
4730 gcc_assert (ncopies >= 1);
4731
4732 bool found_mode = false;
4733 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4734 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4735 opt_scalar_mode rhs_mode_iter;
4736
4737 /* Supportable by target? */
4738 switch (modifier)
4739 {
4740 case NONE:
4741 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4742 return false;
4743 if (supportable_convert_operation (code, vectype_out, vectype_in,
4744 &decl1, &code1))
4745 break;
4746 /* FALLTHRU */
4747 unsupported:
4748 if (dump_enabled_p ())
4749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4750 "conversion not supported by target.\n");
4751 return false;
4752
4753 case WIDEN:
4754 if (supportable_widening_operation (code, stmt_info, vectype_out,
4755 vectype_in, &code1, &code2,
4756 &multi_step_cvt, &interm_types))
4757 {
4758 /* Binary widening operation can only be supported directly by the
4759 architecture. */
4760 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4761 break;
4762 }
4763
4764 if (code != FLOAT_EXPR
4765 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4766 goto unsupported;
4767
4768 fltsz = GET_MODE_SIZE (lhs_mode);
4769 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4770 {
4771 rhs_mode = rhs_mode_iter.require ();
4772 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4773 break;
4774
4775 cvt_type
4776 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4777 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4778 if (cvt_type == NULL_TREE)
4779 goto unsupported;
4780
4781 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4782 {
4783 if (!supportable_convert_operation (code, vectype_out,
4784 cvt_type, &decl1, &codecvt1))
4785 goto unsupported;
4786 }
4787 else if (!supportable_widening_operation (code, stmt_info,
4788 vectype_out, cvt_type,
4789 &codecvt1, &codecvt2,
4790 &multi_step_cvt,
4791 &interm_types))
4792 continue;
4793 else
4794 gcc_assert (multi_step_cvt == 0);
4795
4796 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4797 vectype_in, &code1, &code2,
4798 &multi_step_cvt, &interm_types))
4799 {
4800 found_mode = true;
4801 break;
4802 }
4803 }
4804
4805 if (!found_mode)
4806 goto unsupported;
4807
4808 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4809 codecvt2 = ERROR_MARK;
4810 else
4811 {
4812 multi_step_cvt++;
4813 interm_types.safe_push (cvt_type);
4814 cvt_type = NULL_TREE;
4815 }
4816 break;
4817
4818 case NARROW:
4819 gcc_assert (op_type == unary_op);
4820 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4821 &code1, &multi_step_cvt,
4822 &interm_types))
4823 break;
4824
4825 if (code != FIX_TRUNC_EXPR
4826 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4827 goto unsupported;
4828
4829 cvt_type
4830 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4831 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4832 if (cvt_type == NULL_TREE)
4833 goto unsupported;
4834 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4835 &decl1, &codecvt1))
4836 goto unsupported;
4837 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4838 &code1, &multi_step_cvt,
4839 &interm_types))
4840 break;
4841 goto unsupported;
4842
4843 default:
4844 gcc_unreachable ();
4845 }
4846
4847 if (!vec_stmt) /* transformation not required. */
4848 {
4849 DUMP_VECT_SCOPE ("vectorizable_conversion");
4850 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4851 {
4852 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4853 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4854 cost_vec);
4855 }
4856 else if (modifier == NARROW)
4857 {
4858 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4859 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4860 cost_vec);
4861 }
4862 else
4863 {
4864 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4865 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4866 cost_vec);
4867 }
4868 interm_types.release ();
4869 return true;
4870 }
4871
4872 /* Transform. */
4873 if (dump_enabled_p ())
4874 dump_printf_loc (MSG_NOTE, vect_location,
4875 "transform conversion. ncopies = %d.\n", ncopies);
4876
4877 if (op_type == binary_op)
4878 {
4879 if (CONSTANT_CLASS_P (op0))
4880 op0 = fold_convert (TREE_TYPE (op1), op0);
4881 else if (CONSTANT_CLASS_P (op1))
4882 op1 = fold_convert (TREE_TYPE (op0), op1);
4883 }
4884
4885 /* In case of multi-step conversion, we first generate conversion operations
4886 to the intermediate types, and then from that types to the final one.
4887 We create vector destinations for the intermediate type (TYPES) received
4888 from supportable_*_operation, and store them in the correct order
4889 for future use in vect_create_vectorized_*_stmts (). */
4890 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4891 vec_dest = vect_create_destination_var (scalar_dest,
4892 (cvt_type && modifier == WIDEN)
4893 ? cvt_type : vectype_out);
4894 vec_dsts.quick_push (vec_dest);
4895
4896 if (multi_step_cvt)
4897 {
4898 for (i = interm_types.length () - 1;
4899 interm_types.iterate (i, &intermediate_type); i--)
4900 {
4901 vec_dest = vect_create_destination_var (scalar_dest,
4902 intermediate_type);
4903 vec_dsts.quick_push (vec_dest);
4904 }
4905 }
4906
4907 if (cvt_type)
4908 vec_dest = vect_create_destination_var (scalar_dest,
4909 modifier == WIDEN
4910 ? vectype_out : cvt_type);
4911
4912 if (!slp_node)
4913 {
4914 if (modifier == WIDEN)
4915 {
4916 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4917 if (op_type == binary_op)
4918 vec_oprnds1.create (1);
4919 }
4920 else if (modifier == NARROW)
4921 vec_oprnds0.create (
4922 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4923 }
4924 else if (code == WIDEN_LSHIFT_EXPR)
4925 vec_oprnds1.create (slp_node->vec_stmts_size);
4926
4927 last_oprnd = op0;
4928 prev_stmt_info = NULL;
4929 switch (modifier)
4930 {
4931 case NONE:
4932 for (j = 0; j < ncopies; j++)
4933 {
4934 if (j == 0)
4935 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4936 NULL, slp_node);
4937 else
4938 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
4939
4940 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4941 {
4942 stmt_vec_info new_stmt_info;
4943 /* Arguments are ready, create the new vector stmt. */
4944 if (code1 == CALL_EXPR)
4945 {
4946 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4947 new_temp = make_ssa_name (vec_dest, new_stmt);
4948 gimple_call_set_lhs (new_stmt, new_temp);
4949 new_stmt_info
4950 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4951 }
4952 else
4953 {
4954 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4955 gassign *new_stmt
4956 = gimple_build_assign (vec_dest, code1, vop0);
4957 new_temp = make_ssa_name (vec_dest, new_stmt);
4958 gimple_assign_set_lhs (new_stmt, new_temp);
4959 new_stmt_info
4960 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4961 }
4962
4963 if (slp_node)
4964 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4965 else
4966 {
4967 if (!prev_stmt_info)
4968 STMT_VINFO_VEC_STMT (stmt_info)
4969 = *vec_stmt = new_stmt_info;
4970 else
4971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4972 prev_stmt_info = new_stmt_info;
4973 }
4974 }
4975 }
4976 break;
4977
4978 case WIDEN:
4979 /* In case the vectorization factor (VF) is bigger than the number
4980 of elements that we can fit in a vectype (nunits), we have to
4981 generate more than one vector stmt - i.e - we need to "unroll"
4982 the vector stmt by a factor VF/nunits. */
4983 for (j = 0; j < ncopies; j++)
4984 {
4985 /* Handle uses. */
4986 if (j == 0)
4987 {
4988 if (slp_node)
4989 {
4990 if (code == WIDEN_LSHIFT_EXPR)
4991 {
4992 unsigned int k;
4993
4994 vec_oprnd1 = op1;
4995 /* Store vec_oprnd1 for every vector stmt to be created
4996 for SLP_NODE. We check during the analysis that all
4997 the shift arguments are the same. */
4998 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4999 vec_oprnds1.quick_push (vec_oprnd1);
5000
5001 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5002 &vec_oprnds0, NULL, slp_node);
5003 }
5004 else
5005 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5006 &vec_oprnds1, slp_node);
5007 }
5008 else
5009 {
5010 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5011 vec_oprnds0.quick_push (vec_oprnd0);
5012 if (op_type == binary_op)
5013 {
5014 if (code == WIDEN_LSHIFT_EXPR)
5015 vec_oprnd1 = op1;
5016 else
5017 vec_oprnd1
5018 = vect_get_vec_def_for_operand (op1, stmt_info);
5019 vec_oprnds1.quick_push (vec_oprnd1);
5020 }
5021 }
5022 }
5023 else
5024 {
5025 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5026 vec_oprnds0.truncate (0);
5027 vec_oprnds0.quick_push (vec_oprnd0);
5028 if (op_type == binary_op)
5029 {
5030 if (code == WIDEN_LSHIFT_EXPR)
5031 vec_oprnd1 = op1;
5032 else
5033 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5034 vec_oprnd1);
5035 vec_oprnds1.truncate (0);
5036 vec_oprnds1.quick_push (vec_oprnd1);
5037 }
5038 }
5039
5040 /* Arguments are ready. Create the new vector stmts. */
5041 for (i = multi_step_cvt; i >= 0; i--)
5042 {
5043 tree this_dest = vec_dsts[i];
5044 enum tree_code c1 = code1, c2 = code2;
5045 if (i == 0 && codecvt2 != ERROR_MARK)
5046 {
5047 c1 = codecvt1;
5048 c2 = codecvt2;
5049 }
5050 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5051 &vec_oprnds1, stmt_info,
5052 this_dest, gsi,
5053 c1, c2, decl1, decl2,
5054 op_type);
5055 }
5056
5057 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5058 {
5059 stmt_vec_info new_stmt_info;
5060 if (cvt_type)
5061 {
5062 if (codecvt1 == CALL_EXPR)
5063 {
5064 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5065 new_temp = make_ssa_name (vec_dest, new_stmt);
5066 gimple_call_set_lhs (new_stmt, new_temp);
5067 new_stmt_info
5068 = vect_finish_stmt_generation (stmt_info, new_stmt,
5069 gsi);
5070 }
5071 else
5072 {
5073 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5074 new_temp = make_ssa_name (vec_dest);
5075 gassign *new_stmt
5076 = gimple_build_assign (new_temp, codecvt1, vop0);
5077 new_stmt_info
5078 = vect_finish_stmt_generation (stmt_info, new_stmt,
5079 gsi);
5080 }
5081 }
5082 else
5083 new_stmt_info = vinfo->lookup_def (vop0);
5084
5085 if (slp_node)
5086 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5087 else
5088 {
5089 if (!prev_stmt_info)
5090 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5091 else
5092 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5093 prev_stmt_info = new_stmt_info;
5094 }
5095 }
5096 }
5097
5098 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5099 break;
5100
5101 case NARROW:
5102 /* In case the vectorization factor (VF) is bigger than the number
5103 of elements that we can fit in a vectype (nunits), we have to
5104 generate more than one vector stmt - i.e - we need to "unroll"
5105 the vector stmt by a factor VF/nunits. */
5106 for (j = 0; j < ncopies; j++)
5107 {
5108 /* Handle uses. */
5109 if (slp_node)
5110 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5111 slp_node);
5112 else
5113 {
5114 vec_oprnds0.truncate (0);
5115 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5116 vect_pow2 (multi_step_cvt) - 1);
5117 }
5118
5119 /* Arguments are ready. Create the new vector stmts. */
5120 if (cvt_type)
5121 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5122 {
5123 if (codecvt1 == CALL_EXPR)
5124 {
5125 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5126 new_temp = make_ssa_name (vec_dest, new_stmt);
5127 gimple_call_set_lhs (new_stmt, new_temp);
5128 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5129 }
5130 else
5131 {
5132 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5133 new_temp = make_ssa_name (vec_dest);
5134 gassign *new_stmt
5135 = gimple_build_assign (new_temp, codecvt1, vop0);
5136 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5137 }
5138
5139 vec_oprnds0[i] = new_temp;
5140 }
5141
5142 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5143 stmt_info, vec_dsts, gsi,
5144 slp_node, code1,
5145 &prev_stmt_info);
5146 }
5147
5148 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5149 break;
5150 }
5151
5152 vec_oprnds0.release ();
5153 vec_oprnds1.release ();
5154 interm_types.release ();
5155
5156 return true;
5157 }
5158
5159
5160 /* Function vectorizable_assignment.
5161
5162 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5163 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5164 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5165 Return true if STMT_INFO is vectorizable in this way. */
5166
5167 static bool
5168 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5169 stmt_vec_info *vec_stmt, slp_tree slp_node,
5170 stmt_vector_for_cost *cost_vec)
5171 {
5172 tree vec_dest;
5173 tree scalar_dest;
5174 tree op;
5175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5176 tree new_temp;
5177 enum vect_def_type dt[1] = {vect_unknown_def_type};
5178 int ndts = 1;
5179 int ncopies;
5180 int i, j;
5181 vec<tree> vec_oprnds = vNULL;
5182 tree vop;
5183 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5184 vec_info *vinfo = stmt_info->vinfo;
5185 stmt_vec_info prev_stmt_info = NULL;
5186 enum tree_code code;
5187 tree vectype_in;
5188
5189 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5190 return false;
5191
5192 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5193 && ! vec_stmt)
5194 return false;
5195
5196 /* Is vectorizable assignment? */
5197 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5198 if (!stmt)
5199 return false;
5200
5201 scalar_dest = gimple_assign_lhs (stmt);
5202 if (TREE_CODE (scalar_dest) != SSA_NAME)
5203 return false;
5204
5205 code = gimple_assign_rhs_code (stmt);
5206 if (gimple_assign_single_p (stmt)
5207 || code == PAREN_EXPR
5208 || CONVERT_EXPR_CODE_P (code))
5209 op = gimple_assign_rhs1 (stmt);
5210 else
5211 return false;
5212
5213 if (code == VIEW_CONVERT_EXPR)
5214 op = TREE_OPERAND (op, 0);
5215
5216 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5217 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5218
5219 /* Multiple types in SLP are handled by creating the appropriate number of
5220 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5221 case of SLP. */
5222 if (slp_node)
5223 ncopies = 1;
5224 else
5225 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5226
5227 gcc_assert (ncopies >= 1);
5228
5229 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5230 {
5231 if (dump_enabled_p ())
5232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5233 "use not simple.\n");
5234 return false;
5235 }
5236
5237 /* We can handle NOP_EXPR conversions that do not change the number
5238 of elements or the vector size. */
5239 if ((CONVERT_EXPR_CODE_P (code)
5240 || code == VIEW_CONVERT_EXPR)
5241 && (!vectype_in
5242 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5243 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5244 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5245 return false;
5246
5247 /* We do not handle bit-precision changes. */
5248 if ((CONVERT_EXPR_CODE_P (code)
5249 || code == VIEW_CONVERT_EXPR)
5250 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5251 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5252 || !type_has_mode_precision_p (TREE_TYPE (op)))
5253 /* But a conversion that does not change the bit-pattern is ok. */
5254 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5255 > TYPE_PRECISION (TREE_TYPE (op)))
5256 && TYPE_UNSIGNED (TREE_TYPE (op)))
5257 /* Conversion between boolean types of different sizes is
5258 a simple assignment in case their vectypes are same
5259 boolean vectors. */
5260 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5261 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5262 {
5263 if (dump_enabled_p ())
5264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5265 "type conversion to/from bit-precision "
5266 "unsupported.\n");
5267 return false;
5268 }
5269
5270 if (!vec_stmt) /* transformation not required. */
5271 {
5272 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5273 DUMP_VECT_SCOPE ("vectorizable_assignment");
5274 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5275 return true;
5276 }
5277
5278 /* Transform. */
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5281
5282 /* Handle def. */
5283 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5284
5285 /* Handle use. */
5286 for (j = 0; j < ncopies; j++)
5287 {
5288 /* Handle uses. */
5289 if (j == 0)
5290 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5291 else
5292 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5293
5294 /* Arguments are ready. create the new vector stmt. */
5295 stmt_vec_info new_stmt_info = NULL;
5296 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5297 {
5298 if (CONVERT_EXPR_CODE_P (code)
5299 || code == VIEW_CONVERT_EXPR)
5300 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5301 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5302 new_temp = make_ssa_name (vec_dest, new_stmt);
5303 gimple_assign_set_lhs (new_stmt, new_temp);
5304 new_stmt_info
5305 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5306 if (slp_node)
5307 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5308 }
5309
5310 if (slp_node)
5311 continue;
5312
5313 if (j == 0)
5314 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5315 else
5316 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5317
5318 prev_stmt_info = new_stmt_info;
5319 }
5320
5321 vec_oprnds.release ();
5322 return true;
5323 }
5324
5325
5326 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5327 either as shift by a scalar or by a vector. */
5328
5329 bool
5330 vect_supportable_shift (enum tree_code code, tree scalar_type)
5331 {
5332
5333 machine_mode vec_mode;
5334 optab optab;
5335 int icode;
5336 tree vectype;
5337
5338 vectype = get_vectype_for_scalar_type (scalar_type);
5339 if (!vectype)
5340 return false;
5341
5342 optab = optab_for_tree_code (code, vectype, optab_scalar);
5343 if (!optab
5344 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5345 {
5346 optab = optab_for_tree_code (code, vectype, optab_vector);
5347 if (!optab
5348 || (optab_handler (optab, TYPE_MODE (vectype))
5349 == CODE_FOR_nothing))
5350 return false;
5351 }
5352
5353 vec_mode = TYPE_MODE (vectype);
5354 icode = (int) optab_handler (optab, vec_mode);
5355 if (icode == CODE_FOR_nothing)
5356 return false;
5357
5358 return true;
5359 }
5360
5361
5362 /* Function vectorizable_shift.
5363
5364 Check if STMT_INFO performs a shift operation that can be vectorized.
5365 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5366 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5367 Return true if STMT_INFO is vectorizable in this way. */
5368
5369 static bool
5370 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5371 stmt_vec_info *vec_stmt, slp_tree slp_node,
5372 stmt_vector_for_cost *cost_vec)
5373 {
5374 tree vec_dest;
5375 tree scalar_dest;
5376 tree op0, op1 = NULL;
5377 tree vec_oprnd1 = NULL_TREE;
5378 tree vectype;
5379 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5380 enum tree_code code;
5381 machine_mode vec_mode;
5382 tree new_temp;
5383 optab optab;
5384 int icode;
5385 machine_mode optab_op2_mode;
5386 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5387 int ndts = 2;
5388 stmt_vec_info prev_stmt_info;
5389 poly_uint64 nunits_in;
5390 poly_uint64 nunits_out;
5391 tree vectype_out;
5392 tree op1_vectype;
5393 int ncopies;
5394 int j, i;
5395 vec<tree> vec_oprnds0 = vNULL;
5396 vec<tree> vec_oprnds1 = vNULL;
5397 tree vop0, vop1;
5398 unsigned int k;
5399 bool scalar_shift_arg = true;
5400 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5401 vec_info *vinfo = stmt_info->vinfo;
5402
5403 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5404 return false;
5405
5406 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5407 && ! vec_stmt)
5408 return false;
5409
5410 /* Is STMT a vectorizable binary/unary operation? */
5411 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5412 if (!stmt)
5413 return false;
5414
5415 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5416 return false;
5417
5418 code = gimple_assign_rhs_code (stmt);
5419
5420 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5421 || code == RROTATE_EXPR))
5422 return false;
5423
5424 scalar_dest = gimple_assign_lhs (stmt);
5425 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5426 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5427 {
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5430 "bit-precision shifts not supported.\n");
5431 return false;
5432 }
5433
5434 op0 = gimple_assign_rhs1 (stmt);
5435 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5436 {
5437 if (dump_enabled_p ())
5438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5439 "use not simple.\n");
5440 return false;
5441 }
5442 /* If op0 is an external or constant def use a vector type with
5443 the same size as the output vector type. */
5444 if (!vectype)
5445 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5446 if (vec_stmt)
5447 gcc_assert (vectype);
5448 if (!vectype)
5449 {
5450 if (dump_enabled_p ())
5451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5452 "no vectype for scalar type\n");
5453 return false;
5454 }
5455
5456 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5457 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5458 if (maybe_ne (nunits_out, nunits_in))
5459 return false;
5460
5461 op1 = gimple_assign_rhs2 (stmt);
5462 stmt_vec_info op1_def_stmt_info;
5463 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5464 &op1_def_stmt_info))
5465 {
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5468 "use not simple.\n");
5469 return false;
5470 }
5471
5472 /* Multiple types in SLP are handled by creating the appropriate number of
5473 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5474 case of SLP. */
5475 if (slp_node)
5476 ncopies = 1;
5477 else
5478 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5479
5480 gcc_assert (ncopies >= 1);
5481
5482 /* Determine whether the shift amount is a vector, or scalar. If the
5483 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5484
5485 if ((dt[1] == vect_internal_def
5486 || dt[1] == vect_induction_def)
5487 && !slp_node)
5488 scalar_shift_arg = false;
5489 else if (dt[1] == vect_constant_def
5490 || dt[1] == vect_external_def
5491 || dt[1] == vect_internal_def)
5492 {
5493 /* In SLP, need to check whether the shift count is the same,
5494 in loops if it is a constant or invariant, it is always
5495 a scalar shift. */
5496 if (slp_node)
5497 {
5498 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5499 stmt_vec_info slpstmt_info;
5500
5501 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5502 {
5503 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5504 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5505 scalar_shift_arg = false;
5506 }
5507 }
5508
5509 /* If the shift amount is computed by a pattern stmt we cannot
5510 use the scalar amount directly thus give up and use a vector
5511 shift. */
5512 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5513 scalar_shift_arg = false;
5514 }
5515 else
5516 {
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5519 "operand mode requires invariant argument.\n");
5520 return false;
5521 }
5522
5523 /* Vector shifted by vector. */
5524 if (!scalar_shift_arg)
5525 {
5526 optab = optab_for_tree_code (code, vectype, optab_vector);
5527 if (dump_enabled_p ())
5528 dump_printf_loc (MSG_NOTE, vect_location,
5529 "vector/vector shift/rotate found.\n");
5530
5531 if (!op1_vectype)
5532 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5533 if (op1_vectype == NULL_TREE
5534 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5535 {
5536 if (dump_enabled_p ())
5537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5538 "unusable type for last operand in"
5539 " vector/vector shift/rotate.\n");
5540 return false;
5541 }
5542 }
5543 /* See if the machine has a vector shifted by scalar insn and if not
5544 then see if it has a vector shifted by vector insn. */
5545 else
5546 {
5547 optab = optab_for_tree_code (code, vectype, optab_scalar);
5548 if (optab
5549 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5550 {
5551 if (dump_enabled_p ())
5552 dump_printf_loc (MSG_NOTE, vect_location,
5553 "vector/scalar shift/rotate found.\n");
5554 }
5555 else
5556 {
5557 optab = optab_for_tree_code (code, vectype, optab_vector);
5558 if (optab
5559 && (optab_handler (optab, TYPE_MODE (vectype))
5560 != CODE_FOR_nothing))
5561 {
5562 scalar_shift_arg = false;
5563
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_NOTE, vect_location,
5566 "vector/vector shift/rotate found.\n");
5567
5568 /* Unlike the other binary operators, shifts/rotates have
5569 the rhs being int, instead of the same type as the lhs,
5570 so make sure the scalar is the right type if we are
5571 dealing with vectors of long long/long/short/char. */
5572 if (dt[1] == vect_constant_def)
5573 op1 = fold_convert (TREE_TYPE (vectype), op1);
5574 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5575 TREE_TYPE (op1)))
5576 {
5577 if (slp_node
5578 && TYPE_MODE (TREE_TYPE (vectype))
5579 != TYPE_MODE (TREE_TYPE (op1)))
5580 {
5581 if (dump_enabled_p ())
5582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5583 "unusable type for last operand in"
5584 " vector/vector shift/rotate.\n");
5585 return false;
5586 }
5587 if (vec_stmt && !slp_node)
5588 {
5589 op1 = fold_convert (TREE_TYPE (vectype), op1);
5590 op1 = vect_init_vector (stmt_info, op1,
5591 TREE_TYPE (vectype), NULL);
5592 }
5593 }
5594 }
5595 }
5596 }
5597
5598 /* Supportable by target? */
5599 if (!optab)
5600 {
5601 if (dump_enabled_p ())
5602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5603 "no optab.\n");
5604 return false;
5605 }
5606 vec_mode = TYPE_MODE (vectype);
5607 icode = (int) optab_handler (optab, vec_mode);
5608 if (icode == CODE_FOR_nothing)
5609 {
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5612 "op not supported by target.\n");
5613 /* Check only during analysis. */
5614 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5615 || (!vec_stmt
5616 && !vect_worthwhile_without_simd_p (vinfo, code)))
5617 return false;
5618 if (dump_enabled_p ())
5619 dump_printf_loc (MSG_NOTE, vect_location,
5620 "proceeding using word mode.\n");
5621 }
5622
5623 /* Worthwhile without SIMD support? Check only during analysis. */
5624 if (!vec_stmt
5625 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5626 && !vect_worthwhile_without_simd_p (vinfo, code))
5627 {
5628 if (dump_enabled_p ())
5629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5630 "not worthwhile without SIMD support.\n");
5631 return false;
5632 }
5633
5634 if (!vec_stmt) /* transformation not required. */
5635 {
5636 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5637 DUMP_VECT_SCOPE ("vectorizable_shift");
5638 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5639 return true;
5640 }
5641
5642 /* Transform. */
5643
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_NOTE, vect_location,
5646 "transform binary/unary operation.\n");
5647
5648 /* Handle def. */
5649 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5650
5651 prev_stmt_info = NULL;
5652 for (j = 0; j < ncopies; j++)
5653 {
5654 /* Handle uses. */
5655 if (j == 0)
5656 {
5657 if (scalar_shift_arg)
5658 {
5659 /* Vector shl and shr insn patterns can be defined with scalar
5660 operand 2 (shift operand). In this case, use constant or loop
5661 invariant op1 directly, without extending it to vector mode
5662 first. */
5663 optab_op2_mode = insn_data[icode].operand[2].mode;
5664 if (!VECTOR_MODE_P (optab_op2_mode))
5665 {
5666 if (dump_enabled_p ())
5667 dump_printf_loc (MSG_NOTE, vect_location,
5668 "operand 1 using scalar mode.\n");
5669 vec_oprnd1 = op1;
5670 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5671 vec_oprnds1.quick_push (vec_oprnd1);
5672 if (slp_node)
5673 {
5674 /* Store vec_oprnd1 for every vector stmt to be created
5675 for SLP_NODE. We check during the analysis that all
5676 the shift arguments are the same.
5677 TODO: Allow different constants for different vector
5678 stmts generated for an SLP instance. */
5679 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5680 vec_oprnds1.quick_push (vec_oprnd1);
5681 }
5682 }
5683 }
5684
5685 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5686 (a special case for certain kind of vector shifts); otherwise,
5687 operand 1 should be of a vector type (the usual case). */
5688 if (vec_oprnd1)
5689 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5690 slp_node);
5691 else
5692 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5693 slp_node);
5694 }
5695 else
5696 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5697
5698 /* Arguments are ready. Create the new vector stmt. */
5699 stmt_vec_info new_stmt_info = NULL;
5700 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5701 {
5702 vop1 = vec_oprnds1[i];
5703 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5704 new_temp = make_ssa_name (vec_dest, new_stmt);
5705 gimple_assign_set_lhs (new_stmt, new_temp);
5706 new_stmt_info
5707 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5708 if (slp_node)
5709 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5710 }
5711
5712 if (slp_node)
5713 continue;
5714
5715 if (j == 0)
5716 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5717 else
5718 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5719 prev_stmt_info = new_stmt_info;
5720 }
5721
5722 vec_oprnds0.release ();
5723 vec_oprnds1.release ();
5724
5725 return true;
5726 }
5727
5728
5729 /* Function vectorizable_operation.
5730
5731 Check if STMT_INFO performs a binary, unary or ternary operation that can
5732 be vectorized.
5733 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5734 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5735 Return true if STMT_INFO is vectorizable in this way. */
5736
5737 static bool
5738 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5739 stmt_vec_info *vec_stmt, slp_tree slp_node,
5740 stmt_vector_for_cost *cost_vec)
5741 {
5742 tree vec_dest;
5743 tree scalar_dest;
5744 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5745 tree vectype;
5746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5747 enum tree_code code, orig_code;
5748 machine_mode vec_mode;
5749 tree new_temp;
5750 int op_type;
5751 optab optab;
5752 bool target_support_p;
5753 enum vect_def_type dt[3]
5754 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5755 int ndts = 3;
5756 stmt_vec_info prev_stmt_info;
5757 poly_uint64 nunits_in;
5758 poly_uint64 nunits_out;
5759 tree vectype_out;
5760 int ncopies;
5761 int j, i;
5762 vec<tree> vec_oprnds0 = vNULL;
5763 vec<tree> vec_oprnds1 = vNULL;
5764 vec<tree> vec_oprnds2 = vNULL;
5765 tree vop0, vop1, vop2;
5766 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5767 vec_info *vinfo = stmt_info->vinfo;
5768
5769 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5770 return false;
5771
5772 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5773 && ! vec_stmt)
5774 return false;
5775
5776 /* Is STMT a vectorizable binary/unary operation? */
5777 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5778 if (!stmt)
5779 return false;
5780
5781 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5782 return false;
5783
5784 orig_code = code = gimple_assign_rhs_code (stmt);
5785
5786 /* For pointer addition and subtraction, we should use the normal
5787 plus and minus for the vector operation. */
5788 if (code == POINTER_PLUS_EXPR)
5789 code = PLUS_EXPR;
5790 if (code == POINTER_DIFF_EXPR)
5791 code = MINUS_EXPR;
5792
5793 /* Support only unary or binary operations. */
5794 op_type = TREE_CODE_LENGTH (code);
5795 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5796 {
5797 if (dump_enabled_p ())
5798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5799 "num. args = %d (not unary/binary/ternary op).\n",
5800 op_type);
5801 return false;
5802 }
5803
5804 scalar_dest = gimple_assign_lhs (stmt);
5805 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5806
5807 /* Most operations cannot handle bit-precision types without extra
5808 truncations. */
5809 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5810 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5811 /* Exception are bitwise binary operations. */
5812 && code != BIT_IOR_EXPR
5813 && code != BIT_XOR_EXPR
5814 && code != BIT_AND_EXPR)
5815 {
5816 if (dump_enabled_p ())
5817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5818 "bit-precision arithmetic not supported.\n");
5819 return false;
5820 }
5821
5822 op0 = gimple_assign_rhs1 (stmt);
5823 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5824 {
5825 if (dump_enabled_p ())
5826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5827 "use not simple.\n");
5828 return false;
5829 }
5830 /* If op0 is an external or constant def use a vector type with
5831 the same size as the output vector type. */
5832 if (!vectype)
5833 {
5834 /* For boolean type we cannot determine vectype by
5835 invariant value (don't know whether it is a vector
5836 of booleans or vector of integers). We use output
5837 vectype because operations on boolean don't change
5838 type. */
5839 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5840 {
5841 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5842 {
5843 if (dump_enabled_p ())
5844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5845 "not supported operation on bool value.\n");
5846 return false;
5847 }
5848 vectype = vectype_out;
5849 }
5850 else
5851 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5852 }
5853 if (vec_stmt)
5854 gcc_assert (vectype);
5855 if (!vectype)
5856 {
5857 if (dump_enabled_p ())
5858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5859 "no vectype for scalar type %T\n",
5860 TREE_TYPE (op0));
5861
5862 return false;
5863 }
5864
5865 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5866 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5867 if (maybe_ne (nunits_out, nunits_in))
5868 return false;
5869
5870 if (op_type == binary_op || op_type == ternary_op)
5871 {
5872 op1 = gimple_assign_rhs2 (stmt);
5873 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5874 {
5875 if (dump_enabled_p ())
5876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5877 "use not simple.\n");
5878 return false;
5879 }
5880 }
5881 if (op_type == ternary_op)
5882 {
5883 op2 = gimple_assign_rhs3 (stmt);
5884 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5885 {
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5888 "use not simple.\n");
5889 return false;
5890 }
5891 }
5892
5893 /* Multiple types in SLP are handled by creating the appropriate number of
5894 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5895 case of SLP. */
5896 if (slp_node)
5897 ncopies = 1;
5898 else
5899 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5900
5901 gcc_assert (ncopies >= 1);
5902
5903 /* Shifts are handled in vectorizable_shift (). */
5904 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5905 || code == RROTATE_EXPR)
5906 return false;
5907
5908 /* Supportable by target? */
5909
5910 vec_mode = TYPE_MODE (vectype);
5911 if (code == MULT_HIGHPART_EXPR)
5912 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5913 else
5914 {
5915 optab = optab_for_tree_code (code, vectype, optab_default);
5916 if (!optab)
5917 {
5918 if (dump_enabled_p ())
5919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5920 "no optab.\n");
5921 return false;
5922 }
5923 target_support_p = (optab_handler (optab, vec_mode)
5924 != CODE_FOR_nothing);
5925 }
5926
5927 if (!target_support_p)
5928 {
5929 if (dump_enabled_p ())
5930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5931 "op not supported by target.\n");
5932 /* Check only during analysis. */
5933 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5934 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5935 return false;
5936 if (dump_enabled_p ())
5937 dump_printf_loc (MSG_NOTE, vect_location,
5938 "proceeding using word mode.\n");
5939 }
5940
5941 /* Worthwhile without SIMD support? Check only during analysis. */
5942 if (!VECTOR_MODE_P (vec_mode)
5943 && !vec_stmt
5944 && !vect_worthwhile_without_simd_p (vinfo, code))
5945 {
5946 if (dump_enabled_p ())
5947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5948 "not worthwhile without SIMD support.\n");
5949 return false;
5950 }
5951
5952 if (!vec_stmt) /* transformation not required. */
5953 {
5954 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5955 DUMP_VECT_SCOPE ("vectorizable_operation");
5956 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5957 return true;
5958 }
5959
5960 /* Transform. */
5961
5962 if (dump_enabled_p ())
5963 dump_printf_loc (MSG_NOTE, vect_location,
5964 "transform binary/unary operation.\n");
5965
5966 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5967 vectors with unsigned elements, but the result is signed. So, we
5968 need to compute the MINUS_EXPR into vectype temporary and
5969 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5970 tree vec_cvt_dest = NULL_TREE;
5971 if (orig_code == POINTER_DIFF_EXPR)
5972 {
5973 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5974 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5975 }
5976 /* Handle def. */
5977 else
5978 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5979
5980 /* In case the vectorization factor (VF) is bigger than the number
5981 of elements that we can fit in a vectype (nunits), we have to generate
5982 more than one vector stmt - i.e - we need to "unroll" the
5983 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5984 from one copy of the vector stmt to the next, in the field
5985 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5986 stages to find the correct vector defs to be used when vectorizing
5987 stmts that use the defs of the current stmt. The example below
5988 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5989 we need to create 4 vectorized stmts):
5990
5991 before vectorization:
5992 RELATED_STMT VEC_STMT
5993 S1: x = memref - -
5994 S2: z = x + 1 - -
5995
5996 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5997 there):
5998 RELATED_STMT VEC_STMT
5999 VS1_0: vx0 = memref0 VS1_1 -
6000 VS1_1: vx1 = memref1 VS1_2 -
6001 VS1_2: vx2 = memref2 VS1_3 -
6002 VS1_3: vx3 = memref3 - -
6003 S1: x = load - VS1_0
6004 S2: z = x + 1 - -
6005
6006 step2: vectorize stmt S2 (done here):
6007 To vectorize stmt S2 we first need to find the relevant vector
6008 def for the first operand 'x'. This is, as usual, obtained from
6009 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6010 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6011 relevant vector def 'vx0'. Having found 'vx0' we can generate
6012 the vector stmt VS2_0, and as usual, record it in the
6013 STMT_VINFO_VEC_STMT of stmt S2.
6014 When creating the second copy (VS2_1), we obtain the relevant vector
6015 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6016 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6017 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6018 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6019 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6020 chain of stmts and pointers:
6021 RELATED_STMT VEC_STMT
6022 VS1_0: vx0 = memref0 VS1_1 -
6023 VS1_1: vx1 = memref1 VS1_2 -
6024 VS1_2: vx2 = memref2 VS1_3 -
6025 VS1_3: vx3 = memref3 - -
6026 S1: x = load - VS1_0
6027 VS2_0: vz0 = vx0 + v1 VS2_1 -
6028 VS2_1: vz1 = vx1 + v1 VS2_2 -
6029 VS2_2: vz2 = vx2 + v1 VS2_3 -
6030 VS2_3: vz3 = vx3 + v1 - -
6031 S2: z = x + 1 - VS2_0 */
6032
6033 prev_stmt_info = NULL;
6034 for (j = 0; j < ncopies; j++)
6035 {
6036 /* Handle uses. */
6037 if (j == 0)
6038 {
6039 if (op_type == binary_op)
6040 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6041 slp_node);
6042 else if (op_type == ternary_op)
6043 {
6044 if (slp_node)
6045 {
6046 auto_vec<tree> ops(3);
6047 ops.quick_push (op0);
6048 ops.quick_push (op1);
6049 ops.quick_push (op2);
6050 auto_vec<vec<tree> > vec_defs(3);
6051 vect_get_slp_defs (ops, slp_node, &vec_defs);
6052 vec_oprnds0 = vec_defs[0];
6053 vec_oprnds1 = vec_defs[1];
6054 vec_oprnds2 = vec_defs[2];
6055 }
6056 else
6057 {
6058 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6059 &vec_oprnds1, NULL);
6060 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6061 NULL, NULL);
6062 }
6063 }
6064 else
6065 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6066 slp_node);
6067 }
6068 else
6069 {
6070 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6071 if (op_type == ternary_op)
6072 {
6073 tree vec_oprnd = vec_oprnds2.pop ();
6074 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6075 vec_oprnd));
6076 }
6077 }
6078
6079 /* Arguments are ready. Create the new vector stmt. */
6080 stmt_vec_info new_stmt_info = NULL;
6081 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6082 {
6083 vop1 = ((op_type == binary_op || op_type == ternary_op)
6084 ? vec_oprnds1[i] : NULL_TREE);
6085 vop2 = ((op_type == ternary_op)
6086 ? vec_oprnds2[i] : NULL_TREE);
6087 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6088 vop0, vop1, vop2);
6089 new_temp = make_ssa_name (vec_dest, new_stmt);
6090 gimple_assign_set_lhs (new_stmt, new_temp);
6091 new_stmt_info
6092 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6093 if (vec_cvt_dest)
6094 {
6095 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6096 gassign *new_stmt
6097 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6098 new_temp);
6099 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6100 gimple_assign_set_lhs (new_stmt, new_temp);
6101 new_stmt_info
6102 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6103 }
6104 if (slp_node)
6105 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6106 }
6107
6108 if (slp_node)
6109 continue;
6110
6111 if (j == 0)
6112 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6113 else
6114 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6115 prev_stmt_info = new_stmt_info;
6116 }
6117
6118 vec_oprnds0.release ();
6119 vec_oprnds1.release ();
6120 vec_oprnds2.release ();
6121
6122 return true;
6123 }
6124
6125 /* A helper function to ensure data reference DR_INFO's base alignment. */
6126
6127 static void
6128 ensure_base_align (dr_vec_info *dr_info)
6129 {
6130 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6131 return;
6132
6133 if (dr_info->base_misaligned)
6134 {
6135 tree base_decl = dr_info->base_decl;
6136
6137 unsigned int align_base_to
6138 = DR_TARGET_ALIGNMENT (dr_info) * BITS_PER_UNIT;
6139
6140 if (decl_in_symtab_p (base_decl))
6141 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6142 else
6143 {
6144 SET_DECL_ALIGN (base_decl, align_base_to);
6145 DECL_USER_ALIGN (base_decl) = 1;
6146 }
6147 dr_info->base_misaligned = false;
6148 }
6149 }
6150
6151
6152 /* Function get_group_alias_ptr_type.
6153
6154 Return the alias type for the group starting at FIRST_STMT_INFO. */
6155
6156 static tree
6157 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6158 {
6159 struct data_reference *first_dr, *next_dr;
6160
6161 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6162 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6163 while (next_stmt_info)
6164 {
6165 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6166 if (get_alias_set (DR_REF (first_dr))
6167 != get_alias_set (DR_REF (next_dr)))
6168 {
6169 if (dump_enabled_p ())
6170 dump_printf_loc (MSG_NOTE, vect_location,
6171 "conflicting alias set types.\n");
6172 return ptr_type_node;
6173 }
6174 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6175 }
6176 return reference_alias_ptr_type (DR_REF (first_dr));
6177 }
6178
6179
6180 /* Function vectorizable_store.
6181
6182 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6183 that can be vectorized.
6184 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6185 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6186 Return true if STMT_INFO is vectorizable in this way. */
6187
6188 static bool
6189 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6190 stmt_vec_info *vec_stmt, slp_tree slp_node,
6191 stmt_vector_for_cost *cost_vec)
6192 {
6193 tree data_ref;
6194 tree op;
6195 tree vec_oprnd = NULL_TREE;
6196 tree elem_type;
6197 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6198 struct loop *loop = NULL;
6199 machine_mode vec_mode;
6200 tree dummy;
6201 enum dr_alignment_support alignment_support_scheme;
6202 enum vect_def_type rhs_dt = vect_unknown_def_type;
6203 enum vect_def_type mask_dt = vect_unknown_def_type;
6204 stmt_vec_info prev_stmt_info = NULL;
6205 tree dataref_ptr = NULL_TREE;
6206 tree dataref_offset = NULL_TREE;
6207 gimple *ptr_incr = NULL;
6208 int ncopies;
6209 int j;
6210 stmt_vec_info first_stmt_info;
6211 bool grouped_store;
6212 unsigned int group_size, i;
6213 vec<tree> oprnds = vNULL;
6214 vec<tree> result_chain = vNULL;
6215 tree offset = NULL_TREE;
6216 vec<tree> vec_oprnds = vNULL;
6217 bool slp = (slp_node != NULL);
6218 unsigned int vec_num;
6219 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6220 vec_info *vinfo = stmt_info->vinfo;
6221 tree aggr_type;
6222 gather_scatter_info gs_info;
6223 poly_uint64 vf;
6224 vec_load_store_type vls_type;
6225 tree ref_type;
6226
6227 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6228 return false;
6229
6230 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6231 && ! vec_stmt)
6232 return false;
6233
6234 /* Is vectorizable store? */
6235
6236 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6237 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6238 {
6239 tree scalar_dest = gimple_assign_lhs (assign);
6240 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6241 && is_pattern_stmt_p (stmt_info))
6242 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6243 if (TREE_CODE (scalar_dest) != ARRAY_REF
6244 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6245 && TREE_CODE (scalar_dest) != INDIRECT_REF
6246 && TREE_CODE (scalar_dest) != COMPONENT_REF
6247 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6248 && TREE_CODE (scalar_dest) != REALPART_EXPR
6249 && TREE_CODE (scalar_dest) != MEM_REF)
6250 return false;
6251 }
6252 else
6253 {
6254 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6255 if (!call || !gimple_call_internal_p (call))
6256 return false;
6257
6258 internal_fn ifn = gimple_call_internal_fn (call);
6259 if (!internal_store_fn_p (ifn))
6260 return false;
6261
6262 if (slp_node != NULL)
6263 {
6264 if (dump_enabled_p ())
6265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6266 "SLP of masked stores not supported.\n");
6267 return false;
6268 }
6269
6270 int mask_index = internal_fn_mask_index (ifn);
6271 if (mask_index >= 0)
6272 {
6273 mask = gimple_call_arg (call, mask_index);
6274 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6275 &mask_vectype))
6276 return false;
6277 }
6278 }
6279
6280 op = vect_get_store_rhs (stmt_info);
6281
6282 /* Cannot have hybrid store SLP -- that would mean storing to the
6283 same location twice. */
6284 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6285
6286 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6287 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6288
6289 if (loop_vinfo)
6290 {
6291 loop = LOOP_VINFO_LOOP (loop_vinfo);
6292 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6293 }
6294 else
6295 vf = 1;
6296
6297 /* Multiple types in SLP are handled by creating the appropriate number of
6298 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6299 case of SLP. */
6300 if (slp)
6301 ncopies = 1;
6302 else
6303 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6304
6305 gcc_assert (ncopies >= 1);
6306
6307 /* FORNOW. This restriction should be relaxed. */
6308 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6309 {
6310 if (dump_enabled_p ())
6311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6312 "multiple types in nested loop.\n");
6313 return false;
6314 }
6315
6316 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6317 return false;
6318
6319 elem_type = TREE_TYPE (vectype);
6320 vec_mode = TYPE_MODE (vectype);
6321
6322 if (!STMT_VINFO_DATA_REF (stmt_info))
6323 return false;
6324
6325 vect_memory_access_type memory_access_type;
6326 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6327 &memory_access_type, &gs_info))
6328 return false;
6329
6330 if (mask)
6331 {
6332 if (memory_access_type == VMAT_CONTIGUOUS)
6333 {
6334 if (!VECTOR_MODE_P (vec_mode)
6335 || !can_vec_mask_load_store_p (vec_mode,
6336 TYPE_MODE (mask_vectype), false))
6337 return false;
6338 }
6339 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6340 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6341 {
6342 if (dump_enabled_p ())
6343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6344 "unsupported access type for masked store.\n");
6345 return false;
6346 }
6347 }
6348 else
6349 {
6350 /* FORNOW. In some cases can vectorize even if data-type not supported
6351 (e.g. - array initialization with 0). */
6352 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6353 return false;
6354 }
6355
6356 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6357 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6358 && memory_access_type != VMAT_GATHER_SCATTER
6359 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6360 if (grouped_store)
6361 {
6362 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6363 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6364 group_size = DR_GROUP_SIZE (first_stmt_info);
6365 }
6366 else
6367 {
6368 first_stmt_info = stmt_info;
6369 first_dr_info = dr_info;
6370 group_size = vec_num = 1;
6371 }
6372
6373 if (!vec_stmt) /* transformation not required. */
6374 {
6375 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6376
6377 if (loop_vinfo
6378 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6379 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6380 memory_access_type, &gs_info);
6381
6382 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6383 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6384 vls_type, slp_node, cost_vec);
6385 return true;
6386 }
6387 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6388
6389 /* Transform. */
6390
6391 ensure_base_align (dr_info);
6392
6393 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6394 {
6395 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6396 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6397 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6398 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6399 edge pe = loop_preheader_edge (loop);
6400 gimple_seq seq;
6401 basic_block new_bb;
6402 enum { NARROW, NONE, WIDEN } modifier;
6403 poly_uint64 scatter_off_nunits
6404 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6405
6406 if (known_eq (nunits, scatter_off_nunits))
6407 modifier = NONE;
6408 else if (known_eq (nunits * 2, scatter_off_nunits))
6409 {
6410 modifier = WIDEN;
6411
6412 /* Currently gathers and scatters are only supported for
6413 fixed-length vectors. */
6414 unsigned int count = scatter_off_nunits.to_constant ();
6415 vec_perm_builder sel (count, count, 1);
6416 for (i = 0; i < (unsigned int) count; ++i)
6417 sel.quick_push (i | (count / 2));
6418
6419 vec_perm_indices indices (sel, 1, count);
6420 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6421 indices);
6422 gcc_assert (perm_mask != NULL_TREE);
6423 }
6424 else if (known_eq (nunits, scatter_off_nunits * 2))
6425 {
6426 modifier = NARROW;
6427
6428 /* Currently gathers and scatters are only supported for
6429 fixed-length vectors. */
6430 unsigned int count = nunits.to_constant ();
6431 vec_perm_builder sel (count, count, 1);
6432 for (i = 0; i < (unsigned int) count; ++i)
6433 sel.quick_push (i | (count / 2));
6434
6435 vec_perm_indices indices (sel, 2, count);
6436 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6437 gcc_assert (perm_mask != NULL_TREE);
6438 ncopies *= 2;
6439 }
6440 else
6441 gcc_unreachable ();
6442
6443 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6444 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6445 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6446 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6447 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6448 scaletype = TREE_VALUE (arglist);
6449
6450 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6451 && TREE_CODE (rettype) == VOID_TYPE);
6452
6453 ptr = fold_convert (ptrtype, gs_info.base);
6454 if (!is_gimple_min_invariant (ptr))
6455 {
6456 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6457 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6458 gcc_assert (!new_bb);
6459 }
6460
6461 /* Currently we support only unconditional scatter stores,
6462 so mask should be all ones. */
6463 mask = build_int_cst (masktype, -1);
6464 mask = vect_init_vector (stmt_info, mask, masktype, NULL);
6465
6466 scale = build_int_cst (scaletype, gs_info.scale);
6467
6468 prev_stmt_info = NULL;
6469 for (j = 0; j < ncopies; ++j)
6470 {
6471 if (j == 0)
6472 {
6473 src = vec_oprnd1
6474 = vect_get_vec_def_for_operand (op, stmt_info);
6475 op = vec_oprnd0
6476 = vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
6477 }
6478 else if (modifier != NONE && (j & 1))
6479 {
6480 if (modifier == WIDEN)
6481 {
6482 src = vec_oprnd1
6483 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6484 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6485 stmt_info, gsi);
6486 }
6487 else if (modifier == NARROW)
6488 {
6489 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6490 stmt_info, gsi);
6491 op = vec_oprnd0
6492 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6493 }
6494 else
6495 gcc_unreachable ();
6496 }
6497 else
6498 {
6499 src = vec_oprnd1
6500 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6501 op = vec_oprnd0
6502 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6503 }
6504
6505 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6506 {
6507 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6508 TYPE_VECTOR_SUBPARTS (srctype)));
6509 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6510 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6511 gassign *new_stmt
6512 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6513 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6514 src = var;
6515 }
6516
6517 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6518 {
6519 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6520 TYPE_VECTOR_SUBPARTS (idxtype)));
6521 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6522 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6523 gassign *new_stmt
6524 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6525 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6526 op = var;
6527 }
6528
6529 gcall *new_stmt
6530 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6531 stmt_vec_info new_stmt_info
6532 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6533
6534 if (prev_stmt_info == NULL)
6535 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6536 else
6537 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6538 prev_stmt_info = new_stmt_info;
6539 }
6540 return true;
6541 }
6542
6543 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6544 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6545
6546 if (grouped_store)
6547 {
6548 /* FORNOW */
6549 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6550
6551 /* We vectorize all the stmts of the interleaving group when we
6552 reach the last stmt in the group. */
6553 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6554 < DR_GROUP_SIZE (first_stmt_info)
6555 && !slp)
6556 {
6557 *vec_stmt = NULL;
6558 return true;
6559 }
6560
6561 if (slp)
6562 {
6563 grouped_store = false;
6564 /* VEC_NUM is the number of vect stmts to be created for this
6565 group. */
6566 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6567 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6568 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6569 == first_stmt_info);
6570 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6571 op = vect_get_store_rhs (first_stmt_info);
6572 }
6573 else
6574 /* VEC_NUM is the number of vect stmts to be created for this
6575 group. */
6576 vec_num = group_size;
6577
6578 ref_type = get_group_alias_ptr_type (first_stmt_info);
6579 }
6580 else
6581 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6582
6583 if (dump_enabled_p ())
6584 dump_printf_loc (MSG_NOTE, vect_location,
6585 "transform store. ncopies = %d\n", ncopies);
6586
6587 if (memory_access_type == VMAT_ELEMENTWISE
6588 || memory_access_type == VMAT_STRIDED_SLP)
6589 {
6590 gimple_stmt_iterator incr_gsi;
6591 bool insert_after;
6592 gimple *incr;
6593 tree offvar;
6594 tree ivstep;
6595 tree running_off;
6596 tree stride_base, stride_step, alias_off;
6597 tree vec_oprnd;
6598 unsigned int g;
6599 /* Checked by get_load_store_type. */
6600 unsigned int const_nunits = nunits.to_constant ();
6601
6602 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6603 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6604
6605 stride_base
6606 = fold_build_pointer_plus
6607 (DR_BASE_ADDRESS (first_dr_info->dr),
6608 size_binop (PLUS_EXPR,
6609 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6610 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6611 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6612
6613 /* For a store with loop-invariant (but other than power-of-2)
6614 stride (i.e. not a grouped access) like so:
6615
6616 for (i = 0; i < n; i += stride)
6617 array[i] = ...;
6618
6619 we generate a new induction variable and new stores from
6620 the components of the (vectorized) rhs:
6621
6622 for (j = 0; ; j += VF*stride)
6623 vectemp = ...;
6624 tmp1 = vectemp[0];
6625 array[j] = tmp1;
6626 tmp2 = vectemp[1];
6627 array[j + stride] = tmp2;
6628 ...
6629 */
6630
6631 unsigned nstores = const_nunits;
6632 unsigned lnel = 1;
6633 tree ltype = elem_type;
6634 tree lvectype = vectype;
6635 if (slp)
6636 {
6637 if (group_size < const_nunits
6638 && const_nunits % group_size == 0)
6639 {
6640 nstores = const_nunits / group_size;
6641 lnel = group_size;
6642 ltype = build_vector_type (elem_type, group_size);
6643 lvectype = vectype;
6644
6645 /* First check if vec_extract optab doesn't support extraction
6646 of vector elts directly. */
6647 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6648 machine_mode vmode;
6649 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6650 || !VECTOR_MODE_P (vmode)
6651 || !targetm.vector_mode_supported_p (vmode)
6652 || (convert_optab_handler (vec_extract_optab,
6653 TYPE_MODE (vectype), vmode)
6654 == CODE_FOR_nothing))
6655 {
6656 /* Try to avoid emitting an extract of vector elements
6657 by performing the extracts using an integer type of the
6658 same size, extracting from a vector of those and then
6659 re-interpreting it as the original vector type if
6660 supported. */
6661 unsigned lsize
6662 = group_size * GET_MODE_BITSIZE (elmode);
6663 elmode = int_mode_for_size (lsize, 0).require ();
6664 unsigned int lnunits = const_nunits / group_size;
6665 /* If we can't construct such a vector fall back to
6666 element extracts from the original vector type and
6667 element size stores. */
6668 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6669 && VECTOR_MODE_P (vmode)
6670 && targetm.vector_mode_supported_p (vmode)
6671 && (convert_optab_handler (vec_extract_optab,
6672 vmode, elmode)
6673 != CODE_FOR_nothing))
6674 {
6675 nstores = lnunits;
6676 lnel = group_size;
6677 ltype = build_nonstandard_integer_type (lsize, 1);
6678 lvectype = build_vector_type (ltype, nstores);
6679 }
6680 /* Else fall back to vector extraction anyway.
6681 Fewer stores are more important than avoiding spilling
6682 of the vector we extract from. Compared to the
6683 construction case in vectorizable_load no store-forwarding
6684 issue exists here for reasonable archs. */
6685 }
6686 }
6687 else if (group_size >= const_nunits
6688 && group_size % const_nunits == 0)
6689 {
6690 nstores = 1;
6691 lnel = const_nunits;
6692 ltype = vectype;
6693 lvectype = vectype;
6694 }
6695 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6696 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6697 }
6698
6699 ivstep = stride_step;
6700 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6701 build_int_cst (TREE_TYPE (ivstep), vf));
6702
6703 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6704
6705 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6706 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6707 create_iv (stride_base, ivstep, NULL,
6708 loop, &incr_gsi, insert_after,
6709 &offvar, NULL);
6710 incr = gsi_stmt (incr_gsi);
6711 loop_vinfo->add_stmt (incr);
6712
6713 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6714
6715 prev_stmt_info = NULL;
6716 alias_off = build_int_cst (ref_type, 0);
6717 stmt_vec_info next_stmt_info = first_stmt_info;
6718 for (g = 0; g < group_size; g++)
6719 {
6720 running_off = offvar;
6721 if (g)
6722 {
6723 tree size = TYPE_SIZE_UNIT (ltype);
6724 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6725 size);
6726 tree newoff = copy_ssa_name (running_off, NULL);
6727 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6728 running_off, pos);
6729 vect_finish_stmt_generation (stmt_info, incr, gsi);
6730 running_off = newoff;
6731 }
6732 unsigned int group_el = 0;
6733 unsigned HOST_WIDE_INT
6734 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6735 for (j = 0; j < ncopies; j++)
6736 {
6737 /* We've set op and dt above, from vect_get_store_rhs,
6738 and first_stmt_info == stmt_info. */
6739 if (j == 0)
6740 {
6741 if (slp)
6742 {
6743 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6744 &vec_oprnds, NULL, slp_node);
6745 vec_oprnd = vec_oprnds[0];
6746 }
6747 else
6748 {
6749 op = vect_get_store_rhs (next_stmt_info);
6750 vec_oprnd = vect_get_vec_def_for_operand
6751 (op, next_stmt_info);
6752 }
6753 }
6754 else
6755 {
6756 if (slp)
6757 vec_oprnd = vec_oprnds[j];
6758 else
6759 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6760 vec_oprnd);
6761 }
6762 /* Pun the vector to extract from if necessary. */
6763 if (lvectype != vectype)
6764 {
6765 tree tem = make_ssa_name (lvectype);
6766 gimple *pun
6767 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6768 lvectype, vec_oprnd));
6769 vect_finish_stmt_generation (stmt_info, pun, gsi);
6770 vec_oprnd = tem;
6771 }
6772 for (i = 0; i < nstores; i++)
6773 {
6774 tree newref, newoff;
6775 gimple *incr, *assign;
6776 tree size = TYPE_SIZE (ltype);
6777 /* Extract the i'th component. */
6778 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6779 bitsize_int (i), size);
6780 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6781 size, pos);
6782
6783 elem = force_gimple_operand_gsi (gsi, elem, true,
6784 NULL_TREE, true,
6785 GSI_SAME_STMT);
6786
6787 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6788 group_el * elsz);
6789 newref = build2 (MEM_REF, ltype,
6790 running_off, this_off);
6791 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6792
6793 /* And store it to *running_off. */
6794 assign = gimple_build_assign (newref, elem);
6795 stmt_vec_info assign_info
6796 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6797
6798 group_el += lnel;
6799 if (! slp
6800 || group_el == group_size)
6801 {
6802 newoff = copy_ssa_name (running_off, NULL);
6803 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6804 running_off, stride_step);
6805 vect_finish_stmt_generation (stmt_info, incr, gsi);
6806
6807 running_off = newoff;
6808 group_el = 0;
6809 }
6810 if (g == group_size - 1
6811 && !slp)
6812 {
6813 if (j == 0 && i == 0)
6814 STMT_VINFO_VEC_STMT (stmt_info)
6815 = *vec_stmt = assign_info;
6816 else
6817 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6818 prev_stmt_info = assign_info;
6819 }
6820 }
6821 }
6822 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6823 if (slp)
6824 break;
6825 }
6826
6827 vec_oprnds.release ();
6828 return true;
6829 }
6830
6831 auto_vec<tree> dr_chain (group_size);
6832 oprnds.create (group_size);
6833
6834 alignment_support_scheme
6835 = vect_supportable_dr_alignment (first_dr_info, false);
6836 gcc_assert (alignment_support_scheme);
6837 vec_loop_masks *loop_masks
6838 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6839 ? &LOOP_VINFO_MASKS (loop_vinfo)
6840 : NULL);
6841 /* Targets with store-lane instructions must not require explicit
6842 realignment. vect_supportable_dr_alignment always returns either
6843 dr_aligned or dr_unaligned_supported for masked operations. */
6844 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6845 && !mask
6846 && !loop_masks)
6847 || alignment_support_scheme == dr_aligned
6848 || alignment_support_scheme == dr_unaligned_supported);
6849
6850 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6851 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6852 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6853
6854 tree bump;
6855 tree vec_offset = NULL_TREE;
6856 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6857 {
6858 aggr_type = NULL_TREE;
6859 bump = NULL_TREE;
6860 }
6861 else if (memory_access_type == VMAT_GATHER_SCATTER)
6862 {
6863 aggr_type = elem_type;
6864 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
6865 &bump, &vec_offset);
6866 }
6867 else
6868 {
6869 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6870 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6871 else
6872 aggr_type = vectype;
6873 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
6874 memory_access_type);
6875 }
6876
6877 if (mask)
6878 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6879
6880 /* In case the vectorization factor (VF) is bigger than the number
6881 of elements that we can fit in a vectype (nunits), we have to generate
6882 more than one vector stmt - i.e - we need to "unroll" the
6883 vector stmt by a factor VF/nunits. For more details see documentation in
6884 vect_get_vec_def_for_copy_stmt. */
6885
6886 /* In case of interleaving (non-unit grouped access):
6887
6888 S1: &base + 2 = x2
6889 S2: &base = x0
6890 S3: &base + 1 = x1
6891 S4: &base + 3 = x3
6892
6893 We create vectorized stores starting from base address (the access of the
6894 first stmt in the chain (S2 in the above example), when the last store stmt
6895 of the chain (S4) is reached:
6896
6897 VS1: &base = vx2
6898 VS2: &base + vec_size*1 = vx0
6899 VS3: &base + vec_size*2 = vx1
6900 VS4: &base + vec_size*3 = vx3
6901
6902 Then permutation statements are generated:
6903
6904 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6905 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6906 ...
6907
6908 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6909 (the order of the data-refs in the output of vect_permute_store_chain
6910 corresponds to the order of scalar stmts in the interleaving chain - see
6911 the documentation of vect_permute_store_chain()).
6912
6913 In case of both multiple types and interleaving, above vector stores and
6914 permutation stmts are created for every copy. The result vector stmts are
6915 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6916 STMT_VINFO_RELATED_STMT for the next copies.
6917 */
6918
6919 prev_stmt_info = NULL;
6920 tree vec_mask = NULL_TREE;
6921 for (j = 0; j < ncopies; j++)
6922 {
6923 stmt_vec_info new_stmt_info;
6924 if (j == 0)
6925 {
6926 if (slp)
6927 {
6928 /* Get vectorized arguments for SLP_NODE. */
6929 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
6930 NULL, slp_node);
6931
6932 vec_oprnd = vec_oprnds[0];
6933 }
6934 else
6935 {
6936 /* For interleaved stores we collect vectorized defs for all the
6937 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6938 used as an input to vect_permute_store_chain(), and OPRNDS as
6939 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6940
6941 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6942 OPRNDS are of size 1. */
6943 stmt_vec_info next_stmt_info = first_stmt_info;
6944 for (i = 0; i < group_size; i++)
6945 {
6946 /* Since gaps are not supported for interleaved stores,
6947 DR_GROUP_SIZE is the exact number of stmts in the chain.
6948 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6949 that there is no interleaving, DR_GROUP_SIZE is 1,
6950 and only one iteration of the loop will be executed. */
6951 op = vect_get_store_rhs (next_stmt_info);
6952 vec_oprnd = vect_get_vec_def_for_operand
6953 (op, next_stmt_info);
6954 dr_chain.quick_push (vec_oprnd);
6955 oprnds.quick_push (vec_oprnd);
6956 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6957 }
6958 if (mask)
6959 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
6960 mask_vectype);
6961 }
6962
6963 /* We should have catched mismatched types earlier. */
6964 gcc_assert (useless_type_conversion_p (vectype,
6965 TREE_TYPE (vec_oprnd)));
6966 bool simd_lane_access_p
6967 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6968 if (simd_lane_access_p
6969 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
6970 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
6971 && integer_zerop (DR_OFFSET (first_dr_info->dr))
6972 && integer_zerop (DR_INIT (first_dr_info->dr))
6973 && alias_sets_conflict_p (get_alias_set (aggr_type),
6974 get_alias_set (TREE_TYPE (ref_type))))
6975 {
6976 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
6977 dataref_offset = build_int_cst (ref_type, 0);
6978 }
6979 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6980 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
6981 &dataref_ptr, &vec_offset);
6982 else
6983 dataref_ptr
6984 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
6985 simd_lane_access_p ? loop : NULL,
6986 offset, &dummy, gsi, &ptr_incr,
6987 simd_lane_access_p, NULL_TREE, bump);
6988 }
6989 else
6990 {
6991 /* For interleaved stores we created vectorized defs for all the
6992 defs stored in OPRNDS in the previous iteration (previous copy).
6993 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6994 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6995 next copy.
6996 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6997 OPRNDS are of size 1. */
6998 for (i = 0; i < group_size; i++)
6999 {
7000 op = oprnds[i];
7001 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7002 dr_chain[i] = vec_oprnd;
7003 oprnds[i] = vec_oprnd;
7004 }
7005 if (mask)
7006 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7007 if (dataref_offset)
7008 dataref_offset
7009 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7010 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7011 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7012 else
7013 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7014 stmt_info, bump);
7015 }
7016
7017 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7018 {
7019 tree vec_array;
7020
7021 /* Get an array into which we can store the individual vectors. */
7022 vec_array = create_vector_array (vectype, vec_num);
7023
7024 /* Invalidate the current contents of VEC_ARRAY. This should
7025 become an RTL clobber too, which prevents the vector registers
7026 from being upward-exposed. */
7027 vect_clobber_variable (stmt_info, gsi, vec_array);
7028
7029 /* Store the individual vectors into the array. */
7030 for (i = 0; i < vec_num; i++)
7031 {
7032 vec_oprnd = dr_chain[i];
7033 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7034 }
7035
7036 tree final_mask = NULL;
7037 if (loop_masks)
7038 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7039 vectype, j);
7040 if (vec_mask)
7041 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7042 vec_mask, gsi);
7043
7044 gcall *call;
7045 if (final_mask)
7046 {
7047 /* Emit:
7048 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7049 VEC_ARRAY). */
7050 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7051 tree alias_ptr = build_int_cst (ref_type, align);
7052 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7053 dataref_ptr, alias_ptr,
7054 final_mask, vec_array);
7055 }
7056 else
7057 {
7058 /* Emit:
7059 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7060 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7061 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7062 vec_array);
7063 gimple_call_set_lhs (call, data_ref);
7064 }
7065 gimple_call_set_nothrow (call, true);
7066 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7067
7068 /* Record that VEC_ARRAY is now dead. */
7069 vect_clobber_variable (stmt_info, gsi, vec_array);
7070 }
7071 else
7072 {
7073 new_stmt_info = NULL;
7074 if (grouped_store)
7075 {
7076 if (j == 0)
7077 result_chain.create (group_size);
7078 /* Permute. */
7079 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7080 &result_chain);
7081 }
7082
7083 stmt_vec_info next_stmt_info = first_stmt_info;
7084 for (i = 0; i < vec_num; i++)
7085 {
7086 unsigned align, misalign;
7087
7088 tree final_mask = NULL_TREE;
7089 if (loop_masks)
7090 final_mask = vect_get_loop_mask (gsi, loop_masks,
7091 vec_num * ncopies,
7092 vectype, vec_num * j + i);
7093 if (vec_mask)
7094 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7095 vec_mask, gsi);
7096
7097 if (memory_access_type == VMAT_GATHER_SCATTER)
7098 {
7099 tree scale = size_int (gs_info.scale);
7100 gcall *call;
7101 if (loop_masks)
7102 call = gimple_build_call_internal
7103 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7104 scale, vec_oprnd, final_mask);
7105 else
7106 call = gimple_build_call_internal
7107 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7108 scale, vec_oprnd);
7109 gimple_call_set_nothrow (call, true);
7110 new_stmt_info
7111 = vect_finish_stmt_generation (stmt_info, call, gsi);
7112 break;
7113 }
7114
7115 if (i > 0)
7116 /* Bump the vector pointer. */
7117 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7118 stmt_info, bump);
7119
7120 if (slp)
7121 vec_oprnd = vec_oprnds[i];
7122 else if (grouped_store)
7123 /* For grouped stores vectorized defs are interleaved in
7124 vect_permute_store_chain(). */
7125 vec_oprnd = result_chain[i];
7126
7127 align = DR_TARGET_ALIGNMENT (first_dr_info);
7128 if (aligned_access_p (first_dr_info))
7129 misalign = 0;
7130 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7131 {
7132 align = dr_alignment (vect_dr_behavior (first_dr_info));
7133 misalign = 0;
7134 }
7135 else
7136 misalign = DR_MISALIGNMENT (first_dr_info);
7137 if (dataref_offset == NULL_TREE
7138 && TREE_CODE (dataref_ptr) == SSA_NAME)
7139 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7140 misalign);
7141
7142 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7143 {
7144 tree perm_mask = perm_mask_for_reverse (vectype);
7145 tree perm_dest = vect_create_destination_var
7146 (vect_get_store_rhs (stmt_info), vectype);
7147 tree new_temp = make_ssa_name (perm_dest);
7148
7149 /* Generate the permute statement. */
7150 gimple *perm_stmt
7151 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7152 vec_oprnd, perm_mask);
7153 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7154
7155 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7156 vec_oprnd = new_temp;
7157 }
7158
7159 /* Arguments are ready. Create the new vector stmt. */
7160 if (final_mask)
7161 {
7162 align = least_bit_hwi (misalign | align);
7163 tree ptr = build_int_cst (ref_type, align);
7164 gcall *call
7165 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7166 dataref_ptr, ptr,
7167 final_mask, vec_oprnd);
7168 gimple_call_set_nothrow (call, true);
7169 new_stmt_info
7170 = vect_finish_stmt_generation (stmt_info, call, gsi);
7171 }
7172 else
7173 {
7174 data_ref = fold_build2 (MEM_REF, vectype,
7175 dataref_ptr,
7176 dataref_offset
7177 ? dataref_offset
7178 : build_int_cst (ref_type, 0));
7179 if (aligned_access_p (first_dr_info))
7180 ;
7181 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7182 TREE_TYPE (data_ref)
7183 = build_aligned_type (TREE_TYPE (data_ref),
7184 align * BITS_PER_UNIT);
7185 else
7186 TREE_TYPE (data_ref)
7187 = build_aligned_type (TREE_TYPE (data_ref),
7188 TYPE_ALIGN (elem_type));
7189 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7190 gassign *new_stmt
7191 = gimple_build_assign (data_ref, vec_oprnd);
7192 new_stmt_info
7193 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7194 }
7195
7196 if (slp)
7197 continue;
7198
7199 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7200 if (!next_stmt_info)
7201 break;
7202 }
7203 }
7204 if (!slp)
7205 {
7206 if (j == 0)
7207 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7208 else
7209 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7210 prev_stmt_info = new_stmt_info;
7211 }
7212 }
7213
7214 oprnds.release ();
7215 result_chain.release ();
7216 vec_oprnds.release ();
7217
7218 return true;
7219 }
7220
7221 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7222 VECTOR_CST mask. No checks are made that the target platform supports the
7223 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7224 vect_gen_perm_mask_checked. */
7225
7226 tree
7227 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7228 {
7229 tree mask_type;
7230
7231 poly_uint64 nunits = sel.length ();
7232 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7233
7234 mask_type = build_vector_type (ssizetype, nunits);
7235 return vec_perm_indices_to_tree (mask_type, sel);
7236 }
7237
7238 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7239 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7240
7241 tree
7242 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7243 {
7244 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7245 return vect_gen_perm_mask_any (vectype, sel);
7246 }
7247
7248 /* Given a vector variable X and Y, that was generated for the scalar
7249 STMT_INFO, generate instructions to permute the vector elements of X and Y
7250 using permutation mask MASK_VEC, insert them at *GSI and return the
7251 permuted vector variable. */
7252
7253 static tree
7254 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7255 gimple_stmt_iterator *gsi)
7256 {
7257 tree vectype = TREE_TYPE (x);
7258 tree perm_dest, data_ref;
7259 gimple *perm_stmt;
7260
7261 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7262 if (TREE_CODE (scalar_dest) == SSA_NAME)
7263 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7264 else
7265 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7266 data_ref = make_ssa_name (perm_dest);
7267
7268 /* Generate the permute statement. */
7269 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7270 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7271
7272 return data_ref;
7273 }
7274
7275 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7276 inserting them on the loops preheader edge. Returns true if we
7277 were successful in doing so (and thus STMT_INFO can be moved then),
7278 otherwise returns false. */
7279
7280 static bool
7281 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7282 {
7283 ssa_op_iter i;
7284 tree op;
7285 bool any = false;
7286
7287 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7288 {
7289 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7290 if (!gimple_nop_p (def_stmt)
7291 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7292 {
7293 /* Make sure we don't need to recurse. While we could do
7294 so in simple cases when there are more complex use webs
7295 we don't have an easy way to preserve stmt order to fulfil
7296 dependencies within them. */
7297 tree op2;
7298 ssa_op_iter i2;
7299 if (gimple_code (def_stmt) == GIMPLE_PHI)
7300 return false;
7301 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7302 {
7303 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7304 if (!gimple_nop_p (def_stmt2)
7305 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7306 return false;
7307 }
7308 any = true;
7309 }
7310 }
7311
7312 if (!any)
7313 return true;
7314
7315 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7316 {
7317 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7318 if (!gimple_nop_p (def_stmt)
7319 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7320 {
7321 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7322 gsi_remove (&gsi, false);
7323 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7324 }
7325 }
7326
7327 return true;
7328 }
7329
7330 /* vectorizable_load.
7331
7332 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7333 that can be vectorized.
7334 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7335 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7336 Return true if STMT_INFO is vectorizable in this way. */
7337
7338 static bool
7339 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7340 stmt_vec_info *vec_stmt, slp_tree slp_node,
7341 slp_instance slp_node_instance,
7342 stmt_vector_for_cost *cost_vec)
7343 {
7344 tree scalar_dest;
7345 tree vec_dest = NULL;
7346 tree data_ref = NULL;
7347 stmt_vec_info prev_stmt_info;
7348 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7349 struct loop *loop = NULL;
7350 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7351 bool nested_in_vect_loop = false;
7352 tree elem_type;
7353 tree new_temp;
7354 machine_mode mode;
7355 tree dummy;
7356 enum dr_alignment_support alignment_support_scheme;
7357 tree dataref_ptr = NULL_TREE;
7358 tree dataref_offset = NULL_TREE;
7359 gimple *ptr_incr = NULL;
7360 int ncopies;
7361 int i, j;
7362 unsigned int group_size;
7363 poly_uint64 group_gap_adj;
7364 tree msq = NULL_TREE, lsq;
7365 tree offset = NULL_TREE;
7366 tree byte_offset = NULL_TREE;
7367 tree realignment_token = NULL_TREE;
7368 gphi *phi = NULL;
7369 vec<tree> dr_chain = vNULL;
7370 bool grouped_load = false;
7371 stmt_vec_info first_stmt_info;
7372 stmt_vec_info first_stmt_info_for_drptr = NULL;
7373 bool compute_in_loop = false;
7374 struct loop *at_loop;
7375 int vec_num;
7376 bool slp = (slp_node != NULL);
7377 bool slp_perm = false;
7378 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7379 poly_uint64 vf;
7380 tree aggr_type;
7381 gather_scatter_info gs_info;
7382 vec_info *vinfo = stmt_info->vinfo;
7383 tree ref_type;
7384 enum vect_def_type mask_dt = vect_unknown_def_type;
7385
7386 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7387 return false;
7388
7389 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7390 && ! vec_stmt)
7391 return false;
7392
7393 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7394 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7395 {
7396 scalar_dest = gimple_assign_lhs (assign);
7397 if (TREE_CODE (scalar_dest) != SSA_NAME)
7398 return false;
7399
7400 tree_code code = gimple_assign_rhs_code (assign);
7401 if (code != ARRAY_REF
7402 && code != BIT_FIELD_REF
7403 && code != INDIRECT_REF
7404 && code != COMPONENT_REF
7405 && code != IMAGPART_EXPR
7406 && code != REALPART_EXPR
7407 && code != MEM_REF
7408 && TREE_CODE_CLASS (code) != tcc_declaration)
7409 return false;
7410 }
7411 else
7412 {
7413 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7414 if (!call || !gimple_call_internal_p (call))
7415 return false;
7416
7417 internal_fn ifn = gimple_call_internal_fn (call);
7418 if (!internal_load_fn_p (ifn))
7419 return false;
7420
7421 scalar_dest = gimple_call_lhs (call);
7422 if (!scalar_dest)
7423 return false;
7424
7425 if (slp_node != NULL)
7426 {
7427 if (dump_enabled_p ())
7428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7429 "SLP of masked loads not supported.\n");
7430 return false;
7431 }
7432
7433 int mask_index = internal_fn_mask_index (ifn);
7434 if (mask_index >= 0)
7435 {
7436 mask = gimple_call_arg (call, mask_index);
7437 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7438 &mask_vectype))
7439 return false;
7440 }
7441 }
7442
7443 if (!STMT_VINFO_DATA_REF (stmt_info))
7444 return false;
7445
7446 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7447 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7448
7449 if (loop_vinfo)
7450 {
7451 loop = LOOP_VINFO_LOOP (loop_vinfo);
7452 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7453 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7454 }
7455 else
7456 vf = 1;
7457
7458 /* Multiple types in SLP are handled by creating the appropriate number of
7459 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7460 case of SLP. */
7461 if (slp)
7462 ncopies = 1;
7463 else
7464 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7465
7466 gcc_assert (ncopies >= 1);
7467
7468 /* FORNOW. This restriction should be relaxed. */
7469 if (nested_in_vect_loop && ncopies > 1)
7470 {
7471 if (dump_enabled_p ())
7472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7473 "multiple types in nested loop.\n");
7474 return false;
7475 }
7476
7477 /* Invalidate assumptions made by dependence analysis when vectorization
7478 on the unrolled body effectively re-orders stmts. */
7479 if (ncopies > 1
7480 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7481 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7482 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7483 {
7484 if (dump_enabled_p ())
7485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7486 "cannot perform implicit CSE when unrolling "
7487 "with negative dependence distance\n");
7488 return false;
7489 }
7490
7491 elem_type = TREE_TYPE (vectype);
7492 mode = TYPE_MODE (vectype);
7493
7494 /* FORNOW. In some cases can vectorize even if data-type not supported
7495 (e.g. - data copies). */
7496 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7497 {
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7500 "Aligned load, but unsupported type.\n");
7501 return false;
7502 }
7503
7504 /* Check if the load is a part of an interleaving chain. */
7505 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7506 {
7507 grouped_load = true;
7508 /* FORNOW */
7509 gcc_assert (!nested_in_vect_loop);
7510 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7511
7512 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7513 group_size = DR_GROUP_SIZE (first_stmt_info);
7514
7515 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7516 slp_perm = true;
7517
7518 /* Invalidate assumptions made by dependence analysis when vectorization
7519 on the unrolled body effectively re-orders stmts. */
7520 if (!PURE_SLP_STMT (stmt_info)
7521 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7522 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7523 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7524 {
7525 if (dump_enabled_p ())
7526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7527 "cannot perform implicit CSE when performing "
7528 "group loads with negative dependence distance\n");
7529 return false;
7530 }
7531
7532 /* Similarly when the stmt is a load that is both part of a SLP
7533 instance and a loop vectorized stmt via the same-dr mechanism
7534 we have to give up. */
7535 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7536 && (STMT_SLP_TYPE (stmt_info)
7537 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7538 {
7539 if (dump_enabled_p ())
7540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7541 "conflicting SLP types for CSEd load\n");
7542 return false;
7543 }
7544 }
7545 else
7546 group_size = 1;
7547
7548 vect_memory_access_type memory_access_type;
7549 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7550 &memory_access_type, &gs_info))
7551 return false;
7552
7553 if (mask)
7554 {
7555 if (memory_access_type == VMAT_CONTIGUOUS)
7556 {
7557 machine_mode vec_mode = TYPE_MODE (vectype);
7558 if (!VECTOR_MODE_P (vec_mode)
7559 || !can_vec_mask_load_store_p (vec_mode,
7560 TYPE_MODE (mask_vectype), true))
7561 return false;
7562 }
7563 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7564 {
7565 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7566 tree masktype
7567 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7568 if (TREE_CODE (masktype) == INTEGER_TYPE)
7569 {
7570 if (dump_enabled_p ())
7571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7572 "masked gather with integer mask not"
7573 " supported.");
7574 return false;
7575 }
7576 }
7577 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7578 && memory_access_type != VMAT_GATHER_SCATTER)
7579 {
7580 if (dump_enabled_p ())
7581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7582 "unsupported access type for masked load.\n");
7583 return false;
7584 }
7585 }
7586
7587 if (!vec_stmt) /* transformation not required. */
7588 {
7589 if (!slp)
7590 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7591
7592 if (loop_vinfo
7593 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7594 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7595 memory_access_type, &gs_info);
7596
7597 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7598 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7599 slp_node_instance, slp_node, cost_vec);
7600 return true;
7601 }
7602
7603 if (!slp)
7604 gcc_assert (memory_access_type
7605 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7606
7607 if (dump_enabled_p ())
7608 dump_printf_loc (MSG_NOTE, vect_location,
7609 "transform load. ncopies = %d\n", ncopies);
7610
7611 /* Transform. */
7612
7613 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7614 ensure_base_align (dr_info);
7615
7616 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7617 {
7618 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7619 return true;
7620 }
7621
7622 if (memory_access_type == VMAT_INVARIANT)
7623 {
7624 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7625 /* If we have versioned for aliasing or the loop doesn't
7626 have any data dependencies that would preclude this,
7627 then we are sure this is a loop invariant load and
7628 thus we can insert it on the preheader edge. */
7629 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7630 && !nested_in_vect_loop
7631 && hoist_defs_of_uses (stmt_info, loop));
7632 if (hoist_p)
7633 {
7634 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7635 if (dump_enabled_p ())
7636 dump_printf_loc (MSG_NOTE, vect_location,
7637 "hoisting out of the vectorized loop: %G", stmt);
7638 scalar_dest = copy_ssa_name (scalar_dest);
7639 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7640 gsi_insert_on_edge_immediate
7641 (loop_preheader_edge (loop),
7642 gimple_build_assign (scalar_dest, rhs));
7643 }
7644 /* These copies are all equivalent, but currently the representation
7645 requires a separate STMT_VINFO_VEC_STMT for each one. */
7646 prev_stmt_info = NULL;
7647 gimple_stmt_iterator gsi2 = *gsi;
7648 gsi_next (&gsi2);
7649 for (j = 0; j < ncopies; j++)
7650 {
7651 stmt_vec_info new_stmt_info;
7652 if (hoist_p)
7653 {
7654 new_temp = vect_init_vector (stmt_info, scalar_dest,
7655 vectype, NULL);
7656 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7657 new_stmt_info = vinfo->add_stmt (new_stmt);
7658 }
7659 else
7660 {
7661 new_temp = vect_init_vector (stmt_info, scalar_dest,
7662 vectype, &gsi2);
7663 new_stmt_info = vinfo->lookup_def (new_temp);
7664 }
7665 if (slp)
7666 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7667 else if (j == 0)
7668 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7669 else
7670 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7671 prev_stmt_info = new_stmt_info;
7672 }
7673 return true;
7674 }
7675
7676 if (memory_access_type == VMAT_ELEMENTWISE
7677 || memory_access_type == VMAT_STRIDED_SLP)
7678 {
7679 gimple_stmt_iterator incr_gsi;
7680 bool insert_after;
7681 gimple *incr;
7682 tree offvar;
7683 tree ivstep;
7684 tree running_off;
7685 vec<constructor_elt, va_gc> *v = NULL;
7686 tree stride_base, stride_step, alias_off;
7687 /* Checked by get_load_store_type. */
7688 unsigned int const_nunits = nunits.to_constant ();
7689 unsigned HOST_WIDE_INT cst_offset = 0;
7690
7691 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7692 gcc_assert (!nested_in_vect_loop);
7693
7694 if (grouped_load)
7695 {
7696 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7697 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7698 }
7699 else
7700 {
7701 first_stmt_info = stmt_info;
7702 first_dr_info = dr_info;
7703 }
7704 if (slp && grouped_load)
7705 {
7706 group_size = DR_GROUP_SIZE (first_stmt_info);
7707 ref_type = get_group_alias_ptr_type (first_stmt_info);
7708 }
7709 else
7710 {
7711 if (grouped_load)
7712 cst_offset
7713 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7714 * vect_get_place_in_interleaving_chain (stmt_info,
7715 first_stmt_info));
7716 group_size = 1;
7717 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7718 }
7719
7720 stride_base
7721 = fold_build_pointer_plus
7722 (DR_BASE_ADDRESS (first_dr_info->dr),
7723 size_binop (PLUS_EXPR,
7724 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7725 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7726 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7727
7728 /* For a load with loop-invariant (but other than power-of-2)
7729 stride (i.e. not a grouped access) like so:
7730
7731 for (i = 0; i < n; i += stride)
7732 ... = array[i];
7733
7734 we generate a new induction variable and new accesses to
7735 form a new vector (or vectors, depending on ncopies):
7736
7737 for (j = 0; ; j += VF*stride)
7738 tmp1 = array[j];
7739 tmp2 = array[j + stride];
7740 ...
7741 vectemp = {tmp1, tmp2, ...}
7742 */
7743
7744 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7745 build_int_cst (TREE_TYPE (stride_step), vf));
7746
7747 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7748
7749 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7750 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7751 create_iv (stride_base, ivstep, NULL,
7752 loop, &incr_gsi, insert_after,
7753 &offvar, NULL);
7754 incr = gsi_stmt (incr_gsi);
7755 loop_vinfo->add_stmt (incr);
7756
7757 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7758
7759 prev_stmt_info = NULL;
7760 running_off = offvar;
7761 alias_off = build_int_cst (ref_type, 0);
7762 int nloads = const_nunits;
7763 int lnel = 1;
7764 tree ltype = TREE_TYPE (vectype);
7765 tree lvectype = vectype;
7766 auto_vec<tree> dr_chain;
7767 if (memory_access_type == VMAT_STRIDED_SLP)
7768 {
7769 if (group_size < const_nunits)
7770 {
7771 /* First check if vec_init optab supports construction from
7772 vector elts directly. */
7773 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7774 machine_mode vmode;
7775 if (mode_for_vector (elmode, group_size).exists (&vmode)
7776 && VECTOR_MODE_P (vmode)
7777 && targetm.vector_mode_supported_p (vmode)
7778 && (convert_optab_handler (vec_init_optab,
7779 TYPE_MODE (vectype), vmode)
7780 != CODE_FOR_nothing))
7781 {
7782 nloads = const_nunits / group_size;
7783 lnel = group_size;
7784 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7785 }
7786 else
7787 {
7788 /* Otherwise avoid emitting a constructor of vector elements
7789 by performing the loads using an integer type of the same
7790 size, constructing a vector of those and then
7791 re-interpreting it as the original vector type.
7792 This avoids a huge runtime penalty due to the general
7793 inability to perform store forwarding from smaller stores
7794 to a larger load. */
7795 unsigned lsize
7796 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7797 elmode = int_mode_for_size (lsize, 0).require ();
7798 unsigned int lnunits = const_nunits / group_size;
7799 /* If we can't construct such a vector fall back to
7800 element loads of the original vector type. */
7801 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7802 && VECTOR_MODE_P (vmode)
7803 && targetm.vector_mode_supported_p (vmode)
7804 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7805 != CODE_FOR_nothing))
7806 {
7807 nloads = lnunits;
7808 lnel = group_size;
7809 ltype = build_nonstandard_integer_type (lsize, 1);
7810 lvectype = build_vector_type (ltype, nloads);
7811 }
7812 }
7813 }
7814 else
7815 {
7816 nloads = 1;
7817 lnel = const_nunits;
7818 ltype = vectype;
7819 }
7820 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7821 }
7822 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7823 else if (nloads == 1)
7824 ltype = vectype;
7825
7826 if (slp)
7827 {
7828 /* For SLP permutation support we need to load the whole group,
7829 not only the number of vector stmts the permutation result
7830 fits in. */
7831 if (slp_perm)
7832 {
7833 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7834 variable VF. */
7835 unsigned int const_vf = vf.to_constant ();
7836 ncopies = CEIL (group_size * const_vf, const_nunits);
7837 dr_chain.create (ncopies);
7838 }
7839 else
7840 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7841 }
7842 unsigned int group_el = 0;
7843 unsigned HOST_WIDE_INT
7844 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7845 for (j = 0; j < ncopies; j++)
7846 {
7847 if (nloads > 1)
7848 vec_alloc (v, nloads);
7849 stmt_vec_info new_stmt_info = NULL;
7850 for (i = 0; i < nloads; i++)
7851 {
7852 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7853 group_el * elsz + cst_offset);
7854 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7855 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7856 gassign *new_stmt
7857 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7858 new_stmt_info
7859 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7860 if (nloads > 1)
7861 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7862 gimple_assign_lhs (new_stmt));
7863
7864 group_el += lnel;
7865 if (! slp
7866 || group_el == group_size)
7867 {
7868 tree newoff = copy_ssa_name (running_off);
7869 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7870 running_off, stride_step);
7871 vect_finish_stmt_generation (stmt_info, incr, gsi);
7872
7873 running_off = newoff;
7874 group_el = 0;
7875 }
7876 }
7877 if (nloads > 1)
7878 {
7879 tree vec_inv = build_constructor (lvectype, v);
7880 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
7881 new_stmt_info = vinfo->lookup_def (new_temp);
7882 if (lvectype != vectype)
7883 {
7884 gassign *new_stmt
7885 = gimple_build_assign (make_ssa_name (vectype),
7886 VIEW_CONVERT_EXPR,
7887 build1 (VIEW_CONVERT_EXPR,
7888 vectype, new_temp));
7889 new_stmt_info
7890 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7891 }
7892 }
7893
7894 if (slp)
7895 {
7896 if (slp_perm)
7897 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7898 else
7899 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7900 }
7901 else
7902 {
7903 if (j == 0)
7904 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7905 else
7906 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7907 prev_stmt_info = new_stmt_info;
7908 }
7909 }
7910 if (slp_perm)
7911 {
7912 unsigned n_perms;
7913 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7914 slp_node_instance, false, &n_perms);
7915 }
7916 return true;
7917 }
7918
7919 if (memory_access_type == VMAT_GATHER_SCATTER
7920 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7921 grouped_load = false;
7922
7923 if (grouped_load)
7924 {
7925 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7926 group_size = DR_GROUP_SIZE (first_stmt_info);
7927 /* For SLP vectorization we directly vectorize a subchain
7928 without permutation. */
7929 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7930 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7931 /* For BB vectorization always use the first stmt to base
7932 the data ref pointer on. */
7933 if (bb_vinfo)
7934 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7935
7936 /* Check if the chain of loads is already vectorized. */
7937 if (STMT_VINFO_VEC_STMT (first_stmt_info)
7938 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7939 ??? But we can only do so if there is exactly one
7940 as we have no way to get at the rest. Leave the CSE
7941 opportunity alone.
7942 ??? With the group load eventually participating
7943 in multiple different permutations (having multiple
7944 slp nodes which refer to the same group) the CSE
7945 is even wrong code. See PR56270. */
7946 && !slp)
7947 {
7948 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7949 return true;
7950 }
7951 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7952 group_gap_adj = 0;
7953
7954 /* VEC_NUM is the number of vect stmts to be created for this group. */
7955 if (slp)
7956 {
7957 grouped_load = false;
7958 /* If an SLP permutation is from N elements to N elements,
7959 and if one vector holds a whole number of N, we can load
7960 the inputs to the permutation in the same way as an
7961 unpermuted sequence. In other cases we need to load the
7962 whole group, not only the number of vector stmts the
7963 permutation result fits in. */
7964 if (slp_perm
7965 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
7966 || !multiple_p (nunits, group_size)))
7967 {
7968 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
7969 variable VF; see vect_transform_slp_perm_load. */
7970 unsigned int const_vf = vf.to_constant ();
7971 unsigned int const_nunits = nunits.to_constant ();
7972 vec_num = CEIL (group_size * const_vf, const_nunits);
7973 group_gap_adj = vf * group_size - nunits * vec_num;
7974 }
7975 else
7976 {
7977 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7978 group_gap_adj
7979 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7980 }
7981 }
7982 else
7983 vec_num = group_size;
7984
7985 ref_type = get_group_alias_ptr_type (first_stmt_info);
7986 }
7987 else
7988 {
7989 first_stmt_info = stmt_info;
7990 first_dr_info = dr_info;
7991 group_size = vec_num = 1;
7992 group_gap_adj = 0;
7993 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7994 }
7995
7996 alignment_support_scheme
7997 = vect_supportable_dr_alignment (first_dr_info, false);
7998 gcc_assert (alignment_support_scheme);
7999 vec_loop_masks *loop_masks
8000 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8001 ? &LOOP_VINFO_MASKS (loop_vinfo)
8002 : NULL);
8003 /* Targets with store-lane instructions must not require explicit
8004 realignment. vect_supportable_dr_alignment always returns either
8005 dr_aligned or dr_unaligned_supported for masked operations. */
8006 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8007 && !mask
8008 && !loop_masks)
8009 || alignment_support_scheme == dr_aligned
8010 || alignment_support_scheme == dr_unaligned_supported);
8011
8012 /* In case the vectorization factor (VF) is bigger than the number
8013 of elements that we can fit in a vectype (nunits), we have to generate
8014 more than one vector stmt - i.e - we need to "unroll" the
8015 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8016 from one copy of the vector stmt to the next, in the field
8017 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8018 stages to find the correct vector defs to be used when vectorizing
8019 stmts that use the defs of the current stmt. The example below
8020 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8021 need to create 4 vectorized stmts):
8022
8023 before vectorization:
8024 RELATED_STMT VEC_STMT
8025 S1: x = memref - -
8026 S2: z = x + 1 - -
8027
8028 step 1: vectorize stmt S1:
8029 We first create the vector stmt VS1_0, and, as usual, record a
8030 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8031 Next, we create the vector stmt VS1_1, and record a pointer to
8032 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8033 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8034 stmts and pointers:
8035 RELATED_STMT VEC_STMT
8036 VS1_0: vx0 = memref0 VS1_1 -
8037 VS1_1: vx1 = memref1 VS1_2 -
8038 VS1_2: vx2 = memref2 VS1_3 -
8039 VS1_3: vx3 = memref3 - -
8040 S1: x = load - VS1_0
8041 S2: z = x + 1 - -
8042
8043 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8044 information we recorded in RELATED_STMT field is used to vectorize
8045 stmt S2. */
8046
8047 /* In case of interleaving (non-unit grouped access):
8048
8049 S1: x2 = &base + 2
8050 S2: x0 = &base
8051 S3: x1 = &base + 1
8052 S4: x3 = &base + 3
8053
8054 Vectorized loads are created in the order of memory accesses
8055 starting from the access of the first stmt of the chain:
8056
8057 VS1: vx0 = &base
8058 VS2: vx1 = &base + vec_size*1
8059 VS3: vx3 = &base + vec_size*2
8060 VS4: vx4 = &base + vec_size*3
8061
8062 Then permutation statements are generated:
8063
8064 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8065 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8066 ...
8067
8068 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8069 (the order of the data-refs in the output of vect_permute_load_chain
8070 corresponds to the order of scalar stmts in the interleaving chain - see
8071 the documentation of vect_permute_load_chain()).
8072 The generation of permutation stmts and recording them in
8073 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8074
8075 In case of both multiple types and interleaving, the vector loads and
8076 permutation stmts above are created for every copy. The result vector
8077 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8078 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8079
8080 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8081 on a target that supports unaligned accesses (dr_unaligned_supported)
8082 we generate the following code:
8083 p = initial_addr;
8084 indx = 0;
8085 loop {
8086 p = p + indx * vectype_size;
8087 vec_dest = *(p);
8088 indx = indx + 1;
8089 }
8090
8091 Otherwise, the data reference is potentially unaligned on a target that
8092 does not support unaligned accesses (dr_explicit_realign_optimized) -
8093 then generate the following code, in which the data in each iteration is
8094 obtained by two vector loads, one from the previous iteration, and one
8095 from the current iteration:
8096 p1 = initial_addr;
8097 msq_init = *(floor(p1))
8098 p2 = initial_addr + VS - 1;
8099 realignment_token = call target_builtin;
8100 indx = 0;
8101 loop {
8102 p2 = p2 + indx * vectype_size
8103 lsq = *(floor(p2))
8104 vec_dest = realign_load (msq, lsq, realignment_token)
8105 indx = indx + 1;
8106 msq = lsq;
8107 } */
8108
8109 /* If the misalignment remains the same throughout the execution of the
8110 loop, we can create the init_addr and permutation mask at the loop
8111 preheader. Otherwise, it needs to be created inside the loop.
8112 This can only occur when vectorizing memory accesses in the inner-loop
8113 nested within an outer-loop that is being vectorized. */
8114
8115 if (nested_in_vect_loop
8116 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8117 GET_MODE_SIZE (TYPE_MODE (vectype))))
8118 {
8119 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8120 compute_in_loop = true;
8121 }
8122
8123 if ((alignment_support_scheme == dr_explicit_realign_optimized
8124 || alignment_support_scheme == dr_explicit_realign)
8125 && !compute_in_loop)
8126 {
8127 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8128 alignment_support_scheme, NULL_TREE,
8129 &at_loop);
8130 if (alignment_support_scheme == dr_explicit_realign_optimized)
8131 {
8132 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8133 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8134 size_one_node);
8135 }
8136 }
8137 else
8138 at_loop = loop;
8139
8140 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8141 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8142
8143 tree bump;
8144 tree vec_offset = NULL_TREE;
8145 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8146 {
8147 aggr_type = NULL_TREE;
8148 bump = NULL_TREE;
8149 }
8150 else if (memory_access_type == VMAT_GATHER_SCATTER)
8151 {
8152 aggr_type = elem_type;
8153 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8154 &bump, &vec_offset);
8155 }
8156 else
8157 {
8158 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8159 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8160 else
8161 aggr_type = vectype;
8162 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8163 memory_access_type);
8164 }
8165
8166 tree vec_mask = NULL_TREE;
8167 prev_stmt_info = NULL;
8168 poly_uint64 group_elt = 0;
8169 for (j = 0; j < ncopies; j++)
8170 {
8171 stmt_vec_info new_stmt_info = NULL;
8172 /* 1. Create the vector or array pointer update chain. */
8173 if (j == 0)
8174 {
8175 bool simd_lane_access_p
8176 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8177 if (simd_lane_access_p
8178 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8179 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8180 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8181 && integer_zerop (DR_INIT (first_dr_info->dr))
8182 && alias_sets_conflict_p (get_alias_set (aggr_type),
8183 get_alias_set (TREE_TYPE (ref_type)))
8184 && (alignment_support_scheme == dr_aligned
8185 || alignment_support_scheme == dr_unaligned_supported))
8186 {
8187 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8188 dataref_offset = build_int_cst (ref_type, 0);
8189 }
8190 else if (first_stmt_info_for_drptr
8191 && first_stmt_info != first_stmt_info_for_drptr)
8192 {
8193 dataref_ptr
8194 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8195 aggr_type, at_loop, offset, &dummy,
8196 gsi, &ptr_incr, simd_lane_access_p,
8197 byte_offset, bump);
8198 /* Adjust the pointer by the difference to first_stmt. */
8199 data_reference_p ptrdr
8200 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8201 tree diff
8202 = fold_convert (sizetype,
8203 size_binop (MINUS_EXPR,
8204 DR_INIT (first_dr_info->dr),
8205 DR_INIT (ptrdr)));
8206 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8207 stmt_info, diff);
8208 }
8209 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8210 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8211 &dataref_ptr, &vec_offset);
8212 else
8213 dataref_ptr
8214 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8215 offset, &dummy, gsi, &ptr_incr,
8216 simd_lane_access_p,
8217 byte_offset, bump);
8218 if (mask)
8219 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8220 mask_vectype);
8221 }
8222 else
8223 {
8224 if (dataref_offset)
8225 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8226 bump);
8227 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8228 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8229 else
8230 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8231 stmt_info, bump);
8232 if (mask)
8233 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8234 }
8235
8236 if (grouped_load || slp_perm)
8237 dr_chain.create (vec_num);
8238
8239 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8240 {
8241 tree vec_array;
8242
8243 vec_array = create_vector_array (vectype, vec_num);
8244
8245 tree final_mask = NULL_TREE;
8246 if (loop_masks)
8247 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8248 vectype, j);
8249 if (vec_mask)
8250 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8251 vec_mask, gsi);
8252
8253 gcall *call;
8254 if (final_mask)
8255 {
8256 /* Emit:
8257 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8258 VEC_MASK). */
8259 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8260 tree alias_ptr = build_int_cst (ref_type, align);
8261 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8262 dataref_ptr, alias_ptr,
8263 final_mask);
8264 }
8265 else
8266 {
8267 /* Emit:
8268 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8269 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8270 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8271 }
8272 gimple_call_set_lhs (call, vec_array);
8273 gimple_call_set_nothrow (call, true);
8274 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8275
8276 /* Extract each vector into an SSA_NAME. */
8277 for (i = 0; i < vec_num; i++)
8278 {
8279 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8280 vec_array, i);
8281 dr_chain.quick_push (new_temp);
8282 }
8283
8284 /* Record the mapping between SSA_NAMEs and statements. */
8285 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8286
8287 /* Record that VEC_ARRAY is now dead. */
8288 vect_clobber_variable (stmt_info, gsi, vec_array);
8289 }
8290 else
8291 {
8292 for (i = 0; i < vec_num; i++)
8293 {
8294 tree final_mask = NULL_TREE;
8295 if (loop_masks
8296 && memory_access_type != VMAT_INVARIANT)
8297 final_mask = vect_get_loop_mask (gsi, loop_masks,
8298 vec_num * ncopies,
8299 vectype, vec_num * j + i);
8300 if (vec_mask)
8301 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8302 vec_mask, gsi);
8303
8304 if (i > 0)
8305 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8306 stmt_info, bump);
8307
8308 /* 2. Create the vector-load in the loop. */
8309 gimple *new_stmt = NULL;
8310 switch (alignment_support_scheme)
8311 {
8312 case dr_aligned:
8313 case dr_unaligned_supported:
8314 {
8315 unsigned int align, misalign;
8316
8317 if (memory_access_type == VMAT_GATHER_SCATTER)
8318 {
8319 tree scale = size_int (gs_info.scale);
8320 gcall *call;
8321 if (loop_masks)
8322 call = gimple_build_call_internal
8323 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8324 vec_offset, scale, final_mask);
8325 else
8326 call = gimple_build_call_internal
8327 (IFN_GATHER_LOAD, 3, dataref_ptr,
8328 vec_offset, scale);
8329 gimple_call_set_nothrow (call, true);
8330 new_stmt = call;
8331 data_ref = NULL_TREE;
8332 break;
8333 }
8334
8335 align = DR_TARGET_ALIGNMENT (dr_info);
8336 if (alignment_support_scheme == dr_aligned)
8337 {
8338 gcc_assert (aligned_access_p (first_dr_info));
8339 misalign = 0;
8340 }
8341 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8342 {
8343 align = dr_alignment
8344 (vect_dr_behavior (first_dr_info));
8345 misalign = 0;
8346 }
8347 else
8348 misalign = DR_MISALIGNMENT (first_dr_info);
8349 if (dataref_offset == NULL_TREE
8350 && TREE_CODE (dataref_ptr) == SSA_NAME)
8351 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8352 align, misalign);
8353
8354 if (final_mask)
8355 {
8356 align = least_bit_hwi (misalign | align);
8357 tree ptr = build_int_cst (ref_type, align);
8358 gcall *call
8359 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8360 dataref_ptr, ptr,
8361 final_mask);
8362 gimple_call_set_nothrow (call, true);
8363 new_stmt = call;
8364 data_ref = NULL_TREE;
8365 }
8366 else
8367 {
8368 data_ref
8369 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8370 dataref_offset
8371 ? dataref_offset
8372 : build_int_cst (ref_type, 0));
8373 if (alignment_support_scheme == dr_aligned)
8374 ;
8375 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8376 TREE_TYPE (data_ref)
8377 = build_aligned_type (TREE_TYPE (data_ref),
8378 align * BITS_PER_UNIT);
8379 else
8380 TREE_TYPE (data_ref)
8381 = build_aligned_type (TREE_TYPE (data_ref),
8382 TYPE_ALIGN (elem_type));
8383 }
8384 break;
8385 }
8386 case dr_explicit_realign:
8387 {
8388 tree ptr, bump;
8389
8390 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8391
8392 if (compute_in_loop)
8393 msq = vect_setup_realignment (first_stmt_info, gsi,
8394 &realignment_token,
8395 dr_explicit_realign,
8396 dataref_ptr, NULL);
8397
8398 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8399 ptr = copy_ssa_name (dataref_ptr);
8400 else
8401 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8402 unsigned int align = DR_TARGET_ALIGNMENT (first_dr_info);
8403 new_stmt = gimple_build_assign
8404 (ptr, BIT_AND_EXPR, dataref_ptr,
8405 build_int_cst
8406 (TREE_TYPE (dataref_ptr),
8407 -(HOST_WIDE_INT) align));
8408 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8409 data_ref
8410 = build2 (MEM_REF, vectype, ptr,
8411 build_int_cst (ref_type, 0));
8412 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8413 vec_dest = vect_create_destination_var (scalar_dest,
8414 vectype);
8415 new_stmt = gimple_build_assign (vec_dest, data_ref);
8416 new_temp = make_ssa_name (vec_dest, new_stmt);
8417 gimple_assign_set_lhs (new_stmt, new_temp);
8418 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8419 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8420 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8421 msq = new_temp;
8422
8423 bump = size_binop (MULT_EXPR, vs,
8424 TYPE_SIZE_UNIT (elem_type));
8425 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8426 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8427 stmt_info, bump);
8428 new_stmt = gimple_build_assign
8429 (NULL_TREE, BIT_AND_EXPR, ptr,
8430 build_int_cst
8431 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8432 ptr = copy_ssa_name (ptr, new_stmt);
8433 gimple_assign_set_lhs (new_stmt, ptr);
8434 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8435 data_ref
8436 = build2 (MEM_REF, vectype, ptr,
8437 build_int_cst (ref_type, 0));
8438 break;
8439 }
8440 case dr_explicit_realign_optimized:
8441 {
8442 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8443 new_temp = copy_ssa_name (dataref_ptr);
8444 else
8445 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8446 unsigned int align = DR_TARGET_ALIGNMENT (first_dr_info);
8447 new_stmt = gimple_build_assign
8448 (new_temp, BIT_AND_EXPR, dataref_ptr,
8449 build_int_cst (TREE_TYPE (dataref_ptr),
8450 -(HOST_WIDE_INT) align));
8451 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8452 data_ref
8453 = build2 (MEM_REF, vectype, new_temp,
8454 build_int_cst (ref_type, 0));
8455 break;
8456 }
8457 default:
8458 gcc_unreachable ();
8459 }
8460 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8461 /* DATA_REF is null if we've already built the statement. */
8462 if (data_ref)
8463 {
8464 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8465 new_stmt = gimple_build_assign (vec_dest, data_ref);
8466 }
8467 new_temp = make_ssa_name (vec_dest, new_stmt);
8468 gimple_set_lhs (new_stmt, new_temp);
8469 new_stmt_info
8470 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8471
8472 /* 3. Handle explicit realignment if necessary/supported.
8473 Create in loop:
8474 vec_dest = realign_load (msq, lsq, realignment_token) */
8475 if (alignment_support_scheme == dr_explicit_realign_optimized
8476 || alignment_support_scheme == dr_explicit_realign)
8477 {
8478 lsq = gimple_assign_lhs (new_stmt);
8479 if (!realignment_token)
8480 realignment_token = dataref_ptr;
8481 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8482 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8483 msq, lsq, realignment_token);
8484 new_temp = make_ssa_name (vec_dest, new_stmt);
8485 gimple_assign_set_lhs (new_stmt, new_temp);
8486 new_stmt_info
8487 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8488
8489 if (alignment_support_scheme == dr_explicit_realign_optimized)
8490 {
8491 gcc_assert (phi);
8492 if (i == vec_num - 1 && j == ncopies - 1)
8493 add_phi_arg (phi, lsq,
8494 loop_latch_edge (containing_loop),
8495 UNKNOWN_LOCATION);
8496 msq = lsq;
8497 }
8498 }
8499
8500 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8501 {
8502 tree perm_mask = perm_mask_for_reverse (vectype);
8503 new_temp = permute_vec_elements (new_temp, new_temp,
8504 perm_mask, stmt_info, gsi);
8505 new_stmt_info = vinfo->lookup_def (new_temp);
8506 }
8507
8508 /* Collect vector loads and later create their permutation in
8509 vect_transform_grouped_load (). */
8510 if (grouped_load || slp_perm)
8511 dr_chain.quick_push (new_temp);
8512
8513 /* Store vector loads in the corresponding SLP_NODE. */
8514 if (slp && !slp_perm)
8515 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8516
8517 /* With SLP permutation we load the gaps as well, without
8518 we need to skip the gaps after we manage to fully load
8519 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8520 group_elt += nunits;
8521 if (maybe_ne (group_gap_adj, 0U)
8522 && !slp_perm
8523 && known_eq (group_elt, group_size - group_gap_adj))
8524 {
8525 poly_wide_int bump_val
8526 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8527 * group_gap_adj);
8528 tree bump = wide_int_to_tree (sizetype, bump_val);
8529 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8530 stmt_info, bump);
8531 group_elt = 0;
8532 }
8533 }
8534 /* Bump the vector pointer to account for a gap or for excess
8535 elements loaded for a permuted SLP load. */
8536 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8537 {
8538 poly_wide_int bump_val
8539 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8540 * group_gap_adj);
8541 tree bump = wide_int_to_tree (sizetype, bump_val);
8542 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8543 stmt_info, bump);
8544 }
8545 }
8546
8547 if (slp && !slp_perm)
8548 continue;
8549
8550 if (slp_perm)
8551 {
8552 unsigned n_perms;
8553 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8554 slp_node_instance, false,
8555 &n_perms))
8556 {
8557 dr_chain.release ();
8558 return false;
8559 }
8560 }
8561 else
8562 {
8563 if (grouped_load)
8564 {
8565 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8566 vect_transform_grouped_load (stmt_info, dr_chain,
8567 group_size, gsi);
8568 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8569 }
8570 else
8571 {
8572 if (j == 0)
8573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8574 else
8575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8576 prev_stmt_info = new_stmt_info;
8577 }
8578 }
8579 dr_chain.release ();
8580 }
8581
8582 return true;
8583 }
8584
8585 /* Function vect_is_simple_cond.
8586
8587 Input:
8588 LOOP - the loop that is being vectorized.
8589 COND - Condition that is checked for simple use.
8590
8591 Output:
8592 *COMP_VECTYPE - the vector type for the comparison.
8593 *DTS - The def types for the arguments of the comparison
8594
8595 Returns whether a COND can be vectorized. Checks whether
8596 condition operands are supportable using vec_is_simple_use. */
8597
8598 static bool
8599 vect_is_simple_cond (tree cond, vec_info *vinfo,
8600 tree *comp_vectype, enum vect_def_type *dts,
8601 tree vectype)
8602 {
8603 tree lhs, rhs;
8604 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8605
8606 /* Mask case. */
8607 if (TREE_CODE (cond) == SSA_NAME
8608 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8609 {
8610 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8611 || !*comp_vectype
8612 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8613 return false;
8614 return true;
8615 }
8616
8617 if (!COMPARISON_CLASS_P (cond))
8618 return false;
8619
8620 lhs = TREE_OPERAND (cond, 0);
8621 rhs = TREE_OPERAND (cond, 1);
8622
8623 if (TREE_CODE (lhs) == SSA_NAME)
8624 {
8625 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8626 return false;
8627 }
8628 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8629 || TREE_CODE (lhs) == FIXED_CST)
8630 dts[0] = vect_constant_def;
8631 else
8632 return false;
8633
8634 if (TREE_CODE (rhs) == SSA_NAME)
8635 {
8636 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8637 return false;
8638 }
8639 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8640 || TREE_CODE (rhs) == FIXED_CST)
8641 dts[1] = vect_constant_def;
8642 else
8643 return false;
8644
8645 if (vectype1 && vectype2
8646 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8647 TYPE_VECTOR_SUBPARTS (vectype2)))
8648 return false;
8649
8650 *comp_vectype = vectype1 ? vectype1 : vectype2;
8651 /* Invariant comparison. */
8652 if (! *comp_vectype && vectype)
8653 {
8654 tree scalar_type = TREE_TYPE (lhs);
8655 /* If we can widen the comparison to match vectype do so. */
8656 if (INTEGRAL_TYPE_P (scalar_type)
8657 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8658 TYPE_SIZE (TREE_TYPE (vectype))))
8659 scalar_type = build_nonstandard_integer_type
8660 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8661 TYPE_UNSIGNED (scalar_type));
8662 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8663 }
8664
8665 return true;
8666 }
8667
8668 /* vectorizable_condition.
8669
8670 Check if STMT_INFO is conditional modify expression that can be vectorized.
8671 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8672 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8673 at GSI.
8674
8675 When STMT_INFO is vectorized as a nested cycle, REDUC_DEF is the vector
8676 variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1,
8677 and in else clause if it is 2).
8678
8679 Return true if STMT_INFO is vectorizable in this way. */
8680
8681 bool
8682 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8683 stmt_vec_info *vec_stmt, tree reduc_def,
8684 int reduc_index, slp_tree slp_node,
8685 stmt_vector_for_cost *cost_vec)
8686 {
8687 vec_info *vinfo = stmt_info->vinfo;
8688 tree scalar_dest = NULL_TREE;
8689 tree vec_dest = NULL_TREE;
8690 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8691 tree then_clause, else_clause;
8692 tree comp_vectype = NULL_TREE;
8693 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8694 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8695 tree vec_compare;
8696 tree new_temp;
8697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8698 enum vect_def_type dts[4]
8699 = {vect_unknown_def_type, vect_unknown_def_type,
8700 vect_unknown_def_type, vect_unknown_def_type};
8701 int ndts = 4;
8702 int ncopies;
8703 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8704 stmt_vec_info prev_stmt_info = NULL;
8705 int i, j;
8706 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8707 vec<tree> vec_oprnds0 = vNULL;
8708 vec<tree> vec_oprnds1 = vNULL;
8709 vec<tree> vec_oprnds2 = vNULL;
8710 vec<tree> vec_oprnds3 = vNULL;
8711 tree vec_cmp_type;
8712 bool masked = false;
8713
8714 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8715 return false;
8716
8717 vect_reduction_type reduction_type
8718 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8719 if (reduction_type == TREE_CODE_REDUCTION)
8720 {
8721 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8722 return false;
8723
8724 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8725 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8726 && reduc_def))
8727 return false;
8728
8729 /* FORNOW: not yet supported. */
8730 if (STMT_VINFO_LIVE_P (stmt_info))
8731 {
8732 if (dump_enabled_p ())
8733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8734 "value used after loop.\n");
8735 return false;
8736 }
8737 }
8738
8739 /* Is vectorizable conditional operation? */
8740 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8741 if (!stmt)
8742 return false;
8743
8744 code = gimple_assign_rhs_code (stmt);
8745
8746 if (code != COND_EXPR)
8747 return false;
8748
8749 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8750 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8751
8752 if (slp_node)
8753 ncopies = 1;
8754 else
8755 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8756
8757 gcc_assert (ncopies >= 1);
8758 if (reduc_index && ncopies > 1)
8759 return false; /* FORNOW */
8760
8761 cond_expr = gimple_assign_rhs1 (stmt);
8762 then_clause = gimple_assign_rhs2 (stmt);
8763 else_clause = gimple_assign_rhs3 (stmt);
8764
8765 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8766 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8767 || !comp_vectype)
8768 return false;
8769
8770 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8771 return false;
8772 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8773 return false;
8774
8775 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8776 return false;
8777
8778 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8779 return false;
8780
8781 masked = !COMPARISON_CLASS_P (cond_expr);
8782 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8783
8784 if (vec_cmp_type == NULL_TREE)
8785 return false;
8786
8787 cond_code = TREE_CODE (cond_expr);
8788 if (!masked)
8789 {
8790 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8791 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8792 }
8793
8794 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8795 {
8796 /* Boolean values may have another representation in vectors
8797 and therefore we prefer bit operations over comparison for
8798 them (which also works for scalar masks). We store opcodes
8799 to use in bitop1 and bitop2. Statement is vectorized as
8800 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8801 depending on bitop1 and bitop2 arity. */
8802 switch (cond_code)
8803 {
8804 case GT_EXPR:
8805 bitop1 = BIT_NOT_EXPR;
8806 bitop2 = BIT_AND_EXPR;
8807 break;
8808 case GE_EXPR:
8809 bitop1 = BIT_NOT_EXPR;
8810 bitop2 = BIT_IOR_EXPR;
8811 break;
8812 case LT_EXPR:
8813 bitop1 = BIT_NOT_EXPR;
8814 bitop2 = BIT_AND_EXPR;
8815 std::swap (cond_expr0, cond_expr1);
8816 break;
8817 case LE_EXPR:
8818 bitop1 = BIT_NOT_EXPR;
8819 bitop2 = BIT_IOR_EXPR;
8820 std::swap (cond_expr0, cond_expr1);
8821 break;
8822 case NE_EXPR:
8823 bitop1 = BIT_XOR_EXPR;
8824 break;
8825 case EQ_EXPR:
8826 bitop1 = BIT_XOR_EXPR;
8827 bitop2 = BIT_NOT_EXPR;
8828 break;
8829 default:
8830 return false;
8831 }
8832 cond_code = SSA_NAME;
8833 }
8834
8835 if (!vec_stmt)
8836 {
8837 if (bitop1 != NOP_EXPR)
8838 {
8839 machine_mode mode = TYPE_MODE (comp_vectype);
8840 optab optab;
8841
8842 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8843 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8844 return false;
8845
8846 if (bitop2 != NOP_EXPR)
8847 {
8848 optab = optab_for_tree_code (bitop2, comp_vectype,
8849 optab_default);
8850 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8851 return false;
8852 }
8853 }
8854 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8855 cond_code))
8856 {
8857 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8858 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8859 cost_vec);
8860 return true;
8861 }
8862 return false;
8863 }
8864
8865 /* Transform. */
8866
8867 if (!slp_node)
8868 {
8869 vec_oprnds0.create (1);
8870 vec_oprnds1.create (1);
8871 vec_oprnds2.create (1);
8872 vec_oprnds3.create (1);
8873 }
8874
8875 /* Handle def. */
8876 scalar_dest = gimple_assign_lhs (stmt);
8877 if (reduction_type != EXTRACT_LAST_REDUCTION)
8878 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8879
8880 /* Handle cond expr. */
8881 for (j = 0; j < ncopies; j++)
8882 {
8883 stmt_vec_info new_stmt_info = NULL;
8884 if (j == 0)
8885 {
8886 if (slp_node)
8887 {
8888 auto_vec<tree, 4> ops;
8889 auto_vec<vec<tree>, 4> vec_defs;
8890
8891 if (masked)
8892 ops.safe_push (cond_expr);
8893 else
8894 {
8895 ops.safe_push (cond_expr0);
8896 ops.safe_push (cond_expr1);
8897 }
8898 ops.safe_push (then_clause);
8899 ops.safe_push (else_clause);
8900 vect_get_slp_defs (ops, slp_node, &vec_defs);
8901 vec_oprnds3 = vec_defs.pop ();
8902 vec_oprnds2 = vec_defs.pop ();
8903 if (!masked)
8904 vec_oprnds1 = vec_defs.pop ();
8905 vec_oprnds0 = vec_defs.pop ();
8906 }
8907 else
8908 {
8909 if (masked)
8910 {
8911 vec_cond_lhs
8912 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
8913 comp_vectype);
8914 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
8915 }
8916 else
8917 {
8918 vec_cond_lhs
8919 = vect_get_vec_def_for_operand (cond_expr0,
8920 stmt_info, comp_vectype);
8921 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
8922
8923 vec_cond_rhs
8924 = vect_get_vec_def_for_operand (cond_expr1,
8925 stmt_info, comp_vectype);
8926 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
8927 }
8928 if (reduc_index == 1)
8929 vec_then_clause = reduc_def;
8930 else
8931 {
8932 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8933 stmt_info);
8934 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
8935 }
8936 if (reduc_index == 2)
8937 vec_else_clause = reduc_def;
8938 else
8939 {
8940 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8941 stmt_info);
8942 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
8943 }
8944 }
8945 }
8946 else
8947 {
8948 vec_cond_lhs
8949 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
8950 if (!masked)
8951 vec_cond_rhs
8952 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
8953
8954 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8955 vec_oprnds2.pop ());
8956 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8957 vec_oprnds3.pop ());
8958 }
8959
8960 if (!slp_node)
8961 {
8962 vec_oprnds0.quick_push (vec_cond_lhs);
8963 if (!masked)
8964 vec_oprnds1.quick_push (vec_cond_rhs);
8965 vec_oprnds2.quick_push (vec_then_clause);
8966 vec_oprnds3.quick_push (vec_else_clause);
8967 }
8968
8969 /* Arguments are ready. Create the new vector stmt. */
8970 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8971 {
8972 vec_then_clause = vec_oprnds2[i];
8973 vec_else_clause = vec_oprnds3[i];
8974
8975 if (masked)
8976 vec_compare = vec_cond_lhs;
8977 else
8978 {
8979 vec_cond_rhs = vec_oprnds1[i];
8980 if (bitop1 == NOP_EXPR)
8981 vec_compare = build2 (cond_code, vec_cmp_type,
8982 vec_cond_lhs, vec_cond_rhs);
8983 else
8984 {
8985 new_temp = make_ssa_name (vec_cmp_type);
8986 gassign *new_stmt;
8987 if (bitop1 == BIT_NOT_EXPR)
8988 new_stmt = gimple_build_assign (new_temp, bitop1,
8989 vec_cond_rhs);
8990 else
8991 new_stmt
8992 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8993 vec_cond_rhs);
8994 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8995 if (bitop2 == NOP_EXPR)
8996 vec_compare = new_temp;
8997 else if (bitop2 == BIT_NOT_EXPR)
8998 {
8999 /* Instead of doing ~x ? y : z do x ? z : y. */
9000 vec_compare = new_temp;
9001 std::swap (vec_then_clause, vec_else_clause);
9002 }
9003 else
9004 {
9005 vec_compare = make_ssa_name (vec_cmp_type);
9006 new_stmt
9007 = gimple_build_assign (vec_compare, bitop2,
9008 vec_cond_lhs, new_temp);
9009 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9010 }
9011 }
9012 }
9013 if (reduction_type == EXTRACT_LAST_REDUCTION)
9014 {
9015 if (!is_gimple_val (vec_compare))
9016 {
9017 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9018 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9019 vec_compare);
9020 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9021 vec_compare = vec_compare_name;
9022 }
9023 gcc_assert (reduc_index == 2);
9024 gcall *new_stmt = gimple_build_call_internal
9025 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9026 vec_then_clause);
9027 gimple_call_set_lhs (new_stmt, scalar_dest);
9028 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9029 if (stmt_info->stmt == gsi_stmt (*gsi))
9030 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9031 else
9032 {
9033 /* In this case we're moving the definition to later in the
9034 block. That doesn't matter because the only uses of the
9035 lhs are in phi statements. */
9036 gimple_stmt_iterator old_gsi
9037 = gsi_for_stmt (stmt_info->stmt);
9038 gsi_remove (&old_gsi, true);
9039 new_stmt_info
9040 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9041 }
9042 }
9043 else
9044 {
9045 new_temp = make_ssa_name (vec_dest);
9046 gassign *new_stmt
9047 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9048 vec_then_clause, vec_else_clause);
9049 new_stmt_info
9050 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9051 }
9052 if (slp_node)
9053 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9054 }
9055
9056 if (slp_node)
9057 continue;
9058
9059 if (j == 0)
9060 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9061 else
9062 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9063
9064 prev_stmt_info = new_stmt_info;
9065 }
9066
9067 vec_oprnds0.release ();
9068 vec_oprnds1.release ();
9069 vec_oprnds2.release ();
9070 vec_oprnds3.release ();
9071
9072 return true;
9073 }
9074
9075 /* vectorizable_comparison.
9076
9077 Check if STMT_INFO is comparison expression that can be vectorized.
9078 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9079 comparison, put it in VEC_STMT, and insert it at GSI.
9080
9081 Return true if STMT_INFO is vectorizable in this way. */
9082
9083 static bool
9084 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9085 stmt_vec_info *vec_stmt, tree reduc_def,
9086 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9087 {
9088 vec_info *vinfo = stmt_info->vinfo;
9089 tree lhs, rhs1, rhs2;
9090 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9092 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9093 tree new_temp;
9094 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9095 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9096 int ndts = 2;
9097 poly_uint64 nunits;
9098 int ncopies;
9099 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9100 stmt_vec_info prev_stmt_info = NULL;
9101 int i, j;
9102 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9103 vec<tree> vec_oprnds0 = vNULL;
9104 vec<tree> vec_oprnds1 = vNULL;
9105 tree mask_type;
9106 tree mask;
9107
9108 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9109 return false;
9110
9111 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9112 return false;
9113
9114 mask_type = vectype;
9115 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9116
9117 if (slp_node)
9118 ncopies = 1;
9119 else
9120 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9121
9122 gcc_assert (ncopies >= 1);
9123 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9124 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9125 && reduc_def))
9126 return false;
9127
9128 if (STMT_VINFO_LIVE_P (stmt_info))
9129 {
9130 if (dump_enabled_p ())
9131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9132 "value used after loop.\n");
9133 return false;
9134 }
9135
9136 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9137 if (!stmt)
9138 return false;
9139
9140 code = gimple_assign_rhs_code (stmt);
9141
9142 if (TREE_CODE_CLASS (code) != tcc_comparison)
9143 return false;
9144
9145 rhs1 = gimple_assign_rhs1 (stmt);
9146 rhs2 = gimple_assign_rhs2 (stmt);
9147
9148 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9149 return false;
9150
9151 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9152 return false;
9153
9154 if (vectype1 && vectype2
9155 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9156 TYPE_VECTOR_SUBPARTS (vectype2)))
9157 return false;
9158
9159 vectype = vectype1 ? vectype1 : vectype2;
9160
9161 /* Invariant comparison. */
9162 if (!vectype)
9163 {
9164 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9165 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9166 return false;
9167 }
9168 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9169 return false;
9170
9171 /* Can't compare mask and non-mask types. */
9172 if (vectype1 && vectype2
9173 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9174 return false;
9175
9176 /* Boolean values may have another representation in vectors
9177 and therefore we prefer bit operations over comparison for
9178 them (which also works for scalar masks). We store opcodes
9179 to use in bitop1 and bitop2. Statement is vectorized as
9180 BITOP2 (rhs1 BITOP1 rhs2) or
9181 rhs1 BITOP2 (BITOP1 rhs2)
9182 depending on bitop1 and bitop2 arity. */
9183 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9184 {
9185 if (code == GT_EXPR)
9186 {
9187 bitop1 = BIT_NOT_EXPR;
9188 bitop2 = BIT_AND_EXPR;
9189 }
9190 else if (code == GE_EXPR)
9191 {
9192 bitop1 = BIT_NOT_EXPR;
9193 bitop2 = BIT_IOR_EXPR;
9194 }
9195 else if (code == LT_EXPR)
9196 {
9197 bitop1 = BIT_NOT_EXPR;
9198 bitop2 = BIT_AND_EXPR;
9199 std::swap (rhs1, rhs2);
9200 std::swap (dts[0], dts[1]);
9201 }
9202 else if (code == LE_EXPR)
9203 {
9204 bitop1 = BIT_NOT_EXPR;
9205 bitop2 = BIT_IOR_EXPR;
9206 std::swap (rhs1, rhs2);
9207 std::swap (dts[0], dts[1]);
9208 }
9209 else
9210 {
9211 bitop1 = BIT_XOR_EXPR;
9212 if (code == EQ_EXPR)
9213 bitop2 = BIT_NOT_EXPR;
9214 }
9215 }
9216
9217 if (!vec_stmt)
9218 {
9219 if (bitop1 == NOP_EXPR)
9220 {
9221 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9222 return false;
9223 }
9224 else
9225 {
9226 machine_mode mode = TYPE_MODE (vectype);
9227 optab optab;
9228
9229 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9230 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9231 return false;
9232
9233 if (bitop2 != NOP_EXPR)
9234 {
9235 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9236 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9237 return false;
9238 }
9239 }
9240
9241 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9242 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9243 dts, ndts, slp_node, cost_vec);
9244 return true;
9245 }
9246
9247 /* Transform. */
9248 if (!slp_node)
9249 {
9250 vec_oprnds0.create (1);
9251 vec_oprnds1.create (1);
9252 }
9253
9254 /* Handle def. */
9255 lhs = gimple_assign_lhs (stmt);
9256 mask = vect_create_destination_var (lhs, mask_type);
9257
9258 /* Handle cmp expr. */
9259 for (j = 0; j < ncopies; j++)
9260 {
9261 stmt_vec_info new_stmt_info = NULL;
9262 if (j == 0)
9263 {
9264 if (slp_node)
9265 {
9266 auto_vec<tree, 2> ops;
9267 auto_vec<vec<tree>, 2> vec_defs;
9268
9269 ops.safe_push (rhs1);
9270 ops.safe_push (rhs2);
9271 vect_get_slp_defs (ops, slp_node, &vec_defs);
9272 vec_oprnds1 = vec_defs.pop ();
9273 vec_oprnds0 = vec_defs.pop ();
9274 }
9275 else
9276 {
9277 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9278 vectype);
9279 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9280 vectype);
9281 }
9282 }
9283 else
9284 {
9285 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9286 vec_oprnds0.pop ());
9287 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9288 vec_oprnds1.pop ());
9289 }
9290
9291 if (!slp_node)
9292 {
9293 vec_oprnds0.quick_push (vec_rhs1);
9294 vec_oprnds1.quick_push (vec_rhs2);
9295 }
9296
9297 /* Arguments are ready. Create the new vector stmt. */
9298 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9299 {
9300 vec_rhs2 = vec_oprnds1[i];
9301
9302 new_temp = make_ssa_name (mask);
9303 if (bitop1 == NOP_EXPR)
9304 {
9305 gassign *new_stmt = gimple_build_assign (new_temp, code,
9306 vec_rhs1, vec_rhs2);
9307 new_stmt_info
9308 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9309 }
9310 else
9311 {
9312 gassign *new_stmt;
9313 if (bitop1 == BIT_NOT_EXPR)
9314 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9315 else
9316 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9317 vec_rhs2);
9318 new_stmt_info
9319 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9320 if (bitop2 != NOP_EXPR)
9321 {
9322 tree res = make_ssa_name (mask);
9323 if (bitop2 == BIT_NOT_EXPR)
9324 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9325 else
9326 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9327 new_temp);
9328 new_stmt_info
9329 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9330 }
9331 }
9332 if (slp_node)
9333 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9334 }
9335
9336 if (slp_node)
9337 continue;
9338
9339 if (j == 0)
9340 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9341 else
9342 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9343
9344 prev_stmt_info = new_stmt_info;
9345 }
9346
9347 vec_oprnds0.release ();
9348 vec_oprnds1.release ();
9349
9350 return true;
9351 }
9352
9353 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9354 can handle all live statements in the node. Otherwise return true
9355 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9356 GSI and VEC_STMT are as for vectorizable_live_operation. */
9357
9358 static bool
9359 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9360 slp_tree slp_node, stmt_vec_info *vec_stmt,
9361 stmt_vector_for_cost *cost_vec)
9362 {
9363 if (slp_node)
9364 {
9365 stmt_vec_info slp_stmt_info;
9366 unsigned int i;
9367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9368 {
9369 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9370 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9371 vec_stmt, cost_vec))
9372 return false;
9373 }
9374 }
9375 else if (STMT_VINFO_LIVE_P (stmt_info)
9376 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9377 vec_stmt, cost_vec))
9378 return false;
9379
9380 return true;
9381 }
9382
9383 /* Make sure the statement is vectorizable. */
9384
9385 bool
9386 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9387 slp_tree node, slp_instance node_instance,
9388 stmt_vector_for_cost *cost_vec)
9389 {
9390 vec_info *vinfo = stmt_info->vinfo;
9391 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9392 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9393 bool ok;
9394 gimple_seq pattern_def_seq;
9395
9396 if (dump_enabled_p ())
9397 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9398 stmt_info->stmt);
9399
9400 if (gimple_has_volatile_ops (stmt_info->stmt))
9401 {
9402 if (dump_enabled_p ())
9403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9404 "not vectorized: stmt has volatile operands\n");
9405
9406 return false;
9407 }
9408
9409 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9410 && node == NULL
9411 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9412 {
9413 gimple_stmt_iterator si;
9414
9415 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9416 {
9417 stmt_vec_info pattern_def_stmt_info
9418 = vinfo->lookup_stmt (gsi_stmt (si));
9419 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9420 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9421 {
9422 /* Analyze def stmt of STMT if it's a pattern stmt. */
9423 if (dump_enabled_p ())
9424 dump_printf_loc (MSG_NOTE, vect_location,
9425 "==> examining pattern def statement: %G",
9426 pattern_def_stmt_info->stmt);
9427
9428 if (!vect_analyze_stmt (pattern_def_stmt_info,
9429 need_to_vectorize, node, node_instance,
9430 cost_vec))
9431 return false;
9432 }
9433 }
9434 }
9435
9436 /* Skip stmts that do not need to be vectorized. In loops this is expected
9437 to include:
9438 - the COND_EXPR which is the loop exit condition
9439 - any LABEL_EXPRs in the loop
9440 - computations that are used only for array indexing or loop control.
9441 In basic blocks we only analyze statements that are a part of some SLP
9442 instance, therefore, all the statements are relevant.
9443
9444 Pattern statement needs to be analyzed instead of the original statement
9445 if the original statement is not relevant. Otherwise, we analyze both
9446 statements. In basic blocks we are called from some SLP instance
9447 traversal, don't analyze pattern stmts instead, the pattern stmts
9448 already will be part of SLP instance. */
9449
9450 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9451 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9452 && !STMT_VINFO_LIVE_P (stmt_info))
9453 {
9454 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9455 && pattern_stmt_info
9456 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9457 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9458 {
9459 /* Analyze PATTERN_STMT instead of the original stmt. */
9460 stmt_info = pattern_stmt_info;
9461 if (dump_enabled_p ())
9462 dump_printf_loc (MSG_NOTE, vect_location,
9463 "==> examining pattern statement: %G",
9464 stmt_info->stmt);
9465 }
9466 else
9467 {
9468 if (dump_enabled_p ())
9469 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9470
9471 return true;
9472 }
9473 }
9474 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9475 && node == NULL
9476 && pattern_stmt_info
9477 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9478 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9479 {
9480 /* Analyze PATTERN_STMT too. */
9481 if (dump_enabled_p ())
9482 dump_printf_loc (MSG_NOTE, vect_location,
9483 "==> examining pattern statement: %G",
9484 pattern_stmt_info->stmt);
9485
9486 if (!vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9487 node_instance, cost_vec))
9488 return false;
9489 }
9490
9491 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9492 {
9493 case vect_internal_def:
9494 break;
9495
9496 case vect_reduction_def:
9497 case vect_nested_cycle:
9498 gcc_assert (!bb_vinfo
9499 && (relevance == vect_used_in_outer
9500 || relevance == vect_used_in_outer_by_reduction
9501 || relevance == vect_used_by_reduction
9502 || relevance == vect_unused_in_scope
9503 || relevance == vect_used_only_live));
9504 break;
9505
9506 case vect_induction_def:
9507 gcc_assert (!bb_vinfo);
9508 break;
9509
9510 case vect_constant_def:
9511 case vect_external_def:
9512 case vect_unknown_def_type:
9513 default:
9514 gcc_unreachable ();
9515 }
9516
9517 if (STMT_VINFO_RELEVANT_P (stmt_info))
9518 {
9519 tree type = gimple_expr_type (stmt_info->stmt);
9520 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9521 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9522 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9523 || (call && gimple_call_lhs (call) == NULL_TREE));
9524 *need_to_vectorize = true;
9525 }
9526
9527 if (PURE_SLP_STMT (stmt_info) && !node)
9528 {
9529 dump_printf_loc (MSG_NOTE, vect_location,
9530 "handled only by SLP analysis\n");
9531 return true;
9532 }
9533
9534 ok = true;
9535 if (!bb_vinfo
9536 && (STMT_VINFO_RELEVANT_P (stmt_info)
9537 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9538 ok = (vectorizable_simd_clone_call (stmt_info, NULL, NULL, node, cost_vec)
9539 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9540 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9541 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9542 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9543 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9544 cost_vec)
9545 || vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9546 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9547 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9548 node_instance, cost_vec)
9549 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9550 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
9551 cost_vec)
9552 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
9553 cost_vec));
9554 else
9555 {
9556 if (bb_vinfo)
9557 ok = (vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9558 cost_vec)
9559 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9560 cost_vec)
9561 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9562 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9563 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9564 cost_vec)
9565 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9566 cost_vec)
9567 || vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9568 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9569 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
9570 cost_vec)
9571 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
9572 cost_vec));
9573 }
9574
9575 if (!ok)
9576 {
9577 if (dump_enabled_p ())
9578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9579 "not vectorized: relevant stmt not supported: %G",
9580 stmt_info->stmt);
9581
9582 return false;
9583 }
9584
9585 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9586 need extra handling, except for vectorizable reductions. */
9587 if (!bb_vinfo
9588 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9589 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9590 {
9591 if (dump_enabled_p ())
9592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9593 "not vectorized: live stmt not supported: %G",
9594 stmt_info->stmt);
9595
9596 return false;
9597 }
9598
9599 return true;
9600 }
9601
9602
9603 /* Function vect_transform_stmt.
9604
9605 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9606
9607 bool
9608 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9609 slp_tree slp_node, slp_instance slp_node_instance)
9610 {
9611 vec_info *vinfo = stmt_info->vinfo;
9612 bool is_store = false;
9613 stmt_vec_info vec_stmt = NULL;
9614 bool done;
9615
9616 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9617 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9618
9619 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9620 && nested_in_vect_loop_p
9621 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9622 stmt_info));
9623
9624 gimple *stmt = stmt_info->stmt;
9625 switch (STMT_VINFO_TYPE (stmt_info))
9626 {
9627 case type_demotion_vec_info_type:
9628 case type_promotion_vec_info_type:
9629 case type_conversion_vec_info_type:
9630 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9631 NULL);
9632 gcc_assert (done);
9633 break;
9634
9635 case induc_vec_info_type:
9636 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9637 NULL);
9638 gcc_assert (done);
9639 break;
9640
9641 case shift_vec_info_type:
9642 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9643 gcc_assert (done);
9644 break;
9645
9646 case op_vec_info_type:
9647 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9648 NULL);
9649 gcc_assert (done);
9650 break;
9651
9652 case assignment_vec_info_type:
9653 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9654 NULL);
9655 gcc_assert (done);
9656 break;
9657
9658 case load_vec_info_type:
9659 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9660 slp_node_instance, NULL);
9661 gcc_assert (done);
9662 break;
9663
9664 case store_vec_info_type:
9665 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9666 gcc_assert (done);
9667 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9668 {
9669 /* In case of interleaving, the whole chain is vectorized when the
9670 last store in the chain is reached. Store stmts before the last
9671 one are skipped, and there vec_stmt_info shouldn't be freed
9672 meanwhile. */
9673 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9674 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9675 is_store = true;
9676 }
9677 else
9678 is_store = true;
9679 break;
9680
9681 case condition_vec_info_type:
9682 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, NULL, 0,
9683 slp_node, NULL);
9684 gcc_assert (done);
9685 break;
9686
9687 case comparison_vec_info_type:
9688 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt, NULL,
9689 slp_node, NULL);
9690 gcc_assert (done);
9691 break;
9692
9693 case call_vec_info_type:
9694 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9695 stmt = gsi_stmt (*gsi);
9696 break;
9697
9698 case call_simd_clone_vec_info_type:
9699 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9700 slp_node, NULL);
9701 stmt = gsi_stmt (*gsi);
9702 break;
9703
9704 case reduc_vec_info_type:
9705 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9706 slp_node_instance, NULL);
9707 gcc_assert (done);
9708 break;
9709
9710 default:
9711 if (!STMT_VINFO_LIVE_P (stmt_info))
9712 {
9713 if (dump_enabled_p ())
9714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9715 "stmt not supported.\n");
9716 gcc_unreachable ();
9717 }
9718 }
9719
9720 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9721 This would break hybrid SLP vectorization. */
9722 if (slp_node)
9723 gcc_assert (!vec_stmt
9724 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9725
9726 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9727 is being vectorized, but outside the immediately enclosing loop. */
9728 if (vec_stmt
9729 && nested_p
9730 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9731 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9732 || STMT_VINFO_RELEVANT (stmt_info) ==
9733 vect_used_in_outer_by_reduction))
9734 {
9735 struct loop *innerloop = LOOP_VINFO_LOOP (
9736 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9737 imm_use_iterator imm_iter;
9738 use_operand_p use_p;
9739 tree scalar_dest;
9740
9741 if (dump_enabled_p ())
9742 dump_printf_loc (MSG_NOTE, vect_location,
9743 "Record the vdef for outer-loop vectorization.\n");
9744
9745 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9746 (to be used when vectorizing outer-loop stmts that use the DEF of
9747 STMT). */
9748 if (gimple_code (stmt) == GIMPLE_PHI)
9749 scalar_dest = PHI_RESULT (stmt);
9750 else
9751 scalar_dest = gimple_get_lhs (stmt);
9752
9753 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9754 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9755 {
9756 stmt_vec_info exit_phi_info
9757 = vinfo->lookup_stmt (USE_STMT (use_p));
9758 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9759 }
9760 }
9761
9762 /* Handle stmts whose DEF is used outside the loop-nest that is
9763 being vectorized. */
9764 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9765 {
9766 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9767 NULL);
9768 gcc_assert (done);
9769 }
9770
9771 if (vec_stmt)
9772 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9773
9774 return is_store;
9775 }
9776
9777
9778 /* Remove a group of stores (for SLP or interleaving), free their
9779 stmt_vec_info. */
9780
9781 void
9782 vect_remove_stores (stmt_vec_info first_stmt_info)
9783 {
9784 vec_info *vinfo = first_stmt_info->vinfo;
9785 stmt_vec_info next_stmt_info = first_stmt_info;
9786
9787 while (next_stmt_info)
9788 {
9789 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9790 next_stmt_info = vect_orig_stmt (next_stmt_info);
9791 /* Free the attached stmt_vec_info and remove the stmt. */
9792 vinfo->remove_stmt (next_stmt_info);
9793 next_stmt_info = tmp;
9794 }
9795 }
9796
9797 /* Function get_vectype_for_scalar_type_and_size.
9798
9799 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9800 by the target. */
9801
9802 tree
9803 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9804 {
9805 tree orig_scalar_type = scalar_type;
9806 scalar_mode inner_mode;
9807 machine_mode simd_mode;
9808 poly_uint64 nunits;
9809 tree vectype;
9810
9811 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9812 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9813 return NULL_TREE;
9814
9815 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9816
9817 /* For vector types of elements whose mode precision doesn't
9818 match their types precision we use a element type of mode
9819 precision. The vectorization routines will have to make sure
9820 they support the proper result truncation/extension.
9821 We also make sure to build vector types with INTEGER_TYPE
9822 component type only. */
9823 if (INTEGRAL_TYPE_P (scalar_type)
9824 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9825 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9826 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9827 TYPE_UNSIGNED (scalar_type));
9828
9829 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9830 When the component mode passes the above test simply use a type
9831 corresponding to that mode. The theory is that any use that
9832 would cause problems with this will disable vectorization anyway. */
9833 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9834 && !INTEGRAL_TYPE_P (scalar_type))
9835 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9836
9837 /* We can't build a vector type of elements with alignment bigger than
9838 their size. */
9839 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9840 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9841 TYPE_UNSIGNED (scalar_type));
9842
9843 /* If we felt back to using the mode fail if there was
9844 no scalar type for it. */
9845 if (scalar_type == NULL_TREE)
9846 return NULL_TREE;
9847
9848 /* If no size was supplied use the mode the target prefers. Otherwise
9849 lookup a vector mode of the specified size. */
9850 if (known_eq (size, 0U))
9851 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9852 else if (!multiple_p (size, nbytes, &nunits)
9853 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9854 return NULL_TREE;
9855 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9856 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9857 return NULL_TREE;
9858
9859 vectype = build_vector_type (scalar_type, nunits);
9860
9861 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9862 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9863 return NULL_TREE;
9864
9865 /* Re-attach the address-space qualifier if we canonicalized the scalar
9866 type. */
9867 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9868 return build_qualified_type
9869 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9870
9871 return vectype;
9872 }
9873
9874 poly_uint64 current_vector_size;
9875
9876 /* Function get_vectype_for_scalar_type.
9877
9878 Returns the vector type corresponding to SCALAR_TYPE as supported
9879 by the target. */
9880
9881 tree
9882 get_vectype_for_scalar_type (tree scalar_type)
9883 {
9884 tree vectype;
9885 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9886 current_vector_size);
9887 if (vectype
9888 && known_eq (current_vector_size, 0U))
9889 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9890 return vectype;
9891 }
9892
9893 /* Function get_mask_type_for_scalar_type.
9894
9895 Returns the mask type corresponding to a result of comparison
9896 of vectors of specified SCALAR_TYPE as supported by target. */
9897
9898 tree
9899 get_mask_type_for_scalar_type (tree scalar_type)
9900 {
9901 tree vectype = get_vectype_for_scalar_type (scalar_type);
9902
9903 if (!vectype)
9904 return NULL;
9905
9906 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9907 current_vector_size);
9908 }
9909
9910 /* Function get_same_sized_vectype
9911
9912 Returns a vector type corresponding to SCALAR_TYPE of size
9913 VECTOR_TYPE if supported by the target. */
9914
9915 tree
9916 get_same_sized_vectype (tree scalar_type, tree vector_type)
9917 {
9918 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9919 return build_same_sized_truth_vector_type (vector_type);
9920
9921 return get_vectype_for_scalar_type_and_size
9922 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9923 }
9924
9925 /* Function vect_is_simple_use.
9926
9927 Input:
9928 VINFO - the vect info of the loop or basic block that is being vectorized.
9929 OPERAND - operand in the loop or bb.
9930 Output:
9931 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9932 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9933 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9934 the definition could be anywhere in the function
9935 DT - the type of definition
9936
9937 Returns whether a stmt with OPERAND can be vectorized.
9938 For loops, supportable operands are constants, loop invariants, and operands
9939 that are defined by the current iteration of the loop. Unsupportable
9940 operands are those that are defined by a previous iteration of the loop (as
9941 is the case in reduction/induction computations).
9942 For basic blocks, supportable operands are constants and bb invariants.
9943 For now, operands defined outside the basic block are not supported. */
9944
9945 bool
9946 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
9947 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
9948 {
9949 if (def_stmt_info_out)
9950 *def_stmt_info_out = NULL;
9951 if (def_stmt_out)
9952 *def_stmt_out = NULL;
9953 *dt = vect_unknown_def_type;
9954
9955 if (dump_enabled_p ())
9956 {
9957 dump_printf_loc (MSG_NOTE, vect_location,
9958 "vect_is_simple_use: operand ");
9959 if (TREE_CODE (operand) == SSA_NAME
9960 && !SSA_NAME_IS_DEFAULT_DEF (operand))
9961 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
9962 else
9963 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9964 }
9965
9966 if (CONSTANT_CLASS_P (operand))
9967 *dt = vect_constant_def;
9968 else if (is_gimple_min_invariant (operand))
9969 *dt = vect_external_def;
9970 else if (TREE_CODE (operand) != SSA_NAME)
9971 *dt = vect_unknown_def_type;
9972 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
9973 *dt = vect_external_def;
9974 else
9975 {
9976 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
9977 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
9978 if (!stmt_vinfo)
9979 *dt = vect_external_def;
9980 else
9981 {
9982 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
9983 def_stmt = stmt_vinfo->stmt;
9984 switch (gimple_code (def_stmt))
9985 {
9986 case GIMPLE_PHI:
9987 case GIMPLE_ASSIGN:
9988 case GIMPLE_CALL:
9989 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9990 break;
9991 default:
9992 *dt = vect_unknown_def_type;
9993 break;
9994 }
9995 if (def_stmt_info_out)
9996 *def_stmt_info_out = stmt_vinfo;
9997 }
9998 if (def_stmt_out)
9999 *def_stmt_out = def_stmt;
10000 }
10001
10002 if (dump_enabled_p ())
10003 {
10004 dump_printf (MSG_NOTE, ", type of def: ");
10005 switch (*dt)
10006 {
10007 case vect_uninitialized_def:
10008 dump_printf (MSG_NOTE, "uninitialized\n");
10009 break;
10010 case vect_constant_def:
10011 dump_printf (MSG_NOTE, "constant\n");
10012 break;
10013 case vect_external_def:
10014 dump_printf (MSG_NOTE, "external\n");
10015 break;
10016 case vect_internal_def:
10017 dump_printf (MSG_NOTE, "internal\n");
10018 break;
10019 case vect_induction_def:
10020 dump_printf (MSG_NOTE, "induction\n");
10021 break;
10022 case vect_reduction_def:
10023 dump_printf (MSG_NOTE, "reduction\n");
10024 break;
10025 case vect_double_reduction_def:
10026 dump_printf (MSG_NOTE, "double reduction\n");
10027 break;
10028 case vect_nested_cycle:
10029 dump_printf (MSG_NOTE, "nested cycle\n");
10030 break;
10031 case vect_unknown_def_type:
10032 dump_printf (MSG_NOTE, "unknown\n");
10033 break;
10034 }
10035 }
10036
10037 if (*dt == vect_unknown_def_type)
10038 {
10039 if (dump_enabled_p ())
10040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10041 "Unsupported pattern.\n");
10042 return false;
10043 }
10044
10045 return true;
10046 }
10047
10048 /* Function vect_is_simple_use.
10049
10050 Same as vect_is_simple_use but also determines the vector operand
10051 type of OPERAND and stores it to *VECTYPE. If the definition of
10052 OPERAND is vect_uninitialized_def, vect_constant_def or
10053 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10054 is responsible to compute the best suited vector type for the
10055 scalar operand. */
10056
10057 bool
10058 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10059 tree *vectype, stmt_vec_info *def_stmt_info_out,
10060 gimple **def_stmt_out)
10061 {
10062 stmt_vec_info def_stmt_info;
10063 gimple *def_stmt;
10064 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10065 return false;
10066
10067 if (def_stmt_out)
10068 *def_stmt_out = def_stmt;
10069 if (def_stmt_info_out)
10070 *def_stmt_info_out = def_stmt_info;
10071
10072 /* Now get a vector type if the def is internal, otherwise supply
10073 NULL_TREE and leave it up to the caller to figure out a proper
10074 type for the use stmt. */
10075 if (*dt == vect_internal_def
10076 || *dt == vect_induction_def
10077 || *dt == vect_reduction_def
10078 || *dt == vect_double_reduction_def
10079 || *dt == vect_nested_cycle)
10080 {
10081 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10082 gcc_assert (*vectype != NULL_TREE);
10083 if (dump_enabled_p ())
10084 dump_printf_loc (MSG_NOTE, vect_location,
10085 "vect_is_simple_use: vectype %T\n", *vectype);
10086 }
10087 else if (*dt == vect_uninitialized_def
10088 || *dt == vect_constant_def
10089 || *dt == vect_external_def)
10090 *vectype = NULL_TREE;
10091 else
10092 gcc_unreachable ();
10093
10094 return true;
10095 }
10096
10097
10098 /* Function supportable_widening_operation
10099
10100 Check whether an operation represented by the code CODE is a
10101 widening operation that is supported by the target platform in
10102 vector form (i.e., when operating on arguments of type VECTYPE_IN
10103 producing a result of type VECTYPE_OUT).
10104
10105 Widening operations we currently support are NOP (CONVERT), FLOAT,
10106 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10107 are supported by the target platform either directly (via vector
10108 tree-codes), or via target builtins.
10109
10110 Output:
10111 - CODE1 and CODE2 are codes of vector operations to be used when
10112 vectorizing the operation, if available.
10113 - MULTI_STEP_CVT determines the number of required intermediate steps in
10114 case of multi-step conversion (like char->short->int - in that case
10115 MULTI_STEP_CVT will be 1).
10116 - INTERM_TYPES contains the intermediate type required to perform the
10117 widening operation (short in the above example). */
10118
10119 bool
10120 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10121 tree vectype_out, tree vectype_in,
10122 enum tree_code *code1, enum tree_code *code2,
10123 int *multi_step_cvt,
10124 vec<tree> *interm_types)
10125 {
10126 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10127 struct loop *vect_loop = NULL;
10128 machine_mode vec_mode;
10129 enum insn_code icode1, icode2;
10130 optab optab1, optab2;
10131 tree vectype = vectype_in;
10132 tree wide_vectype = vectype_out;
10133 enum tree_code c1, c2;
10134 int i;
10135 tree prev_type, intermediate_type;
10136 machine_mode intermediate_mode, prev_mode;
10137 optab optab3, optab4;
10138
10139 *multi_step_cvt = 0;
10140 if (loop_info)
10141 vect_loop = LOOP_VINFO_LOOP (loop_info);
10142
10143 switch (code)
10144 {
10145 case WIDEN_MULT_EXPR:
10146 /* The result of a vectorized widening operation usually requires
10147 two vectors (because the widened results do not fit into one vector).
10148 The generated vector results would normally be expected to be
10149 generated in the same order as in the original scalar computation,
10150 i.e. if 8 results are generated in each vector iteration, they are
10151 to be organized as follows:
10152 vect1: [res1,res2,res3,res4],
10153 vect2: [res5,res6,res7,res8].
10154
10155 However, in the special case that the result of the widening
10156 operation is used in a reduction computation only, the order doesn't
10157 matter (because when vectorizing a reduction we change the order of
10158 the computation). Some targets can take advantage of this and
10159 generate more efficient code. For example, targets like Altivec,
10160 that support widen_mult using a sequence of {mult_even,mult_odd}
10161 generate the following vectors:
10162 vect1: [res1,res3,res5,res7],
10163 vect2: [res2,res4,res6,res8].
10164
10165 When vectorizing outer-loops, we execute the inner-loop sequentially
10166 (each vectorized inner-loop iteration contributes to VF outer-loop
10167 iterations in parallel). We therefore don't allow to change the
10168 order of the computation in the inner-loop during outer-loop
10169 vectorization. */
10170 /* TODO: Another case in which order doesn't *really* matter is when we
10171 widen and then contract again, e.g. (short)((int)x * y >> 8).
10172 Normally, pack_trunc performs an even/odd permute, whereas the
10173 repack from an even/odd expansion would be an interleave, which
10174 would be significantly simpler for e.g. AVX2. */
10175 /* In any case, in order to avoid duplicating the code below, recurse
10176 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10177 are properly set up for the caller. If we fail, we'll continue with
10178 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10179 if (vect_loop
10180 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10181 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10182 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10183 stmt_info, vectype_out,
10184 vectype_in, code1, code2,
10185 multi_step_cvt, interm_types))
10186 {
10187 /* Elements in a vector with vect_used_by_reduction property cannot
10188 be reordered if the use chain with this property does not have the
10189 same operation. One such an example is s += a * b, where elements
10190 in a and b cannot be reordered. Here we check if the vector defined
10191 by STMT is only directly used in the reduction statement. */
10192 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10193 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10194 if (use_stmt_info
10195 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10196 return true;
10197 }
10198 c1 = VEC_WIDEN_MULT_LO_EXPR;
10199 c2 = VEC_WIDEN_MULT_HI_EXPR;
10200 break;
10201
10202 case DOT_PROD_EXPR:
10203 c1 = DOT_PROD_EXPR;
10204 c2 = DOT_PROD_EXPR;
10205 break;
10206
10207 case SAD_EXPR:
10208 c1 = SAD_EXPR;
10209 c2 = SAD_EXPR;
10210 break;
10211
10212 case VEC_WIDEN_MULT_EVEN_EXPR:
10213 /* Support the recursion induced just above. */
10214 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10215 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10216 break;
10217
10218 case WIDEN_LSHIFT_EXPR:
10219 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10220 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10221 break;
10222
10223 CASE_CONVERT:
10224 c1 = VEC_UNPACK_LO_EXPR;
10225 c2 = VEC_UNPACK_HI_EXPR;
10226 break;
10227
10228 case FLOAT_EXPR:
10229 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10230 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10231 break;
10232
10233 case FIX_TRUNC_EXPR:
10234 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10235 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10236 break;
10237
10238 default:
10239 gcc_unreachable ();
10240 }
10241
10242 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10243 std::swap (c1, c2);
10244
10245 if (code == FIX_TRUNC_EXPR)
10246 {
10247 /* The signedness is determined from output operand. */
10248 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10249 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10250 }
10251 else
10252 {
10253 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10254 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10255 }
10256
10257 if (!optab1 || !optab2)
10258 return false;
10259
10260 vec_mode = TYPE_MODE (vectype);
10261 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10262 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10263 return false;
10264
10265 *code1 = c1;
10266 *code2 = c2;
10267
10268 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10269 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10270 /* For scalar masks we may have different boolean
10271 vector types having the same QImode. Thus we
10272 add additional check for elements number. */
10273 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10274 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10275 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10276
10277 /* Check if it's a multi-step conversion that can be done using intermediate
10278 types. */
10279
10280 prev_type = vectype;
10281 prev_mode = vec_mode;
10282
10283 if (!CONVERT_EXPR_CODE_P (code))
10284 return false;
10285
10286 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10287 intermediate steps in promotion sequence. We try
10288 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10289 not. */
10290 interm_types->create (MAX_INTERM_CVT_STEPS);
10291 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10292 {
10293 intermediate_mode = insn_data[icode1].operand[0].mode;
10294 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10295 {
10296 intermediate_type = vect_halve_mask_nunits (prev_type);
10297 if (intermediate_mode != TYPE_MODE (intermediate_type))
10298 return false;
10299 }
10300 else
10301 intermediate_type
10302 = lang_hooks.types.type_for_mode (intermediate_mode,
10303 TYPE_UNSIGNED (prev_type));
10304
10305 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10306 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10307
10308 if (!optab3 || !optab4
10309 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10310 || insn_data[icode1].operand[0].mode != intermediate_mode
10311 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10312 || insn_data[icode2].operand[0].mode != intermediate_mode
10313 || ((icode1 = optab_handler (optab3, intermediate_mode))
10314 == CODE_FOR_nothing)
10315 || ((icode2 = optab_handler (optab4, intermediate_mode))
10316 == CODE_FOR_nothing))
10317 break;
10318
10319 interm_types->quick_push (intermediate_type);
10320 (*multi_step_cvt)++;
10321
10322 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10323 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10324 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10325 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10326 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10327
10328 prev_type = intermediate_type;
10329 prev_mode = intermediate_mode;
10330 }
10331
10332 interm_types->release ();
10333 return false;
10334 }
10335
10336
10337 /* Function supportable_narrowing_operation
10338
10339 Check whether an operation represented by the code CODE is a
10340 narrowing operation that is supported by the target platform in
10341 vector form (i.e., when operating on arguments of type VECTYPE_IN
10342 and producing a result of type VECTYPE_OUT).
10343
10344 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10345 and FLOAT. This function checks if these operations are supported by
10346 the target platform directly via vector tree-codes.
10347
10348 Output:
10349 - CODE1 is the code of a vector operation to be used when
10350 vectorizing the operation, if available.
10351 - MULTI_STEP_CVT determines the number of required intermediate steps in
10352 case of multi-step conversion (like int->short->char - in that case
10353 MULTI_STEP_CVT will be 1).
10354 - INTERM_TYPES contains the intermediate type required to perform the
10355 narrowing operation (short in the above example). */
10356
10357 bool
10358 supportable_narrowing_operation (enum tree_code code,
10359 tree vectype_out, tree vectype_in,
10360 enum tree_code *code1, int *multi_step_cvt,
10361 vec<tree> *interm_types)
10362 {
10363 machine_mode vec_mode;
10364 enum insn_code icode1;
10365 optab optab1, interm_optab;
10366 tree vectype = vectype_in;
10367 tree narrow_vectype = vectype_out;
10368 enum tree_code c1;
10369 tree intermediate_type, prev_type;
10370 machine_mode intermediate_mode, prev_mode;
10371 int i;
10372 bool uns;
10373
10374 *multi_step_cvt = 0;
10375 switch (code)
10376 {
10377 CASE_CONVERT:
10378 c1 = VEC_PACK_TRUNC_EXPR;
10379 break;
10380
10381 case FIX_TRUNC_EXPR:
10382 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10383 break;
10384
10385 case FLOAT_EXPR:
10386 c1 = VEC_PACK_FLOAT_EXPR;
10387 break;
10388
10389 default:
10390 gcc_unreachable ();
10391 }
10392
10393 if (code == FIX_TRUNC_EXPR)
10394 /* The signedness is determined from output operand. */
10395 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10396 else
10397 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10398
10399 if (!optab1)
10400 return false;
10401
10402 vec_mode = TYPE_MODE (vectype);
10403 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10404 return false;
10405
10406 *code1 = c1;
10407
10408 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10409 /* For scalar masks we may have different boolean
10410 vector types having the same QImode. Thus we
10411 add additional check for elements number. */
10412 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10413 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10414 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10415
10416 if (code == FLOAT_EXPR)
10417 return false;
10418
10419 /* Check if it's a multi-step conversion that can be done using intermediate
10420 types. */
10421 prev_mode = vec_mode;
10422 prev_type = vectype;
10423 if (code == FIX_TRUNC_EXPR)
10424 uns = TYPE_UNSIGNED (vectype_out);
10425 else
10426 uns = TYPE_UNSIGNED (vectype);
10427
10428 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10429 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10430 costly than signed. */
10431 if (code == FIX_TRUNC_EXPR && uns)
10432 {
10433 enum insn_code icode2;
10434
10435 intermediate_type
10436 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10437 interm_optab
10438 = optab_for_tree_code (c1, intermediate_type, optab_default);
10439 if (interm_optab != unknown_optab
10440 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10441 && insn_data[icode1].operand[0].mode
10442 == insn_data[icode2].operand[0].mode)
10443 {
10444 uns = false;
10445 optab1 = interm_optab;
10446 icode1 = icode2;
10447 }
10448 }
10449
10450 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10451 intermediate steps in promotion sequence. We try
10452 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10453 interm_types->create (MAX_INTERM_CVT_STEPS);
10454 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10455 {
10456 intermediate_mode = insn_data[icode1].operand[0].mode;
10457 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10458 {
10459 intermediate_type = vect_double_mask_nunits (prev_type);
10460 if (intermediate_mode != TYPE_MODE (intermediate_type))
10461 return false;
10462 }
10463 else
10464 intermediate_type
10465 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10466 interm_optab
10467 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10468 optab_default);
10469 if (!interm_optab
10470 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10471 || insn_data[icode1].operand[0].mode != intermediate_mode
10472 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10473 == CODE_FOR_nothing))
10474 break;
10475
10476 interm_types->quick_push (intermediate_type);
10477 (*multi_step_cvt)++;
10478
10479 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10480 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10481 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10482 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10483
10484 prev_mode = intermediate_mode;
10485 prev_type = intermediate_type;
10486 optab1 = interm_optab;
10487 }
10488
10489 interm_types->release ();
10490 return false;
10491 }
10492
10493 /* Generate and return a statement that sets vector mask MASK such that
10494 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10495
10496 gcall *
10497 vect_gen_while (tree mask, tree start_index, tree end_index)
10498 {
10499 tree cmp_type = TREE_TYPE (start_index);
10500 tree mask_type = TREE_TYPE (mask);
10501 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10502 cmp_type, mask_type,
10503 OPTIMIZE_FOR_SPEED));
10504 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10505 start_index, end_index,
10506 build_zero_cst (mask_type));
10507 gimple_call_set_lhs (call, mask);
10508 return call;
10509 }
10510
10511 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10512 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10513
10514 tree
10515 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10516 tree end_index)
10517 {
10518 tree tmp = make_ssa_name (mask_type);
10519 gcall *call = vect_gen_while (tmp, start_index, end_index);
10520 gimple_seq_add_stmt (seq, call);
10521 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10522 }
10523
10524 /* Try to compute the vector types required to vectorize STMT_INFO,
10525 returning true on success and false if vectorization isn't possible.
10526
10527 On success:
10528
10529 - Set *STMT_VECTYPE_OUT to:
10530 - NULL_TREE if the statement doesn't need to be vectorized;
10531 - boolean_type_node if the statement is a boolean operation whose
10532 vector type can only be determined once all the other vector types
10533 are known; and
10534 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10535
10536 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10537 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10538 statement does not help to determine the overall number of units. */
10539
10540 bool
10541 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10542 tree *stmt_vectype_out,
10543 tree *nunits_vectype_out)
10544 {
10545 gimple *stmt = stmt_info->stmt;
10546
10547 *stmt_vectype_out = NULL_TREE;
10548 *nunits_vectype_out = NULL_TREE;
10549
10550 if (gimple_get_lhs (stmt) == NULL_TREE
10551 /* MASK_STORE has no lhs, but is ok. */
10552 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10553 {
10554 if (is_a <gcall *> (stmt))
10555 {
10556 /* Ignore calls with no lhs. These must be calls to
10557 #pragma omp simd functions, and what vectorization factor
10558 it really needs can't be determined until
10559 vectorizable_simd_clone_call. */
10560 if (dump_enabled_p ())
10561 dump_printf_loc (MSG_NOTE, vect_location,
10562 "defer to SIMD clone analysis.\n");
10563 return true;
10564 }
10565
10566 if (dump_enabled_p ())
10567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10568 "not vectorized: irregular stmt.%G", stmt);
10569 return false;
10570 }
10571
10572 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10573 {
10574 if (dump_enabled_p ())
10575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10576 "not vectorized: vector stmt in loop:%G", stmt);
10577 return false;
10578 }
10579
10580 tree vectype;
10581 tree scalar_type = NULL_TREE;
10582 if (STMT_VINFO_VECTYPE (stmt_info))
10583 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10584 else
10585 {
10586 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10587 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10588 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10589 else
10590 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10591
10592 /* Pure bool ops don't participate in number-of-units computation.
10593 For comparisons use the types being compared. */
10594 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10595 && is_gimple_assign (stmt)
10596 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10597 {
10598 *stmt_vectype_out = boolean_type_node;
10599
10600 tree rhs1 = gimple_assign_rhs1 (stmt);
10601 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10602 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10603 scalar_type = TREE_TYPE (rhs1);
10604 else
10605 {
10606 if (dump_enabled_p ())
10607 dump_printf_loc (MSG_NOTE, vect_location,
10608 "pure bool operation.\n");
10609 return true;
10610 }
10611 }
10612
10613 if (dump_enabled_p ())
10614 dump_printf_loc (MSG_NOTE, vect_location,
10615 "get vectype for scalar type: %T\n", scalar_type);
10616 vectype = get_vectype_for_scalar_type (scalar_type);
10617 if (!vectype)
10618 {
10619 if (dump_enabled_p ())
10620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10621 "not vectorized: unsupported data-type %T\n",
10622 scalar_type);
10623 return false;
10624 }
10625
10626 if (!*stmt_vectype_out)
10627 *stmt_vectype_out = vectype;
10628
10629 if (dump_enabled_p ())
10630 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10631 }
10632
10633 /* Don't try to compute scalar types if the stmt produces a boolean
10634 vector; use the existing vector type instead. */
10635 tree nunits_vectype;
10636 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10637 nunits_vectype = vectype;
10638 else
10639 {
10640 /* The number of units is set according to the smallest scalar
10641 type (or the largest vector size, but we only support one
10642 vector size per vectorization). */
10643 if (*stmt_vectype_out != boolean_type_node)
10644 {
10645 HOST_WIDE_INT dummy;
10646 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10647 &dummy, &dummy);
10648 }
10649 if (dump_enabled_p ())
10650 dump_printf_loc (MSG_NOTE, vect_location,
10651 "get vectype for scalar type: %T\n", scalar_type);
10652 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10653 }
10654 if (!nunits_vectype)
10655 {
10656 if (dump_enabled_p ())
10657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10658 "not vectorized: unsupported data-type %T\n",
10659 scalar_type);
10660 return false;
10661 }
10662
10663 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10664 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10665 {
10666 if (dump_enabled_p ())
10667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10668 "not vectorized: different sized vector "
10669 "types in statement, %T and %T\n",
10670 vectype, nunits_vectype);
10671 return false;
10672 }
10673
10674 if (dump_enabled_p ())
10675 {
10676 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10677 nunits_vectype);
10678
10679 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10680 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10681 dump_printf (MSG_NOTE, "\n");
10682 }
10683
10684 *nunits_vectype_out = nunits_vectype;
10685 return true;
10686 }
10687
10688 /* Try to determine the correct vector type for STMT_INFO, which is a
10689 statement that produces a scalar boolean result. Return the vector
10690 type on success, otherwise return NULL_TREE. */
10691
10692 tree
10693 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10694 {
10695 gimple *stmt = stmt_info->stmt;
10696 tree mask_type = NULL;
10697 tree vectype, scalar_type;
10698
10699 if (is_gimple_assign (stmt)
10700 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10701 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10702 {
10703 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10704 mask_type = get_mask_type_for_scalar_type (scalar_type);
10705
10706 if (!mask_type)
10707 {
10708 if (dump_enabled_p ())
10709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10710 "not vectorized: unsupported mask\n");
10711 return NULL_TREE;
10712 }
10713 }
10714 else
10715 {
10716 tree rhs;
10717 ssa_op_iter iter;
10718 enum vect_def_type dt;
10719
10720 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10721 {
10722 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10723 {
10724 if (dump_enabled_p ())
10725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10726 "not vectorized: can't compute mask type "
10727 "for statement, %G", stmt);
10728 return NULL_TREE;
10729 }
10730
10731 /* No vectype probably means external definition.
10732 Allow it in case there is another operand which
10733 allows to determine mask type. */
10734 if (!vectype)
10735 continue;
10736
10737 if (!mask_type)
10738 mask_type = vectype;
10739 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10740 TYPE_VECTOR_SUBPARTS (vectype)))
10741 {
10742 if (dump_enabled_p ())
10743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10744 "not vectorized: different sized masks "
10745 "types in statement, %T and %T\n",
10746 mask_type, vectype);
10747 return NULL_TREE;
10748 }
10749 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10750 != VECTOR_BOOLEAN_TYPE_P (vectype))
10751 {
10752 if (dump_enabled_p ())
10753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10754 "not vectorized: mixed mask and "
10755 "nonmask vector types in statement, "
10756 "%T and %T\n",
10757 mask_type, vectype);
10758 return NULL_TREE;
10759 }
10760 }
10761
10762 /* We may compare boolean value loaded as vector of integers.
10763 Fix mask_type in such case. */
10764 if (mask_type
10765 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10766 && gimple_code (stmt) == GIMPLE_ASSIGN
10767 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10768 mask_type = build_same_sized_truth_vector_type (mask_type);
10769 }
10770
10771 /* No mask_type should mean loop invariant predicate.
10772 This is probably a subject for optimization in if-conversion. */
10773 if (!mask_type && dump_enabled_p ())
10774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10775 "not vectorized: can't compute mask type "
10776 "for statement, %G", stmt);
10777 return mask_type;
10778 }