poly_int: current_vector_size and TARGET_AUTOVECTORIZE_VECTOR_SIZES
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
56
57 /* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59 enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
63 };
64
65 /* Return the vectorized type for the given statement. */
66
67 tree
68 stmt_vectype (struct _stmt_vec_info *stmt_info)
69 {
70 return STMT_VINFO_VECTYPE (stmt_info);
71 }
72
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75 bool
76 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
77 {
78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
82
83 if (!loop_vinfo)
84 return false;
85
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
87
88 return (bb->loop_father == loop->inner);
89 }
90
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
94
95 unsigned
96 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
98 int misalign, enum vect_cost_model_location where)
99 {
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
106 if (body_cost_vec)
107 {
108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
113 return (unsigned)
114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
115 }
116 else
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
119 }
120
121 /* Return a variable of type ELEM_TYPE[NELEMS]. */
122
123 static tree
124 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125 {
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
128 }
129
130 /* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
134
135 static tree
136 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
137 tree array, unsigned HOST_WIDE_INT n)
138 {
139 tree vect_type, vect, vect_name, array_ref;
140 gimple *new_stmt;
141
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
148
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
153
154 return vect_name;
155 }
156
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
160
161 static void
162 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
163 tree array, unsigned HOST_WIDE_INT n)
164 {
165 tree array_ref;
166 gimple *new_stmt;
167
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
171
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
174 }
175
176 /* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
179
180 static tree
181 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
182 {
183 tree mem_ref;
184
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
188 return mem_ref;
189 }
190
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193 /* Function vect_mark_relevant.
194
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196
197 static void
198 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
199 enum vect_relevant relevant, bool live_p)
200 {
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 gimple *pattern_stmt;
205
206 if (dump_enabled_p ())
207 {
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 }
212
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 {
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
223
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
225
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
235 }
236
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 {
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "already marked relevant/live.\n");
247 return;
248 }
249
250 worklist->safe_push (stmt);
251 }
252
253
254 /* Function is_simple_and_all_uses_invariant
255
256 Return true if STMT is simple and all uses of it are invariant. */
257
258 bool
259 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
260 {
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
315 *relevant = vect_used_in_scope;
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
321 {
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE, vect_location,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant = vect_used_in_scope;
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
339
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
362 return (*live_p || *relevant);
363 }
364
365
366 /* Function exist_non_indexing_operands_for_use_p
367
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
370
371 static bool
372 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
373 {
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
376
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
382
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
395
396 if (!gimple_assign_copy_p (stmt))
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
401 {
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
414 }
415 return false;
416 }
417
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
420 operand = gimple_assign_rhs1 (stmt);
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428 }
429
430
431 /*
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458 static bool
459 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
460 enum vect_relevant relevant, vec<gimple *> *worklist,
461 bool force)
462 {
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
467 gimple *def_stmt;
468 enum vect_def_type dt;
469
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
474
475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
476 {
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
492 }
493
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
530
531 switch (relevant)
532 {
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
537
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
542
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
547
548 case vect_used_in_scope:
549 break;
550
551 default:
552 gcc_unreachable ();
553 }
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
568
569 switch (relevant)
570 {
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
576
577 case vect_used_by_reduction:
578 case vect_used_only_live:
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
582 case vect_used_in_scope:
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
606
607 vect_mark_relevant (worklist, def_stmt, relevant, false);
608 return true;
609 }
610
611
612 /* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628 bool
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630 {
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
635 gimple *stmt;
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
639 gimple *phi;
640 bool live_p;
641 enum vect_relevant relevant;
642
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
646
647 auto_vec<gimple *, 64> worklist;
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
654 {
655 phi = gsi_stmt (si);
656 if (dump_enabled_p ())
657 {
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, phi, relevant, live_p);
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
668 if (dump_enabled_p ())
669 {
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
672 }
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
676 }
677 }
678
679 /* 2. Process_worklist */
680 while (worklist.length () > 0)
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
685 stmt = worklist.pop ();
686 if (dump_enabled_p ())
687 {
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
690 }
691
692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
697
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
700
701 One exception is when STMT has been identified as defining a reduction
702 variable; in this case we set the relevance to vect_used_by_reduction.
703 This is because we distinguish between two kinds of relevant stmts -
704 those that are used by a reduction computation, and those that are
705 (also) used by a regular computation. This allows us later on to
706 identify stmts that are used solely by a reduction, and therefore the
707 order of the results that they produce does not have to be kept. */
708
709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
710 {
711 case vect_reduction_def:
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
717 {
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
722 }
723 break;
724
725 case vect_nested_cycle:
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
729 {
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "unsupported use of nested cycle.\n");
733
734 return false;
735 }
736 break;
737
738 case vect_double_reduction_def:
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
742 {
743 if (dump_enabled_p ())
744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
745 "unsupported use of double reduction.\n");
746
747 return false;
748 }
749 break;
750
751 default:
752 break;
753 }
754
755 if (is_pattern_stmt_p (stmt_vinfo))
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
769 relevant, &worklist, false)
770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
771 relevant, &worklist, false))
772 return false;
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
776 {
777 op = gimple_op (stmt, i);
778 if (TREE_CODE (op) == SSA_NAME
779 && !process_use (stmt, op, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
789 if (!process_use (stmt, arg, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
799 if (!process_use (stmt, op, loop_vinfo, relevant,
800 &worklist, false))
801 return false;
802 }
803
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
805 {
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
811 return false;
812 }
813 } /* while worklist */
814
815 return true;
816 }
817
818
819 /* Function vect_model_simple_cost.
820
821 Models cost for simple operations, i.e. those that only emit ncopies of a
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
824
825 void
826 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
827 enum vect_def_type *dt,
828 int ndts,
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
831 {
832 int i;
833 int inside_cost = 0, prologue_cost = 0;
834
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
838
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
845 stmt_info, 0, vect_prologue);
846
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
850
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
855 }
856
857
858 /* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
862
863 static void
864 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
866 {
867 int i, tmp;
868 int inside_cost = 0, prologue_cost = 0;
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
872
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
876
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
881
882 for (i = 0; i < pwr + 1; i++)
883 {
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
889 }
890
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
896
897 if (dump_enabled_p ())
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
901 }
902
903 /* Function vect_model_store_cost
904
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
907
908 void
909 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
914 {
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
919
920 if (dt == vect_constant_def || dt == vect_external_def)
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
923
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
927 {
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
930 }
931
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
936
937 /* We assume that the cost of a single store-lanes instruction is
938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
939 access is instead being provided by a permute-and-store operation,
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
943 {
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
950
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE, vect_location,
953 "vect_model_store_cost: strided group_size = %d .\n",
954 group_size);
955 }
956
957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
958 /* Costs of the stores. */
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
961 {
962 /* N scalar stores plus extracting the elements. */
963 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
964 inside_cost += record_stmt_cost (body_cost_vec,
965 ncopies * assumed_nunits,
966 scalar_store, stmt_info, 0, vect_body);
967 }
968 else
969 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
970
971 if (memory_access_type == VMAT_ELEMENTWISE
972 || memory_access_type == VMAT_STRIDED_SLP)
973 {
974 /* N scalar stores plus extracting the elements. */
975 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
976 inside_cost += record_stmt_cost (body_cost_vec,
977 ncopies * assumed_nunits,
978 vec_to_scalar, stmt_info, 0, vect_body);
979 }
980
981 if (dump_enabled_p ())
982 dump_printf_loc (MSG_NOTE, vect_location,
983 "vect_model_store_cost: inside_cost = %d, "
984 "prologue_cost = %d .\n", inside_cost, prologue_cost);
985 }
986
987
988 /* Calculate cost of DR's memory access. */
989 void
990 vect_get_store_cost (struct data_reference *dr, int ncopies,
991 unsigned int *inside_cost,
992 stmt_vector_for_cost *body_cost_vec)
993 {
994 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
995 gimple *stmt = DR_STMT (dr);
996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
997
998 switch (alignment_support_scheme)
999 {
1000 case dr_aligned:
1001 {
1002 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1003 vector_store, stmt_info, 0,
1004 vect_body);
1005
1006 if (dump_enabled_p ())
1007 dump_printf_loc (MSG_NOTE, vect_location,
1008 "vect_model_store_cost: aligned.\n");
1009 break;
1010 }
1011
1012 case dr_unaligned_supported:
1013 {
1014 /* Here, we assign an additional cost for the unaligned store. */
1015 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1016 unaligned_store, stmt_info,
1017 DR_MISALIGNMENT (dr), vect_body);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: unaligned supported by "
1021 "hardware.\n");
1022 break;
1023 }
1024
1025 case dr_unaligned_unsupported:
1026 {
1027 *inside_cost = VECT_MAX_COST;
1028
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1031 "vect_model_store_cost: unsupported access.\n");
1032 break;
1033 }
1034
1035 default:
1036 gcc_unreachable ();
1037 }
1038 }
1039
1040
1041 /* Function vect_model_load_cost
1042
1043 Models cost for loads. In the case of grouped accesses, one access has
1044 the overhead of the grouped access attributed to it. Since unaligned
1045 accesses are supported for loads, we also account for the costs of the
1046 access scheme chosen. */
1047
1048 void
1049 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1050 vect_memory_access_type memory_access_type,
1051 slp_tree slp_node,
1052 stmt_vector_for_cost *prologue_cost_vec,
1053 stmt_vector_for_cost *body_cost_vec)
1054 {
1055 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1056 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1057 unsigned int inside_cost = 0, prologue_cost = 0;
1058 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1059
1060 /* Grouped loads read all elements in the group at once,
1061 so we want the DR for the first statement. */
1062 if (!slp_node && grouped_access_p)
1063 {
1064 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1065 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1066 }
1067
1068 /* True if we should include any once-per-group costs as well as
1069 the cost of the statement itself. For SLP we only get called
1070 once per group anyhow. */
1071 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1072
1073 /* We assume that the cost of a single load-lanes instruction is
1074 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1075 access is instead being provided by a load-and-permute operation,
1076 include the cost of the permutes. */
1077 if (first_stmt_p
1078 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1079 {
1080 /* Uses an even and odd extract operations or shuffle operations
1081 for each needed permute. */
1082 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1083 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1084 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1085 stmt_info, 0, vect_body);
1086
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_load_cost: strided group_size = %d .\n",
1090 group_size);
1091 }
1092
1093 /* The loads themselves. */
1094 if (memory_access_type == VMAT_ELEMENTWISE
1095 || memory_access_type == VMAT_GATHER_SCATTER)
1096 {
1097 /* N scalar loads plus gathering them into a vector. */
1098 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1099 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1100 inside_cost += record_stmt_cost (body_cost_vec,
1101 ncopies * assumed_nunits,
1102 scalar_load, stmt_info, 0, vect_body);
1103 }
1104 else
1105 vect_get_load_cost (dr, ncopies, first_stmt_p,
1106 &inside_cost, &prologue_cost,
1107 prologue_cost_vec, body_cost_vec, true);
1108 if (memory_access_type == VMAT_ELEMENTWISE
1109 || memory_access_type == VMAT_STRIDED_SLP)
1110 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1111 stmt_info, 0, vect_body);
1112
1113 if (dump_enabled_p ())
1114 dump_printf_loc (MSG_NOTE, vect_location,
1115 "vect_model_load_cost: inside_cost = %d, "
1116 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1117 }
1118
1119
1120 /* Calculate cost of DR's memory access. */
1121 void
1122 vect_get_load_cost (struct data_reference *dr, int ncopies,
1123 bool add_realign_cost, unsigned int *inside_cost,
1124 unsigned int *prologue_cost,
1125 stmt_vector_for_cost *prologue_cost_vec,
1126 stmt_vector_for_cost *body_cost_vec,
1127 bool record_prologue_costs)
1128 {
1129 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1130 gimple *stmt = DR_STMT (dr);
1131 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1132
1133 switch (alignment_support_scheme)
1134 {
1135 case dr_aligned:
1136 {
1137 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1138 stmt_info, 0, vect_body);
1139
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "vect_model_load_cost: aligned.\n");
1143
1144 break;
1145 }
1146 case dr_unaligned_supported:
1147 {
1148 /* Here, we assign an additional cost for the unaligned load. */
1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1150 unaligned_load, stmt_info,
1151 DR_MISALIGNMENT (dr), vect_body);
1152
1153 if (dump_enabled_p ())
1154 dump_printf_loc (MSG_NOTE, vect_location,
1155 "vect_model_load_cost: unaligned supported by "
1156 "hardware.\n");
1157
1158 break;
1159 }
1160 case dr_explicit_realign:
1161 {
1162 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1163 vector_load, stmt_info, 0, vect_body);
1164 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1165 vec_perm, stmt_info, 0, vect_body);
1166
1167 /* FIXME: If the misalignment remains fixed across the iterations of
1168 the containing loop, the following cost should be added to the
1169 prologue costs. */
1170 if (targetm.vectorize.builtin_mask_for_load)
1171 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1172 stmt_info, 0, vect_body);
1173
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: explicit realign\n");
1177
1178 break;
1179 }
1180 case dr_explicit_realign_optimized:
1181 {
1182 if (dump_enabled_p ())
1183 dump_printf_loc (MSG_NOTE, vect_location,
1184 "vect_model_load_cost: unaligned software "
1185 "pipelined.\n");
1186
1187 /* Unaligned software pipeline has a load of an address, an initial
1188 load, and possibly a mask operation to "prime" the loop. However,
1189 if this is an access in a group of loads, which provide grouped
1190 access, then the above cost should only be considered for one
1191 access in the group. Inside the loop, there is a load op
1192 and a realignment op. */
1193
1194 if (add_realign_cost && record_prologue_costs)
1195 {
1196 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1197 vector_stmt, stmt_info,
1198 0, vect_prologue);
1199 if (targetm.vectorize.builtin_mask_for_load)
1200 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1201 vector_stmt, stmt_info,
1202 0, vect_prologue);
1203 }
1204
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1206 stmt_info, 0, vect_body);
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1208 stmt_info, 0, vect_body);
1209
1210 if (dump_enabled_p ())
1211 dump_printf_loc (MSG_NOTE, vect_location,
1212 "vect_model_load_cost: explicit realign optimized"
1213 "\n");
1214
1215 break;
1216 }
1217
1218 case dr_unaligned_unsupported:
1219 {
1220 *inside_cost = VECT_MAX_COST;
1221
1222 if (dump_enabled_p ())
1223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1224 "vect_model_load_cost: unsupported access.\n");
1225 break;
1226 }
1227
1228 default:
1229 gcc_unreachable ();
1230 }
1231 }
1232
1233 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1234 the loop preheader for the vectorized stmt STMT. */
1235
1236 static void
1237 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1238 {
1239 if (gsi)
1240 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1241 else
1242 {
1243 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1245
1246 if (loop_vinfo)
1247 {
1248 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1249 basic_block new_bb;
1250 edge pe;
1251
1252 if (nested_in_vect_loop_p (loop, stmt))
1253 loop = loop->inner;
1254
1255 pe = loop_preheader_edge (loop);
1256 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1257 gcc_assert (!new_bb);
1258 }
1259 else
1260 {
1261 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1262 basic_block bb;
1263 gimple_stmt_iterator gsi_bb_start;
1264
1265 gcc_assert (bb_vinfo);
1266 bb = BB_VINFO_BB (bb_vinfo);
1267 gsi_bb_start = gsi_after_labels (bb);
1268 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1269 }
1270 }
1271
1272 if (dump_enabled_p ())
1273 {
1274 dump_printf_loc (MSG_NOTE, vect_location,
1275 "created new init_stmt: ");
1276 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1277 }
1278 }
1279
1280 /* Function vect_init_vector.
1281
1282 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1283 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1284 vector type a vector with all elements equal to VAL is created first.
1285 Place the initialization at BSI if it is not NULL. Otherwise, place the
1286 initialization at the loop preheader.
1287 Return the DEF of INIT_STMT.
1288 It will be used in the vectorization of STMT. */
1289
1290 tree
1291 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1292 {
1293 gimple *init_stmt;
1294 tree new_temp;
1295
1296 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1297 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1298 {
1299 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1300 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1301 {
1302 /* Scalar boolean value should be transformed into
1303 all zeros or all ones value before building a vector. */
1304 if (VECTOR_BOOLEAN_TYPE_P (type))
1305 {
1306 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1307 tree false_val = build_zero_cst (TREE_TYPE (type));
1308
1309 if (CONSTANT_CLASS_P (val))
1310 val = integer_zerop (val) ? false_val : true_val;
1311 else
1312 {
1313 new_temp = make_ssa_name (TREE_TYPE (type));
1314 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1315 val, true_val, false_val);
1316 vect_init_vector_1 (stmt, init_stmt, gsi);
1317 val = new_temp;
1318 }
1319 }
1320 else if (CONSTANT_CLASS_P (val))
1321 val = fold_convert (TREE_TYPE (type), val);
1322 else
1323 {
1324 new_temp = make_ssa_name (TREE_TYPE (type));
1325 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1326 init_stmt = gimple_build_assign (new_temp,
1327 fold_build1 (VIEW_CONVERT_EXPR,
1328 TREE_TYPE (type),
1329 val));
1330 else
1331 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1332 vect_init_vector_1 (stmt, init_stmt, gsi);
1333 val = new_temp;
1334 }
1335 }
1336 val = build_vector_from_val (type, val);
1337 }
1338
1339 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1340 init_stmt = gimple_build_assign (new_temp, val);
1341 vect_init_vector_1 (stmt, init_stmt, gsi);
1342 return new_temp;
1343 }
1344
1345 /* Function vect_get_vec_def_for_operand_1.
1346
1347 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1348 DT that will be used in the vectorized stmt. */
1349
1350 tree
1351 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1352 {
1353 tree vec_oprnd;
1354 gimple *vec_stmt;
1355 stmt_vec_info def_stmt_info = NULL;
1356
1357 switch (dt)
1358 {
1359 /* operand is a constant or a loop invariant. */
1360 case vect_constant_def:
1361 case vect_external_def:
1362 /* Code should use vect_get_vec_def_for_operand. */
1363 gcc_unreachable ();
1364
1365 /* operand is defined inside the loop. */
1366 case vect_internal_def:
1367 {
1368 /* Get the def from the vectorized stmt. */
1369 def_stmt_info = vinfo_for_stmt (def_stmt);
1370
1371 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1372 /* Get vectorized pattern statement. */
1373 if (!vec_stmt
1374 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1375 && !STMT_VINFO_RELEVANT (def_stmt_info))
1376 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1377 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1378 gcc_assert (vec_stmt);
1379 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1380 vec_oprnd = PHI_RESULT (vec_stmt);
1381 else if (is_gimple_call (vec_stmt))
1382 vec_oprnd = gimple_call_lhs (vec_stmt);
1383 else
1384 vec_oprnd = gimple_assign_lhs (vec_stmt);
1385 return vec_oprnd;
1386 }
1387
1388 /* operand is defined by a loop header phi. */
1389 case vect_reduction_def:
1390 case vect_double_reduction_def:
1391 case vect_nested_cycle:
1392 case vect_induction_def:
1393 {
1394 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1395
1396 /* Get the def from the vectorized stmt. */
1397 def_stmt_info = vinfo_for_stmt (def_stmt);
1398 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1399 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1400 vec_oprnd = PHI_RESULT (vec_stmt);
1401 else
1402 vec_oprnd = gimple_get_lhs (vec_stmt);
1403 return vec_oprnd;
1404 }
1405
1406 default:
1407 gcc_unreachable ();
1408 }
1409 }
1410
1411
1412 /* Function vect_get_vec_def_for_operand.
1413
1414 OP is an operand in STMT. This function returns a (vector) def that will be
1415 used in the vectorized stmt for STMT.
1416
1417 In the case that OP is an SSA_NAME which is defined in the loop, then
1418 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1419
1420 In case OP is an invariant or constant, a new stmt that creates a vector def
1421 needs to be introduced. VECTYPE may be used to specify a required type for
1422 vector invariant. */
1423
1424 tree
1425 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1426 {
1427 gimple *def_stmt;
1428 enum vect_def_type dt;
1429 bool is_simple_use;
1430 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1431 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1432
1433 if (dump_enabled_p ())
1434 {
1435 dump_printf_loc (MSG_NOTE, vect_location,
1436 "vect_get_vec_def_for_operand: ");
1437 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1438 dump_printf (MSG_NOTE, "\n");
1439 }
1440
1441 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1442 gcc_assert (is_simple_use);
1443 if (def_stmt && dump_enabled_p ())
1444 {
1445 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1446 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1447 }
1448
1449 if (dt == vect_constant_def || dt == vect_external_def)
1450 {
1451 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1452 tree vector_type;
1453
1454 if (vectype)
1455 vector_type = vectype;
1456 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1457 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1458 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1459 else
1460 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1461
1462 gcc_assert (vector_type);
1463 return vect_init_vector (stmt, op, vector_type, NULL);
1464 }
1465 else
1466 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1467 }
1468
1469
1470 /* Function vect_get_vec_def_for_stmt_copy
1471
1472 Return a vector-def for an operand. This function is used when the
1473 vectorized stmt to be created (by the caller to this function) is a "copy"
1474 created in case the vectorized result cannot fit in one vector, and several
1475 copies of the vector-stmt are required. In this case the vector-def is
1476 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1477 of the stmt that defines VEC_OPRND.
1478 DT is the type of the vector def VEC_OPRND.
1479
1480 Context:
1481 In case the vectorization factor (VF) is bigger than the number
1482 of elements that can fit in a vectype (nunits), we have to generate
1483 more than one vector stmt to vectorize the scalar stmt. This situation
1484 arises when there are multiple data-types operated upon in the loop; the
1485 smallest data-type determines the VF, and as a result, when vectorizing
1486 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1487 vector stmt (each computing a vector of 'nunits' results, and together
1488 computing 'VF' results in each iteration). This function is called when
1489 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1490 which VF=16 and nunits=4, so the number of copies required is 4):
1491
1492 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1493
1494 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1495 VS1.1: vx.1 = memref1 VS1.2
1496 VS1.2: vx.2 = memref2 VS1.3
1497 VS1.3: vx.3 = memref3
1498
1499 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1500 VSnew.1: vz1 = vx.1 + ... VSnew.2
1501 VSnew.2: vz2 = vx.2 + ... VSnew.3
1502 VSnew.3: vz3 = vx.3 + ...
1503
1504 The vectorization of S1 is explained in vectorizable_load.
1505 The vectorization of S2:
1506 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1507 the function 'vect_get_vec_def_for_operand' is called to
1508 get the relevant vector-def for each operand of S2. For operand x it
1509 returns the vector-def 'vx.0'.
1510
1511 To create the remaining copies of the vector-stmt (VSnew.j), this
1512 function is called to get the relevant vector-def for each operand. It is
1513 obtained from the respective VS1.j stmt, which is recorded in the
1514 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515
1516 For example, to obtain the vector-def 'vx.1' in order to create the
1517 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1518 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1519 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1520 and return its def ('vx.1').
1521 Overall, to create the above sequence this function will be called 3 times:
1522 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1523 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1524 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1525
1526 tree
1527 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1528 {
1529 gimple *vec_stmt_for_operand;
1530 stmt_vec_info def_stmt_info;
1531
1532 /* Do nothing; can reuse same def. */
1533 if (dt == vect_external_def || dt == vect_constant_def )
1534 return vec_oprnd;
1535
1536 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1537 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1538 gcc_assert (def_stmt_info);
1539 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1540 gcc_assert (vec_stmt_for_operand);
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1546 }
1547
1548
1549 /* Get vectorized definitions for the operands to create a copy of an original
1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1551
1552 void
1553 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
1556 {
1557 tree vec_oprnd = vec_oprnds0->pop ();
1558
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1560 vec_oprnds0->quick_push (vec_oprnd);
1561
1562 if (vec_oprnds1 && vec_oprnds1->length ())
1563 {
1564 vec_oprnd = vec_oprnds1->pop ();
1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1566 vec_oprnds1->quick_push (vec_oprnd);
1567 }
1568 }
1569
1570
1571 /* Get vectorized definitions for OP0 and OP1. */
1572
1573 void
1574 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1575 vec<tree> *vec_oprnds0,
1576 vec<tree> *vec_oprnds1,
1577 slp_tree slp_node)
1578 {
1579 if (slp_node)
1580 {
1581 int nops = (op1 == NULL_TREE) ? 1 : 2;
1582 auto_vec<tree> ops (nops);
1583 auto_vec<vec<tree> > vec_defs (nops);
1584
1585 ops.quick_push (op0);
1586 if (op1)
1587 ops.quick_push (op1);
1588
1589 vect_get_slp_defs (ops, slp_node, &vec_defs);
1590
1591 *vec_oprnds0 = vec_defs[0];
1592 if (op1)
1593 *vec_oprnds1 = vec_defs[1];
1594 }
1595 else
1596 {
1597 tree vec_oprnd;
1598
1599 vec_oprnds0->create (1);
1600 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1601 vec_oprnds0->quick_push (vec_oprnd);
1602
1603 if (op1)
1604 {
1605 vec_oprnds1->create (1);
1606 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1607 vec_oprnds1->quick_push (vec_oprnd);
1608 }
1609 }
1610 }
1611
1612
1613 /* Function vect_finish_stmt_generation.
1614
1615 Insert a new stmt. */
1616
1617 void
1618 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1619 gimple_stmt_iterator *gsi)
1620 {
1621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1622 vec_info *vinfo = stmt_info->vinfo;
1623
1624 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1625
1626 if (!gsi_end_p (*gsi)
1627 && gimple_has_mem_ops (vec_stmt))
1628 {
1629 gimple *at_stmt = gsi_stmt (*gsi);
1630 tree vuse = gimple_vuse (at_stmt);
1631 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1632 {
1633 tree vdef = gimple_vdef (at_stmt);
1634 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1635 /* If we have an SSA vuse and insert a store, update virtual
1636 SSA form to avoid triggering the renamer. Do so only
1637 if we can easily see all uses - which is what almost always
1638 happens with the way vectorized stmts are inserted. */
1639 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1640 && ((is_gimple_assign (vec_stmt)
1641 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1642 || (is_gimple_call (vec_stmt)
1643 && !(gimple_call_flags (vec_stmt)
1644 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1645 {
1646 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1647 gimple_set_vdef (vec_stmt, new_vdef);
1648 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1649 }
1650 }
1651 }
1652 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1653
1654 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1655
1656 if (dump_enabled_p ())
1657 {
1658 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1660 }
1661
1662 gimple_set_location (vec_stmt, gimple_location (stmt));
1663
1664 /* While EH edges will generally prevent vectorization, stmt might
1665 e.g. be in a must-not-throw region. Ensure newly created stmts
1666 that could throw are part of the same region. */
1667 int lp_nr = lookup_stmt_eh_lp (stmt);
1668 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1669 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1670 }
1671
1672 /* We want to vectorize a call to combined function CFN with function
1673 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1674 as the types of all inputs. Check whether this is possible using
1675 an internal function, returning its code if so or IFN_LAST if not. */
1676
1677 static internal_fn
1678 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1679 tree vectype_out, tree vectype_in)
1680 {
1681 internal_fn ifn;
1682 if (internal_fn_p (cfn))
1683 ifn = as_internal_fn (cfn);
1684 else
1685 ifn = associated_internal_fn (fndecl);
1686 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1687 {
1688 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1689 if (info.vectorizable)
1690 {
1691 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1692 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1693 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1694 OPTIMIZE_FOR_SPEED))
1695 return ifn;
1696 }
1697 }
1698 return IFN_LAST;
1699 }
1700
1701
1702 static tree permute_vec_elements (tree, tree, tree, gimple *,
1703 gimple_stmt_iterator *);
1704
1705 /* STMT is a non-strided load or store, meaning that it accesses
1706 elements with a known constant step. Return -1 if that step
1707 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1708
1709 static int
1710 compare_step_with_zero (gimple *stmt)
1711 {
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1713 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1714 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1715 size_zero_node);
1716 }
1717
1718 /* If the target supports a permute mask that reverses the elements in
1719 a vector of type VECTYPE, return that mask, otherwise return null. */
1720
1721 static tree
1722 perm_mask_for_reverse (tree vectype)
1723 {
1724 int i, nunits;
1725
1726 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1727
1728 /* The encoding has a single stepped pattern. */
1729 vec_perm_builder sel (nunits, 1, 3);
1730 for (i = 0; i < 3; ++i)
1731 sel.quick_push (nunits - 1 - i);
1732
1733 vec_perm_indices indices (sel, 1, nunits);
1734 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1735 return NULL_TREE;
1736 return vect_gen_perm_mask_checked (vectype, indices);
1737 }
1738
1739 /* A subroutine of get_load_store_type, with a subset of the same
1740 arguments. Handle the case where STMT is part of a grouped load
1741 or store.
1742
1743 For stores, the statements in the group are all consecutive
1744 and there is no gap at the end. For loads, the statements in the
1745 group might not be consecutive; there can be gaps between statements
1746 as well as at the end. */
1747
1748 static bool
1749 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1750 vec_load_store_type vls_type,
1751 vect_memory_access_type *memory_access_type)
1752 {
1753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1754 vec_info *vinfo = stmt_info->vinfo;
1755 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1756 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1757 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1758 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1759 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1760 bool single_element_p = (stmt == first_stmt
1761 && !GROUP_NEXT_ELEMENT (stmt_info));
1762 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1763 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1764
1765 /* True if the vectorized statements would access beyond the last
1766 statement in the group. */
1767 bool overrun_p = false;
1768
1769 /* True if we can cope with such overrun by peeling for gaps, so that
1770 there is at least one final scalar iteration after the vector loop. */
1771 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1772
1773 /* There can only be a gap at the end of the group if the stride is
1774 known at compile time. */
1775 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1776
1777 /* Stores can't yet have gaps. */
1778 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1779
1780 if (slp)
1781 {
1782 if (STMT_VINFO_STRIDED_P (stmt_info))
1783 {
1784 /* Try to use consecutive accesses of GROUP_SIZE elements,
1785 separated by the stride, until we have a complete vector.
1786 Fall back to scalar accesses if that isn't possible. */
1787 if (nunits % group_size == 0)
1788 *memory_access_type = VMAT_STRIDED_SLP;
1789 else
1790 *memory_access_type = VMAT_ELEMENTWISE;
1791 }
1792 else
1793 {
1794 overrun_p = loop_vinfo && gap != 0;
1795 if (overrun_p && vls_type != VLS_LOAD)
1796 {
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "Grouped store with gaps requires"
1799 " non-consecutive accesses\n");
1800 return false;
1801 }
1802 /* An overrun is fine if the trailing elements are smaller
1803 than the alignment boundary B. Every vector access will
1804 be a multiple of B and so we are guaranteed to access a
1805 non-gap element in the same B-sized block. */
1806 if (overrun_p
1807 && gap < (vect_known_alignment_in_bytes (first_dr)
1808 / vect_get_scalar_dr_size (first_dr)))
1809 overrun_p = false;
1810 if (overrun_p && !can_overrun_p)
1811 {
1812 if (dump_enabled_p ())
1813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1814 "Peeling for outer loop is not supported\n");
1815 return false;
1816 }
1817 *memory_access_type = VMAT_CONTIGUOUS;
1818 }
1819 }
1820 else
1821 {
1822 /* We can always handle this case using elementwise accesses,
1823 but see if something more efficient is available. */
1824 *memory_access_type = VMAT_ELEMENTWISE;
1825
1826 /* If there is a gap at the end of the group then these optimizations
1827 would access excess elements in the last iteration. */
1828 bool would_overrun_p = (gap != 0);
1829 /* An overrun is fine if the trailing elements are smaller than the
1830 alignment boundary B. Every vector access will be a multiple of B
1831 and so we are guaranteed to access a non-gap element in the
1832 same B-sized block. */
1833 if (would_overrun_p
1834 && gap < (vect_known_alignment_in_bytes (first_dr)
1835 / vect_get_scalar_dr_size (first_dr)))
1836 would_overrun_p = false;
1837
1838 if (!STMT_VINFO_STRIDED_P (stmt_info)
1839 && (can_overrun_p || !would_overrun_p)
1840 && compare_step_with_zero (stmt) > 0)
1841 {
1842 /* First try using LOAD/STORE_LANES. */
1843 if (vls_type == VLS_LOAD
1844 ? vect_load_lanes_supported (vectype, group_size)
1845 : vect_store_lanes_supported (vectype, group_size))
1846 {
1847 *memory_access_type = VMAT_LOAD_STORE_LANES;
1848 overrun_p = would_overrun_p;
1849 }
1850
1851 /* If that fails, try using permuting loads. */
1852 if (*memory_access_type == VMAT_ELEMENTWISE
1853 && (vls_type == VLS_LOAD
1854 ? vect_grouped_load_supported (vectype, single_element_p,
1855 group_size)
1856 : vect_grouped_store_supported (vectype, group_size)))
1857 {
1858 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1859 overrun_p = would_overrun_p;
1860 }
1861 }
1862 }
1863
1864 if (vls_type != VLS_LOAD && first_stmt == stmt)
1865 {
1866 /* STMT is the leader of the group. Check the operands of all the
1867 stmts of the group. */
1868 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1869 while (next_stmt)
1870 {
1871 gcc_assert (gimple_assign_single_p (next_stmt));
1872 tree op = gimple_assign_rhs1 (next_stmt);
1873 gimple *def_stmt;
1874 enum vect_def_type dt;
1875 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1876 {
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1879 "use not simple.\n");
1880 return false;
1881 }
1882 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1883 }
1884 }
1885
1886 if (overrun_p)
1887 {
1888 gcc_assert (can_overrun_p);
1889 if (dump_enabled_p ())
1890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1891 "Data access with gaps requires scalar "
1892 "epilogue loop\n");
1893 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1894 }
1895
1896 return true;
1897 }
1898
1899 /* A subroutine of get_load_store_type, with a subset of the same
1900 arguments. Handle the case where STMT is a load or store that
1901 accesses consecutive elements with a negative step. */
1902
1903 static vect_memory_access_type
1904 get_negative_load_store_type (gimple *stmt, tree vectype,
1905 vec_load_store_type vls_type,
1906 unsigned int ncopies)
1907 {
1908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1909 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1910 dr_alignment_support alignment_support_scheme;
1911
1912 if (ncopies > 1)
1913 {
1914 if (dump_enabled_p ())
1915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1916 "multiple types with negative step.\n");
1917 return VMAT_ELEMENTWISE;
1918 }
1919
1920 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1921 if (alignment_support_scheme != dr_aligned
1922 && alignment_support_scheme != dr_unaligned_supported)
1923 {
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1926 "negative step but alignment required.\n");
1927 return VMAT_ELEMENTWISE;
1928 }
1929
1930 if (vls_type == VLS_STORE_INVARIANT)
1931 {
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_NOTE, vect_location,
1934 "negative step with invariant source;"
1935 " no permute needed.\n");
1936 return VMAT_CONTIGUOUS_DOWN;
1937 }
1938
1939 if (!perm_mask_for_reverse (vectype))
1940 {
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1943 "negative step and reversing not supported.\n");
1944 return VMAT_ELEMENTWISE;
1945 }
1946
1947 return VMAT_CONTIGUOUS_REVERSE;
1948 }
1949
1950 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1951 if there is a memory access type that the vectorized form can use,
1952 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1953 or scatters, fill in GS_INFO accordingly.
1954
1955 SLP says whether we're performing SLP rather than loop vectorization.
1956 VECTYPE is the vector type that the vectorized statements will use.
1957 NCOPIES is the number of vector statements that will be needed. */
1958
1959 static bool
1960 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1961 vec_load_store_type vls_type, unsigned int ncopies,
1962 vect_memory_access_type *memory_access_type,
1963 gather_scatter_info *gs_info)
1964 {
1965 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1966 vec_info *vinfo = stmt_info->vinfo;
1967 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1968 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1969 {
1970 *memory_access_type = VMAT_GATHER_SCATTER;
1971 gimple *def_stmt;
1972 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1973 gcc_unreachable ();
1974 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1975 &gs_info->offset_dt,
1976 &gs_info->offset_vectype))
1977 {
1978 if (dump_enabled_p ())
1979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1980 "%s index use not simple.\n",
1981 vls_type == VLS_LOAD ? "gather" : "scatter");
1982 return false;
1983 }
1984 }
1985 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1986 {
1987 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1988 memory_access_type))
1989 return false;
1990 }
1991 else if (STMT_VINFO_STRIDED_P (stmt_info))
1992 {
1993 gcc_assert (!slp);
1994 *memory_access_type = VMAT_ELEMENTWISE;
1995 }
1996 else
1997 {
1998 int cmp = compare_step_with_zero (stmt);
1999 if (cmp < 0)
2000 *memory_access_type = get_negative_load_store_type
2001 (stmt, vectype, vls_type, ncopies);
2002 else if (cmp == 0)
2003 {
2004 gcc_assert (vls_type == VLS_LOAD);
2005 *memory_access_type = VMAT_INVARIANT;
2006 }
2007 else
2008 *memory_access_type = VMAT_CONTIGUOUS;
2009 }
2010
2011 /* FIXME: At the moment the cost model seems to underestimate the
2012 cost of using elementwise accesses. This check preserves the
2013 traditional behavior until that can be fixed. */
2014 if (*memory_access_type == VMAT_ELEMENTWISE
2015 && !STMT_VINFO_STRIDED_P (stmt_info))
2016 {
2017 if (dump_enabled_p ())
2018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2019 "not falling back to elementwise accesses\n");
2020 return false;
2021 }
2022 return true;
2023 }
2024
2025 /* Function vectorizable_mask_load_store.
2026
2027 Check if STMT performs a conditional load or store that can be vectorized.
2028 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2029 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2030 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2031
2032 static bool
2033 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2034 gimple **vec_stmt, slp_tree slp_node)
2035 {
2036 tree vec_dest = NULL;
2037 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2038 stmt_vec_info prev_stmt_info;
2039 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2040 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2041 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2042 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2043 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2044 tree rhs_vectype = NULL_TREE;
2045 tree mask_vectype;
2046 tree elem_type;
2047 gimple *new_stmt;
2048 tree dummy;
2049 tree dataref_ptr = NULL_TREE;
2050 gimple *ptr_incr;
2051 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2052 int ncopies;
2053 int i, j;
2054 bool inv_p;
2055 gather_scatter_info gs_info;
2056 vec_load_store_type vls_type;
2057 tree mask;
2058 gimple *def_stmt;
2059 enum vect_def_type dt;
2060
2061 if (slp_node != NULL)
2062 return false;
2063
2064 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2065 gcc_assert (ncopies >= 1);
2066
2067 mask = gimple_call_arg (stmt, 2);
2068
2069 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2070 return false;
2071
2072 /* FORNOW. This restriction should be relaxed. */
2073 if (nested_in_vect_loop && ncopies > 1)
2074 {
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "multiple types in nested loop.");
2078 return false;
2079 }
2080
2081 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2082 return false;
2083
2084 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2085 && ! vec_stmt)
2086 return false;
2087
2088 if (!STMT_VINFO_DATA_REF (stmt_info))
2089 return false;
2090
2091 elem_type = TREE_TYPE (vectype);
2092
2093 if (TREE_CODE (mask) != SSA_NAME)
2094 return false;
2095
2096 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2097 return false;
2098
2099 if (!mask_vectype)
2100 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2101
2102 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2103 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2104 return false;
2105
2106 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2107 {
2108 tree rhs = gimple_call_arg (stmt, 3);
2109 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2110 return false;
2111 if (dt == vect_constant_def || dt == vect_external_def)
2112 vls_type = VLS_STORE_INVARIANT;
2113 else
2114 vls_type = VLS_STORE;
2115 }
2116 else
2117 vls_type = VLS_LOAD;
2118
2119 vect_memory_access_type memory_access_type;
2120 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2121 &memory_access_type, &gs_info))
2122 return false;
2123
2124 if (memory_access_type == VMAT_GATHER_SCATTER)
2125 {
2126 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2127 tree masktype
2128 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2129 if (TREE_CODE (masktype) == INTEGER_TYPE)
2130 {
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "masked gather with integer mask not supported.");
2134 return false;
2135 }
2136 }
2137 else if (memory_access_type != VMAT_CONTIGUOUS)
2138 {
2139 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2141 "unsupported access type for masked %s.\n",
2142 vls_type == VLS_LOAD ? "load" : "store");
2143 return false;
2144 }
2145 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2146 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2147 TYPE_MODE (mask_vectype),
2148 vls_type == VLS_LOAD)
2149 || (rhs_vectype
2150 && !useless_type_conversion_p (vectype, rhs_vectype)))
2151 return false;
2152
2153 if (!vec_stmt) /* transformation not required. */
2154 {
2155 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2156 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2157 if (vls_type == VLS_LOAD)
2158 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2159 NULL, NULL, NULL);
2160 else
2161 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2162 dt, NULL, NULL, NULL);
2163 return true;
2164 }
2165 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2166
2167 /* Transform. */
2168
2169 if (memory_access_type == VMAT_GATHER_SCATTER)
2170 {
2171 tree vec_oprnd0 = NULL_TREE, op;
2172 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2173 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2174 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2175 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2176 tree mask_perm_mask = NULL_TREE;
2177 edge pe = loop_preheader_edge (loop);
2178 gimple_seq seq;
2179 basic_block new_bb;
2180 enum { NARROW, NONE, WIDEN } modifier;
2181 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2182
2183 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2184 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2185 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2186 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2187 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2188 scaletype = TREE_VALUE (arglist);
2189 gcc_checking_assert (types_compatible_p (srctype, rettype)
2190 && types_compatible_p (srctype, masktype));
2191
2192 if (nunits == gather_off_nunits)
2193 modifier = NONE;
2194 else if (nunits == gather_off_nunits / 2)
2195 {
2196 modifier = WIDEN;
2197
2198 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
2199 for (i = 0; i < gather_off_nunits; ++i)
2200 sel.quick_push (i | nunits);
2201
2202 vec_perm_indices indices (sel, 1, gather_off_nunits);
2203 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2204 indices);
2205 }
2206 else if (nunits == gather_off_nunits * 2)
2207 {
2208 modifier = NARROW;
2209
2210 vec_perm_builder sel (nunits, nunits, 1);
2211 sel.quick_grow (nunits);
2212 for (i = 0; i < nunits; ++i)
2213 sel[i] = i < gather_off_nunits
2214 ? i : i + nunits - gather_off_nunits;
2215 vec_perm_indices indices (sel, 2, nunits);
2216 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2217
2218 ncopies *= 2;
2219
2220 for (i = 0; i < nunits; ++i)
2221 sel[i] = i | gather_off_nunits;
2222 indices.new_vector (sel, 2, gather_off_nunits);
2223 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2224 }
2225 else
2226 gcc_unreachable ();
2227
2228 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2229
2230 ptr = fold_convert (ptrtype, gs_info.base);
2231 if (!is_gimple_min_invariant (ptr))
2232 {
2233 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2234 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2235 gcc_assert (!new_bb);
2236 }
2237
2238 scale = build_int_cst (scaletype, gs_info.scale);
2239
2240 prev_stmt_info = NULL;
2241 for (j = 0; j < ncopies; ++j)
2242 {
2243 if (modifier == WIDEN && (j & 1))
2244 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2245 perm_mask, stmt, gsi);
2246 else if (j == 0)
2247 op = vec_oprnd0
2248 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2249 else
2250 op = vec_oprnd0
2251 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2252
2253 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2254 {
2255 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2256 == TYPE_VECTOR_SUBPARTS (idxtype));
2257 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2258 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2259 new_stmt
2260 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2262 op = var;
2263 }
2264
2265 if (mask_perm_mask && (j & 1))
2266 mask_op = permute_vec_elements (mask_op, mask_op,
2267 mask_perm_mask, stmt, gsi);
2268 else
2269 {
2270 if (j == 0)
2271 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2272 else
2273 {
2274 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2275 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2276 }
2277
2278 mask_op = vec_mask;
2279 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2280 {
2281 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2282 == TYPE_VECTOR_SUBPARTS (masktype));
2283 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2284 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2285 new_stmt
2286 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2288 mask_op = var;
2289 }
2290 }
2291
2292 new_stmt
2293 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2294 scale);
2295
2296 if (!useless_type_conversion_p (vectype, rettype))
2297 {
2298 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2299 == TYPE_VECTOR_SUBPARTS (rettype));
2300 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2301 gimple_call_set_lhs (new_stmt, op);
2302 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2303 var = make_ssa_name (vec_dest);
2304 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2305 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2306 }
2307 else
2308 {
2309 var = make_ssa_name (vec_dest, new_stmt);
2310 gimple_call_set_lhs (new_stmt, var);
2311 }
2312
2313 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2314
2315 if (modifier == NARROW)
2316 {
2317 if ((j & 1) == 0)
2318 {
2319 prev_res = var;
2320 continue;
2321 }
2322 var = permute_vec_elements (prev_res, var,
2323 perm_mask, stmt, gsi);
2324 new_stmt = SSA_NAME_DEF_STMT (var);
2325 }
2326
2327 if (prev_stmt_info == NULL)
2328 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2329 else
2330 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2331 prev_stmt_info = vinfo_for_stmt (new_stmt);
2332 }
2333
2334 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2335 from the IL. */
2336 if (STMT_VINFO_RELATED_STMT (stmt_info))
2337 {
2338 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2339 stmt_info = vinfo_for_stmt (stmt);
2340 }
2341 tree lhs = gimple_call_lhs (stmt);
2342 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2343 set_vinfo_for_stmt (new_stmt, stmt_info);
2344 set_vinfo_for_stmt (stmt, NULL);
2345 STMT_VINFO_STMT (stmt_info) = new_stmt;
2346 gsi_replace (gsi, new_stmt, true);
2347 return true;
2348 }
2349 else if (vls_type != VLS_LOAD)
2350 {
2351 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2352 prev_stmt_info = NULL;
2353 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2354 for (i = 0; i < ncopies; i++)
2355 {
2356 unsigned align, misalign;
2357
2358 if (i == 0)
2359 {
2360 tree rhs = gimple_call_arg (stmt, 3);
2361 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2362 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2363 mask_vectype);
2364 /* We should have catched mismatched types earlier. */
2365 gcc_assert (useless_type_conversion_p (vectype,
2366 TREE_TYPE (vec_rhs)));
2367 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2368 NULL_TREE, &dummy, gsi,
2369 &ptr_incr, false, &inv_p);
2370 gcc_assert (!inv_p);
2371 }
2372 else
2373 {
2374 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2375 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2376 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2377 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2378 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2379 TYPE_SIZE_UNIT (vectype));
2380 }
2381
2382 align = DR_TARGET_ALIGNMENT (dr);
2383 if (aligned_access_p (dr))
2384 misalign = 0;
2385 else if (DR_MISALIGNMENT (dr) == -1)
2386 {
2387 align = TYPE_ALIGN_UNIT (elem_type);
2388 misalign = 0;
2389 }
2390 else
2391 misalign = DR_MISALIGNMENT (dr);
2392 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2393 misalign);
2394 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2395 misalign ? least_bit_hwi (misalign) : align);
2396 gcall *call
2397 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2398 ptr, vec_mask, vec_rhs);
2399 gimple_call_set_nothrow (call, true);
2400 new_stmt = call;
2401 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2402 if (i == 0)
2403 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2404 else
2405 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2406 prev_stmt_info = vinfo_for_stmt (new_stmt);
2407 }
2408 }
2409 else
2410 {
2411 tree vec_mask = NULL_TREE;
2412 prev_stmt_info = NULL;
2413 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2414 for (i = 0; i < ncopies; i++)
2415 {
2416 unsigned align, misalign;
2417
2418 if (i == 0)
2419 {
2420 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2421 mask_vectype);
2422 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2423 NULL_TREE, &dummy, gsi,
2424 &ptr_incr, false, &inv_p);
2425 gcc_assert (!inv_p);
2426 }
2427 else
2428 {
2429 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2430 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2431 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2432 TYPE_SIZE_UNIT (vectype));
2433 }
2434
2435 align = DR_TARGET_ALIGNMENT (dr);
2436 if (aligned_access_p (dr))
2437 misalign = 0;
2438 else if (DR_MISALIGNMENT (dr) == -1)
2439 {
2440 align = TYPE_ALIGN_UNIT (elem_type);
2441 misalign = 0;
2442 }
2443 else
2444 misalign = DR_MISALIGNMENT (dr);
2445 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2446 misalign);
2447 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2448 misalign ? least_bit_hwi (misalign) : align);
2449 gcall *call
2450 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2451 ptr, vec_mask);
2452 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2453 gimple_call_set_nothrow (call, true);
2454 vect_finish_stmt_generation (stmt, call, gsi);
2455 if (i == 0)
2456 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2457 else
2458 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2459 prev_stmt_info = vinfo_for_stmt (call);
2460 }
2461 }
2462
2463 if (vls_type == VLS_LOAD)
2464 {
2465 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2466 from the IL. */
2467 if (STMT_VINFO_RELATED_STMT (stmt_info))
2468 {
2469 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2470 stmt_info = vinfo_for_stmt (stmt);
2471 }
2472 tree lhs = gimple_call_lhs (stmt);
2473 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2474 set_vinfo_for_stmt (new_stmt, stmt_info);
2475 set_vinfo_for_stmt (stmt, NULL);
2476 STMT_VINFO_STMT (stmt_info) = new_stmt;
2477 gsi_replace (gsi, new_stmt, true);
2478 }
2479
2480 return true;
2481 }
2482
2483 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2484
2485 static bool
2486 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2487 gimple **vec_stmt, slp_tree slp_node,
2488 tree vectype_in, enum vect_def_type *dt)
2489 {
2490 tree op, vectype;
2491 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2492 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2493 unsigned ncopies, nunits;
2494
2495 op = gimple_call_arg (stmt, 0);
2496 vectype = STMT_VINFO_VECTYPE (stmt_info);
2497 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2498
2499 /* Multiple types in SLP are handled by creating the appropriate number of
2500 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2501 case of SLP. */
2502 if (slp_node)
2503 ncopies = 1;
2504 else
2505 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2506
2507 gcc_assert (ncopies >= 1);
2508
2509 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2510 if (! char_vectype)
2511 return false;
2512
2513 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2514 unsigned word_bytes = num_bytes / nunits;
2515
2516 /* The encoding uses one stepped pattern for each byte in the word. */
2517 vec_perm_builder elts (num_bytes, word_bytes, 3);
2518 for (unsigned i = 0; i < 3; ++i)
2519 for (unsigned j = 0; j < word_bytes; ++j)
2520 elts.quick_push ((i + 1) * word_bytes - j - 1);
2521
2522 vec_perm_indices indices (elts, 1, num_bytes);
2523 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2524 return false;
2525
2526 if (! vec_stmt)
2527 {
2528 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2529 if (dump_enabled_p ())
2530 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2531 "\n");
2532 if (! PURE_SLP_STMT (stmt_info))
2533 {
2534 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2535 1, vector_stmt, stmt_info, 0, vect_prologue);
2536 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2537 ncopies, vec_perm, stmt_info, 0, vect_body);
2538 }
2539 return true;
2540 }
2541
2542 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2543
2544 /* Transform. */
2545 vec<tree> vec_oprnds = vNULL;
2546 gimple *new_stmt = NULL;
2547 stmt_vec_info prev_stmt_info = NULL;
2548 for (unsigned j = 0; j < ncopies; j++)
2549 {
2550 /* Handle uses. */
2551 if (j == 0)
2552 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2553 else
2554 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2555
2556 /* Arguments are ready. create the new vector stmt. */
2557 unsigned i;
2558 tree vop;
2559 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2560 {
2561 tree tem = make_ssa_name (char_vectype);
2562 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2563 char_vectype, vop));
2564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2565 tree tem2 = make_ssa_name (char_vectype);
2566 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2567 tem, tem, bswap_vconst);
2568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2569 tem = make_ssa_name (vectype);
2570 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2571 vectype, tem2));
2572 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2573 if (slp_node)
2574 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2575 }
2576
2577 if (slp_node)
2578 continue;
2579
2580 if (j == 0)
2581 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2582 else
2583 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2584
2585 prev_stmt_info = vinfo_for_stmt (new_stmt);
2586 }
2587
2588 vec_oprnds.release ();
2589 return true;
2590 }
2591
2592 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2593 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2594 in a single step. On success, store the binary pack code in
2595 *CONVERT_CODE. */
2596
2597 static bool
2598 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2599 tree_code *convert_code)
2600 {
2601 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2602 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2603 return false;
2604
2605 tree_code code;
2606 int multi_step_cvt = 0;
2607 auto_vec <tree, 8> interm_types;
2608 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2609 &code, &multi_step_cvt,
2610 &interm_types)
2611 || multi_step_cvt)
2612 return false;
2613
2614 *convert_code = code;
2615 return true;
2616 }
2617
2618 /* Function vectorizable_call.
2619
2620 Check if GS performs a function call that can be vectorized.
2621 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2622 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2623 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2624
2625 static bool
2626 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2627 slp_tree slp_node)
2628 {
2629 gcall *stmt;
2630 tree vec_dest;
2631 tree scalar_dest;
2632 tree op, type;
2633 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2634 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2635 tree vectype_out, vectype_in;
2636 int nunits_in;
2637 int nunits_out;
2638 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2640 vec_info *vinfo = stmt_info->vinfo;
2641 tree fndecl, new_temp, rhs_type;
2642 gimple *def_stmt;
2643 enum vect_def_type dt[3]
2644 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2645 int ndts = 3;
2646 gimple *new_stmt = NULL;
2647 int ncopies, j;
2648 vec<tree> vargs = vNULL;
2649 enum { NARROW, NONE, WIDEN } modifier;
2650 size_t i, nargs;
2651 tree lhs;
2652
2653 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2654 return false;
2655
2656 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2657 && ! vec_stmt)
2658 return false;
2659
2660 /* Is GS a vectorizable call? */
2661 stmt = dyn_cast <gcall *> (gs);
2662 if (!stmt)
2663 return false;
2664
2665 if (gimple_call_internal_p (stmt)
2666 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2667 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2668 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2669 slp_node);
2670
2671 if (gimple_call_lhs (stmt) == NULL_TREE
2672 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2673 return false;
2674
2675 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2676
2677 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2678
2679 /* Process function arguments. */
2680 rhs_type = NULL_TREE;
2681 vectype_in = NULL_TREE;
2682 nargs = gimple_call_num_args (stmt);
2683
2684 /* Bail out if the function has more than three arguments, we do not have
2685 interesting builtin functions to vectorize with more than two arguments
2686 except for fma. No arguments is also not good. */
2687 if (nargs == 0 || nargs > 3)
2688 return false;
2689
2690 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2691 if (gimple_call_internal_p (stmt)
2692 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2693 {
2694 nargs = 0;
2695 rhs_type = unsigned_type_node;
2696 }
2697
2698 for (i = 0; i < nargs; i++)
2699 {
2700 tree opvectype;
2701
2702 op = gimple_call_arg (stmt, i);
2703
2704 /* We can only handle calls with arguments of the same type. */
2705 if (rhs_type
2706 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2707 {
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710 "argument types differ.\n");
2711 return false;
2712 }
2713 if (!rhs_type)
2714 rhs_type = TREE_TYPE (op);
2715
2716 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2717 {
2718 if (dump_enabled_p ())
2719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2720 "use not simple.\n");
2721 return false;
2722 }
2723
2724 if (!vectype_in)
2725 vectype_in = opvectype;
2726 else if (opvectype
2727 && opvectype != vectype_in)
2728 {
2729 if (dump_enabled_p ())
2730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2731 "argument vector types differ.\n");
2732 return false;
2733 }
2734 }
2735 /* If all arguments are external or constant defs use a vector type with
2736 the same size as the output vector type. */
2737 if (!vectype_in)
2738 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2739 if (vec_stmt)
2740 gcc_assert (vectype_in);
2741 if (!vectype_in)
2742 {
2743 if (dump_enabled_p ())
2744 {
2745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2746 "no vectype for scalar type ");
2747 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2748 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2749 }
2750
2751 return false;
2752 }
2753
2754 /* FORNOW */
2755 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2756 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2757 if (nunits_in == nunits_out / 2)
2758 modifier = NARROW;
2759 else if (nunits_out == nunits_in)
2760 modifier = NONE;
2761 else if (nunits_out == nunits_in / 2)
2762 modifier = WIDEN;
2763 else
2764 return false;
2765
2766 /* We only handle functions that do not read or clobber memory. */
2767 if (gimple_vuse (stmt))
2768 {
2769 if (dump_enabled_p ())
2770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2771 "function reads from or writes to memory.\n");
2772 return false;
2773 }
2774
2775 /* For now, we only vectorize functions if a target specific builtin
2776 is available. TODO -- in some cases, it might be profitable to
2777 insert the calls for pieces of the vector, in order to be able
2778 to vectorize other operations in the loop. */
2779 fndecl = NULL_TREE;
2780 internal_fn ifn = IFN_LAST;
2781 combined_fn cfn = gimple_call_combined_fn (stmt);
2782 tree callee = gimple_call_fndecl (stmt);
2783
2784 /* First try using an internal function. */
2785 tree_code convert_code = ERROR_MARK;
2786 if (cfn != CFN_LAST
2787 && (modifier == NONE
2788 || (modifier == NARROW
2789 && simple_integer_narrowing (vectype_out, vectype_in,
2790 &convert_code))))
2791 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2792 vectype_in);
2793
2794 /* If that fails, try asking for a target-specific built-in function. */
2795 if (ifn == IFN_LAST)
2796 {
2797 if (cfn != CFN_LAST)
2798 fndecl = targetm.vectorize.builtin_vectorized_function
2799 (cfn, vectype_out, vectype_in);
2800 else
2801 fndecl = targetm.vectorize.builtin_md_vectorized_function
2802 (callee, vectype_out, vectype_in);
2803 }
2804
2805 if (ifn == IFN_LAST && !fndecl)
2806 {
2807 if (cfn == CFN_GOMP_SIMD_LANE
2808 && !slp_node
2809 && loop_vinfo
2810 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2811 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2812 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2813 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2814 {
2815 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2816 { 0, 1, 2, ... vf - 1 } vector. */
2817 gcc_assert (nargs == 0);
2818 }
2819 else if (modifier == NONE
2820 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2821 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2822 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2823 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2824 vectype_in, dt);
2825 else
2826 {
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2829 "function is not vectorizable.\n");
2830 return false;
2831 }
2832 }
2833
2834 if (slp_node)
2835 ncopies = 1;
2836 else if (modifier == NARROW && ifn == IFN_LAST)
2837 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2838 else
2839 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2840
2841 /* Sanity check: make sure that at least one copy of the vectorized stmt
2842 needs to be generated. */
2843 gcc_assert (ncopies >= 1);
2844
2845 if (!vec_stmt) /* transformation not required. */
2846 {
2847 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2848 if (dump_enabled_p ())
2849 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2850 "\n");
2851 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2852 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2853 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2854 vec_promote_demote, stmt_info, 0, vect_body);
2855
2856 return true;
2857 }
2858
2859 /* Transform. */
2860
2861 if (dump_enabled_p ())
2862 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2863
2864 /* Handle def. */
2865 scalar_dest = gimple_call_lhs (stmt);
2866 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2867
2868 prev_stmt_info = NULL;
2869 if (modifier == NONE || ifn != IFN_LAST)
2870 {
2871 tree prev_res = NULL_TREE;
2872 for (j = 0; j < ncopies; ++j)
2873 {
2874 /* Build argument list for the vectorized call. */
2875 if (j == 0)
2876 vargs.create (nargs);
2877 else
2878 vargs.truncate (0);
2879
2880 if (slp_node)
2881 {
2882 auto_vec<vec<tree> > vec_defs (nargs);
2883 vec<tree> vec_oprnds0;
2884
2885 for (i = 0; i < nargs; i++)
2886 vargs.quick_push (gimple_call_arg (stmt, i));
2887 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2888 vec_oprnds0 = vec_defs[0];
2889
2890 /* Arguments are ready. Create the new vector stmt. */
2891 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2892 {
2893 size_t k;
2894 for (k = 0; k < nargs; k++)
2895 {
2896 vec<tree> vec_oprndsk = vec_defs[k];
2897 vargs[k] = vec_oprndsk[i];
2898 }
2899 if (modifier == NARROW)
2900 {
2901 tree half_res = make_ssa_name (vectype_in);
2902 gcall *call
2903 = gimple_build_call_internal_vec (ifn, vargs);
2904 gimple_call_set_lhs (call, half_res);
2905 gimple_call_set_nothrow (call, true);
2906 new_stmt = call;
2907 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2908 if ((i & 1) == 0)
2909 {
2910 prev_res = half_res;
2911 continue;
2912 }
2913 new_temp = make_ssa_name (vec_dest);
2914 new_stmt = gimple_build_assign (new_temp, convert_code,
2915 prev_res, half_res);
2916 }
2917 else
2918 {
2919 gcall *call;
2920 if (ifn != IFN_LAST)
2921 call = gimple_build_call_internal_vec (ifn, vargs);
2922 else
2923 call = gimple_build_call_vec (fndecl, vargs);
2924 new_temp = make_ssa_name (vec_dest, call);
2925 gimple_call_set_lhs (call, new_temp);
2926 gimple_call_set_nothrow (call, true);
2927 new_stmt = call;
2928 }
2929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2931 }
2932
2933 for (i = 0; i < nargs; i++)
2934 {
2935 vec<tree> vec_oprndsi = vec_defs[i];
2936 vec_oprndsi.release ();
2937 }
2938 continue;
2939 }
2940
2941 for (i = 0; i < nargs; i++)
2942 {
2943 op = gimple_call_arg (stmt, i);
2944 if (j == 0)
2945 vec_oprnd0
2946 = vect_get_vec_def_for_operand (op, stmt);
2947 else
2948 {
2949 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2950 vec_oprnd0
2951 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2952 }
2953
2954 vargs.quick_push (vec_oprnd0);
2955 }
2956
2957 if (gimple_call_internal_p (stmt)
2958 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2959 {
2960 tree_vector_builder v (vectype_out, 1, 3);
2961 for (int k = 0; k < 3; ++k)
2962 v.quick_push (build_int_cst (unsigned_type_node,
2963 j * nunits_out + k));
2964 tree cst = v.build ();
2965 tree new_var
2966 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2967 gimple *init_stmt = gimple_build_assign (new_var, cst);
2968 vect_init_vector_1 (stmt, init_stmt, NULL);
2969 new_temp = make_ssa_name (vec_dest);
2970 new_stmt = gimple_build_assign (new_temp, new_var);
2971 }
2972 else if (modifier == NARROW)
2973 {
2974 tree half_res = make_ssa_name (vectype_in);
2975 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2976 gimple_call_set_lhs (call, half_res);
2977 gimple_call_set_nothrow (call, true);
2978 new_stmt = call;
2979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2980 if ((j & 1) == 0)
2981 {
2982 prev_res = half_res;
2983 continue;
2984 }
2985 new_temp = make_ssa_name (vec_dest);
2986 new_stmt = gimple_build_assign (new_temp, convert_code,
2987 prev_res, half_res);
2988 }
2989 else
2990 {
2991 gcall *call;
2992 if (ifn != IFN_LAST)
2993 call = gimple_build_call_internal_vec (ifn, vargs);
2994 else
2995 call = gimple_build_call_vec (fndecl, vargs);
2996 new_temp = make_ssa_name (vec_dest, new_stmt);
2997 gimple_call_set_lhs (call, new_temp);
2998 gimple_call_set_nothrow (call, true);
2999 new_stmt = call;
3000 }
3001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3002
3003 if (j == (modifier == NARROW ? 1 : 0))
3004 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3005 else
3006 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3007
3008 prev_stmt_info = vinfo_for_stmt (new_stmt);
3009 }
3010 }
3011 else if (modifier == NARROW)
3012 {
3013 for (j = 0; j < ncopies; ++j)
3014 {
3015 /* Build argument list for the vectorized call. */
3016 if (j == 0)
3017 vargs.create (nargs * 2);
3018 else
3019 vargs.truncate (0);
3020
3021 if (slp_node)
3022 {
3023 auto_vec<vec<tree> > vec_defs (nargs);
3024 vec<tree> vec_oprnds0;
3025
3026 for (i = 0; i < nargs; i++)
3027 vargs.quick_push (gimple_call_arg (stmt, i));
3028 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3029 vec_oprnds0 = vec_defs[0];
3030
3031 /* Arguments are ready. Create the new vector stmt. */
3032 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3033 {
3034 size_t k;
3035 vargs.truncate (0);
3036 for (k = 0; k < nargs; k++)
3037 {
3038 vec<tree> vec_oprndsk = vec_defs[k];
3039 vargs.quick_push (vec_oprndsk[i]);
3040 vargs.quick_push (vec_oprndsk[i + 1]);
3041 }
3042 gcall *call;
3043 if (ifn != IFN_LAST)
3044 call = gimple_build_call_internal_vec (ifn, vargs);
3045 else
3046 call = gimple_build_call_vec (fndecl, vargs);
3047 new_temp = make_ssa_name (vec_dest, call);
3048 gimple_call_set_lhs (call, new_temp);
3049 gimple_call_set_nothrow (call, true);
3050 new_stmt = call;
3051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3052 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3053 }
3054
3055 for (i = 0; i < nargs; i++)
3056 {
3057 vec<tree> vec_oprndsi = vec_defs[i];
3058 vec_oprndsi.release ();
3059 }
3060 continue;
3061 }
3062
3063 for (i = 0; i < nargs; i++)
3064 {
3065 op = gimple_call_arg (stmt, i);
3066 if (j == 0)
3067 {
3068 vec_oprnd0
3069 = vect_get_vec_def_for_operand (op, stmt);
3070 vec_oprnd1
3071 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3072 }
3073 else
3074 {
3075 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3076 vec_oprnd0
3077 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3078 vec_oprnd1
3079 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3080 }
3081
3082 vargs.quick_push (vec_oprnd0);
3083 vargs.quick_push (vec_oprnd1);
3084 }
3085
3086 new_stmt = gimple_build_call_vec (fndecl, vargs);
3087 new_temp = make_ssa_name (vec_dest, new_stmt);
3088 gimple_call_set_lhs (new_stmt, new_temp);
3089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3090
3091 if (j == 0)
3092 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3093 else
3094 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3095
3096 prev_stmt_info = vinfo_for_stmt (new_stmt);
3097 }
3098
3099 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3100 }
3101 else
3102 /* No current target implements this case. */
3103 return false;
3104
3105 vargs.release ();
3106
3107 /* The call in STMT might prevent it from being removed in dce.
3108 We however cannot remove it here, due to the way the ssa name
3109 it defines is mapped to the new definition. So just replace
3110 rhs of the statement with something harmless. */
3111
3112 if (slp_node)
3113 return true;
3114
3115 type = TREE_TYPE (scalar_dest);
3116 if (is_pattern_stmt_p (stmt_info))
3117 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3118 else
3119 lhs = gimple_call_lhs (stmt);
3120
3121 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3122 set_vinfo_for_stmt (new_stmt, stmt_info);
3123 set_vinfo_for_stmt (stmt, NULL);
3124 STMT_VINFO_STMT (stmt_info) = new_stmt;
3125 gsi_replace (gsi, new_stmt, false);
3126
3127 return true;
3128 }
3129
3130
3131 struct simd_call_arg_info
3132 {
3133 tree vectype;
3134 tree op;
3135 HOST_WIDE_INT linear_step;
3136 enum vect_def_type dt;
3137 unsigned int align;
3138 bool simd_lane_linear;
3139 };
3140
3141 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3142 is linear within simd lane (but not within whole loop), note it in
3143 *ARGINFO. */
3144
3145 static void
3146 vect_simd_lane_linear (tree op, struct loop *loop,
3147 struct simd_call_arg_info *arginfo)
3148 {
3149 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3150
3151 if (!is_gimple_assign (def_stmt)
3152 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3153 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3154 return;
3155
3156 tree base = gimple_assign_rhs1 (def_stmt);
3157 HOST_WIDE_INT linear_step = 0;
3158 tree v = gimple_assign_rhs2 (def_stmt);
3159 while (TREE_CODE (v) == SSA_NAME)
3160 {
3161 tree t;
3162 def_stmt = SSA_NAME_DEF_STMT (v);
3163 if (is_gimple_assign (def_stmt))
3164 switch (gimple_assign_rhs_code (def_stmt))
3165 {
3166 case PLUS_EXPR:
3167 t = gimple_assign_rhs2 (def_stmt);
3168 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3169 return;
3170 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3171 v = gimple_assign_rhs1 (def_stmt);
3172 continue;
3173 case MULT_EXPR:
3174 t = gimple_assign_rhs2 (def_stmt);
3175 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3176 return;
3177 linear_step = tree_to_shwi (t);
3178 v = gimple_assign_rhs1 (def_stmt);
3179 continue;
3180 CASE_CONVERT:
3181 t = gimple_assign_rhs1 (def_stmt);
3182 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3183 || (TYPE_PRECISION (TREE_TYPE (v))
3184 < TYPE_PRECISION (TREE_TYPE (t))))
3185 return;
3186 if (!linear_step)
3187 linear_step = 1;
3188 v = t;
3189 continue;
3190 default:
3191 return;
3192 }
3193 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3194 && loop->simduid
3195 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3196 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3197 == loop->simduid))
3198 {
3199 if (!linear_step)
3200 linear_step = 1;
3201 arginfo->linear_step = linear_step;
3202 arginfo->op = base;
3203 arginfo->simd_lane_linear = true;
3204 return;
3205 }
3206 }
3207 }
3208
3209 /* Function vectorizable_simd_clone_call.
3210
3211 Check if STMT performs a function call that can be vectorized
3212 by calling a simd clone of the function.
3213 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3214 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3215 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3216
3217 static bool
3218 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3219 gimple **vec_stmt, slp_tree slp_node)
3220 {
3221 tree vec_dest;
3222 tree scalar_dest;
3223 tree op, type;
3224 tree vec_oprnd0 = NULL_TREE;
3225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3226 tree vectype;
3227 unsigned int nunits;
3228 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3229 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3230 vec_info *vinfo = stmt_info->vinfo;
3231 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3232 tree fndecl, new_temp;
3233 gimple *def_stmt;
3234 gimple *new_stmt = NULL;
3235 int ncopies, j;
3236 auto_vec<simd_call_arg_info> arginfo;
3237 vec<tree> vargs = vNULL;
3238 size_t i, nargs;
3239 tree lhs, rtype, ratype;
3240 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3241
3242 /* Is STMT a vectorizable call? */
3243 if (!is_gimple_call (stmt))
3244 return false;
3245
3246 fndecl = gimple_call_fndecl (stmt);
3247 if (fndecl == NULL_TREE)
3248 return false;
3249
3250 struct cgraph_node *node = cgraph_node::get (fndecl);
3251 if (node == NULL || node->simd_clones == NULL)
3252 return false;
3253
3254 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3255 return false;
3256
3257 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3258 && ! vec_stmt)
3259 return false;
3260
3261 if (gimple_call_lhs (stmt)
3262 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3263 return false;
3264
3265 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3266
3267 vectype = STMT_VINFO_VECTYPE (stmt_info);
3268
3269 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3270 return false;
3271
3272 /* FORNOW */
3273 if (slp_node)
3274 return false;
3275
3276 /* Process function arguments. */
3277 nargs = gimple_call_num_args (stmt);
3278
3279 /* Bail out if the function has zero arguments. */
3280 if (nargs == 0)
3281 return false;
3282
3283 arginfo.reserve (nargs, true);
3284
3285 for (i = 0; i < nargs; i++)
3286 {
3287 simd_call_arg_info thisarginfo;
3288 affine_iv iv;
3289
3290 thisarginfo.linear_step = 0;
3291 thisarginfo.align = 0;
3292 thisarginfo.op = NULL_TREE;
3293 thisarginfo.simd_lane_linear = false;
3294
3295 op = gimple_call_arg (stmt, i);
3296 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3297 &thisarginfo.vectype)
3298 || thisarginfo.dt == vect_uninitialized_def)
3299 {
3300 if (dump_enabled_p ())
3301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3302 "use not simple.\n");
3303 return false;
3304 }
3305
3306 if (thisarginfo.dt == vect_constant_def
3307 || thisarginfo.dt == vect_external_def)
3308 gcc_assert (thisarginfo.vectype == NULL_TREE);
3309 else
3310 gcc_assert (thisarginfo.vectype != NULL_TREE);
3311
3312 /* For linear arguments, the analyze phase should have saved
3313 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3314 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3315 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3316 {
3317 gcc_assert (vec_stmt);
3318 thisarginfo.linear_step
3319 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3320 thisarginfo.op
3321 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3322 thisarginfo.simd_lane_linear
3323 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3324 == boolean_true_node);
3325 /* If loop has been peeled for alignment, we need to adjust it. */
3326 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3327 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3328 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3329 {
3330 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3331 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3332 tree opt = TREE_TYPE (thisarginfo.op);
3333 bias = fold_convert (TREE_TYPE (step), bias);
3334 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3335 thisarginfo.op
3336 = fold_build2 (POINTER_TYPE_P (opt)
3337 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3338 thisarginfo.op, bias);
3339 }
3340 }
3341 else if (!vec_stmt
3342 && thisarginfo.dt != vect_constant_def
3343 && thisarginfo.dt != vect_external_def
3344 && loop_vinfo
3345 && TREE_CODE (op) == SSA_NAME
3346 && simple_iv (loop, loop_containing_stmt (stmt), op,
3347 &iv, false)
3348 && tree_fits_shwi_p (iv.step))
3349 {
3350 thisarginfo.linear_step = tree_to_shwi (iv.step);
3351 thisarginfo.op = iv.base;
3352 }
3353 else if ((thisarginfo.dt == vect_constant_def
3354 || thisarginfo.dt == vect_external_def)
3355 && POINTER_TYPE_P (TREE_TYPE (op)))
3356 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3357 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3358 linear too. */
3359 if (POINTER_TYPE_P (TREE_TYPE (op))
3360 && !thisarginfo.linear_step
3361 && !vec_stmt
3362 && thisarginfo.dt != vect_constant_def
3363 && thisarginfo.dt != vect_external_def
3364 && loop_vinfo
3365 && !slp_node
3366 && TREE_CODE (op) == SSA_NAME)
3367 vect_simd_lane_linear (op, loop, &thisarginfo);
3368
3369 arginfo.quick_push (thisarginfo);
3370 }
3371
3372 unsigned HOST_WIDE_INT vf;
3373 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3374 {
3375 if (dump_enabled_p ())
3376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3377 "not considering SIMD clones; not yet supported"
3378 " for variable-width vectors.\n");
3379 return NULL;
3380 }
3381
3382 unsigned int badness = 0;
3383 struct cgraph_node *bestn = NULL;
3384 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3385 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3386 else
3387 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3388 n = n->simdclone->next_clone)
3389 {
3390 unsigned int this_badness = 0;
3391 if (n->simdclone->simdlen > vf
3392 || n->simdclone->nargs != nargs)
3393 continue;
3394 if (n->simdclone->simdlen < vf)
3395 this_badness += (exact_log2 (vf)
3396 - exact_log2 (n->simdclone->simdlen)) * 1024;
3397 if (n->simdclone->inbranch)
3398 this_badness += 2048;
3399 int target_badness = targetm.simd_clone.usable (n);
3400 if (target_badness < 0)
3401 continue;
3402 this_badness += target_badness * 512;
3403 /* FORNOW: Have to add code to add the mask argument. */
3404 if (n->simdclone->inbranch)
3405 continue;
3406 for (i = 0; i < nargs; i++)
3407 {
3408 switch (n->simdclone->args[i].arg_type)
3409 {
3410 case SIMD_CLONE_ARG_TYPE_VECTOR:
3411 if (!useless_type_conversion_p
3412 (n->simdclone->args[i].orig_type,
3413 TREE_TYPE (gimple_call_arg (stmt, i))))
3414 i = -1;
3415 else if (arginfo[i].dt == vect_constant_def
3416 || arginfo[i].dt == vect_external_def
3417 || arginfo[i].linear_step)
3418 this_badness += 64;
3419 break;
3420 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3421 if (arginfo[i].dt != vect_constant_def
3422 && arginfo[i].dt != vect_external_def)
3423 i = -1;
3424 break;
3425 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3426 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3427 if (arginfo[i].dt == vect_constant_def
3428 || arginfo[i].dt == vect_external_def
3429 || (arginfo[i].linear_step
3430 != n->simdclone->args[i].linear_step))
3431 i = -1;
3432 break;
3433 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3434 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3435 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3436 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3437 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3438 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3439 /* FORNOW */
3440 i = -1;
3441 break;
3442 case SIMD_CLONE_ARG_TYPE_MASK:
3443 gcc_unreachable ();
3444 }
3445 if (i == (size_t) -1)
3446 break;
3447 if (n->simdclone->args[i].alignment > arginfo[i].align)
3448 {
3449 i = -1;
3450 break;
3451 }
3452 if (arginfo[i].align)
3453 this_badness += (exact_log2 (arginfo[i].align)
3454 - exact_log2 (n->simdclone->args[i].alignment));
3455 }
3456 if (i == (size_t) -1)
3457 continue;
3458 if (bestn == NULL || this_badness < badness)
3459 {
3460 bestn = n;
3461 badness = this_badness;
3462 }
3463 }
3464
3465 if (bestn == NULL)
3466 return false;
3467
3468 for (i = 0; i < nargs; i++)
3469 if ((arginfo[i].dt == vect_constant_def
3470 || arginfo[i].dt == vect_external_def)
3471 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3472 {
3473 arginfo[i].vectype
3474 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3475 i)));
3476 if (arginfo[i].vectype == NULL
3477 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3478 > bestn->simdclone->simdlen))
3479 return false;
3480 }
3481
3482 fndecl = bestn->decl;
3483 nunits = bestn->simdclone->simdlen;
3484 ncopies = vf / nunits;
3485
3486 /* If the function isn't const, only allow it in simd loops where user
3487 has asserted that at least nunits consecutive iterations can be
3488 performed using SIMD instructions. */
3489 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3490 && gimple_vuse (stmt))
3491 return false;
3492
3493 /* Sanity check: make sure that at least one copy of the vectorized stmt
3494 needs to be generated. */
3495 gcc_assert (ncopies >= 1);
3496
3497 if (!vec_stmt) /* transformation not required. */
3498 {
3499 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3500 for (i = 0; i < nargs; i++)
3501 if ((bestn->simdclone->args[i].arg_type
3502 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3503 || (bestn->simdclone->args[i].arg_type
3504 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3505 {
3506 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3507 + 1);
3508 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3509 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3510 ? size_type_node : TREE_TYPE (arginfo[i].op);
3511 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3512 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3513 tree sll = arginfo[i].simd_lane_linear
3514 ? boolean_true_node : boolean_false_node;
3515 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3516 }
3517 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3518 if (dump_enabled_p ())
3519 dump_printf_loc (MSG_NOTE, vect_location,
3520 "=== vectorizable_simd_clone_call ===\n");
3521 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3522 return true;
3523 }
3524
3525 /* Transform. */
3526
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3529
3530 /* Handle def. */
3531 scalar_dest = gimple_call_lhs (stmt);
3532 vec_dest = NULL_TREE;
3533 rtype = NULL_TREE;
3534 ratype = NULL_TREE;
3535 if (scalar_dest)
3536 {
3537 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3538 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3539 if (TREE_CODE (rtype) == ARRAY_TYPE)
3540 {
3541 ratype = rtype;
3542 rtype = TREE_TYPE (ratype);
3543 }
3544 }
3545
3546 prev_stmt_info = NULL;
3547 for (j = 0; j < ncopies; ++j)
3548 {
3549 /* Build argument list for the vectorized call. */
3550 if (j == 0)
3551 vargs.create (nargs);
3552 else
3553 vargs.truncate (0);
3554
3555 for (i = 0; i < nargs; i++)
3556 {
3557 unsigned int k, l, m, o;
3558 tree atype;
3559 op = gimple_call_arg (stmt, i);
3560 switch (bestn->simdclone->args[i].arg_type)
3561 {
3562 case SIMD_CLONE_ARG_TYPE_VECTOR:
3563 atype = bestn->simdclone->args[i].vector_type;
3564 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3565 for (m = j * o; m < (j + 1) * o; m++)
3566 {
3567 if (TYPE_VECTOR_SUBPARTS (atype)
3568 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3569 {
3570 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3571 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3572 / TYPE_VECTOR_SUBPARTS (atype));
3573 gcc_assert ((k & (k - 1)) == 0);
3574 if (m == 0)
3575 vec_oprnd0
3576 = vect_get_vec_def_for_operand (op, stmt);
3577 else
3578 {
3579 vec_oprnd0 = arginfo[i].op;
3580 if ((m & (k - 1)) == 0)
3581 vec_oprnd0
3582 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3583 vec_oprnd0);
3584 }
3585 arginfo[i].op = vec_oprnd0;
3586 vec_oprnd0
3587 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3588 bitsize_int (prec),
3589 bitsize_int ((m & (k - 1)) * prec));
3590 new_stmt
3591 = gimple_build_assign (make_ssa_name (atype),
3592 vec_oprnd0);
3593 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3594 vargs.safe_push (gimple_assign_lhs (new_stmt));
3595 }
3596 else
3597 {
3598 k = (TYPE_VECTOR_SUBPARTS (atype)
3599 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3600 gcc_assert ((k & (k - 1)) == 0);
3601 vec<constructor_elt, va_gc> *ctor_elts;
3602 if (k != 1)
3603 vec_alloc (ctor_elts, k);
3604 else
3605 ctor_elts = NULL;
3606 for (l = 0; l < k; l++)
3607 {
3608 if (m == 0 && l == 0)
3609 vec_oprnd0
3610 = vect_get_vec_def_for_operand (op, stmt);
3611 else
3612 vec_oprnd0
3613 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3614 arginfo[i].op);
3615 arginfo[i].op = vec_oprnd0;
3616 if (k == 1)
3617 break;
3618 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3619 vec_oprnd0);
3620 }
3621 if (k == 1)
3622 vargs.safe_push (vec_oprnd0);
3623 else
3624 {
3625 vec_oprnd0 = build_constructor (atype, ctor_elts);
3626 new_stmt
3627 = gimple_build_assign (make_ssa_name (atype),
3628 vec_oprnd0);
3629 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3630 vargs.safe_push (gimple_assign_lhs (new_stmt));
3631 }
3632 }
3633 }
3634 break;
3635 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3636 vargs.safe_push (op);
3637 break;
3638 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3639 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3640 if (j == 0)
3641 {
3642 gimple_seq stmts;
3643 arginfo[i].op
3644 = force_gimple_operand (arginfo[i].op, &stmts, true,
3645 NULL_TREE);
3646 if (stmts != NULL)
3647 {
3648 basic_block new_bb;
3649 edge pe = loop_preheader_edge (loop);
3650 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3651 gcc_assert (!new_bb);
3652 }
3653 if (arginfo[i].simd_lane_linear)
3654 {
3655 vargs.safe_push (arginfo[i].op);
3656 break;
3657 }
3658 tree phi_res = copy_ssa_name (op);
3659 gphi *new_phi = create_phi_node (phi_res, loop->header);
3660 set_vinfo_for_stmt (new_phi,
3661 new_stmt_vec_info (new_phi, loop_vinfo));
3662 add_phi_arg (new_phi, arginfo[i].op,
3663 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3664 enum tree_code code
3665 = POINTER_TYPE_P (TREE_TYPE (op))
3666 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3667 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3668 ? sizetype : TREE_TYPE (op);
3669 widest_int cst
3670 = wi::mul (bestn->simdclone->args[i].linear_step,
3671 ncopies * nunits);
3672 tree tcst = wide_int_to_tree (type, cst);
3673 tree phi_arg = copy_ssa_name (op);
3674 new_stmt
3675 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3676 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3677 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3678 set_vinfo_for_stmt (new_stmt,
3679 new_stmt_vec_info (new_stmt, loop_vinfo));
3680 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3681 UNKNOWN_LOCATION);
3682 arginfo[i].op = phi_res;
3683 vargs.safe_push (phi_res);
3684 }
3685 else
3686 {
3687 enum tree_code code
3688 = POINTER_TYPE_P (TREE_TYPE (op))
3689 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3690 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3691 ? sizetype : TREE_TYPE (op);
3692 widest_int cst
3693 = wi::mul (bestn->simdclone->args[i].linear_step,
3694 j * nunits);
3695 tree tcst = wide_int_to_tree (type, cst);
3696 new_temp = make_ssa_name (TREE_TYPE (op));
3697 new_stmt = gimple_build_assign (new_temp, code,
3698 arginfo[i].op, tcst);
3699 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3700 vargs.safe_push (new_temp);
3701 }
3702 break;
3703 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3704 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3705 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3706 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3707 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3708 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3709 default:
3710 gcc_unreachable ();
3711 }
3712 }
3713
3714 new_stmt = gimple_build_call_vec (fndecl, vargs);
3715 if (vec_dest)
3716 {
3717 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3718 if (ratype)
3719 new_temp = create_tmp_var (ratype);
3720 else if (TYPE_VECTOR_SUBPARTS (vectype)
3721 == TYPE_VECTOR_SUBPARTS (rtype))
3722 new_temp = make_ssa_name (vec_dest, new_stmt);
3723 else
3724 new_temp = make_ssa_name (rtype, new_stmt);
3725 gimple_call_set_lhs (new_stmt, new_temp);
3726 }
3727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3728
3729 if (vec_dest)
3730 {
3731 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3732 {
3733 unsigned int k, l;
3734 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3735 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3736 gcc_assert ((k & (k - 1)) == 0);
3737 for (l = 0; l < k; l++)
3738 {
3739 tree t;
3740 if (ratype)
3741 {
3742 t = build_fold_addr_expr (new_temp);
3743 t = build2 (MEM_REF, vectype, t,
3744 build_int_cst (TREE_TYPE (t),
3745 l * prec / BITS_PER_UNIT));
3746 }
3747 else
3748 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3749 bitsize_int (prec), bitsize_int (l * prec));
3750 new_stmt
3751 = gimple_build_assign (make_ssa_name (vectype), t);
3752 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3753 if (j == 0 && l == 0)
3754 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3755 else
3756 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3757
3758 prev_stmt_info = vinfo_for_stmt (new_stmt);
3759 }
3760
3761 if (ratype)
3762 {
3763 tree clobber = build_constructor (ratype, NULL);
3764 TREE_THIS_VOLATILE (clobber) = 1;
3765 new_stmt = gimple_build_assign (new_temp, clobber);
3766 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3767 }
3768 continue;
3769 }
3770 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3771 {
3772 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3773 / TYPE_VECTOR_SUBPARTS (rtype));
3774 gcc_assert ((k & (k - 1)) == 0);
3775 if ((j & (k - 1)) == 0)
3776 vec_alloc (ret_ctor_elts, k);
3777 if (ratype)
3778 {
3779 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3780 for (m = 0; m < o; m++)
3781 {
3782 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3783 size_int (m), NULL_TREE, NULL_TREE);
3784 new_stmt
3785 = gimple_build_assign (make_ssa_name (rtype), tem);
3786 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3787 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3788 gimple_assign_lhs (new_stmt));
3789 }
3790 tree clobber = build_constructor (ratype, NULL);
3791 TREE_THIS_VOLATILE (clobber) = 1;
3792 new_stmt = gimple_build_assign (new_temp, clobber);
3793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3794 }
3795 else
3796 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3797 if ((j & (k - 1)) != k - 1)
3798 continue;
3799 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3800 new_stmt
3801 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3803
3804 if ((unsigned) j == k - 1)
3805 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3806 else
3807 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3808
3809 prev_stmt_info = vinfo_for_stmt (new_stmt);
3810 continue;
3811 }
3812 else if (ratype)
3813 {
3814 tree t = build_fold_addr_expr (new_temp);
3815 t = build2 (MEM_REF, vectype, t,
3816 build_int_cst (TREE_TYPE (t), 0));
3817 new_stmt
3818 = gimple_build_assign (make_ssa_name (vec_dest), t);
3819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3820 tree clobber = build_constructor (ratype, NULL);
3821 TREE_THIS_VOLATILE (clobber) = 1;
3822 vect_finish_stmt_generation (stmt,
3823 gimple_build_assign (new_temp,
3824 clobber), gsi);
3825 }
3826 }
3827
3828 if (j == 0)
3829 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3830 else
3831 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3832
3833 prev_stmt_info = vinfo_for_stmt (new_stmt);
3834 }
3835
3836 vargs.release ();
3837
3838 /* The call in STMT might prevent it from being removed in dce.
3839 We however cannot remove it here, due to the way the ssa name
3840 it defines is mapped to the new definition. So just replace
3841 rhs of the statement with something harmless. */
3842
3843 if (slp_node)
3844 return true;
3845
3846 if (scalar_dest)
3847 {
3848 type = TREE_TYPE (scalar_dest);
3849 if (is_pattern_stmt_p (stmt_info))
3850 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3851 else
3852 lhs = gimple_call_lhs (stmt);
3853 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3854 }
3855 else
3856 new_stmt = gimple_build_nop ();
3857 set_vinfo_for_stmt (new_stmt, stmt_info);
3858 set_vinfo_for_stmt (stmt, NULL);
3859 STMT_VINFO_STMT (stmt_info) = new_stmt;
3860 gsi_replace (gsi, new_stmt, true);
3861 unlink_stmt_vdef (stmt);
3862
3863 return true;
3864 }
3865
3866
3867 /* Function vect_gen_widened_results_half
3868
3869 Create a vector stmt whose code, type, number of arguments, and result
3870 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3871 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3872 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3873 needs to be created (DECL is a function-decl of a target-builtin).
3874 STMT is the original scalar stmt that we are vectorizing. */
3875
3876 static gimple *
3877 vect_gen_widened_results_half (enum tree_code code,
3878 tree decl,
3879 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3880 tree vec_dest, gimple_stmt_iterator *gsi,
3881 gimple *stmt)
3882 {
3883 gimple *new_stmt;
3884 tree new_temp;
3885
3886 /* Generate half of the widened result: */
3887 if (code == CALL_EXPR)
3888 {
3889 /* Target specific support */
3890 if (op_type == binary_op)
3891 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3892 else
3893 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3894 new_temp = make_ssa_name (vec_dest, new_stmt);
3895 gimple_call_set_lhs (new_stmt, new_temp);
3896 }
3897 else
3898 {
3899 /* Generic support */
3900 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3901 if (op_type != binary_op)
3902 vec_oprnd1 = NULL;
3903 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3904 new_temp = make_ssa_name (vec_dest, new_stmt);
3905 gimple_assign_set_lhs (new_stmt, new_temp);
3906 }
3907 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3908
3909 return new_stmt;
3910 }
3911
3912
3913 /* Get vectorized definitions for loop-based vectorization. For the first
3914 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3915 scalar operand), and for the rest we get a copy with
3916 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3917 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3918 The vectors are collected into VEC_OPRNDS. */
3919
3920 static void
3921 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3922 vec<tree> *vec_oprnds, int multi_step_cvt)
3923 {
3924 tree vec_oprnd;
3925
3926 /* Get first vector operand. */
3927 /* All the vector operands except the very first one (that is scalar oprnd)
3928 are stmt copies. */
3929 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3930 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3931 else
3932 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3933
3934 vec_oprnds->quick_push (vec_oprnd);
3935
3936 /* Get second vector operand. */
3937 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3938 vec_oprnds->quick_push (vec_oprnd);
3939
3940 *oprnd = vec_oprnd;
3941
3942 /* For conversion in multiple steps, continue to get operands
3943 recursively. */
3944 if (multi_step_cvt)
3945 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3946 }
3947
3948
3949 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3950 For multi-step conversions store the resulting vectors and call the function
3951 recursively. */
3952
3953 static void
3954 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3955 int multi_step_cvt, gimple *stmt,
3956 vec<tree> vec_dsts,
3957 gimple_stmt_iterator *gsi,
3958 slp_tree slp_node, enum tree_code code,
3959 stmt_vec_info *prev_stmt_info)
3960 {
3961 unsigned int i;
3962 tree vop0, vop1, new_tmp, vec_dest;
3963 gimple *new_stmt;
3964 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3965
3966 vec_dest = vec_dsts.pop ();
3967
3968 for (i = 0; i < vec_oprnds->length (); i += 2)
3969 {
3970 /* Create demotion operation. */
3971 vop0 = (*vec_oprnds)[i];
3972 vop1 = (*vec_oprnds)[i + 1];
3973 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3974 new_tmp = make_ssa_name (vec_dest, new_stmt);
3975 gimple_assign_set_lhs (new_stmt, new_tmp);
3976 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3977
3978 if (multi_step_cvt)
3979 /* Store the resulting vector for next recursive call. */
3980 (*vec_oprnds)[i/2] = new_tmp;
3981 else
3982 {
3983 /* This is the last step of the conversion sequence. Store the
3984 vectors in SLP_NODE or in vector info of the scalar statement
3985 (or in STMT_VINFO_RELATED_STMT chain). */
3986 if (slp_node)
3987 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3988 else
3989 {
3990 if (!*prev_stmt_info)
3991 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3992 else
3993 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3994
3995 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3996 }
3997 }
3998 }
3999
4000 /* For multi-step demotion operations we first generate demotion operations
4001 from the source type to the intermediate types, and then combine the
4002 results (stored in VEC_OPRNDS) in demotion operation to the destination
4003 type. */
4004 if (multi_step_cvt)
4005 {
4006 /* At each level of recursion we have half of the operands we had at the
4007 previous level. */
4008 vec_oprnds->truncate ((i+1)/2);
4009 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4010 stmt, vec_dsts, gsi, slp_node,
4011 VEC_PACK_TRUNC_EXPR,
4012 prev_stmt_info);
4013 }
4014
4015 vec_dsts.quick_push (vec_dest);
4016 }
4017
4018
4019 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4020 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4021 the resulting vectors and call the function recursively. */
4022
4023 static void
4024 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4025 vec<tree> *vec_oprnds1,
4026 gimple *stmt, tree vec_dest,
4027 gimple_stmt_iterator *gsi,
4028 enum tree_code code1,
4029 enum tree_code code2, tree decl1,
4030 tree decl2, int op_type)
4031 {
4032 int i;
4033 tree vop0, vop1, new_tmp1, new_tmp2;
4034 gimple *new_stmt1, *new_stmt2;
4035 vec<tree> vec_tmp = vNULL;
4036
4037 vec_tmp.create (vec_oprnds0->length () * 2);
4038 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4039 {
4040 if (op_type == binary_op)
4041 vop1 = (*vec_oprnds1)[i];
4042 else
4043 vop1 = NULL_TREE;
4044
4045 /* Generate the two halves of promotion operation. */
4046 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4047 op_type, vec_dest, gsi, stmt);
4048 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4049 op_type, vec_dest, gsi, stmt);
4050 if (is_gimple_call (new_stmt1))
4051 {
4052 new_tmp1 = gimple_call_lhs (new_stmt1);
4053 new_tmp2 = gimple_call_lhs (new_stmt2);
4054 }
4055 else
4056 {
4057 new_tmp1 = gimple_assign_lhs (new_stmt1);
4058 new_tmp2 = gimple_assign_lhs (new_stmt2);
4059 }
4060
4061 /* Store the results for the next step. */
4062 vec_tmp.quick_push (new_tmp1);
4063 vec_tmp.quick_push (new_tmp2);
4064 }
4065
4066 vec_oprnds0->release ();
4067 *vec_oprnds0 = vec_tmp;
4068 }
4069
4070
4071 /* Check if STMT performs a conversion operation, that can be vectorized.
4072 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4073 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4074 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4075
4076 static bool
4077 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4078 gimple **vec_stmt, slp_tree slp_node)
4079 {
4080 tree vec_dest;
4081 tree scalar_dest;
4082 tree op0, op1 = NULL_TREE;
4083 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4084 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4085 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4086 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4087 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4088 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4089 tree new_temp;
4090 gimple *def_stmt;
4091 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4092 int ndts = 2;
4093 gimple *new_stmt = NULL;
4094 stmt_vec_info prev_stmt_info;
4095 int nunits_in;
4096 int nunits_out;
4097 tree vectype_out, vectype_in;
4098 int ncopies, i, j;
4099 tree lhs_type, rhs_type;
4100 enum { NARROW, NONE, WIDEN } modifier;
4101 vec<tree> vec_oprnds0 = vNULL;
4102 vec<tree> vec_oprnds1 = vNULL;
4103 tree vop0;
4104 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4105 vec_info *vinfo = stmt_info->vinfo;
4106 int multi_step_cvt = 0;
4107 vec<tree> interm_types = vNULL;
4108 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4109 int op_type;
4110 unsigned short fltsz;
4111
4112 /* Is STMT a vectorizable conversion? */
4113
4114 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4115 return false;
4116
4117 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4118 && ! vec_stmt)
4119 return false;
4120
4121 if (!is_gimple_assign (stmt))
4122 return false;
4123
4124 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4125 return false;
4126
4127 code = gimple_assign_rhs_code (stmt);
4128 if (!CONVERT_EXPR_CODE_P (code)
4129 && code != FIX_TRUNC_EXPR
4130 && code != FLOAT_EXPR
4131 && code != WIDEN_MULT_EXPR
4132 && code != WIDEN_LSHIFT_EXPR)
4133 return false;
4134
4135 op_type = TREE_CODE_LENGTH (code);
4136
4137 /* Check types of lhs and rhs. */
4138 scalar_dest = gimple_assign_lhs (stmt);
4139 lhs_type = TREE_TYPE (scalar_dest);
4140 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4141
4142 op0 = gimple_assign_rhs1 (stmt);
4143 rhs_type = TREE_TYPE (op0);
4144
4145 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4146 && !((INTEGRAL_TYPE_P (lhs_type)
4147 && INTEGRAL_TYPE_P (rhs_type))
4148 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4149 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4150 return false;
4151
4152 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4153 && ((INTEGRAL_TYPE_P (lhs_type)
4154 && !type_has_mode_precision_p (lhs_type))
4155 || (INTEGRAL_TYPE_P (rhs_type)
4156 && !type_has_mode_precision_p (rhs_type))))
4157 {
4158 if (dump_enabled_p ())
4159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4160 "type conversion to/from bit-precision unsupported."
4161 "\n");
4162 return false;
4163 }
4164
4165 /* Check the operands of the operation. */
4166 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4167 {
4168 if (dump_enabled_p ())
4169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4170 "use not simple.\n");
4171 return false;
4172 }
4173 if (op_type == binary_op)
4174 {
4175 bool ok;
4176
4177 op1 = gimple_assign_rhs2 (stmt);
4178 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4179 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4180 OP1. */
4181 if (CONSTANT_CLASS_P (op0))
4182 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4183 else
4184 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4185
4186 if (!ok)
4187 {
4188 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4190 "use not simple.\n");
4191 return false;
4192 }
4193 }
4194
4195 /* If op0 is an external or constant defs use a vector type of
4196 the same size as the output vector type. */
4197 if (!vectype_in)
4198 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4199 if (vec_stmt)
4200 gcc_assert (vectype_in);
4201 if (!vectype_in)
4202 {
4203 if (dump_enabled_p ())
4204 {
4205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4206 "no vectype for scalar type ");
4207 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4208 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4209 }
4210
4211 return false;
4212 }
4213
4214 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4215 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4216 {
4217 if (dump_enabled_p ())
4218 {
4219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4220 "can't convert between boolean and non "
4221 "boolean vectors");
4222 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4223 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4224 }
4225
4226 return false;
4227 }
4228
4229 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4230 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4231 if (nunits_in < nunits_out)
4232 modifier = NARROW;
4233 else if (nunits_out == nunits_in)
4234 modifier = NONE;
4235 else
4236 modifier = WIDEN;
4237
4238 /* Multiple types in SLP are handled by creating the appropriate number of
4239 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4240 case of SLP. */
4241 if (slp_node)
4242 ncopies = 1;
4243 else if (modifier == NARROW)
4244 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4245 else
4246 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4247
4248 /* Sanity check: make sure that at least one copy of the vectorized stmt
4249 needs to be generated. */
4250 gcc_assert (ncopies >= 1);
4251
4252 bool found_mode = false;
4253 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4254 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4255 opt_scalar_mode rhs_mode_iter;
4256
4257 /* Supportable by target? */
4258 switch (modifier)
4259 {
4260 case NONE:
4261 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4262 return false;
4263 if (supportable_convert_operation (code, vectype_out, vectype_in,
4264 &decl1, &code1))
4265 break;
4266 /* FALLTHRU */
4267 unsupported:
4268 if (dump_enabled_p ())
4269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4270 "conversion not supported by target.\n");
4271 return false;
4272
4273 case WIDEN:
4274 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4275 &code1, &code2, &multi_step_cvt,
4276 &interm_types))
4277 {
4278 /* Binary widening operation can only be supported directly by the
4279 architecture. */
4280 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4281 break;
4282 }
4283
4284 if (code != FLOAT_EXPR
4285 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4286 goto unsupported;
4287
4288 fltsz = GET_MODE_SIZE (lhs_mode);
4289 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4290 {
4291 rhs_mode = rhs_mode_iter.require ();
4292 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4293 break;
4294
4295 cvt_type
4296 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4297 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4298 if (cvt_type == NULL_TREE)
4299 goto unsupported;
4300
4301 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4302 {
4303 if (!supportable_convert_operation (code, vectype_out,
4304 cvt_type, &decl1, &codecvt1))
4305 goto unsupported;
4306 }
4307 else if (!supportable_widening_operation (code, stmt, vectype_out,
4308 cvt_type, &codecvt1,
4309 &codecvt2, &multi_step_cvt,
4310 &interm_types))
4311 continue;
4312 else
4313 gcc_assert (multi_step_cvt == 0);
4314
4315 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4316 vectype_in, &code1, &code2,
4317 &multi_step_cvt, &interm_types))
4318 {
4319 found_mode = true;
4320 break;
4321 }
4322 }
4323
4324 if (!found_mode)
4325 goto unsupported;
4326
4327 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4328 codecvt2 = ERROR_MARK;
4329 else
4330 {
4331 multi_step_cvt++;
4332 interm_types.safe_push (cvt_type);
4333 cvt_type = NULL_TREE;
4334 }
4335 break;
4336
4337 case NARROW:
4338 gcc_assert (op_type == unary_op);
4339 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4340 &code1, &multi_step_cvt,
4341 &interm_types))
4342 break;
4343
4344 if (code != FIX_TRUNC_EXPR
4345 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4346 goto unsupported;
4347
4348 cvt_type
4349 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4350 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4351 if (cvt_type == NULL_TREE)
4352 goto unsupported;
4353 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4354 &decl1, &codecvt1))
4355 goto unsupported;
4356 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4357 &code1, &multi_step_cvt,
4358 &interm_types))
4359 break;
4360 goto unsupported;
4361
4362 default:
4363 gcc_unreachable ();
4364 }
4365
4366 if (!vec_stmt) /* transformation not required. */
4367 {
4368 if (dump_enabled_p ())
4369 dump_printf_loc (MSG_NOTE, vect_location,
4370 "=== vectorizable_conversion ===\n");
4371 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4372 {
4373 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4374 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4375 }
4376 else if (modifier == NARROW)
4377 {
4378 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4379 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4380 }
4381 else
4382 {
4383 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4384 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4385 }
4386 interm_types.release ();
4387 return true;
4388 }
4389
4390 /* Transform. */
4391 if (dump_enabled_p ())
4392 dump_printf_loc (MSG_NOTE, vect_location,
4393 "transform conversion. ncopies = %d.\n", ncopies);
4394
4395 if (op_type == binary_op)
4396 {
4397 if (CONSTANT_CLASS_P (op0))
4398 op0 = fold_convert (TREE_TYPE (op1), op0);
4399 else if (CONSTANT_CLASS_P (op1))
4400 op1 = fold_convert (TREE_TYPE (op0), op1);
4401 }
4402
4403 /* In case of multi-step conversion, we first generate conversion operations
4404 to the intermediate types, and then from that types to the final one.
4405 We create vector destinations for the intermediate type (TYPES) received
4406 from supportable_*_operation, and store them in the correct order
4407 for future use in vect_create_vectorized_*_stmts (). */
4408 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4409 vec_dest = vect_create_destination_var (scalar_dest,
4410 (cvt_type && modifier == WIDEN)
4411 ? cvt_type : vectype_out);
4412 vec_dsts.quick_push (vec_dest);
4413
4414 if (multi_step_cvt)
4415 {
4416 for (i = interm_types.length () - 1;
4417 interm_types.iterate (i, &intermediate_type); i--)
4418 {
4419 vec_dest = vect_create_destination_var (scalar_dest,
4420 intermediate_type);
4421 vec_dsts.quick_push (vec_dest);
4422 }
4423 }
4424
4425 if (cvt_type)
4426 vec_dest = vect_create_destination_var (scalar_dest,
4427 modifier == WIDEN
4428 ? vectype_out : cvt_type);
4429
4430 if (!slp_node)
4431 {
4432 if (modifier == WIDEN)
4433 {
4434 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4435 if (op_type == binary_op)
4436 vec_oprnds1.create (1);
4437 }
4438 else if (modifier == NARROW)
4439 vec_oprnds0.create (
4440 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4441 }
4442 else if (code == WIDEN_LSHIFT_EXPR)
4443 vec_oprnds1.create (slp_node->vec_stmts_size);
4444
4445 last_oprnd = op0;
4446 prev_stmt_info = NULL;
4447 switch (modifier)
4448 {
4449 case NONE:
4450 for (j = 0; j < ncopies; j++)
4451 {
4452 if (j == 0)
4453 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4454 else
4455 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4456
4457 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4458 {
4459 /* Arguments are ready, create the new vector stmt. */
4460 if (code1 == CALL_EXPR)
4461 {
4462 new_stmt = gimple_build_call (decl1, 1, vop0);
4463 new_temp = make_ssa_name (vec_dest, new_stmt);
4464 gimple_call_set_lhs (new_stmt, new_temp);
4465 }
4466 else
4467 {
4468 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4469 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4470 new_temp = make_ssa_name (vec_dest, new_stmt);
4471 gimple_assign_set_lhs (new_stmt, new_temp);
4472 }
4473
4474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4475 if (slp_node)
4476 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4477 else
4478 {
4479 if (!prev_stmt_info)
4480 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4481 else
4482 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4483 prev_stmt_info = vinfo_for_stmt (new_stmt);
4484 }
4485 }
4486 }
4487 break;
4488
4489 case WIDEN:
4490 /* In case the vectorization factor (VF) is bigger than the number
4491 of elements that we can fit in a vectype (nunits), we have to
4492 generate more than one vector stmt - i.e - we need to "unroll"
4493 the vector stmt by a factor VF/nunits. */
4494 for (j = 0; j < ncopies; j++)
4495 {
4496 /* Handle uses. */
4497 if (j == 0)
4498 {
4499 if (slp_node)
4500 {
4501 if (code == WIDEN_LSHIFT_EXPR)
4502 {
4503 unsigned int k;
4504
4505 vec_oprnd1 = op1;
4506 /* Store vec_oprnd1 for every vector stmt to be created
4507 for SLP_NODE. We check during the analysis that all
4508 the shift arguments are the same. */
4509 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4510 vec_oprnds1.quick_push (vec_oprnd1);
4511
4512 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4513 slp_node);
4514 }
4515 else
4516 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4517 &vec_oprnds1, slp_node);
4518 }
4519 else
4520 {
4521 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4522 vec_oprnds0.quick_push (vec_oprnd0);
4523 if (op_type == binary_op)
4524 {
4525 if (code == WIDEN_LSHIFT_EXPR)
4526 vec_oprnd1 = op1;
4527 else
4528 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4529 vec_oprnds1.quick_push (vec_oprnd1);
4530 }
4531 }
4532 }
4533 else
4534 {
4535 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4536 vec_oprnds0.truncate (0);
4537 vec_oprnds0.quick_push (vec_oprnd0);
4538 if (op_type == binary_op)
4539 {
4540 if (code == WIDEN_LSHIFT_EXPR)
4541 vec_oprnd1 = op1;
4542 else
4543 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4544 vec_oprnd1);
4545 vec_oprnds1.truncate (0);
4546 vec_oprnds1.quick_push (vec_oprnd1);
4547 }
4548 }
4549
4550 /* Arguments are ready. Create the new vector stmts. */
4551 for (i = multi_step_cvt; i >= 0; i--)
4552 {
4553 tree this_dest = vec_dsts[i];
4554 enum tree_code c1 = code1, c2 = code2;
4555 if (i == 0 && codecvt2 != ERROR_MARK)
4556 {
4557 c1 = codecvt1;
4558 c2 = codecvt2;
4559 }
4560 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4561 &vec_oprnds1,
4562 stmt, this_dest, gsi,
4563 c1, c2, decl1, decl2,
4564 op_type);
4565 }
4566
4567 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4568 {
4569 if (cvt_type)
4570 {
4571 if (codecvt1 == CALL_EXPR)
4572 {
4573 new_stmt = gimple_build_call (decl1, 1, vop0);
4574 new_temp = make_ssa_name (vec_dest, new_stmt);
4575 gimple_call_set_lhs (new_stmt, new_temp);
4576 }
4577 else
4578 {
4579 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4580 new_temp = make_ssa_name (vec_dest);
4581 new_stmt = gimple_build_assign (new_temp, codecvt1,
4582 vop0);
4583 }
4584
4585 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4586 }
4587 else
4588 new_stmt = SSA_NAME_DEF_STMT (vop0);
4589
4590 if (slp_node)
4591 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4592 else
4593 {
4594 if (!prev_stmt_info)
4595 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4596 else
4597 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4598 prev_stmt_info = vinfo_for_stmt (new_stmt);
4599 }
4600 }
4601 }
4602
4603 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4604 break;
4605
4606 case NARROW:
4607 /* In case the vectorization factor (VF) is bigger than the number
4608 of elements that we can fit in a vectype (nunits), we have to
4609 generate more than one vector stmt - i.e - we need to "unroll"
4610 the vector stmt by a factor VF/nunits. */
4611 for (j = 0; j < ncopies; j++)
4612 {
4613 /* Handle uses. */
4614 if (slp_node)
4615 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4616 slp_node);
4617 else
4618 {
4619 vec_oprnds0.truncate (0);
4620 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4621 vect_pow2 (multi_step_cvt) - 1);
4622 }
4623
4624 /* Arguments are ready. Create the new vector stmts. */
4625 if (cvt_type)
4626 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4627 {
4628 if (codecvt1 == CALL_EXPR)
4629 {
4630 new_stmt = gimple_build_call (decl1, 1, vop0);
4631 new_temp = make_ssa_name (vec_dest, new_stmt);
4632 gimple_call_set_lhs (new_stmt, new_temp);
4633 }
4634 else
4635 {
4636 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4637 new_temp = make_ssa_name (vec_dest);
4638 new_stmt = gimple_build_assign (new_temp, codecvt1,
4639 vop0);
4640 }
4641
4642 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4643 vec_oprnds0[i] = new_temp;
4644 }
4645
4646 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4647 stmt, vec_dsts, gsi,
4648 slp_node, code1,
4649 &prev_stmt_info);
4650 }
4651
4652 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4653 break;
4654 }
4655
4656 vec_oprnds0.release ();
4657 vec_oprnds1.release ();
4658 interm_types.release ();
4659
4660 return true;
4661 }
4662
4663
4664 /* Function vectorizable_assignment.
4665
4666 Check if STMT performs an assignment (copy) that can be vectorized.
4667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4668 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4669 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4670
4671 static bool
4672 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4673 gimple **vec_stmt, slp_tree slp_node)
4674 {
4675 tree vec_dest;
4676 tree scalar_dest;
4677 tree op;
4678 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4679 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4680 tree new_temp;
4681 gimple *def_stmt;
4682 enum vect_def_type dt[1] = {vect_unknown_def_type};
4683 int ndts = 1;
4684 int ncopies;
4685 int i, j;
4686 vec<tree> vec_oprnds = vNULL;
4687 tree vop;
4688 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4689 vec_info *vinfo = stmt_info->vinfo;
4690 gimple *new_stmt = NULL;
4691 stmt_vec_info prev_stmt_info = NULL;
4692 enum tree_code code;
4693 tree vectype_in;
4694
4695 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4696 return false;
4697
4698 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4699 && ! vec_stmt)
4700 return false;
4701
4702 /* Is vectorizable assignment? */
4703 if (!is_gimple_assign (stmt))
4704 return false;
4705
4706 scalar_dest = gimple_assign_lhs (stmt);
4707 if (TREE_CODE (scalar_dest) != SSA_NAME)
4708 return false;
4709
4710 code = gimple_assign_rhs_code (stmt);
4711 if (gimple_assign_single_p (stmt)
4712 || code == PAREN_EXPR
4713 || CONVERT_EXPR_CODE_P (code))
4714 op = gimple_assign_rhs1 (stmt);
4715 else
4716 return false;
4717
4718 if (code == VIEW_CONVERT_EXPR)
4719 op = TREE_OPERAND (op, 0);
4720
4721 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4722 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4723
4724 /* Multiple types in SLP are handled by creating the appropriate number of
4725 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4726 case of SLP. */
4727 if (slp_node)
4728 ncopies = 1;
4729 else
4730 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4731
4732 gcc_assert (ncopies >= 1);
4733
4734 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4735 {
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4738 "use not simple.\n");
4739 return false;
4740 }
4741
4742 /* We can handle NOP_EXPR conversions that do not change the number
4743 of elements or the vector size. */
4744 if ((CONVERT_EXPR_CODE_P (code)
4745 || code == VIEW_CONVERT_EXPR)
4746 && (!vectype_in
4747 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4748 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4749 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4750 return false;
4751
4752 /* We do not handle bit-precision changes. */
4753 if ((CONVERT_EXPR_CODE_P (code)
4754 || code == VIEW_CONVERT_EXPR)
4755 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4756 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4757 || !type_has_mode_precision_p (TREE_TYPE (op)))
4758 /* But a conversion that does not change the bit-pattern is ok. */
4759 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4760 > TYPE_PRECISION (TREE_TYPE (op)))
4761 && TYPE_UNSIGNED (TREE_TYPE (op)))
4762 /* Conversion between boolean types of different sizes is
4763 a simple assignment in case their vectypes are same
4764 boolean vectors. */
4765 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4766 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4767 {
4768 if (dump_enabled_p ())
4769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4770 "type conversion to/from bit-precision "
4771 "unsupported.\n");
4772 return false;
4773 }
4774
4775 if (!vec_stmt) /* transformation not required. */
4776 {
4777 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_NOTE, vect_location,
4780 "=== vectorizable_assignment ===\n");
4781 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4782 return true;
4783 }
4784
4785 /* Transform. */
4786 if (dump_enabled_p ())
4787 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4788
4789 /* Handle def. */
4790 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4791
4792 /* Handle use. */
4793 for (j = 0; j < ncopies; j++)
4794 {
4795 /* Handle uses. */
4796 if (j == 0)
4797 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4798 else
4799 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4800
4801 /* Arguments are ready. create the new vector stmt. */
4802 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4803 {
4804 if (CONVERT_EXPR_CODE_P (code)
4805 || code == VIEW_CONVERT_EXPR)
4806 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4807 new_stmt = gimple_build_assign (vec_dest, vop);
4808 new_temp = make_ssa_name (vec_dest, new_stmt);
4809 gimple_assign_set_lhs (new_stmt, new_temp);
4810 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4811 if (slp_node)
4812 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4813 }
4814
4815 if (slp_node)
4816 continue;
4817
4818 if (j == 0)
4819 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4820 else
4821 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4822
4823 prev_stmt_info = vinfo_for_stmt (new_stmt);
4824 }
4825
4826 vec_oprnds.release ();
4827 return true;
4828 }
4829
4830
4831 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4832 either as shift by a scalar or by a vector. */
4833
4834 bool
4835 vect_supportable_shift (enum tree_code code, tree scalar_type)
4836 {
4837
4838 machine_mode vec_mode;
4839 optab optab;
4840 int icode;
4841 tree vectype;
4842
4843 vectype = get_vectype_for_scalar_type (scalar_type);
4844 if (!vectype)
4845 return false;
4846
4847 optab = optab_for_tree_code (code, vectype, optab_scalar);
4848 if (!optab
4849 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4850 {
4851 optab = optab_for_tree_code (code, vectype, optab_vector);
4852 if (!optab
4853 || (optab_handler (optab, TYPE_MODE (vectype))
4854 == CODE_FOR_nothing))
4855 return false;
4856 }
4857
4858 vec_mode = TYPE_MODE (vectype);
4859 icode = (int) optab_handler (optab, vec_mode);
4860 if (icode == CODE_FOR_nothing)
4861 return false;
4862
4863 return true;
4864 }
4865
4866
4867 /* Function vectorizable_shift.
4868
4869 Check if STMT performs a shift operation that can be vectorized.
4870 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4871 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4872 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4873
4874 static bool
4875 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4876 gimple **vec_stmt, slp_tree slp_node)
4877 {
4878 tree vec_dest;
4879 tree scalar_dest;
4880 tree op0, op1 = NULL;
4881 tree vec_oprnd1 = NULL_TREE;
4882 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4883 tree vectype;
4884 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4885 enum tree_code code;
4886 machine_mode vec_mode;
4887 tree new_temp;
4888 optab optab;
4889 int icode;
4890 machine_mode optab_op2_mode;
4891 gimple *def_stmt;
4892 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4893 int ndts = 2;
4894 gimple *new_stmt = NULL;
4895 stmt_vec_info prev_stmt_info;
4896 int nunits_in;
4897 int nunits_out;
4898 tree vectype_out;
4899 tree op1_vectype;
4900 int ncopies;
4901 int j, i;
4902 vec<tree> vec_oprnds0 = vNULL;
4903 vec<tree> vec_oprnds1 = vNULL;
4904 tree vop0, vop1;
4905 unsigned int k;
4906 bool scalar_shift_arg = true;
4907 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4908 vec_info *vinfo = stmt_info->vinfo;
4909
4910 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4911 return false;
4912
4913 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4914 && ! vec_stmt)
4915 return false;
4916
4917 /* Is STMT a vectorizable binary/unary operation? */
4918 if (!is_gimple_assign (stmt))
4919 return false;
4920
4921 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4922 return false;
4923
4924 code = gimple_assign_rhs_code (stmt);
4925
4926 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4927 || code == RROTATE_EXPR))
4928 return false;
4929
4930 scalar_dest = gimple_assign_lhs (stmt);
4931 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4932 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4933 {
4934 if (dump_enabled_p ())
4935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4936 "bit-precision shifts not supported.\n");
4937 return false;
4938 }
4939
4940 op0 = gimple_assign_rhs1 (stmt);
4941 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4942 {
4943 if (dump_enabled_p ())
4944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4945 "use not simple.\n");
4946 return false;
4947 }
4948 /* If op0 is an external or constant def use a vector type with
4949 the same size as the output vector type. */
4950 if (!vectype)
4951 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4952 if (vec_stmt)
4953 gcc_assert (vectype);
4954 if (!vectype)
4955 {
4956 if (dump_enabled_p ())
4957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4958 "no vectype for scalar type\n");
4959 return false;
4960 }
4961
4962 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4963 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4964 if (nunits_out != nunits_in)
4965 return false;
4966
4967 op1 = gimple_assign_rhs2 (stmt);
4968 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4969 {
4970 if (dump_enabled_p ())
4971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4972 "use not simple.\n");
4973 return false;
4974 }
4975
4976 /* Multiple types in SLP are handled by creating the appropriate number of
4977 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4978 case of SLP. */
4979 if (slp_node)
4980 ncopies = 1;
4981 else
4982 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4983
4984 gcc_assert (ncopies >= 1);
4985
4986 /* Determine whether the shift amount is a vector, or scalar. If the
4987 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4988
4989 if ((dt[1] == vect_internal_def
4990 || dt[1] == vect_induction_def)
4991 && !slp_node)
4992 scalar_shift_arg = false;
4993 else if (dt[1] == vect_constant_def
4994 || dt[1] == vect_external_def
4995 || dt[1] == vect_internal_def)
4996 {
4997 /* In SLP, need to check whether the shift count is the same,
4998 in loops if it is a constant or invariant, it is always
4999 a scalar shift. */
5000 if (slp_node)
5001 {
5002 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5003 gimple *slpstmt;
5004
5005 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5006 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5007 scalar_shift_arg = false;
5008 }
5009
5010 /* If the shift amount is computed by a pattern stmt we cannot
5011 use the scalar amount directly thus give up and use a vector
5012 shift. */
5013 if (dt[1] == vect_internal_def)
5014 {
5015 gimple *def = SSA_NAME_DEF_STMT (op1);
5016 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5017 scalar_shift_arg = false;
5018 }
5019 }
5020 else
5021 {
5022 if (dump_enabled_p ())
5023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5024 "operand mode requires invariant argument.\n");
5025 return false;
5026 }
5027
5028 /* Vector shifted by vector. */
5029 if (!scalar_shift_arg)
5030 {
5031 optab = optab_for_tree_code (code, vectype, optab_vector);
5032 if (dump_enabled_p ())
5033 dump_printf_loc (MSG_NOTE, vect_location,
5034 "vector/vector shift/rotate found.\n");
5035
5036 if (!op1_vectype)
5037 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5038 if (op1_vectype == NULL_TREE
5039 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5040 {
5041 if (dump_enabled_p ())
5042 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5043 "unusable type for last operand in"
5044 " vector/vector shift/rotate.\n");
5045 return false;
5046 }
5047 }
5048 /* See if the machine has a vector shifted by scalar insn and if not
5049 then see if it has a vector shifted by vector insn. */
5050 else
5051 {
5052 optab = optab_for_tree_code (code, vectype, optab_scalar);
5053 if (optab
5054 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5055 {
5056 if (dump_enabled_p ())
5057 dump_printf_loc (MSG_NOTE, vect_location,
5058 "vector/scalar shift/rotate found.\n");
5059 }
5060 else
5061 {
5062 optab = optab_for_tree_code (code, vectype, optab_vector);
5063 if (optab
5064 && (optab_handler (optab, TYPE_MODE (vectype))
5065 != CODE_FOR_nothing))
5066 {
5067 scalar_shift_arg = false;
5068
5069 if (dump_enabled_p ())
5070 dump_printf_loc (MSG_NOTE, vect_location,
5071 "vector/vector shift/rotate found.\n");
5072
5073 /* Unlike the other binary operators, shifts/rotates have
5074 the rhs being int, instead of the same type as the lhs,
5075 so make sure the scalar is the right type if we are
5076 dealing with vectors of long long/long/short/char. */
5077 if (dt[1] == vect_constant_def)
5078 op1 = fold_convert (TREE_TYPE (vectype), op1);
5079 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5080 TREE_TYPE (op1)))
5081 {
5082 if (slp_node
5083 && TYPE_MODE (TREE_TYPE (vectype))
5084 != TYPE_MODE (TREE_TYPE (op1)))
5085 {
5086 if (dump_enabled_p ())
5087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5088 "unusable type for last operand in"
5089 " vector/vector shift/rotate.\n");
5090 return false;
5091 }
5092 if (vec_stmt && !slp_node)
5093 {
5094 op1 = fold_convert (TREE_TYPE (vectype), op1);
5095 op1 = vect_init_vector (stmt, op1,
5096 TREE_TYPE (vectype), NULL);
5097 }
5098 }
5099 }
5100 }
5101 }
5102
5103 /* Supportable by target? */
5104 if (!optab)
5105 {
5106 if (dump_enabled_p ())
5107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5108 "no optab.\n");
5109 return false;
5110 }
5111 vec_mode = TYPE_MODE (vectype);
5112 icode = (int) optab_handler (optab, vec_mode);
5113 if (icode == CODE_FOR_nothing)
5114 {
5115 if (dump_enabled_p ())
5116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5117 "op not supported by target.\n");
5118 /* Check only during analysis. */
5119 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5120 || (!vec_stmt
5121 && !vect_worthwhile_without_simd_p (vinfo, code)))
5122 return false;
5123 if (dump_enabled_p ())
5124 dump_printf_loc (MSG_NOTE, vect_location,
5125 "proceeding using word mode.\n");
5126 }
5127
5128 /* Worthwhile without SIMD support? Check only during analysis. */
5129 if (!vec_stmt
5130 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5131 && !vect_worthwhile_without_simd_p (vinfo, code))
5132 {
5133 if (dump_enabled_p ())
5134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5135 "not worthwhile without SIMD support.\n");
5136 return false;
5137 }
5138
5139 if (!vec_stmt) /* transformation not required. */
5140 {
5141 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5142 if (dump_enabled_p ())
5143 dump_printf_loc (MSG_NOTE, vect_location,
5144 "=== vectorizable_shift ===\n");
5145 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5146 return true;
5147 }
5148
5149 /* Transform. */
5150
5151 if (dump_enabled_p ())
5152 dump_printf_loc (MSG_NOTE, vect_location,
5153 "transform binary/unary operation.\n");
5154
5155 /* Handle def. */
5156 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5157
5158 prev_stmt_info = NULL;
5159 for (j = 0; j < ncopies; j++)
5160 {
5161 /* Handle uses. */
5162 if (j == 0)
5163 {
5164 if (scalar_shift_arg)
5165 {
5166 /* Vector shl and shr insn patterns can be defined with scalar
5167 operand 2 (shift operand). In this case, use constant or loop
5168 invariant op1 directly, without extending it to vector mode
5169 first. */
5170 optab_op2_mode = insn_data[icode].operand[2].mode;
5171 if (!VECTOR_MODE_P (optab_op2_mode))
5172 {
5173 if (dump_enabled_p ())
5174 dump_printf_loc (MSG_NOTE, vect_location,
5175 "operand 1 using scalar mode.\n");
5176 vec_oprnd1 = op1;
5177 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5178 vec_oprnds1.quick_push (vec_oprnd1);
5179 if (slp_node)
5180 {
5181 /* Store vec_oprnd1 for every vector stmt to be created
5182 for SLP_NODE. We check during the analysis that all
5183 the shift arguments are the same.
5184 TODO: Allow different constants for different vector
5185 stmts generated for an SLP instance. */
5186 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5187 vec_oprnds1.quick_push (vec_oprnd1);
5188 }
5189 }
5190 }
5191
5192 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5193 (a special case for certain kind of vector shifts); otherwise,
5194 operand 1 should be of a vector type (the usual case). */
5195 if (vec_oprnd1)
5196 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5197 slp_node);
5198 else
5199 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5200 slp_node);
5201 }
5202 else
5203 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5204
5205 /* Arguments are ready. Create the new vector stmt. */
5206 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5207 {
5208 vop1 = vec_oprnds1[i];
5209 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5210 new_temp = make_ssa_name (vec_dest, new_stmt);
5211 gimple_assign_set_lhs (new_stmt, new_temp);
5212 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5213 if (slp_node)
5214 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5215 }
5216
5217 if (slp_node)
5218 continue;
5219
5220 if (j == 0)
5221 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5222 else
5223 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5224 prev_stmt_info = vinfo_for_stmt (new_stmt);
5225 }
5226
5227 vec_oprnds0.release ();
5228 vec_oprnds1.release ();
5229
5230 return true;
5231 }
5232
5233
5234 /* Function vectorizable_operation.
5235
5236 Check if STMT performs a binary, unary or ternary operation that can
5237 be vectorized.
5238 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5239 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5240 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5241
5242 static bool
5243 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5244 gimple **vec_stmt, slp_tree slp_node)
5245 {
5246 tree vec_dest;
5247 tree scalar_dest;
5248 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5249 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5250 tree vectype;
5251 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5252 enum tree_code code, orig_code;
5253 machine_mode vec_mode;
5254 tree new_temp;
5255 int op_type;
5256 optab optab;
5257 bool target_support_p;
5258 gimple *def_stmt;
5259 enum vect_def_type dt[3]
5260 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5261 int ndts = 3;
5262 gimple *new_stmt = NULL;
5263 stmt_vec_info prev_stmt_info;
5264 int nunits_in;
5265 int nunits_out;
5266 tree vectype_out;
5267 int ncopies;
5268 int j, i;
5269 vec<tree> vec_oprnds0 = vNULL;
5270 vec<tree> vec_oprnds1 = vNULL;
5271 vec<tree> vec_oprnds2 = vNULL;
5272 tree vop0, vop1, vop2;
5273 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5274 vec_info *vinfo = stmt_info->vinfo;
5275
5276 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5277 return false;
5278
5279 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5280 && ! vec_stmt)
5281 return false;
5282
5283 /* Is STMT a vectorizable binary/unary operation? */
5284 if (!is_gimple_assign (stmt))
5285 return false;
5286
5287 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5288 return false;
5289
5290 orig_code = code = gimple_assign_rhs_code (stmt);
5291
5292 /* For pointer addition and subtraction, we should use the normal
5293 plus and minus for the vector operation. */
5294 if (code == POINTER_PLUS_EXPR)
5295 code = PLUS_EXPR;
5296 if (code == POINTER_DIFF_EXPR)
5297 code = MINUS_EXPR;
5298
5299 /* Support only unary or binary operations. */
5300 op_type = TREE_CODE_LENGTH (code);
5301 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5302 {
5303 if (dump_enabled_p ())
5304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5305 "num. args = %d (not unary/binary/ternary op).\n",
5306 op_type);
5307 return false;
5308 }
5309
5310 scalar_dest = gimple_assign_lhs (stmt);
5311 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5312
5313 /* Most operations cannot handle bit-precision types without extra
5314 truncations. */
5315 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5316 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5317 /* Exception are bitwise binary operations. */
5318 && code != BIT_IOR_EXPR
5319 && code != BIT_XOR_EXPR
5320 && code != BIT_AND_EXPR)
5321 {
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "bit-precision arithmetic not supported.\n");
5325 return false;
5326 }
5327
5328 op0 = gimple_assign_rhs1 (stmt);
5329 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5330 {
5331 if (dump_enabled_p ())
5332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5333 "use not simple.\n");
5334 return false;
5335 }
5336 /* If op0 is an external or constant def use a vector type with
5337 the same size as the output vector type. */
5338 if (!vectype)
5339 {
5340 /* For boolean type we cannot determine vectype by
5341 invariant value (don't know whether it is a vector
5342 of booleans or vector of integers). We use output
5343 vectype because operations on boolean don't change
5344 type. */
5345 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5346 {
5347 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5348 {
5349 if (dump_enabled_p ())
5350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5351 "not supported operation on bool value.\n");
5352 return false;
5353 }
5354 vectype = vectype_out;
5355 }
5356 else
5357 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5358 }
5359 if (vec_stmt)
5360 gcc_assert (vectype);
5361 if (!vectype)
5362 {
5363 if (dump_enabled_p ())
5364 {
5365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5366 "no vectype for scalar type ");
5367 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5368 TREE_TYPE (op0));
5369 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5370 }
5371
5372 return false;
5373 }
5374
5375 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5376 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5377 if (nunits_out != nunits_in)
5378 return false;
5379
5380 if (op_type == binary_op || op_type == ternary_op)
5381 {
5382 op1 = gimple_assign_rhs2 (stmt);
5383 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5384 {
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5387 "use not simple.\n");
5388 return false;
5389 }
5390 }
5391 if (op_type == ternary_op)
5392 {
5393 op2 = gimple_assign_rhs3 (stmt);
5394 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5395 {
5396 if (dump_enabled_p ())
5397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5398 "use not simple.\n");
5399 return false;
5400 }
5401 }
5402
5403 /* Multiple types in SLP are handled by creating the appropriate number of
5404 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5405 case of SLP. */
5406 if (slp_node)
5407 ncopies = 1;
5408 else
5409 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5410
5411 gcc_assert (ncopies >= 1);
5412
5413 /* Shifts are handled in vectorizable_shift (). */
5414 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5415 || code == RROTATE_EXPR)
5416 return false;
5417
5418 /* Supportable by target? */
5419
5420 vec_mode = TYPE_MODE (vectype);
5421 if (code == MULT_HIGHPART_EXPR)
5422 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5423 else
5424 {
5425 optab = optab_for_tree_code (code, vectype, optab_default);
5426 if (!optab)
5427 {
5428 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5430 "no optab.\n");
5431 return false;
5432 }
5433 target_support_p = (optab_handler (optab, vec_mode)
5434 != CODE_FOR_nothing);
5435 }
5436
5437 if (!target_support_p)
5438 {
5439 if (dump_enabled_p ())
5440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5441 "op not supported by target.\n");
5442 /* Check only during analysis. */
5443 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5444 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5445 return false;
5446 if (dump_enabled_p ())
5447 dump_printf_loc (MSG_NOTE, vect_location,
5448 "proceeding using word mode.\n");
5449 }
5450
5451 /* Worthwhile without SIMD support? Check only during analysis. */
5452 if (!VECTOR_MODE_P (vec_mode)
5453 && !vec_stmt
5454 && !vect_worthwhile_without_simd_p (vinfo, code))
5455 {
5456 if (dump_enabled_p ())
5457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5458 "not worthwhile without SIMD support.\n");
5459 return false;
5460 }
5461
5462 if (!vec_stmt) /* transformation not required. */
5463 {
5464 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5465 if (dump_enabled_p ())
5466 dump_printf_loc (MSG_NOTE, vect_location,
5467 "=== vectorizable_operation ===\n");
5468 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5469 return true;
5470 }
5471
5472 /* Transform. */
5473
5474 if (dump_enabled_p ())
5475 dump_printf_loc (MSG_NOTE, vect_location,
5476 "transform binary/unary operation.\n");
5477
5478 /* Handle def. */
5479 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5480
5481 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5482 vectors with unsigned elements, but the result is signed. So, we
5483 need to compute the MINUS_EXPR into vectype temporary and
5484 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5485 tree vec_cvt_dest = NULL_TREE;
5486 if (orig_code == POINTER_DIFF_EXPR)
5487 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5488
5489 /* In case the vectorization factor (VF) is bigger than the number
5490 of elements that we can fit in a vectype (nunits), we have to generate
5491 more than one vector stmt - i.e - we need to "unroll" the
5492 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5493 from one copy of the vector stmt to the next, in the field
5494 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5495 stages to find the correct vector defs to be used when vectorizing
5496 stmts that use the defs of the current stmt. The example below
5497 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5498 we need to create 4 vectorized stmts):
5499
5500 before vectorization:
5501 RELATED_STMT VEC_STMT
5502 S1: x = memref - -
5503 S2: z = x + 1 - -
5504
5505 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5506 there):
5507 RELATED_STMT VEC_STMT
5508 VS1_0: vx0 = memref0 VS1_1 -
5509 VS1_1: vx1 = memref1 VS1_2 -
5510 VS1_2: vx2 = memref2 VS1_3 -
5511 VS1_3: vx3 = memref3 - -
5512 S1: x = load - VS1_0
5513 S2: z = x + 1 - -
5514
5515 step2: vectorize stmt S2 (done here):
5516 To vectorize stmt S2 we first need to find the relevant vector
5517 def for the first operand 'x'. This is, as usual, obtained from
5518 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5519 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5520 relevant vector def 'vx0'. Having found 'vx0' we can generate
5521 the vector stmt VS2_0, and as usual, record it in the
5522 STMT_VINFO_VEC_STMT of stmt S2.
5523 When creating the second copy (VS2_1), we obtain the relevant vector
5524 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5525 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5526 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5527 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5528 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5529 chain of stmts and pointers:
5530 RELATED_STMT VEC_STMT
5531 VS1_0: vx0 = memref0 VS1_1 -
5532 VS1_1: vx1 = memref1 VS1_2 -
5533 VS1_2: vx2 = memref2 VS1_3 -
5534 VS1_3: vx3 = memref3 - -
5535 S1: x = load - VS1_0
5536 VS2_0: vz0 = vx0 + v1 VS2_1 -
5537 VS2_1: vz1 = vx1 + v1 VS2_2 -
5538 VS2_2: vz2 = vx2 + v1 VS2_3 -
5539 VS2_3: vz3 = vx3 + v1 - -
5540 S2: z = x + 1 - VS2_0 */
5541
5542 prev_stmt_info = NULL;
5543 for (j = 0; j < ncopies; j++)
5544 {
5545 /* Handle uses. */
5546 if (j == 0)
5547 {
5548 if (op_type == binary_op || op_type == ternary_op)
5549 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5550 slp_node);
5551 else
5552 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5553 slp_node);
5554 if (op_type == ternary_op)
5555 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5556 slp_node);
5557 }
5558 else
5559 {
5560 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5561 if (op_type == ternary_op)
5562 {
5563 tree vec_oprnd = vec_oprnds2.pop ();
5564 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5565 vec_oprnd));
5566 }
5567 }
5568
5569 /* Arguments are ready. Create the new vector stmt. */
5570 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5571 {
5572 vop1 = ((op_type == binary_op || op_type == ternary_op)
5573 ? vec_oprnds1[i] : NULL_TREE);
5574 vop2 = ((op_type == ternary_op)
5575 ? vec_oprnds2[i] : NULL_TREE);
5576 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5577 new_temp = make_ssa_name (vec_dest, new_stmt);
5578 gimple_assign_set_lhs (new_stmt, new_temp);
5579 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5580 if (vec_cvt_dest)
5581 {
5582 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5583 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5584 new_temp);
5585 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5586 gimple_assign_set_lhs (new_stmt, new_temp);
5587 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5588 }
5589 if (slp_node)
5590 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5591 }
5592
5593 if (slp_node)
5594 continue;
5595
5596 if (j == 0)
5597 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5598 else
5599 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5600 prev_stmt_info = vinfo_for_stmt (new_stmt);
5601 }
5602
5603 vec_oprnds0.release ();
5604 vec_oprnds1.release ();
5605 vec_oprnds2.release ();
5606
5607 return true;
5608 }
5609
5610 /* A helper function to ensure data reference DR's base alignment. */
5611
5612 static void
5613 ensure_base_align (struct data_reference *dr)
5614 {
5615 if (!dr->aux)
5616 return;
5617
5618 if (DR_VECT_AUX (dr)->base_misaligned)
5619 {
5620 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5621
5622 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5623
5624 if (decl_in_symtab_p (base_decl))
5625 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5626 else
5627 {
5628 SET_DECL_ALIGN (base_decl, align_base_to);
5629 DECL_USER_ALIGN (base_decl) = 1;
5630 }
5631 DR_VECT_AUX (dr)->base_misaligned = false;
5632 }
5633 }
5634
5635
5636 /* Function get_group_alias_ptr_type.
5637
5638 Return the alias type for the group starting at FIRST_STMT. */
5639
5640 static tree
5641 get_group_alias_ptr_type (gimple *first_stmt)
5642 {
5643 struct data_reference *first_dr, *next_dr;
5644 gimple *next_stmt;
5645
5646 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5647 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5648 while (next_stmt)
5649 {
5650 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5651 if (get_alias_set (DR_REF (first_dr))
5652 != get_alias_set (DR_REF (next_dr)))
5653 {
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_NOTE, vect_location,
5656 "conflicting alias set types.\n");
5657 return ptr_type_node;
5658 }
5659 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5660 }
5661 return reference_alias_ptr_type (DR_REF (first_dr));
5662 }
5663
5664
5665 /* Function vectorizable_store.
5666
5667 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5668 can be vectorized.
5669 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5670 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5671 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5672
5673 static bool
5674 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5675 slp_tree slp_node)
5676 {
5677 tree scalar_dest;
5678 tree data_ref;
5679 tree op;
5680 tree vec_oprnd = NULL_TREE;
5681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5682 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5683 tree elem_type;
5684 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5685 struct loop *loop = NULL;
5686 machine_mode vec_mode;
5687 tree dummy;
5688 enum dr_alignment_support alignment_support_scheme;
5689 gimple *def_stmt;
5690 enum vect_def_type dt;
5691 stmt_vec_info prev_stmt_info = NULL;
5692 tree dataref_ptr = NULL_TREE;
5693 tree dataref_offset = NULL_TREE;
5694 gimple *ptr_incr = NULL;
5695 int ncopies;
5696 int j;
5697 gimple *next_stmt, *first_stmt;
5698 bool grouped_store;
5699 unsigned int group_size, i;
5700 vec<tree> oprnds = vNULL;
5701 vec<tree> result_chain = vNULL;
5702 bool inv_p;
5703 tree offset = NULL_TREE;
5704 vec<tree> vec_oprnds = vNULL;
5705 bool slp = (slp_node != NULL);
5706 unsigned int vec_num;
5707 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5708 vec_info *vinfo = stmt_info->vinfo;
5709 tree aggr_type;
5710 gather_scatter_info gs_info;
5711 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5712 gimple *new_stmt;
5713 poly_uint64 vf;
5714 vec_load_store_type vls_type;
5715 tree ref_type;
5716
5717 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5718 return false;
5719
5720 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5721 && ! vec_stmt)
5722 return false;
5723
5724 /* Is vectorizable store? */
5725
5726 if (!is_gimple_assign (stmt))
5727 return false;
5728
5729 scalar_dest = gimple_assign_lhs (stmt);
5730 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5731 && is_pattern_stmt_p (stmt_info))
5732 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5733 if (TREE_CODE (scalar_dest) != ARRAY_REF
5734 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5735 && TREE_CODE (scalar_dest) != INDIRECT_REF
5736 && TREE_CODE (scalar_dest) != COMPONENT_REF
5737 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5738 && TREE_CODE (scalar_dest) != REALPART_EXPR
5739 && TREE_CODE (scalar_dest) != MEM_REF)
5740 return false;
5741
5742 /* Cannot have hybrid store SLP -- that would mean storing to the
5743 same location twice. */
5744 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5745
5746 gcc_assert (gimple_assign_single_p (stmt));
5747
5748 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5749 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5750
5751 if (loop_vinfo)
5752 {
5753 loop = LOOP_VINFO_LOOP (loop_vinfo);
5754 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5755 }
5756 else
5757 vf = 1;
5758
5759 /* Multiple types in SLP are handled by creating the appropriate number of
5760 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5761 case of SLP. */
5762 if (slp)
5763 ncopies = 1;
5764 else
5765 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5766
5767 gcc_assert (ncopies >= 1);
5768
5769 /* FORNOW. This restriction should be relaxed. */
5770 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5771 {
5772 if (dump_enabled_p ())
5773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5774 "multiple types in nested loop.\n");
5775 return false;
5776 }
5777
5778 op = gimple_assign_rhs1 (stmt);
5779
5780 /* In the case this is a store from a constant make sure
5781 native_encode_expr can handle it. */
5782 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
5783 return false;
5784
5785 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5786 {
5787 if (dump_enabled_p ())
5788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5789 "use not simple.\n");
5790 return false;
5791 }
5792
5793 if (dt == vect_constant_def || dt == vect_external_def)
5794 vls_type = VLS_STORE_INVARIANT;
5795 else
5796 vls_type = VLS_STORE;
5797
5798 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5799 return false;
5800
5801 elem_type = TREE_TYPE (vectype);
5802 vec_mode = TYPE_MODE (vectype);
5803
5804 /* FORNOW. In some cases can vectorize even if data-type not supported
5805 (e.g. - array initialization with 0). */
5806 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5807 return false;
5808
5809 if (!STMT_VINFO_DATA_REF (stmt_info))
5810 return false;
5811
5812 vect_memory_access_type memory_access_type;
5813 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5814 &memory_access_type, &gs_info))
5815 return false;
5816
5817 if (!vec_stmt) /* transformation not required. */
5818 {
5819 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5820 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5821 /* The SLP costs are calculated during SLP analysis. */
5822 if (!PURE_SLP_STMT (stmt_info))
5823 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5824 NULL, NULL, NULL);
5825 return true;
5826 }
5827 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5828
5829 /* Transform. */
5830
5831 ensure_base_align (dr);
5832
5833 if (memory_access_type == VMAT_GATHER_SCATTER)
5834 {
5835 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5836 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5837 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5838 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5839 edge pe = loop_preheader_edge (loop);
5840 gimple_seq seq;
5841 basic_block new_bb;
5842 enum { NARROW, NONE, WIDEN } modifier;
5843 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5844
5845 if (nunits == (unsigned int) scatter_off_nunits)
5846 modifier = NONE;
5847 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5848 {
5849 modifier = WIDEN;
5850
5851 vec_perm_builder sel (scatter_off_nunits, scatter_off_nunits, 1);
5852 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5853 sel.quick_push (i | nunits);
5854
5855 vec_perm_indices indices (sel, 1, scatter_off_nunits);
5856 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5857 indices);
5858 gcc_assert (perm_mask != NULL_TREE);
5859 }
5860 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5861 {
5862 modifier = NARROW;
5863
5864 vec_perm_builder sel (nunits, nunits, 1);
5865 for (i = 0; i < (unsigned int) nunits; ++i)
5866 sel.quick_push (i | scatter_off_nunits);
5867
5868 vec_perm_indices indices (sel, 2, nunits);
5869 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5870 gcc_assert (perm_mask != NULL_TREE);
5871 ncopies *= 2;
5872 }
5873 else
5874 gcc_unreachable ();
5875
5876 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5877 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5878 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5879 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5880 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5881 scaletype = TREE_VALUE (arglist);
5882
5883 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5884 && TREE_CODE (rettype) == VOID_TYPE);
5885
5886 ptr = fold_convert (ptrtype, gs_info.base);
5887 if (!is_gimple_min_invariant (ptr))
5888 {
5889 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5890 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5891 gcc_assert (!new_bb);
5892 }
5893
5894 /* Currently we support only unconditional scatter stores,
5895 so mask should be all ones. */
5896 mask = build_int_cst (masktype, -1);
5897 mask = vect_init_vector (stmt, mask, masktype, NULL);
5898
5899 scale = build_int_cst (scaletype, gs_info.scale);
5900
5901 prev_stmt_info = NULL;
5902 for (j = 0; j < ncopies; ++j)
5903 {
5904 if (j == 0)
5905 {
5906 src = vec_oprnd1
5907 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5908 op = vec_oprnd0
5909 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5910 }
5911 else if (modifier != NONE && (j & 1))
5912 {
5913 if (modifier == WIDEN)
5914 {
5915 src = vec_oprnd1
5916 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5917 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5918 stmt, gsi);
5919 }
5920 else if (modifier == NARROW)
5921 {
5922 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5923 stmt, gsi);
5924 op = vec_oprnd0
5925 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5926 vec_oprnd0);
5927 }
5928 else
5929 gcc_unreachable ();
5930 }
5931 else
5932 {
5933 src = vec_oprnd1
5934 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5935 op = vec_oprnd0
5936 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5937 vec_oprnd0);
5938 }
5939
5940 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5941 {
5942 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5943 == TYPE_VECTOR_SUBPARTS (srctype));
5944 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5945 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5946 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5948 src = var;
5949 }
5950
5951 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5952 {
5953 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5954 == TYPE_VECTOR_SUBPARTS (idxtype));
5955 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5956 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5957 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5959 op = var;
5960 }
5961
5962 new_stmt
5963 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5964
5965 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5966
5967 if (prev_stmt_info == NULL)
5968 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5969 else
5970 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5971 prev_stmt_info = vinfo_for_stmt (new_stmt);
5972 }
5973 return true;
5974 }
5975
5976 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5977 if (grouped_store)
5978 {
5979 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5980 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5981 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5982
5983 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5984
5985 /* FORNOW */
5986 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5987
5988 /* We vectorize all the stmts of the interleaving group when we
5989 reach the last stmt in the group. */
5990 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5991 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5992 && !slp)
5993 {
5994 *vec_stmt = NULL;
5995 return true;
5996 }
5997
5998 if (slp)
5999 {
6000 grouped_store = false;
6001 /* VEC_NUM is the number of vect stmts to be created for this
6002 group. */
6003 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6004 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6005 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6006 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6007 op = gimple_assign_rhs1 (first_stmt);
6008 }
6009 else
6010 /* VEC_NUM is the number of vect stmts to be created for this
6011 group. */
6012 vec_num = group_size;
6013
6014 ref_type = get_group_alias_ptr_type (first_stmt);
6015 }
6016 else
6017 {
6018 first_stmt = stmt;
6019 first_dr = dr;
6020 group_size = vec_num = 1;
6021 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6022 }
6023
6024 if (dump_enabled_p ())
6025 dump_printf_loc (MSG_NOTE, vect_location,
6026 "transform store. ncopies = %d\n", ncopies);
6027
6028 if (memory_access_type == VMAT_ELEMENTWISE
6029 || memory_access_type == VMAT_STRIDED_SLP)
6030 {
6031 gimple_stmt_iterator incr_gsi;
6032 bool insert_after;
6033 gimple *incr;
6034 tree offvar;
6035 tree ivstep;
6036 tree running_off;
6037 gimple_seq stmts = NULL;
6038 tree stride_base, stride_step, alias_off;
6039 tree vec_oprnd;
6040 unsigned int g;
6041
6042 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6043
6044 stride_base
6045 = fold_build_pointer_plus
6046 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6047 size_binop (PLUS_EXPR,
6048 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6049 convert_to_ptrofftype (DR_INIT (first_dr))));
6050 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6051
6052 /* For a store with loop-invariant (but other than power-of-2)
6053 stride (i.e. not a grouped access) like so:
6054
6055 for (i = 0; i < n; i += stride)
6056 array[i] = ...;
6057
6058 we generate a new induction variable and new stores from
6059 the components of the (vectorized) rhs:
6060
6061 for (j = 0; ; j += VF*stride)
6062 vectemp = ...;
6063 tmp1 = vectemp[0];
6064 array[j] = tmp1;
6065 tmp2 = vectemp[1];
6066 array[j + stride] = tmp2;
6067 ...
6068 */
6069
6070 unsigned nstores = nunits;
6071 unsigned lnel = 1;
6072 tree ltype = elem_type;
6073 tree lvectype = vectype;
6074 if (slp)
6075 {
6076 if (group_size < nunits
6077 && nunits % group_size == 0)
6078 {
6079 nstores = nunits / group_size;
6080 lnel = group_size;
6081 ltype = build_vector_type (elem_type, group_size);
6082 lvectype = vectype;
6083
6084 /* First check if vec_extract optab doesn't support extraction
6085 of vector elts directly. */
6086 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6087 machine_mode vmode;
6088 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6089 || !VECTOR_MODE_P (vmode)
6090 || (convert_optab_handler (vec_extract_optab,
6091 TYPE_MODE (vectype), vmode)
6092 == CODE_FOR_nothing))
6093 {
6094 /* Try to avoid emitting an extract of vector elements
6095 by performing the extracts using an integer type of the
6096 same size, extracting from a vector of those and then
6097 re-interpreting it as the original vector type if
6098 supported. */
6099 unsigned lsize
6100 = group_size * GET_MODE_BITSIZE (elmode);
6101 elmode = int_mode_for_size (lsize, 0).require ();
6102 /* If we can't construct such a vector fall back to
6103 element extracts from the original vector type and
6104 element size stores. */
6105 if (mode_for_vector (elmode,
6106 nunits / group_size).exists (&vmode)
6107 && VECTOR_MODE_P (vmode)
6108 && (convert_optab_handler (vec_extract_optab,
6109 vmode, elmode)
6110 != CODE_FOR_nothing))
6111 {
6112 nstores = nunits / group_size;
6113 lnel = group_size;
6114 ltype = build_nonstandard_integer_type (lsize, 1);
6115 lvectype = build_vector_type (ltype, nstores);
6116 }
6117 /* Else fall back to vector extraction anyway.
6118 Fewer stores are more important than avoiding spilling
6119 of the vector we extract from. Compared to the
6120 construction case in vectorizable_load no store-forwarding
6121 issue exists here for reasonable archs. */
6122 }
6123 }
6124 else if (group_size >= nunits
6125 && group_size % nunits == 0)
6126 {
6127 nstores = 1;
6128 lnel = nunits;
6129 ltype = vectype;
6130 lvectype = vectype;
6131 }
6132 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6133 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6134 }
6135
6136 ivstep = stride_step;
6137 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6138 build_int_cst (TREE_TYPE (ivstep), vf));
6139
6140 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6141
6142 create_iv (stride_base, ivstep, NULL,
6143 loop, &incr_gsi, insert_after,
6144 &offvar, NULL);
6145 incr = gsi_stmt (incr_gsi);
6146 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6147
6148 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6149 if (stmts)
6150 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6151
6152 prev_stmt_info = NULL;
6153 alias_off = build_int_cst (ref_type, 0);
6154 next_stmt = first_stmt;
6155 for (g = 0; g < group_size; g++)
6156 {
6157 running_off = offvar;
6158 if (g)
6159 {
6160 tree size = TYPE_SIZE_UNIT (ltype);
6161 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6162 size);
6163 tree newoff = copy_ssa_name (running_off, NULL);
6164 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6165 running_off, pos);
6166 vect_finish_stmt_generation (stmt, incr, gsi);
6167 running_off = newoff;
6168 }
6169 unsigned int group_el = 0;
6170 unsigned HOST_WIDE_INT
6171 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6172 for (j = 0; j < ncopies; j++)
6173 {
6174 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6175 and first_stmt == stmt. */
6176 if (j == 0)
6177 {
6178 if (slp)
6179 {
6180 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6181 slp_node);
6182 vec_oprnd = vec_oprnds[0];
6183 }
6184 else
6185 {
6186 gcc_assert (gimple_assign_single_p (next_stmt));
6187 op = gimple_assign_rhs1 (next_stmt);
6188 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6189 }
6190 }
6191 else
6192 {
6193 if (slp)
6194 vec_oprnd = vec_oprnds[j];
6195 else
6196 {
6197 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6198 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6199 }
6200 }
6201 /* Pun the vector to extract from if necessary. */
6202 if (lvectype != vectype)
6203 {
6204 tree tem = make_ssa_name (lvectype);
6205 gimple *pun
6206 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6207 lvectype, vec_oprnd));
6208 vect_finish_stmt_generation (stmt, pun, gsi);
6209 vec_oprnd = tem;
6210 }
6211 for (i = 0; i < nstores; i++)
6212 {
6213 tree newref, newoff;
6214 gimple *incr, *assign;
6215 tree size = TYPE_SIZE (ltype);
6216 /* Extract the i'th component. */
6217 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6218 bitsize_int (i), size);
6219 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6220 size, pos);
6221
6222 elem = force_gimple_operand_gsi (gsi, elem, true,
6223 NULL_TREE, true,
6224 GSI_SAME_STMT);
6225
6226 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6227 group_el * elsz);
6228 newref = build2 (MEM_REF, ltype,
6229 running_off, this_off);
6230
6231 /* And store it to *running_off. */
6232 assign = gimple_build_assign (newref, elem);
6233 vect_finish_stmt_generation (stmt, assign, gsi);
6234
6235 group_el += lnel;
6236 if (! slp
6237 || group_el == group_size)
6238 {
6239 newoff = copy_ssa_name (running_off, NULL);
6240 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6241 running_off, stride_step);
6242 vect_finish_stmt_generation (stmt, incr, gsi);
6243
6244 running_off = newoff;
6245 group_el = 0;
6246 }
6247 if (g == group_size - 1
6248 && !slp)
6249 {
6250 if (j == 0 && i == 0)
6251 STMT_VINFO_VEC_STMT (stmt_info)
6252 = *vec_stmt = assign;
6253 else
6254 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6255 prev_stmt_info = vinfo_for_stmt (assign);
6256 }
6257 }
6258 }
6259 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6260 if (slp)
6261 break;
6262 }
6263
6264 vec_oprnds.release ();
6265 return true;
6266 }
6267
6268 auto_vec<tree> dr_chain (group_size);
6269 oprnds.create (group_size);
6270
6271 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6272 gcc_assert (alignment_support_scheme);
6273 /* Targets with store-lane instructions must not require explicit
6274 realignment. */
6275 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6276 || alignment_support_scheme == dr_aligned
6277 || alignment_support_scheme == dr_unaligned_supported);
6278
6279 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6280 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6281 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6282
6283 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6284 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6285 else
6286 aggr_type = vectype;
6287
6288 /* In case the vectorization factor (VF) is bigger than the number
6289 of elements that we can fit in a vectype (nunits), we have to generate
6290 more than one vector stmt - i.e - we need to "unroll" the
6291 vector stmt by a factor VF/nunits. For more details see documentation in
6292 vect_get_vec_def_for_copy_stmt. */
6293
6294 /* In case of interleaving (non-unit grouped access):
6295
6296 S1: &base + 2 = x2
6297 S2: &base = x0
6298 S3: &base + 1 = x1
6299 S4: &base + 3 = x3
6300
6301 We create vectorized stores starting from base address (the access of the
6302 first stmt in the chain (S2 in the above example), when the last store stmt
6303 of the chain (S4) is reached:
6304
6305 VS1: &base = vx2
6306 VS2: &base + vec_size*1 = vx0
6307 VS3: &base + vec_size*2 = vx1
6308 VS4: &base + vec_size*3 = vx3
6309
6310 Then permutation statements are generated:
6311
6312 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6313 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6314 ...
6315
6316 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6317 (the order of the data-refs in the output of vect_permute_store_chain
6318 corresponds to the order of scalar stmts in the interleaving chain - see
6319 the documentation of vect_permute_store_chain()).
6320
6321 In case of both multiple types and interleaving, above vector stores and
6322 permutation stmts are created for every copy. The result vector stmts are
6323 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6324 STMT_VINFO_RELATED_STMT for the next copies.
6325 */
6326
6327 prev_stmt_info = NULL;
6328 for (j = 0; j < ncopies; j++)
6329 {
6330
6331 if (j == 0)
6332 {
6333 if (slp)
6334 {
6335 /* Get vectorized arguments for SLP_NODE. */
6336 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6337 NULL, slp_node);
6338
6339 vec_oprnd = vec_oprnds[0];
6340 }
6341 else
6342 {
6343 /* For interleaved stores we collect vectorized defs for all the
6344 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6345 used as an input to vect_permute_store_chain(), and OPRNDS as
6346 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6347
6348 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6349 OPRNDS are of size 1. */
6350 next_stmt = first_stmt;
6351 for (i = 0; i < group_size; i++)
6352 {
6353 /* Since gaps are not supported for interleaved stores,
6354 GROUP_SIZE is the exact number of stmts in the chain.
6355 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6356 there is no interleaving, GROUP_SIZE is 1, and only one
6357 iteration of the loop will be executed. */
6358 gcc_assert (next_stmt
6359 && gimple_assign_single_p (next_stmt));
6360 op = gimple_assign_rhs1 (next_stmt);
6361
6362 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6363 dr_chain.quick_push (vec_oprnd);
6364 oprnds.quick_push (vec_oprnd);
6365 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6366 }
6367 }
6368
6369 /* We should have catched mismatched types earlier. */
6370 gcc_assert (useless_type_conversion_p (vectype,
6371 TREE_TYPE (vec_oprnd)));
6372 bool simd_lane_access_p
6373 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6374 if (simd_lane_access_p
6375 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6376 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6377 && integer_zerop (DR_OFFSET (first_dr))
6378 && integer_zerop (DR_INIT (first_dr))
6379 && alias_sets_conflict_p (get_alias_set (aggr_type),
6380 get_alias_set (TREE_TYPE (ref_type))))
6381 {
6382 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6383 dataref_offset = build_int_cst (ref_type, 0);
6384 inv_p = false;
6385 }
6386 else
6387 dataref_ptr
6388 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6389 simd_lane_access_p ? loop : NULL,
6390 offset, &dummy, gsi, &ptr_incr,
6391 simd_lane_access_p, &inv_p);
6392 gcc_assert (bb_vinfo || !inv_p);
6393 }
6394 else
6395 {
6396 /* For interleaved stores we created vectorized defs for all the
6397 defs stored in OPRNDS in the previous iteration (previous copy).
6398 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6399 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6400 next copy.
6401 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6402 OPRNDS are of size 1. */
6403 for (i = 0; i < group_size; i++)
6404 {
6405 op = oprnds[i];
6406 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6407 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6408 dr_chain[i] = vec_oprnd;
6409 oprnds[i] = vec_oprnd;
6410 }
6411 if (dataref_offset)
6412 dataref_offset
6413 = int_const_binop (PLUS_EXPR, dataref_offset,
6414 TYPE_SIZE_UNIT (aggr_type));
6415 else
6416 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6417 TYPE_SIZE_UNIT (aggr_type));
6418 }
6419
6420 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6421 {
6422 tree vec_array;
6423
6424 /* Combine all the vectors into an array. */
6425 vec_array = create_vector_array (vectype, vec_num);
6426 for (i = 0; i < vec_num; i++)
6427 {
6428 vec_oprnd = dr_chain[i];
6429 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6430 }
6431
6432 /* Emit:
6433 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6434 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6435 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6436 vec_array);
6437 gimple_call_set_lhs (call, data_ref);
6438 gimple_call_set_nothrow (call, true);
6439 new_stmt = call;
6440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6441 }
6442 else
6443 {
6444 new_stmt = NULL;
6445 if (grouped_store)
6446 {
6447 if (j == 0)
6448 result_chain.create (group_size);
6449 /* Permute. */
6450 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6451 &result_chain);
6452 }
6453
6454 next_stmt = first_stmt;
6455 for (i = 0; i < vec_num; i++)
6456 {
6457 unsigned align, misalign;
6458
6459 if (i > 0)
6460 /* Bump the vector pointer. */
6461 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6462 stmt, NULL_TREE);
6463
6464 if (slp)
6465 vec_oprnd = vec_oprnds[i];
6466 else if (grouped_store)
6467 /* For grouped stores vectorized defs are interleaved in
6468 vect_permute_store_chain(). */
6469 vec_oprnd = result_chain[i];
6470
6471 data_ref = fold_build2 (MEM_REF, vectype,
6472 dataref_ptr,
6473 dataref_offset
6474 ? dataref_offset
6475 : build_int_cst (ref_type, 0));
6476 align = DR_TARGET_ALIGNMENT (first_dr);
6477 if (aligned_access_p (first_dr))
6478 misalign = 0;
6479 else if (DR_MISALIGNMENT (first_dr) == -1)
6480 {
6481 align = dr_alignment (vect_dr_behavior (first_dr));
6482 misalign = 0;
6483 TREE_TYPE (data_ref)
6484 = build_aligned_type (TREE_TYPE (data_ref),
6485 align * BITS_PER_UNIT);
6486 }
6487 else
6488 {
6489 TREE_TYPE (data_ref)
6490 = build_aligned_type (TREE_TYPE (data_ref),
6491 TYPE_ALIGN (elem_type));
6492 misalign = DR_MISALIGNMENT (first_dr);
6493 }
6494 if (dataref_offset == NULL_TREE
6495 && TREE_CODE (dataref_ptr) == SSA_NAME)
6496 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6497 misalign);
6498
6499 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6500 {
6501 tree perm_mask = perm_mask_for_reverse (vectype);
6502 tree perm_dest
6503 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6504 vectype);
6505 tree new_temp = make_ssa_name (perm_dest);
6506
6507 /* Generate the permute statement. */
6508 gimple *perm_stmt
6509 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6510 vec_oprnd, perm_mask);
6511 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6512
6513 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6514 vec_oprnd = new_temp;
6515 }
6516
6517 /* Arguments are ready. Create the new vector stmt. */
6518 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6519 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6520
6521 if (slp)
6522 continue;
6523
6524 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6525 if (!next_stmt)
6526 break;
6527 }
6528 }
6529 if (!slp)
6530 {
6531 if (j == 0)
6532 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6533 else
6534 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6535 prev_stmt_info = vinfo_for_stmt (new_stmt);
6536 }
6537 }
6538
6539 oprnds.release ();
6540 result_chain.release ();
6541 vec_oprnds.release ();
6542
6543 return true;
6544 }
6545
6546 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6547 VECTOR_CST mask. No checks are made that the target platform supports the
6548 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6549 vect_gen_perm_mask_checked. */
6550
6551 tree
6552 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6553 {
6554 tree mask_type;
6555
6556 unsigned int nunits = sel.length ();
6557 gcc_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
6558
6559 mask_type = build_vector_type (ssizetype, nunits);
6560 return vec_perm_indices_to_tree (mask_type, sel);
6561 }
6562
6563 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6564 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6565
6566 tree
6567 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6568 {
6569 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6570 return vect_gen_perm_mask_any (vectype, sel);
6571 }
6572
6573 /* Given a vector variable X and Y, that was generated for the scalar
6574 STMT, generate instructions to permute the vector elements of X and Y
6575 using permutation mask MASK_VEC, insert them at *GSI and return the
6576 permuted vector variable. */
6577
6578 static tree
6579 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6580 gimple_stmt_iterator *gsi)
6581 {
6582 tree vectype = TREE_TYPE (x);
6583 tree perm_dest, data_ref;
6584 gimple *perm_stmt;
6585
6586 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6587 data_ref = make_ssa_name (perm_dest);
6588
6589 /* Generate the permute statement. */
6590 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6591 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6592
6593 return data_ref;
6594 }
6595
6596 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6597 inserting them on the loops preheader edge. Returns true if we
6598 were successful in doing so (and thus STMT can be moved then),
6599 otherwise returns false. */
6600
6601 static bool
6602 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6603 {
6604 ssa_op_iter i;
6605 tree op;
6606 bool any = false;
6607
6608 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6609 {
6610 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6611 if (!gimple_nop_p (def_stmt)
6612 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6613 {
6614 /* Make sure we don't need to recurse. While we could do
6615 so in simple cases when there are more complex use webs
6616 we don't have an easy way to preserve stmt order to fulfil
6617 dependencies within them. */
6618 tree op2;
6619 ssa_op_iter i2;
6620 if (gimple_code (def_stmt) == GIMPLE_PHI)
6621 return false;
6622 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6623 {
6624 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6625 if (!gimple_nop_p (def_stmt2)
6626 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6627 return false;
6628 }
6629 any = true;
6630 }
6631 }
6632
6633 if (!any)
6634 return true;
6635
6636 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6637 {
6638 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6639 if (!gimple_nop_p (def_stmt)
6640 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6641 {
6642 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6643 gsi_remove (&gsi, false);
6644 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6645 }
6646 }
6647
6648 return true;
6649 }
6650
6651 /* vectorizable_load.
6652
6653 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6654 can be vectorized.
6655 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6656 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6657 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6658
6659 static bool
6660 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6661 slp_tree slp_node, slp_instance slp_node_instance)
6662 {
6663 tree scalar_dest;
6664 tree vec_dest = NULL;
6665 tree data_ref = NULL;
6666 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6667 stmt_vec_info prev_stmt_info;
6668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6669 struct loop *loop = NULL;
6670 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6671 bool nested_in_vect_loop = false;
6672 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6673 tree elem_type;
6674 tree new_temp;
6675 machine_mode mode;
6676 gimple *new_stmt = NULL;
6677 tree dummy;
6678 enum dr_alignment_support alignment_support_scheme;
6679 tree dataref_ptr = NULL_TREE;
6680 tree dataref_offset = NULL_TREE;
6681 gimple *ptr_incr = NULL;
6682 int ncopies;
6683 int i, j, group_size;
6684 poly_int64 group_gap_adj;
6685 tree msq = NULL_TREE, lsq;
6686 tree offset = NULL_TREE;
6687 tree byte_offset = NULL_TREE;
6688 tree realignment_token = NULL_TREE;
6689 gphi *phi = NULL;
6690 vec<tree> dr_chain = vNULL;
6691 bool grouped_load = false;
6692 gimple *first_stmt;
6693 gimple *first_stmt_for_drptr = NULL;
6694 bool inv_p;
6695 bool compute_in_loop = false;
6696 struct loop *at_loop;
6697 int vec_num;
6698 bool slp = (slp_node != NULL);
6699 bool slp_perm = false;
6700 enum tree_code code;
6701 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6702 poly_uint64 vf;
6703 tree aggr_type;
6704 gather_scatter_info gs_info;
6705 vec_info *vinfo = stmt_info->vinfo;
6706 tree ref_type;
6707
6708 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6709 return false;
6710
6711 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6712 && ! vec_stmt)
6713 return false;
6714
6715 /* Is vectorizable load? */
6716 if (!is_gimple_assign (stmt))
6717 return false;
6718
6719 scalar_dest = gimple_assign_lhs (stmt);
6720 if (TREE_CODE (scalar_dest) != SSA_NAME)
6721 return false;
6722
6723 code = gimple_assign_rhs_code (stmt);
6724 if (code != ARRAY_REF
6725 && code != BIT_FIELD_REF
6726 && code != INDIRECT_REF
6727 && code != COMPONENT_REF
6728 && code != IMAGPART_EXPR
6729 && code != REALPART_EXPR
6730 && code != MEM_REF
6731 && TREE_CODE_CLASS (code) != tcc_declaration)
6732 return false;
6733
6734 if (!STMT_VINFO_DATA_REF (stmt_info))
6735 return false;
6736
6737 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6738 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6739
6740 if (loop_vinfo)
6741 {
6742 loop = LOOP_VINFO_LOOP (loop_vinfo);
6743 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6744 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6745 }
6746 else
6747 vf = 1;
6748
6749 /* Multiple types in SLP are handled by creating the appropriate number of
6750 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6751 case of SLP. */
6752 if (slp)
6753 ncopies = 1;
6754 else
6755 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6756
6757 gcc_assert (ncopies >= 1);
6758
6759 /* FORNOW. This restriction should be relaxed. */
6760 if (nested_in_vect_loop && ncopies > 1)
6761 {
6762 if (dump_enabled_p ())
6763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6764 "multiple types in nested loop.\n");
6765 return false;
6766 }
6767
6768 /* Invalidate assumptions made by dependence analysis when vectorization
6769 on the unrolled body effectively re-orders stmts. */
6770 if (ncopies > 1
6771 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6772 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6773 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6774 {
6775 if (dump_enabled_p ())
6776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6777 "cannot perform implicit CSE when unrolling "
6778 "with negative dependence distance\n");
6779 return false;
6780 }
6781
6782 elem_type = TREE_TYPE (vectype);
6783 mode = TYPE_MODE (vectype);
6784
6785 /* FORNOW. In some cases can vectorize even if data-type not supported
6786 (e.g. - data copies). */
6787 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6788 {
6789 if (dump_enabled_p ())
6790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6791 "Aligned load, but unsupported type.\n");
6792 return false;
6793 }
6794
6795 /* Check if the load is a part of an interleaving chain. */
6796 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6797 {
6798 grouped_load = true;
6799 /* FORNOW */
6800 gcc_assert (!nested_in_vect_loop);
6801 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6802
6803 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6804 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6805
6806 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6807 slp_perm = true;
6808
6809 /* Invalidate assumptions made by dependence analysis when vectorization
6810 on the unrolled body effectively re-orders stmts. */
6811 if (!PURE_SLP_STMT (stmt_info)
6812 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6813 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6814 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6815 {
6816 if (dump_enabled_p ())
6817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6818 "cannot perform implicit CSE when performing "
6819 "group loads with negative dependence distance\n");
6820 return false;
6821 }
6822
6823 /* Similarly when the stmt is a load that is both part of a SLP
6824 instance and a loop vectorized stmt via the same-dr mechanism
6825 we have to give up. */
6826 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6827 && (STMT_SLP_TYPE (stmt_info)
6828 != STMT_SLP_TYPE (vinfo_for_stmt
6829 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6830 {
6831 if (dump_enabled_p ())
6832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6833 "conflicting SLP types for CSEd load\n");
6834 return false;
6835 }
6836 }
6837
6838 vect_memory_access_type memory_access_type;
6839 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6840 &memory_access_type, &gs_info))
6841 return false;
6842
6843 if (!vec_stmt) /* transformation not required. */
6844 {
6845 if (!slp)
6846 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6847 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6848 /* The SLP costs are calculated during SLP analysis. */
6849 if (!PURE_SLP_STMT (stmt_info))
6850 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6851 NULL, NULL, NULL);
6852 return true;
6853 }
6854
6855 if (!slp)
6856 gcc_assert (memory_access_type
6857 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6858
6859 if (dump_enabled_p ())
6860 dump_printf_loc (MSG_NOTE, vect_location,
6861 "transform load. ncopies = %d\n", ncopies);
6862
6863 /* Transform. */
6864
6865 ensure_base_align (dr);
6866
6867 if (memory_access_type == VMAT_GATHER_SCATTER)
6868 {
6869 tree vec_oprnd0 = NULL_TREE, op;
6870 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6871 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6872 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6873 edge pe = loop_preheader_edge (loop);
6874 gimple_seq seq;
6875 basic_block new_bb;
6876 enum { NARROW, NONE, WIDEN } modifier;
6877 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6878
6879 if (nunits == gather_off_nunits)
6880 modifier = NONE;
6881 else if (nunits == gather_off_nunits / 2)
6882 {
6883 modifier = WIDEN;
6884
6885 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
6886 for (i = 0; i < gather_off_nunits; ++i)
6887 sel.quick_push (i | nunits);
6888
6889 vec_perm_indices indices (sel, 1, gather_off_nunits);
6890 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6891 indices);
6892 }
6893 else if (nunits == gather_off_nunits * 2)
6894 {
6895 modifier = NARROW;
6896
6897 vec_perm_builder sel (nunits, nunits, 1);
6898 for (i = 0; i < nunits; ++i)
6899 sel.quick_push (i < gather_off_nunits
6900 ? i : i + nunits - gather_off_nunits);
6901
6902 vec_perm_indices indices (sel, 2, nunits);
6903 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6904 ncopies *= 2;
6905 }
6906 else
6907 gcc_unreachable ();
6908
6909 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6910 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6911 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6912 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6913 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6914 scaletype = TREE_VALUE (arglist);
6915 gcc_checking_assert (types_compatible_p (srctype, rettype));
6916
6917 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6918
6919 ptr = fold_convert (ptrtype, gs_info.base);
6920 if (!is_gimple_min_invariant (ptr))
6921 {
6922 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6923 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6924 gcc_assert (!new_bb);
6925 }
6926
6927 /* Currently we support only unconditional gather loads,
6928 so mask should be all ones. */
6929 if (TREE_CODE (masktype) == INTEGER_TYPE)
6930 mask = build_int_cst (masktype, -1);
6931 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6932 {
6933 mask = build_int_cst (TREE_TYPE (masktype), -1);
6934 mask = build_vector_from_val (masktype, mask);
6935 mask = vect_init_vector (stmt, mask, masktype, NULL);
6936 }
6937 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6938 {
6939 REAL_VALUE_TYPE r;
6940 long tmp[6];
6941 for (j = 0; j < 6; ++j)
6942 tmp[j] = -1;
6943 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6944 mask = build_real (TREE_TYPE (masktype), r);
6945 mask = build_vector_from_val (masktype, mask);
6946 mask = vect_init_vector (stmt, mask, masktype, NULL);
6947 }
6948 else
6949 gcc_unreachable ();
6950
6951 scale = build_int_cst (scaletype, gs_info.scale);
6952
6953 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6954 merge = build_int_cst (TREE_TYPE (rettype), 0);
6955 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6956 {
6957 REAL_VALUE_TYPE r;
6958 long tmp[6];
6959 for (j = 0; j < 6; ++j)
6960 tmp[j] = 0;
6961 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6962 merge = build_real (TREE_TYPE (rettype), r);
6963 }
6964 else
6965 gcc_unreachable ();
6966 merge = build_vector_from_val (rettype, merge);
6967 merge = vect_init_vector (stmt, merge, rettype, NULL);
6968
6969 prev_stmt_info = NULL;
6970 for (j = 0; j < ncopies; ++j)
6971 {
6972 if (modifier == WIDEN && (j & 1))
6973 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6974 perm_mask, stmt, gsi);
6975 else if (j == 0)
6976 op = vec_oprnd0
6977 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6978 else
6979 op = vec_oprnd0
6980 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6981
6982 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6983 {
6984 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6985 == TYPE_VECTOR_SUBPARTS (idxtype));
6986 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6987 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6988 new_stmt
6989 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6990 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6991 op = var;
6992 }
6993
6994 new_stmt
6995 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6996
6997 if (!useless_type_conversion_p (vectype, rettype))
6998 {
6999 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
7000 == TYPE_VECTOR_SUBPARTS (rettype));
7001 op = vect_get_new_ssa_name (rettype, vect_simple_var);
7002 gimple_call_set_lhs (new_stmt, op);
7003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7004 var = make_ssa_name (vec_dest);
7005 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
7006 new_stmt
7007 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7008 }
7009 else
7010 {
7011 var = make_ssa_name (vec_dest, new_stmt);
7012 gimple_call_set_lhs (new_stmt, var);
7013 }
7014
7015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7016
7017 if (modifier == NARROW)
7018 {
7019 if ((j & 1) == 0)
7020 {
7021 prev_res = var;
7022 continue;
7023 }
7024 var = permute_vec_elements (prev_res, var,
7025 perm_mask, stmt, gsi);
7026 new_stmt = SSA_NAME_DEF_STMT (var);
7027 }
7028
7029 if (prev_stmt_info == NULL)
7030 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7031 else
7032 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7033 prev_stmt_info = vinfo_for_stmt (new_stmt);
7034 }
7035 return true;
7036 }
7037
7038 if (memory_access_type == VMAT_ELEMENTWISE
7039 || memory_access_type == VMAT_STRIDED_SLP)
7040 {
7041 gimple_stmt_iterator incr_gsi;
7042 bool insert_after;
7043 gimple *incr;
7044 tree offvar;
7045 tree ivstep;
7046 tree running_off;
7047 vec<constructor_elt, va_gc> *v = NULL;
7048 gimple_seq stmts = NULL;
7049 tree stride_base, stride_step, alias_off;
7050
7051 gcc_assert (!nested_in_vect_loop);
7052
7053 if (slp && grouped_load)
7054 {
7055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7056 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7057 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7058 ref_type = get_group_alias_ptr_type (first_stmt);
7059 }
7060 else
7061 {
7062 first_stmt = stmt;
7063 first_dr = dr;
7064 group_size = 1;
7065 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7066 }
7067
7068 stride_base
7069 = fold_build_pointer_plus
7070 (DR_BASE_ADDRESS (first_dr),
7071 size_binop (PLUS_EXPR,
7072 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7073 convert_to_ptrofftype (DR_INIT (first_dr))));
7074 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7075
7076 /* For a load with loop-invariant (but other than power-of-2)
7077 stride (i.e. not a grouped access) like so:
7078
7079 for (i = 0; i < n; i += stride)
7080 ... = array[i];
7081
7082 we generate a new induction variable and new accesses to
7083 form a new vector (or vectors, depending on ncopies):
7084
7085 for (j = 0; ; j += VF*stride)
7086 tmp1 = array[j];
7087 tmp2 = array[j + stride];
7088 ...
7089 vectemp = {tmp1, tmp2, ...}
7090 */
7091
7092 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7093 build_int_cst (TREE_TYPE (stride_step), vf));
7094
7095 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7096
7097 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7098 loop, &incr_gsi, insert_after,
7099 &offvar, NULL);
7100 incr = gsi_stmt (incr_gsi);
7101 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7102
7103 stride_step = force_gimple_operand (unshare_expr (stride_step),
7104 &stmts, true, NULL_TREE);
7105 if (stmts)
7106 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7107
7108 prev_stmt_info = NULL;
7109 running_off = offvar;
7110 alias_off = build_int_cst (ref_type, 0);
7111 int nloads = nunits;
7112 int lnel = 1;
7113 tree ltype = TREE_TYPE (vectype);
7114 tree lvectype = vectype;
7115 auto_vec<tree> dr_chain;
7116 if (memory_access_type == VMAT_STRIDED_SLP)
7117 {
7118 if (group_size < nunits)
7119 {
7120 /* First check if vec_init optab supports construction from
7121 vector elts directly. */
7122 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7123 machine_mode vmode;
7124 if (mode_for_vector (elmode, group_size).exists (&vmode)
7125 && VECTOR_MODE_P (vmode)
7126 && (convert_optab_handler (vec_init_optab,
7127 TYPE_MODE (vectype), vmode)
7128 != CODE_FOR_nothing))
7129 {
7130 nloads = nunits / group_size;
7131 lnel = group_size;
7132 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7133 }
7134 else
7135 {
7136 /* Otherwise avoid emitting a constructor of vector elements
7137 by performing the loads using an integer type of the same
7138 size, constructing a vector of those and then
7139 re-interpreting it as the original vector type.
7140 This avoids a huge runtime penalty due to the general
7141 inability to perform store forwarding from smaller stores
7142 to a larger load. */
7143 unsigned lsize
7144 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7145 elmode = int_mode_for_size (lsize, 0).require ();
7146 /* If we can't construct such a vector fall back to
7147 element loads of the original vector type. */
7148 if (mode_for_vector (elmode,
7149 nunits / group_size).exists (&vmode)
7150 && VECTOR_MODE_P (vmode)
7151 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7152 != CODE_FOR_nothing))
7153 {
7154 nloads = nunits / group_size;
7155 lnel = group_size;
7156 ltype = build_nonstandard_integer_type (lsize, 1);
7157 lvectype = build_vector_type (ltype, nloads);
7158 }
7159 }
7160 }
7161 else
7162 {
7163 nloads = 1;
7164 lnel = nunits;
7165 ltype = vectype;
7166 }
7167 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7168 }
7169 if (slp)
7170 {
7171 /* For SLP permutation support we need to load the whole group,
7172 not only the number of vector stmts the permutation result
7173 fits in. */
7174 if (slp_perm)
7175 {
7176 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7177 variable VF. */
7178 unsigned int const_vf = vf.to_constant ();
7179 ncopies = (group_size * const_vf + nunits - 1) / nunits;
7180 dr_chain.create (ncopies);
7181 }
7182 else
7183 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7184 }
7185 int group_el = 0;
7186 unsigned HOST_WIDE_INT
7187 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7188 for (j = 0; j < ncopies; j++)
7189 {
7190 if (nloads > 1)
7191 vec_alloc (v, nloads);
7192 for (i = 0; i < nloads; i++)
7193 {
7194 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7195 group_el * elsz);
7196 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7197 build2 (MEM_REF, ltype,
7198 running_off, this_off));
7199 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7200 if (nloads > 1)
7201 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7202 gimple_assign_lhs (new_stmt));
7203
7204 group_el += lnel;
7205 if (! slp
7206 || group_el == group_size)
7207 {
7208 tree newoff = copy_ssa_name (running_off);
7209 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7210 running_off, stride_step);
7211 vect_finish_stmt_generation (stmt, incr, gsi);
7212
7213 running_off = newoff;
7214 group_el = 0;
7215 }
7216 }
7217 if (nloads > 1)
7218 {
7219 tree vec_inv = build_constructor (lvectype, v);
7220 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7221 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7222 if (lvectype != vectype)
7223 {
7224 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7225 VIEW_CONVERT_EXPR,
7226 build1 (VIEW_CONVERT_EXPR,
7227 vectype, new_temp));
7228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7229 }
7230 }
7231
7232 if (slp)
7233 {
7234 if (slp_perm)
7235 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7236 else
7237 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7238 }
7239 else
7240 {
7241 if (j == 0)
7242 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7243 else
7244 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7245 prev_stmt_info = vinfo_for_stmt (new_stmt);
7246 }
7247 }
7248 if (slp_perm)
7249 {
7250 unsigned n_perms;
7251 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7252 slp_node_instance, false, &n_perms);
7253 }
7254 return true;
7255 }
7256
7257 if (grouped_load)
7258 {
7259 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7260 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7261 /* For SLP vectorization we directly vectorize a subchain
7262 without permutation. */
7263 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7264 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7265 /* For BB vectorization always use the first stmt to base
7266 the data ref pointer on. */
7267 if (bb_vinfo)
7268 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7269
7270 /* Check if the chain of loads is already vectorized. */
7271 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7272 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7273 ??? But we can only do so if there is exactly one
7274 as we have no way to get at the rest. Leave the CSE
7275 opportunity alone.
7276 ??? With the group load eventually participating
7277 in multiple different permutations (having multiple
7278 slp nodes which refer to the same group) the CSE
7279 is even wrong code. See PR56270. */
7280 && !slp)
7281 {
7282 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7283 return true;
7284 }
7285 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7286 group_gap_adj = 0;
7287
7288 /* VEC_NUM is the number of vect stmts to be created for this group. */
7289 if (slp)
7290 {
7291 grouped_load = false;
7292 /* For SLP permutation support we need to load the whole group,
7293 not only the number of vector stmts the permutation result
7294 fits in. */
7295 if (slp_perm)
7296 {
7297 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7298 variable VF. */
7299 unsigned int const_vf = vf.to_constant ();
7300 vec_num = (group_size * const_vf + nunits - 1) / nunits;
7301 group_gap_adj = vf * group_size - nunits * vec_num;
7302 }
7303 else
7304 {
7305 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7306 group_gap_adj
7307 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7308 }
7309 }
7310 else
7311 vec_num = group_size;
7312
7313 ref_type = get_group_alias_ptr_type (first_stmt);
7314 }
7315 else
7316 {
7317 first_stmt = stmt;
7318 first_dr = dr;
7319 group_size = vec_num = 1;
7320 group_gap_adj = 0;
7321 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7322 }
7323
7324 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7325 gcc_assert (alignment_support_scheme);
7326 /* Targets with load-lane instructions must not require explicit
7327 realignment. */
7328 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7329 || alignment_support_scheme == dr_aligned
7330 || alignment_support_scheme == dr_unaligned_supported);
7331
7332 /* In case the vectorization factor (VF) is bigger than the number
7333 of elements that we can fit in a vectype (nunits), we have to generate
7334 more than one vector stmt - i.e - we need to "unroll" the
7335 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7336 from one copy of the vector stmt to the next, in the field
7337 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7338 stages to find the correct vector defs to be used when vectorizing
7339 stmts that use the defs of the current stmt. The example below
7340 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7341 need to create 4 vectorized stmts):
7342
7343 before vectorization:
7344 RELATED_STMT VEC_STMT
7345 S1: x = memref - -
7346 S2: z = x + 1 - -
7347
7348 step 1: vectorize stmt S1:
7349 We first create the vector stmt VS1_0, and, as usual, record a
7350 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7351 Next, we create the vector stmt VS1_1, and record a pointer to
7352 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7353 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7354 stmts and pointers:
7355 RELATED_STMT VEC_STMT
7356 VS1_0: vx0 = memref0 VS1_1 -
7357 VS1_1: vx1 = memref1 VS1_2 -
7358 VS1_2: vx2 = memref2 VS1_3 -
7359 VS1_3: vx3 = memref3 - -
7360 S1: x = load - VS1_0
7361 S2: z = x + 1 - -
7362
7363 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7364 information we recorded in RELATED_STMT field is used to vectorize
7365 stmt S2. */
7366
7367 /* In case of interleaving (non-unit grouped access):
7368
7369 S1: x2 = &base + 2
7370 S2: x0 = &base
7371 S3: x1 = &base + 1
7372 S4: x3 = &base + 3
7373
7374 Vectorized loads are created in the order of memory accesses
7375 starting from the access of the first stmt of the chain:
7376
7377 VS1: vx0 = &base
7378 VS2: vx1 = &base + vec_size*1
7379 VS3: vx3 = &base + vec_size*2
7380 VS4: vx4 = &base + vec_size*3
7381
7382 Then permutation statements are generated:
7383
7384 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7385 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7386 ...
7387
7388 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7389 (the order of the data-refs in the output of vect_permute_load_chain
7390 corresponds to the order of scalar stmts in the interleaving chain - see
7391 the documentation of vect_permute_load_chain()).
7392 The generation of permutation stmts and recording them in
7393 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7394
7395 In case of both multiple types and interleaving, the vector loads and
7396 permutation stmts above are created for every copy. The result vector
7397 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7398 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7399
7400 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7401 on a target that supports unaligned accesses (dr_unaligned_supported)
7402 we generate the following code:
7403 p = initial_addr;
7404 indx = 0;
7405 loop {
7406 p = p + indx * vectype_size;
7407 vec_dest = *(p);
7408 indx = indx + 1;
7409 }
7410
7411 Otherwise, the data reference is potentially unaligned on a target that
7412 does not support unaligned accesses (dr_explicit_realign_optimized) -
7413 then generate the following code, in which the data in each iteration is
7414 obtained by two vector loads, one from the previous iteration, and one
7415 from the current iteration:
7416 p1 = initial_addr;
7417 msq_init = *(floor(p1))
7418 p2 = initial_addr + VS - 1;
7419 realignment_token = call target_builtin;
7420 indx = 0;
7421 loop {
7422 p2 = p2 + indx * vectype_size
7423 lsq = *(floor(p2))
7424 vec_dest = realign_load (msq, lsq, realignment_token)
7425 indx = indx + 1;
7426 msq = lsq;
7427 } */
7428
7429 /* If the misalignment remains the same throughout the execution of the
7430 loop, we can create the init_addr and permutation mask at the loop
7431 preheader. Otherwise, it needs to be created inside the loop.
7432 This can only occur when vectorizing memory accesses in the inner-loop
7433 nested within an outer-loop that is being vectorized. */
7434
7435 if (nested_in_vect_loop
7436 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7437 {
7438 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7439 compute_in_loop = true;
7440 }
7441
7442 if ((alignment_support_scheme == dr_explicit_realign_optimized
7443 || alignment_support_scheme == dr_explicit_realign)
7444 && !compute_in_loop)
7445 {
7446 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7447 alignment_support_scheme, NULL_TREE,
7448 &at_loop);
7449 if (alignment_support_scheme == dr_explicit_realign_optimized)
7450 {
7451 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7452 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7453 size_one_node);
7454 }
7455 }
7456 else
7457 at_loop = loop;
7458
7459 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7460 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7461
7462 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7463 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7464 else
7465 aggr_type = vectype;
7466
7467 prev_stmt_info = NULL;
7468 int group_elt = 0;
7469 for (j = 0; j < ncopies; j++)
7470 {
7471 /* 1. Create the vector or array pointer update chain. */
7472 if (j == 0)
7473 {
7474 bool simd_lane_access_p
7475 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7476 if (simd_lane_access_p
7477 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7478 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7479 && integer_zerop (DR_OFFSET (first_dr))
7480 && integer_zerop (DR_INIT (first_dr))
7481 && alias_sets_conflict_p (get_alias_set (aggr_type),
7482 get_alias_set (TREE_TYPE (ref_type)))
7483 && (alignment_support_scheme == dr_aligned
7484 || alignment_support_scheme == dr_unaligned_supported))
7485 {
7486 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7487 dataref_offset = build_int_cst (ref_type, 0);
7488 inv_p = false;
7489 }
7490 else if (first_stmt_for_drptr
7491 && first_stmt != first_stmt_for_drptr)
7492 {
7493 dataref_ptr
7494 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7495 at_loop, offset, &dummy, gsi,
7496 &ptr_incr, simd_lane_access_p,
7497 &inv_p, byte_offset);
7498 /* Adjust the pointer by the difference to first_stmt. */
7499 data_reference_p ptrdr
7500 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7501 tree diff = fold_convert (sizetype,
7502 size_binop (MINUS_EXPR,
7503 DR_INIT (first_dr),
7504 DR_INIT (ptrdr)));
7505 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7506 stmt, diff);
7507 }
7508 else
7509 dataref_ptr
7510 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7511 offset, &dummy, gsi, &ptr_incr,
7512 simd_lane_access_p, &inv_p,
7513 byte_offset);
7514 }
7515 else if (dataref_offset)
7516 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7517 TYPE_SIZE_UNIT (aggr_type));
7518 else
7519 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7520 TYPE_SIZE_UNIT (aggr_type));
7521
7522 if (grouped_load || slp_perm)
7523 dr_chain.create (vec_num);
7524
7525 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7526 {
7527 tree vec_array;
7528
7529 vec_array = create_vector_array (vectype, vec_num);
7530
7531 /* Emit:
7532 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7533 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7534 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7535 data_ref);
7536 gimple_call_set_lhs (call, vec_array);
7537 gimple_call_set_nothrow (call, true);
7538 new_stmt = call;
7539 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7540
7541 /* Extract each vector into an SSA_NAME. */
7542 for (i = 0; i < vec_num; i++)
7543 {
7544 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7545 vec_array, i);
7546 dr_chain.quick_push (new_temp);
7547 }
7548
7549 /* Record the mapping between SSA_NAMEs and statements. */
7550 vect_record_grouped_load_vectors (stmt, dr_chain);
7551 }
7552 else
7553 {
7554 for (i = 0; i < vec_num; i++)
7555 {
7556 if (i > 0)
7557 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7558 stmt, NULL_TREE);
7559
7560 /* 2. Create the vector-load in the loop. */
7561 switch (alignment_support_scheme)
7562 {
7563 case dr_aligned:
7564 case dr_unaligned_supported:
7565 {
7566 unsigned int align, misalign;
7567
7568 data_ref
7569 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7570 dataref_offset
7571 ? dataref_offset
7572 : build_int_cst (ref_type, 0));
7573 align = DR_TARGET_ALIGNMENT (dr);
7574 if (alignment_support_scheme == dr_aligned)
7575 {
7576 gcc_assert (aligned_access_p (first_dr));
7577 misalign = 0;
7578 }
7579 else if (DR_MISALIGNMENT (first_dr) == -1)
7580 {
7581 align = dr_alignment (vect_dr_behavior (first_dr));
7582 misalign = 0;
7583 TREE_TYPE (data_ref)
7584 = build_aligned_type (TREE_TYPE (data_ref),
7585 align * BITS_PER_UNIT);
7586 }
7587 else
7588 {
7589 TREE_TYPE (data_ref)
7590 = build_aligned_type (TREE_TYPE (data_ref),
7591 TYPE_ALIGN (elem_type));
7592 misalign = DR_MISALIGNMENT (first_dr);
7593 }
7594 if (dataref_offset == NULL_TREE
7595 && TREE_CODE (dataref_ptr) == SSA_NAME)
7596 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7597 align, misalign);
7598 break;
7599 }
7600 case dr_explicit_realign:
7601 {
7602 tree ptr, bump;
7603
7604 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7605
7606 if (compute_in_loop)
7607 msq = vect_setup_realignment (first_stmt, gsi,
7608 &realignment_token,
7609 dr_explicit_realign,
7610 dataref_ptr, NULL);
7611
7612 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7613 ptr = copy_ssa_name (dataref_ptr);
7614 else
7615 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7616 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7617 new_stmt = gimple_build_assign
7618 (ptr, BIT_AND_EXPR, dataref_ptr,
7619 build_int_cst
7620 (TREE_TYPE (dataref_ptr),
7621 -(HOST_WIDE_INT) align));
7622 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7623 data_ref
7624 = build2 (MEM_REF, vectype, ptr,
7625 build_int_cst (ref_type, 0));
7626 vec_dest = vect_create_destination_var (scalar_dest,
7627 vectype);
7628 new_stmt = gimple_build_assign (vec_dest, data_ref);
7629 new_temp = make_ssa_name (vec_dest, new_stmt);
7630 gimple_assign_set_lhs (new_stmt, new_temp);
7631 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7632 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7633 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7634 msq = new_temp;
7635
7636 bump = size_binop (MULT_EXPR, vs,
7637 TYPE_SIZE_UNIT (elem_type));
7638 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7639 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7640 new_stmt = gimple_build_assign
7641 (NULL_TREE, BIT_AND_EXPR, ptr,
7642 build_int_cst
7643 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7644 ptr = copy_ssa_name (ptr, new_stmt);
7645 gimple_assign_set_lhs (new_stmt, ptr);
7646 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7647 data_ref
7648 = build2 (MEM_REF, vectype, ptr,
7649 build_int_cst (ref_type, 0));
7650 break;
7651 }
7652 case dr_explicit_realign_optimized:
7653 {
7654 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7655 new_temp = copy_ssa_name (dataref_ptr);
7656 else
7657 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7658 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7659 new_stmt = gimple_build_assign
7660 (new_temp, BIT_AND_EXPR, dataref_ptr,
7661 build_int_cst (TREE_TYPE (dataref_ptr),
7662 -(HOST_WIDE_INT) align));
7663 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7664 data_ref
7665 = build2 (MEM_REF, vectype, new_temp,
7666 build_int_cst (ref_type, 0));
7667 break;
7668 }
7669 default:
7670 gcc_unreachable ();
7671 }
7672 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7673 new_stmt = gimple_build_assign (vec_dest, data_ref);
7674 new_temp = make_ssa_name (vec_dest, new_stmt);
7675 gimple_assign_set_lhs (new_stmt, new_temp);
7676 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7677
7678 /* 3. Handle explicit realignment if necessary/supported.
7679 Create in loop:
7680 vec_dest = realign_load (msq, lsq, realignment_token) */
7681 if (alignment_support_scheme == dr_explicit_realign_optimized
7682 || alignment_support_scheme == dr_explicit_realign)
7683 {
7684 lsq = gimple_assign_lhs (new_stmt);
7685 if (!realignment_token)
7686 realignment_token = dataref_ptr;
7687 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7688 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7689 msq, lsq, realignment_token);
7690 new_temp = make_ssa_name (vec_dest, new_stmt);
7691 gimple_assign_set_lhs (new_stmt, new_temp);
7692 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7693
7694 if (alignment_support_scheme == dr_explicit_realign_optimized)
7695 {
7696 gcc_assert (phi);
7697 if (i == vec_num - 1 && j == ncopies - 1)
7698 add_phi_arg (phi, lsq,
7699 loop_latch_edge (containing_loop),
7700 UNKNOWN_LOCATION);
7701 msq = lsq;
7702 }
7703 }
7704
7705 /* 4. Handle invariant-load. */
7706 if (inv_p && !bb_vinfo)
7707 {
7708 gcc_assert (!grouped_load);
7709 /* If we have versioned for aliasing or the loop doesn't
7710 have any data dependencies that would preclude this,
7711 then we are sure this is a loop invariant load and
7712 thus we can insert it on the preheader edge. */
7713 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7714 && !nested_in_vect_loop
7715 && hoist_defs_of_uses (stmt, loop))
7716 {
7717 if (dump_enabled_p ())
7718 {
7719 dump_printf_loc (MSG_NOTE, vect_location,
7720 "hoisting out of the vectorized "
7721 "loop: ");
7722 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7723 }
7724 tree tem = copy_ssa_name (scalar_dest);
7725 gsi_insert_on_edge_immediate
7726 (loop_preheader_edge (loop),
7727 gimple_build_assign (tem,
7728 unshare_expr
7729 (gimple_assign_rhs1 (stmt))));
7730 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7731 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7732 set_vinfo_for_stmt (new_stmt,
7733 new_stmt_vec_info (new_stmt, vinfo));
7734 }
7735 else
7736 {
7737 gimple_stmt_iterator gsi2 = *gsi;
7738 gsi_next (&gsi2);
7739 new_temp = vect_init_vector (stmt, scalar_dest,
7740 vectype, &gsi2);
7741 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7742 }
7743 }
7744
7745 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7746 {
7747 tree perm_mask = perm_mask_for_reverse (vectype);
7748 new_temp = permute_vec_elements (new_temp, new_temp,
7749 perm_mask, stmt, gsi);
7750 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7751 }
7752
7753 /* Collect vector loads and later create their permutation in
7754 vect_transform_grouped_load (). */
7755 if (grouped_load || slp_perm)
7756 dr_chain.quick_push (new_temp);
7757
7758 /* Store vector loads in the corresponding SLP_NODE. */
7759 if (slp && !slp_perm)
7760 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7761
7762 /* With SLP permutation we load the gaps as well, without
7763 we need to skip the gaps after we manage to fully load
7764 all elements. group_gap_adj is GROUP_SIZE here. */
7765 group_elt += nunits;
7766 if (maybe_ne (group_gap_adj, 0U)
7767 && !slp_perm
7768 && known_eq (group_elt, group_size - group_gap_adj))
7769 {
7770 poly_wide_int bump_val
7771 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7772 * group_gap_adj);
7773 tree bump = wide_int_to_tree (sizetype, bump_val);
7774 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7775 stmt, bump);
7776 group_elt = 0;
7777 }
7778 }
7779 /* Bump the vector pointer to account for a gap or for excess
7780 elements loaded for a permuted SLP load. */
7781 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
7782 {
7783 poly_wide_int bump_val
7784 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7785 * group_gap_adj);
7786 tree bump = wide_int_to_tree (sizetype, bump_val);
7787 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7788 stmt, bump);
7789 }
7790 }
7791
7792 if (slp && !slp_perm)
7793 continue;
7794
7795 if (slp_perm)
7796 {
7797 unsigned n_perms;
7798 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7799 slp_node_instance, false,
7800 &n_perms))
7801 {
7802 dr_chain.release ();
7803 return false;
7804 }
7805 }
7806 else
7807 {
7808 if (grouped_load)
7809 {
7810 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7811 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7812 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7813 }
7814 else
7815 {
7816 if (j == 0)
7817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7818 else
7819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7820 prev_stmt_info = vinfo_for_stmt (new_stmt);
7821 }
7822 }
7823 dr_chain.release ();
7824 }
7825
7826 return true;
7827 }
7828
7829 /* Function vect_is_simple_cond.
7830
7831 Input:
7832 LOOP - the loop that is being vectorized.
7833 COND - Condition that is checked for simple use.
7834
7835 Output:
7836 *COMP_VECTYPE - the vector type for the comparison.
7837 *DTS - The def types for the arguments of the comparison
7838
7839 Returns whether a COND can be vectorized. Checks whether
7840 condition operands are supportable using vec_is_simple_use. */
7841
7842 static bool
7843 vect_is_simple_cond (tree cond, vec_info *vinfo,
7844 tree *comp_vectype, enum vect_def_type *dts,
7845 tree vectype)
7846 {
7847 tree lhs, rhs;
7848 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7849
7850 /* Mask case. */
7851 if (TREE_CODE (cond) == SSA_NAME
7852 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7853 {
7854 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7855 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7856 &dts[0], comp_vectype)
7857 || !*comp_vectype
7858 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7859 return false;
7860 return true;
7861 }
7862
7863 if (!COMPARISON_CLASS_P (cond))
7864 return false;
7865
7866 lhs = TREE_OPERAND (cond, 0);
7867 rhs = TREE_OPERAND (cond, 1);
7868
7869 if (TREE_CODE (lhs) == SSA_NAME)
7870 {
7871 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7872 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7873 return false;
7874 }
7875 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7876 || TREE_CODE (lhs) == FIXED_CST)
7877 dts[0] = vect_constant_def;
7878 else
7879 return false;
7880
7881 if (TREE_CODE (rhs) == SSA_NAME)
7882 {
7883 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7884 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7885 return false;
7886 }
7887 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7888 || TREE_CODE (rhs) == FIXED_CST)
7889 dts[1] = vect_constant_def;
7890 else
7891 return false;
7892
7893 if (vectype1 && vectype2
7894 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7895 return false;
7896
7897 *comp_vectype = vectype1 ? vectype1 : vectype2;
7898 /* Invariant comparison. */
7899 if (! *comp_vectype)
7900 {
7901 tree scalar_type = TREE_TYPE (lhs);
7902 /* If we can widen the comparison to match vectype do so. */
7903 if (INTEGRAL_TYPE_P (scalar_type)
7904 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7905 TYPE_SIZE (TREE_TYPE (vectype))))
7906 scalar_type = build_nonstandard_integer_type
7907 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7908 TYPE_UNSIGNED (scalar_type));
7909 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7910 }
7911
7912 return true;
7913 }
7914
7915 /* vectorizable_condition.
7916
7917 Check if STMT is conditional modify expression that can be vectorized.
7918 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7919 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7920 at GSI.
7921
7922 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7923 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7924 else clause if it is 2).
7925
7926 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7927
7928 bool
7929 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7930 gimple **vec_stmt, tree reduc_def, int reduc_index,
7931 slp_tree slp_node)
7932 {
7933 tree scalar_dest = NULL_TREE;
7934 tree vec_dest = NULL_TREE;
7935 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7936 tree then_clause, else_clause;
7937 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7938 tree comp_vectype = NULL_TREE;
7939 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7940 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7941 tree vec_compare;
7942 tree new_temp;
7943 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7944 enum vect_def_type dts[4]
7945 = {vect_unknown_def_type, vect_unknown_def_type,
7946 vect_unknown_def_type, vect_unknown_def_type};
7947 int ndts = 4;
7948 int ncopies;
7949 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7950 stmt_vec_info prev_stmt_info = NULL;
7951 int i, j;
7952 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7953 vec<tree> vec_oprnds0 = vNULL;
7954 vec<tree> vec_oprnds1 = vNULL;
7955 vec<tree> vec_oprnds2 = vNULL;
7956 vec<tree> vec_oprnds3 = vNULL;
7957 tree vec_cmp_type;
7958 bool masked = false;
7959
7960 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7961 return false;
7962
7963 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7964 {
7965 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7966 return false;
7967
7968 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7969 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7970 && reduc_def))
7971 return false;
7972
7973 /* FORNOW: not yet supported. */
7974 if (STMT_VINFO_LIVE_P (stmt_info))
7975 {
7976 if (dump_enabled_p ())
7977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7978 "value used after loop.\n");
7979 return false;
7980 }
7981 }
7982
7983 /* Is vectorizable conditional operation? */
7984 if (!is_gimple_assign (stmt))
7985 return false;
7986
7987 code = gimple_assign_rhs_code (stmt);
7988
7989 if (code != COND_EXPR)
7990 return false;
7991
7992 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7993 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7994
7995 if (slp_node)
7996 ncopies = 1;
7997 else
7998 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7999
8000 gcc_assert (ncopies >= 1);
8001 if (reduc_index && ncopies > 1)
8002 return false; /* FORNOW */
8003
8004 cond_expr = gimple_assign_rhs1 (stmt);
8005 then_clause = gimple_assign_rhs2 (stmt);
8006 else_clause = gimple_assign_rhs3 (stmt);
8007
8008 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8009 &comp_vectype, &dts[0], vectype)
8010 || !comp_vectype)
8011 return false;
8012
8013 gimple *def_stmt;
8014 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8015 &vectype1))
8016 return false;
8017 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8018 &vectype2))
8019 return false;
8020
8021 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8022 return false;
8023
8024 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8025 return false;
8026
8027 masked = !COMPARISON_CLASS_P (cond_expr);
8028 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8029
8030 if (vec_cmp_type == NULL_TREE)
8031 return false;
8032
8033 cond_code = TREE_CODE (cond_expr);
8034 if (!masked)
8035 {
8036 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8037 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8038 }
8039
8040 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8041 {
8042 /* Boolean values may have another representation in vectors
8043 and therefore we prefer bit operations over comparison for
8044 them (which also works for scalar masks). We store opcodes
8045 to use in bitop1 and bitop2. Statement is vectorized as
8046 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8047 depending on bitop1 and bitop2 arity. */
8048 switch (cond_code)
8049 {
8050 case GT_EXPR:
8051 bitop1 = BIT_NOT_EXPR;
8052 bitop2 = BIT_AND_EXPR;
8053 break;
8054 case GE_EXPR:
8055 bitop1 = BIT_NOT_EXPR;
8056 bitop2 = BIT_IOR_EXPR;
8057 break;
8058 case LT_EXPR:
8059 bitop1 = BIT_NOT_EXPR;
8060 bitop2 = BIT_AND_EXPR;
8061 std::swap (cond_expr0, cond_expr1);
8062 break;
8063 case LE_EXPR:
8064 bitop1 = BIT_NOT_EXPR;
8065 bitop2 = BIT_IOR_EXPR;
8066 std::swap (cond_expr0, cond_expr1);
8067 break;
8068 case NE_EXPR:
8069 bitop1 = BIT_XOR_EXPR;
8070 break;
8071 case EQ_EXPR:
8072 bitop1 = BIT_XOR_EXPR;
8073 bitop2 = BIT_NOT_EXPR;
8074 break;
8075 default:
8076 return false;
8077 }
8078 cond_code = SSA_NAME;
8079 }
8080
8081 if (!vec_stmt)
8082 {
8083 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8084 if (bitop1 != NOP_EXPR)
8085 {
8086 machine_mode mode = TYPE_MODE (comp_vectype);
8087 optab optab;
8088
8089 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8090 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8091 return false;
8092
8093 if (bitop2 != NOP_EXPR)
8094 {
8095 optab = optab_for_tree_code (bitop2, comp_vectype,
8096 optab_default);
8097 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8098 return false;
8099 }
8100 }
8101 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8102 cond_code))
8103 {
8104 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8105 return true;
8106 }
8107 return false;
8108 }
8109
8110 /* Transform. */
8111
8112 if (!slp_node)
8113 {
8114 vec_oprnds0.create (1);
8115 vec_oprnds1.create (1);
8116 vec_oprnds2.create (1);
8117 vec_oprnds3.create (1);
8118 }
8119
8120 /* Handle def. */
8121 scalar_dest = gimple_assign_lhs (stmt);
8122 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8123
8124 /* Handle cond expr. */
8125 for (j = 0; j < ncopies; j++)
8126 {
8127 gassign *new_stmt = NULL;
8128 if (j == 0)
8129 {
8130 if (slp_node)
8131 {
8132 auto_vec<tree, 4> ops;
8133 auto_vec<vec<tree>, 4> vec_defs;
8134
8135 if (masked)
8136 ops.safe_push (cond_expr);
8137 else
8138 {
8139 ops.safe_push (cond_expr0);
8140 ops.safe_push (cond_expr1);
8141 }
8142 ops.safe_push (then_clause);
8143 ops.safe_push (else_clause);
8144 vect_get_slp_defs (ops, slp_node, &vec_defs);
8145 vec_oprnds3 = vec_defs.pop ();
8146 vec_oprnds2 = vec_defs.pop ();
8147 if (!masked)
8148 vec_oprnds1 = vec_defs.pop ();
8149 vec_oprnds0 = vec_defs.pop ();
8150 }
8151 else
8152 {
8153 gimple *gtemp;
8154 if (masked)
8155 {
8156 vec_cond_lhs
8157 = vect_get_vec_def_for_operand (cond_expr, stmt,
8158 comp_vectype);
8159 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8160 &gtemp, &dts[0]);
8161 }
8162 else
8163 {
8164 vec_cond_lhs
8165 = vect_get_vec_def_for_operand (cond_expr0,
8166 stmt, comp_vectype);
8167 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8168
8169 vec_cond_rhs
8170 = vect_get_vec_def_for_operand (cond_expr1,
8171 stmt, comp_vectype);
8172 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8173 }
8174 if (reduc_index == 1)
8175 vec_then_clause = reduc_def;
8176 else
8177 {
8178 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8179 stmt);
8180 vect_is_simple_use (then_clause, loop_vinfo,
8181 &gtemp, &dts[2]);
8182 }
8183 if (reduc_index == 2)
8184 vec_else_clause = reduc_def;
8185 else
8186 {
8187 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8188 stmt);
8189 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8190 }
8191 }
8192 }
8193 else
8194 {
8195 vec_cond_lhs
8196 = vect_get_vec_def_for_stmt_copy (dts[0],
8197 vec_oprnds0.pop ());
8198 if (!masked)
8199 vec_cond_rhs
8200 = vect_get_vec_def_for_stmt_copy (dts[1],
8201 vec_oprnds1.pop ());
8202
8203 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8204 vec_oprnds2.pop ());
8205 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8206 vec_oprnds3.pop ());
8207 }
8208
8209 if (!slp_node)
8210 {
8211 vec_oprnds0.quick_push (vec_cond_lhs);
8212 if (!masked)
8213 vec_oprnds1.quick_push (vec_cond_rhs);
8214 vec_oprnds2.quick_push (vec_then_clause);
8215 vec_oprnds3.quick_push (vec_else_clause);
8216 }
8217
8218 /* Arguments are ready. Create the new vector stmt. */
8219 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8220 {
8221 vec_then_clause = vec_oprnds2[i];
8222 vec_else_clause = vec_oprnds3[i];
8223
8224 if (masked)
8225 vec_compare = vec_cond_lhs;
8226 else
8227 {
8228 vec_cond_rhs = vec_oprnds1[i];
8229 if (bitop1 == NOP_EXPR)
8230 vec_compare = build2 (cond_code, vec_cmp_type,
8231 vec_cond_lhs, vec_cond_rhs);
8232 else
8233 {
8234 new_temp = make_ssa_name (vec_cmp_type);
8235 if (bitop1 == BIT_NOT_EXPR)
8236 new_stmt = gimple_build_assign (new_temp, bitop1,
8237 vec_cond_rhs);
8238 else
8239 new_stmt
8240 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8241 vec_cond_rhs);
8242 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8243 if (bitop2 == NOP_EXPR)
8244 vec_compare = new_temp;
8245 else if (bitop2 == BIT_NOT_EXPR)
8246 {
8247 /* Instead of doing ~x ? y : z do x ? z : y. */
8248 vec_compare = new_temp;
8249 std::swap (vec_then_clause, vec_else_clause);
8250 }
8251 else
8252 {
8253 vec_compare = make_ssa_name (vec_cmp_type);
8254 new_stmt
8255 = gimple_build_assign (vec_compare, bitop2,
8256 vec_cond_lhs, new_temp);
8257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8258 }
8259 }
8260 }
8261 new_temp = make_ssa_name (vec_dest);
8262 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8263 vec_compare, vec_then_clause,
8264 vec_else_clause);
8265 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8266 if (slp_node)
8267 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8268 }
8269
8270 if (slp_node)
8271 continue;
8272
8273 if (j == 0)
8274 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8275 else
8276 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8277
8278 prev_stmt_info = vinfo_for_stmt (new_stmt);
8279 }
8280
8281 vec_oprnds0.release ();
8282 vec_oprnds1.release ();
8283 vec_oprnds2.release ();
8284 vec_oprnds3.release ();
8285
8286 return true;
8287 }
8288
8289 /* vectorizable_comparison.
8290
8291 Check if STMT is comparison expression that can be vectorized.
8292 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8293 comparison, put it in VEC_STMT, and insert it at GSI.
8294
8295 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8296
8297 static bool
8298 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8299 gimple **vec_stmt, tree reduc_def,
8300 slp_tree slp_node)
8301 {
8302 tree lhs, rhs1, rhs2;
8303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8304 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8305 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8306 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8307 tree new_temp;
8308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8309 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8310 int ndts = 2;
8311 unsigned nunits;
8312 int ncopies;
8313 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8314 stmt_vec_info prev_stmt_info = NULL;
8315 int i, j;
8316 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8317 vec<tree> vec_oprnds0 = vNULL;
8318 vec<tree> vec_oprnds1 = vNULL;
8319 gimple *def_stmt;
8320 tree mask_type;
8321 tree mask;
8322
8323 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8324 return false;
8325
8326 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8327 return false;
8328
8329 mask_type = vectype;
8330 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8331
8332 if (slp_node)
8333 ncopies = 1;
8334 else
8335 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8336
8337 gcc_assert (ncopies >= 1);
8338 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8339 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8340 && reduc_def))
8341 return false;
8342
8343 if (STMT_VINFO_LIVE_P (stmt_info))
8344 {
8345 if (dump_enabled_p ())
8346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8347 "value used after loop.\n");
8348 return false;
8349 }
8350
8351 if (!is_gimple_assign (stmt))
8352 return false;
8353
8354 code = gimple_assign_rhs_code (stmt);
8355
8356 if (TREE_CODE_CLASS (code) != tcc_comparison)
8357 return false;
8358
8359 rhs1 = gimple_assign_rhs1 (stmt);
8360 rhs2 = gimple_assign_rhs2 (stmt);
8361
8362 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8363 &dts[0], &vectype1))
8364 return false;
8365
8366 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8367 &dts[1], &vectype2))
8368 return false;
8369
8370 if (vectype1 && vectype2
8371 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8372 return false;
8373
8374 vectype = vectype1 ? vectype1 : vectype2;
8375
8376 /* Invariant comparison. */
8377 if (!vectype)
8378 {
8379 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8380 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8381 return false;
8382 }
8383 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8384 return false;
8385
8386 /* Can't compare mask and non-mask types. */
8387 if (vectype1 && vectype2
8388 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8389 return false;
8390
8391 /* Boolean values may have another representation in vectors
8392 and therefore we prefer bit operations over comparison for
8393 them (which also works for scalar masks). We store opcodes
8394 to use in bitop1 and bitop2. Statement is vectorized as
8395 BITOP2 (rhs1 BITOP1 rhs2) or
8396 rhs1 BITOP2 (BITOP1 rhs2)
8397 depending on bitop1 and bitop2 arity. */
8398 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8399 {
8400 if (code == GT_EXPR)
8401 {
8402 bitop1 = BIT_NOT_EXPR;
8403 bitop2 = BIT_AND_EXPR;
8404 }
8405 else if (code == GE_EXPR)
8406 {
8407 bitop1 = BIT_NOT_EXPR;
8408 bitop2 = BIT_IOR_EXPR;
8409 }
8410 else if (code == LT_EXPR)
8411 {
8412 bitop1 = BIT_NOT_EXPR;
8413 bitop2 = BIT_AND_EXPR;
8414 std::swap (rhs1, rhs2);
8415 std::swap (dts[0], dts[1]);
8416 }
8417 else if (code == LE_EXPR)
8418 {
8419 bitop1 = BIT_NOT_EXPR;
8420 bitop2 = BIT_IOR_EXPR;
8421 std::swap (rhs1, rhs2);
8422 std::swap (dts[0], dts[1]);
8423 }
8424 else
8425 {
8426 bitop1 = BIT_XOR_EXPR;
8427 if (code == EQ_EXPR)
8428 bitop2 = BIT_NOT_EXPR;
8429 }
8430 }
8431
8432 if (!vec_stmt)
8433 {
8434 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8435 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8436 dts, ndts, NULL, NULL);
8437 if (bitop1 == NOP_EXPR)
8438 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8439 else
8440 {
8441 machine_mode mode = TYPE_MODE (vectype);
8442 optab optab;
8443
8444 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8445 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8446 return false;
8447
8448 if (bitop2 != NOP_EXPR)
8449 {
8450 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8451 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8452 return false;
8453 }
8454 return true;
8455 }
8456 }
8457
8458 /* Transform. */
8459 if (!slp_node)
8460 {
8461 vec_oprnds0.create (1);
8462 vec_oprnds1.create (1);
8463 }
8464
8465 /* Handle def. */
8466 lhs = gimple_assign_lhs (stmt);
8467 mask = vect_create_destination_var (lhs, mask_type);
8468
8469 /* Handle cmp expr. */
8470 for (j = 0; j < ncopies; j++)
8471 {
8472 gassign *new_stmt = NULL;
8473 if (j == 0)
8474 {
8475 if (slp_node)
8476 {
8477 auto_vec<tree, 2> ops;
8478 auto_vec<vec<tree>, 2> vec_defs;
8479
8480 ops.safe_push (rhs1);
8481 ops.safe_push (rhs2);
8482 vect_get_slp_defs (ops, slp_node, &vec_defs);
8483 vec_oprnds1 = vec_defs.pop ();
8484 vec_oprnds0 = vec_defs.pop ();
8485 }
8486 else
8487 {
8488 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8489 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8490 }
8491 }
8492 else
8493 {
8494 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8495 vec_oprnds0.pop ());
8496 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8497 vec_oprnds1.pop ());
8498 }
8499
8500 if (!slp_node)
8501 {
8502 vec_oprnds0.quick_push (vec_rhs1);
8503 vec_oprnds1.quick_push (vec_rhs2);
8504 }
8505
8506 /* Arguments are ready. Create the new vector stmt. */
8507 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8508 {
8509 vec_rhs2 = vec_oprnds1[i];
8510
8511 new_temp = make_ssa_name (mask);
8512 if (bitop1 == NOP_EXPR)
8513 {
8514 new_stmt = gimple_build_assign (new_temp, code,
8515 vec_rhs1, vec_rhs2);
8516 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8517 }
8518 else
8519 {
8520 if (bitop1 == BIT_NOT_EXPR)
8521 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8522 else
8523 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8524 vec_rhs2);
8525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8526 if (bitop2 != NOP_EXPR)
8527 {
8528 tree res = make_ssa_name (mask);
8529 if (bitop2 == BIT_NOT_EXPR)
8530 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8531 else
8532 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8533 new_temp);
8534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8535 }
8536 }
8537 if (slp_node)
8538 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8539 }
8540
8541 if (slp_node)
8542 continue;
8543
8544 if (j == 0)
8545 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8546 else
8547 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8548
8549 prev_stmt_info = vinfo_for_stmt (new_stmt);
8550 }
8551
8552 vec_oprnds0.release ();
8553 vec_oprnds1.release ();
8554
8555 return true;
8556 }
8557
8558 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8559 can handle all live statements in the node. Otherwise return true
8560 if STMT is not live or if vectorizable_live_operation can handle it.
8561 GSI and VEC_STMT are as for vectorizable_live_operation. */
8562
8563 static bool
8564 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8565 slp_tree slp_node, gimple **vec_stmt)
8566 {
8567 if (slp_node)
8568 {
8569 gimple *slp_stmt;
8570 unsigned int i;
8571 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8572 {
8573 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8574 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8575 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8576 vec_stmt))
8577 return false;
8578 }
8579 }
8580 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8581 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8582 return false;
8583
8584 return true;
8585 }
8586
8587 /* Make sure the statement is vectorizable. */
8588
8589 bool
8590 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8591 slp_instance node_instance)
8592 {
8593 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8594 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8595 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8596 bool ok;
8597 gimple *pattern_stmt;
8598 gimple_seq pattern_def_seq;
8599
8600 if (dump_enabled_p ())
8601 {
8602 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8603 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8604 }
8605
8606 if (gimple_has_volatile_ops (stmt))
8607 {
8608 if (dump_enabled_p ())
8609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8610 "not vectorized: stmt has volatile operands\n");
8611
8612 return false;
8613 }
8614
8615 /* Skip stmts that do not need to be vectorized. In loops this is expected
8616 to include:
8617 - the COND_EXPR which is the loop exit condition
8618 - any LABEL_EXPRs in the loop
8619 - computations that are used only for array indexing or loop control.
8620 In basic blocks we only analyze statements that are a part of some SLP
8621 instance, therefore, all the statements are relevant.
8622
8623 Pattern statement needs to be analyzed instead of the original statement
8624 if the original statement is not relevant. Otherwise, we analyze both
8625 statements. In basic blocks we are called from some SLP instance
8626 traversal, don't analyze pattern stmts instead, the pattern stmts
8627 already will be part of SLP instance. */
8628
8629 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8630 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8631 && !STMT_VINFO_LIVE_P (stmt_info))
8632 {
8633 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8634 && pattern_stmt
8635 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8636 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8637 {
8638 /* Analyze PATTERN_STMT instead of the original stmt. */
8639 stmt = pattern_stmt;
8640 stmt_info = vinfo_for_stmt (pattern_stmt);
8641 if (dump_enabled_p ())
8642 {
8643 dump_printf_loc (MSG_NOTE, vect_location,
8644 "==> examining pattern statement: ");
8645 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8646 }
8647 }
8648 else
8649 {
8650 if (dump_enabled_p ())
8651 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8652
8653 return true;
8654 }
8655 }
8656 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8657 && node == NULL
8658 && pattern_stmt
8659 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8660 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8661 {
8662 /* Analyze PATTERN_STMT too. */
8663 if (dump_enabled_p ())
8664 {
8665 dump_printf_loc (MSG_NOTE, vect_location,
8666 "==> examining pattern statement: ");
8667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8668 }
8669
8670 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8671 node_instance))
8672 return false;
8673 }
8674
8675 if (is_pattern_stmt_p (stmt_info)
8676 && node == NULL
8677 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8678 {
8679 gimple_stmt_iterator si;
8680
8681 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8682 {
8683 gimple *pattern_def_stmt = gsi_stmt (si);
8684 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8685 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8686 {
8687 /* Analyze def stmt of STMT if it's a pattern stmt. */
8688 if (dump_enabled_p ())
8689 {
8690 dump_printf_loc (MSG_NOTE, vect_location,
8691 "==> examining pattern def statement: ");
8692 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8693 }
8694
8695 if (!vect_analyze_stmt (pattern_def_stmt,
8696 need_to_vectorize, node, node_instance))
8697 return false;
8698 }
8699 }
8700 }
8701
8702 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8703 {
8704 case vect_internal_def:
8705 break;
8706
8707 case vect_reduction_def:
8708 case vect_nested_cycle:
8709 gcc_assert (!bb_vinfo
8710 && (relevance == vect_used_in_outer
8711 || relevance == vect_used_in_outer_by_reduction
8712 || relevance == vect_used_by_reduction
8713 || relevance == vect_unused_in_scope
8714 || relevance == vect_used_only_live));
8715 break;
8716
8717 case vect_induction_def:
8718 gcc_assert (!bb_vinfo);
8719 break;
8720
8721 case vect_constant_def:
8722 case vect_external_def:
8723 case vect_unknown_def_type:
8724 default:
8725 gcc_unreachable ();
8726 }
8727
8728 if (STMT_VINFO_RELEVANT_P (stmt_info))
8729 {
8730 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8731 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8732 || (is_gimple_call (stmt)
8733 && gimple_call_lhs (stmt) == NULL_TREE));
8734 *need_to_vectorize = true;
8735 }
8736
8737 if (PURE_SLP_STMT (stmt_info) && !node)
8738 {
8739 dump_printf_loc (MSG_NOTE, vect_location,
8740 "handled only by SLP analysis\n");
8741 return true;
8742 }
8743
8744 ok = true;
8745 if (!bb_vinfo
8746 && (STMT_VINFO_RELEVANT_P (stmt_info)
8747 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8748 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8749 || vectorizable_conversion (stmt, NULL, NULL, node)
8750 || vectorizable_shift (stmt, NULL, NULL, node)
8751 || vectorizable_operation (stmt, NULL, NULL, node)
8752 || vectorizable_assignment (stmt, NULL, NULL, node)
8753 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8754 || vectorizable_call (stmt, NULL, NULL, node)
8755 || vectorizable_store (stmt, NULL, NULL, node)
8756 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8757 || vectorizable_induction (stmt, NULL, NULL, node)
8758 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8759 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8760 else
8761 {
8762 if (bb_vinfo)
8763 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8764 || vectorizable_conversion (stmt, NULL, NULL, node)
8765 || vectorizable_shift (stmt, NULL, NULL, node)
8766 || vectorizable_operation (stmt, NULL, NULL, node)
8767 || vectorizable_assignment (stmt, NULL, NULL, node)
8768 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8769 || vectorizable_call (stmt, NULL, NULL, node)
8770 || vectorizable_store (stmt, NULL, NULL, node)
8771 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8772 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8773 }
8774
8775 if (!ok)
8776 {
8777 if (dump_enabled_p ())
8778 {
8779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8780 "not vectorized: relevant stmt not ");
8781 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8782 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8783 }
8784
8785 return false;
8786 }
8787
8788 if (bb_vinfo)
8789 return true;
8790
8791 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8792 need extra handling, except for vectorizable reductions. */
8793 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8794 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8795 {
8796 if (dump_enabled_p ())
8797 {
8798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8799 "not vectorized: live stmt not supported: ");
8800 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8801 }
8802
8803 return false;
8804 }
8805
8806 return true;
8807 }
8808
8809
8810 /* Function vect_transform_stmt.
8811
8812 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8813
8814 bool
8815 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8816 bool *grouped_store, slp_tree slp_node,
8817 slp_instance slp_node_instance)
8818 {
8819 bool is_store = false;
8820 gimple *vec_stmt = NULL;
8821 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8822 bool done;
8823
8824 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8825 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8826
8827 switch (STMT_VINFO_TYPE (stmt_info))
8828 {
8829 case type_demotion_vec_info_type:
8830 case type_promotion_vec_info_type:
8831 case type_conversion_vec_info_type:
8832 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8833 gcc_assert (done);
8834 break;
8835
8836 case induc_vec_info_type:
8837 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8838 gcc_assert (done);
8839 break;
8840
8841 case shift_vec_info_type:
8842 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8843 gcc_assert (done);
8844 break;
8845
8846 case op_vec_info_type:
8847 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8848 gcc_assert (done);
8849 break;
8850
8851 case assignment_vec_info_type:
8852 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8853 gcc_assert (done);
8854 break;
8855
8856 case load_vec_info_type:
8857 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8858 slp_node_instance);
8859 gcc_assert (done);
8860 break;
8861
8862 case store_vec_info_type:
8863 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8864 gcc_assert (done);
8865 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8866 {
8867 /* In case of interleaving, the whole chain is vectorized when the
8868 last store in the chain is reached. Store stmts before the last
8869 one are skipped, and there vec_stmt_info shouldn't be freed
8870 meanwhile. */
8871 *grouped_store = true;
8872 if (STMT_VINFO_VEC_STMT (stmt_info))
8873 is_store = true;
8874 }
8875 else
8876 is_store = true;
8877 break;
8878
8879 case condition_vec_info_type:
8880 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8881 gcc_assert (done);
8882 break;
8883
8884 case comparison_vec_info_type:
8885 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8886 gcc_assert (done);
8887 break;
8888
8889 case call_vec_info_type:
8890 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8891 stmt = gsi_stmt (*gsi);
8892 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8893 is_store = true;
8894 break;
8895
8896 case call_simd_clone_vec_info_type:
8897 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8898 stmt = gsi_stmt (*gsi);
8899 break;
8900
8901 case reduc_vec_info_type:
8902 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8903 slp_node_instance);
8904 gcc_assert (done);
8905 break;
8906
8907 default:
8908 if (!STMT_VINFO_LIVE_P (stmt_info))
8909 {
8910 if (dump_enabled_p ())
8911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8912 "stmt not supported.\n");
8913 gcc_unreachable ();
8914 }
8915 }
8916
8917 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8918 This would break hybrid SLP vectorization. */
8919 if (slp_node)
8920 gcc_assert (!vec_stmt
8921 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8922
8923 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8924 is being vectorized, but outside the immediately enclosing loop. */
8925 if (vec_stmt
8926 && STMT_VINFO_LOOP_VINFO (stmt_info)
8927 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8928 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8929 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8930 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8931 || STMT_VINFO_RELEVANT (stmt_info) ==
8932 vect_used_in_outer_by_reduction))
8933 {
8934 struct loop *innerloop = LOOP_VINFO_LOOP (
8935 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8936 imm_use_iterator imm_iter;
8937 use_operand_p use_p;
8938 tree scalar_dest;
8939 gimple *exit_phi;
8940
8941 if (dump_enabled_p ())
8942 dump_printf_loc (MSG_NOTE, vect_location,
8943 "Record the vdef for outer-loop vectorization.\n");
8944
8945 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8946 (to be used when vectorizing outer-loop stmts that use the DEF of
8947 STMT). */
8948 if (gimple_code (stmt) == GIMPLE_PHI)
8949 scalar_dest = PHI_RESULT (stmt);
8950 else
8951 scalar_dest = gimple_assign_lhs (stmt);
8952
8953 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8954 {
8955 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8956 {
8957 exit_phi = USE_STMT (use_p);
8958 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8959 }
8960 }
8961 }
8962
8963 /* Handle stmts whose DEF is used outside the loop-nest that is
8964 being vectorized. */
8965 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8966 {
8967 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8968 gcc_assert (done);
8969 }
8970
8971 if (vec_stmt)
8972 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8973
8974 return is_store;
8975 }
8976
8977
8978 /* Remove a group of stores (for SLP or interleaving), free their
8979 stmt_vec_info. */
8980
8981 void
8982 vect_remove_stores (gimple *first_stmt)
8983 {
8984 gimple *next = first_stmt;
8985 gimple *tmp;
8986 gimple_stmt_iterator next_si;
8987
8988 while (next)
8989 {
8990 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8991
8992 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8993 if (is_pattern_stmt_p (stmt_info))
8994 next = STMT_VINFO_RELATED_STMT (stmt_info);
8995 /* Free the attached stmt_vec_info and remove the stmt. */
8996 next_si = gsi_for_stmt (next);
8997 unlink_stmt_vdef (next);
8998 gsi_remove (&next_si, true);
8999 release_defs (next);
9000 free_stmt_vec_info (next);
9001 next = tmp;
9002 }
9003 }
9004
9005
9006 /* Function new_stmt_vec_info.
9007
9008 Create and initialize a new stmt_vec_info struct for STMT. */
9009
9010 stmt_vec_info
9011 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9012 {
9013 stmt_vec_info res;
9014 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9015
9016 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9017 STMT_VINFO_STMT (res) = stmt;
9018 res->vinfo = vinfo;
9019 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9020 STMT_VINFO_LIVE_P (res) = false;
9021 STMT_VINFO_VECTYPE (res) = NULL;
9022 STMT_VINFO_VEC_STMT (res) = NULL;
9023 STMT_VINFO_VECTORIZABLE (res) = true;
9024 STMT_VINFO_IN_PATTERN_P (res) = false;
9025 STMT_VINFO_RELATED_STMT (res) = NULL;
9026 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9027 STMT_VINFO_DATA_REF (res) = NULL;
9028 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9029 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9030
9031 if (gimple_code (stmt) == GIMPLE_PHI
9032 && is_loop_header_bb_p (gimple_bb (stmt)))
9033 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9034 else
9035 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9036
9037 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9038 STMT_SLP_TYPE (res) = loop_vect;
9039 STMT_VINFO_NUM_SLP_USES (res) = 0;
9040
9041 GROUP_FIRST_ELEMENT (res) = NULL;
9042 GROUP_NEXT_ELEMENT (res) = NULL;
9043 GROUP_SIZE (res) = 0;
9044 GROUP_STORE_COUNT (res) = 0;
9045 GROUP_GAP (res) = 0;
9046 GROUP_SAME_DR_STMT (res) = NULL;
9047
9048 return res;
9049 }
9050
9051
9052 /* Create a hash table for stmt_vec_info. */
9053
9054 void
9055 init_stmt_vec_info_vec (void)
9056 {
9057 gcc_assert (!stmt_vec_info_vec.exists ());
9058 stmt_vec_info_vec.create (50);
9059 }
9060
9061
9062 /* Free hash table for stmt_vec_info. */
9063
9064 void
9065 free_stmt_vec_info_vec (void)
9066 {
9067 unsigned int i;
9068 stmt_vec_info info;
9069 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9070 if (info != NULL)
9071 free_stmt_vec_info (STMT_VINFO_STMT (info));
9072 gcc_assert (stmt_vec_info_vec.exists ());
9073 stmt_vec_info_vec.release ();
9074 }
9075
9076
9077 /* Free stmt vectorization related info. */
9078
9079 void
9080 free_stmt_vec_info (gimple *stmt)
9081 {
9082 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9083
9084 if (!stmt_info)
9085 return;
9086
9087 /* Check if this statement has a related "pattern stmt"
9088 (introduced by the vectorizer during the pattern recognition
9089 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9090 too. */
9091 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9092 {
9093 stmt_vec_info patt_info
9094 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9095 if (patt_info)
9096 {
9097 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9098 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9099 gimple_set_bb (patt_stmt, NULL);
9100 tree lhs = gimple_get_lhs (patt_stmt);
9101 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9102 release_ssa_name (lhs);
9103 if (seq)
9104 {
9105 gimple_stmt_iterator si;
9106 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9107 {
9108 gimple *seq_stmt = gsi_stmt (si);
9109 gimple_set_bb (seq_stmt, NULL);
9110 lhs = gimple_get_lhs (seq_stmt);
9111 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9112 release_ssa_name (lhs);
9113 free_stmt_vec_info (seq_stmt);
9114 }
9115 }
9116 free_stmt_vec_info (patt_stmt);
9117 }
9118 }
9119
9120 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9121 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9122 set_vinfo_for_stmt (stmt, NULL);
9123 free (stmt_info);
9124 }
9125
9126
9127 /* Function get_vectype_for_scalar_type_and_size.
9128
9129 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9130 by the target. */
9131
9132 static tree
9133 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9134 {
9135 tree orig_scalar_type = scalar_type;
9136 scalar_mode inner_mode;
9137 machine_mode simd_mode;
9138 poly_uint64 nunits;
9139 tree vectype;
9140
9141 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9142 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9143 return NULL_TREE;
9144
9145 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9146
9147 /* For vector types of elements whose mode precision doesn't
9148 match their types precision we use a element type of mode
9149 precision. The vectorization routines will have to make sure
9150 they support the proper result truncation/extension.
9151 We also make sure to build vector types with INTEGER_TYPE
9152 component type only. */
9153 if (INTEGRAL_TYPE_P (scalar_type)
9154 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9155 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9156 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9157 TYPE_UNSIGNED (scalar_type));
9158
9159 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9160 When the component mode passes the above test simply use a type
9161 corresponding to that mode. The theory is that any use that
9162 would cause problems with this will disable vectorization anyway. */
9163 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9164 && !INTEGRAL_TYPE_P (scalar_type))
9165 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9166
9167 /* We can't build a vector type of elements with alignment bigger than
9168 their size. */
9169 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9170 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9171 TYPE_UNSIGNED (scalar_type));
9172
9173 /* If we felt back to using the mode fail if there was
9174 no scalar type for it. */
9175 if (scalar_type == NULL_TREE)
9176 return NULL_TREE;
9177
9178 /* If no size was supplied use the mode the target prefers. Otherwise
9179 lookup a vector mode of the specified size. */
9180 if (known_eq (size, 0U))
9181 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9182 else if (!multiple_p (size, nbytes, &nunits)
9183 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9184 return NULL_TREE;
9185 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9186 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9187 return NULL_TREE;
9188
9189 vectype = build_vector_type (scalar_type, nunits);
9190
9191 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9192 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9193 return NULL_TREE;
9194
9195 /* Re-attach the address-space qualifier if we canonicalized the scalar
9196 type. */
9197 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9198 return build_qualified_type
9199 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9200
9201 return vectype;
9202 }
9203
9204 poly_uint64 current_vector_size;
9205
9206 /* Function get_vectype_for_scalar_type.
9207
9208 Returns the vector type corresponding to SCALAR_TYPE as supported
9209 by the target. */
9210
9211 tree
9212 get_vectype_for_scalar_type (tree scalar_type)
9213 {
9214 tree vectype;
9215 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9216 current_vector_size);
9217 if (vectype
9218 && known_eq (current_vector_size, 0U))
9219 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9220 return vectype;
9221 }
9222
9223 /* Function get_mask_type_for_scalar_type.
9224
9225 Returns the mask type corresponding to a result of comparison
9226 of vectors of specified SCALAR_TYPE as supported by target. */
9227
9228 tree
9229 get_mask_type_for_scalar_type (tree scalar_type)
9230 {
9231 tree vectype = get_vectype_for_scalar_type (scalar_type);
9232
9233 if (!vectype)
9234 return NULL;
9235
9236 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9237 current_vector_size);
9238 }
9239
9240 /* Function get_same_sized_vectype
9241
9242 Returns a vector type corresponding to SCALAR_TYPE of size
9243 VECTOR_TYPE if supported by the target. */
9244
9245 tree
9246 get_same_sized_vectype (tree scalar_type, tree vector_type)
9247 {
9248 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9249 return build_same_sized_truth_vector_type (vector_type);
9250
9251 return get_vectype_for_scalar_type_and_size
9252 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9253 }
9254
9255 /* Function vect_is_simple_use.
9256
9257 Input:
9258 VINFO - the vect info of the loop or basic block that is being vectorized.
9259 OPERAND - operand in the loop or bb.
9260 Output:
9261 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9262 DT - the type of definition
9263
9264 Returns whether a stmt with OPERAND can be vectorized.
9265 For loops, supportable operands are constants, loop invariants, and operands
9266 that are defined by the current iteration of the loop. Unsupportable
9267 operands are those that are defined by a previous iteration of the loop (as
9268 is the case in reduction/induction computations).
9269 For basic blocks, supportable operands are constants and bb invariants.
9270 For now, operands defined outside the basic block are not supported. */
9271
9272 bool
9273 vect_is_simple_use (tree operand, vec_info *vinfo,
9274 gimple **def_stmt, enum vect_def_type *dt)
9275 {
9276 *def_stmt = NULL;
9277 *dt = vect_unknown_def_type;
9278
9279 if (dump_enabled_p ())
9280 {
9281 dump_printf_loc (MSG_NOTE, vect_location,
9282 "vect_is_simple_use: operand ");
9283 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9284 dump_printf (MSG_NOTE, "\n");
9285 }
9286
9287 if (CONSTANT_CLASS_P (operand))
9288 {
9289 *dt = vect_constant_def;
9290 return true;
9291 }
9292
9293 if (is_gimple_min_invariant (operand))
9294 {
9295 *dt = vect_external_def;
9296 return true;
9297 }
9298
9299 if (TREE_CODE (operand) != SSA_NAME)
9300 {
9301 if (dump_enabled_p ())
9302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9303 "not ssa-name.\n");
9304 return false;
9305 }
9306
9307 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9308 {
9309 *dt = vect_external_def;
9310 return true;
9311 }
9312
9313 *def_stmt = SSA_NAME_DEF_STMT (operand);
9314 if (dump_enabled_p ())
9315 {
9316 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9317 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9318 }
9319
9320 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9321 *dt = vect_external_def;
9322 else
9323 {
9324 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9325 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9326 }
9327
9328 if (dump_enabled_p ())
9329 {
9330 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9331 switch (*dt)
9332 {
9333 case vect_uninitialized_def:
9334 dump_printf (MSG_NOTE, "uninitialized\n");
9335 break;
9336 case vect_constant_def:
9337 dump_printf (MSG_NOTE, "constant\n");
9338 break;
9339 case vect_external_def:
9340 dump_printf (MSG_NOTE, "external\n");
9341 break;
9342 case vect_internal_def:
9343 dump_printf (MSG_NOTE, "internal\n");
9344 break;
9345 case vect_induction_def:
9346 dump_printf (MSG_NOTE, "induction\n");
9347 break;
9348 case vect_reduction_def:
9349 dump_printf (MSG_NOTE, "reduction\n");
9350 break;
9351 case vect_double_reduction_def:
9352 dump_printf (MSG_NOTE, "double reduction\n");
9353 break;
9354 case vect_nested_cycle:
9355 dump_printf (MSG_NOTE, "nested cycle\n");
9356 break;
9357 case vect_unknown_def_type:
9358 dump_printf (MSG_NOTE, "unknown\n");
9359 break;
9360 }
9361 }
9362
9363 if (*dt == vect_unknown_def_type)
9364 {
9365 if (dump_enabled_p ())
9366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9367 "Unsupported pattern.\n");
9368 return false;
9369 }
9370
9371 switch (gimple_code (*def_stmt))
9372 {
9373 case GIMPLE_PHI:
9374 case GIMPLE_ASSIGN:
9375 case GIMPLE_CALL:
9376 break;
9377 default:
9378 if (dump_enabled_p ())
9379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9380 "unsupported defining stmt:\n");
9381 return false;
9382 }
9383
9384 return true;
9385 }
9386
9387 /* Function vect_is_simple_use.
9388
9389 Same as vect_is_simple_use but also determines the vector operand
9390 type of OPERAND and stores it to *VECTYPE. If the definition of
9391 OPERAND is vect_uninitialized_def, vect_constant_def or
9392 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9393 is responsible to compute the best suited vector type for the
9394 scalar operand. */
9395
9396 bool
9397 vect_is_simple_use (tree operand, vec_info *vinfo,
9398 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9399 {
9400 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9401 return false;
9402
9403 /* Now get a vector type if the def is internal, otherwise supply
9404 NULL_TREE and leave it up to the caller to figure out a proper
9405 type for the use stmt. */
9406 if (*dt == vect_internal_def
9407 || *dt == vect_induction_def
9408 || *dt == vect_reduction_def
9409 || *dt == vect_double_reduction_def
9410 || *dt == vect_nested_cycle)
9411 {
9412 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9413
9414 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9415 && !STMT_VINFO_RELEVANT (stmt_info)
9416 && !STMT_VINFO_LIVE_P (stmt_info))
9417 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9418
9419 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9420 gcc_assert (*vectype != NULL_TREE);
9421 }
9422 else if (*dt == vect_uninitialized_def
9423 || *dt == vect_constant_def
9424 || *dt == vect_external_def)
9425 *vectype = NULL_TREE;
9426 else
9427 gcc_unreachable ();
9428
9429 return true;
9430 }
9431
9432
9433 /* Function supportable_widening_operation
9434
9435 Check whether an operation represented by the code CODE is a
9436 widening operation that is supported by the target platform in
9437 vector form (i.e., when operating on arguments of type VECTYPE_IN
9438 producing a result of type VECTYPE_OUT).
9439
9440 Widening operations we currently support are NOP (CONVERT), FLOAT
9441 and WIDEN_MULT. This function checks if these operations are supported
9442 by the target platform either directly (via vector tree-codes), or via
9443 target builtins.
9444
9445 Output:
9446 - CODE1 and CODE2 are codes of vector operations to be used when
9447 vectorizing the operation, if available.
9448 - MULTI_STEP_CVT determines the number of required intermediate steps in
9449 case of multi-step conversion (like char->short->int - in that case
9450 MULTI_STEP_CVT will be 1).
9451 - INTERM_TYPES contains the intermediate type required to perform the
9452 widening operation (short in the above example). */
9453
9454 bool
9455 supportable_widening_operation (enum tree_code code, gimple *stmt,
9456 tree vectype_out, tree vectype_in,
9457 enum tree_code *code1, enum tree_code *code2,
9458 int *multi_step_cvt,
9459 vec<tree> *interm_types)
9460 {
9461 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9462 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9463 struct loop *vect_loop = NULL;
9464 machine_mode vec_mode;
9465 enum insn_code icode1, icode2;
9466 optab optab1, optab2;
9467 tree vectype = vectype_in;
9468 tree wide_vectype = vectype_out;
9469 enum tree_code c1, c2;
9470 int i;
9471 tree prev_type, intermediate_type;
9472 machine_mode intermediate_mode, prev_mode;
9473 optab optab3, optab4;
9474
9475 *multi_step_cvt = 0;
9476 if (loop_info)
9477 vect_loop = LOOP_VINFO_LOOP (loop_info);
9478
9479 switch (code)
9480 {
9481 case WIDEN_MULT_EXPR:
9482 /* The result of a vectorized widening operation usually requires
9483 two vectors (because the widened results do not fit into one vector).
9484 The generated vector results would normally be expected to be
9485 generated in the same order as in the original scalar computation,
9486 i.e. if 8 results are generated in each vector iteration, they are
9487 to be organized as follows:
9488 vect1: [res1,res2,res3,res4],
9489 vect2: [res5,res6,res7,res8].
9490
9491 However, in the special case that the result of the widening
9492 operation is used in a reduction computation only, the order doesn't
9493 matter (because when vectorizing a reduction we change the order of
9494 the computation). Some targets can take advantage of this and
9495 generate more efficient code. For example, targets like Altivec,
9496 that support widen_mult using a sequence of {mult_even,mult_odd}
9497 generate the following vectors:
9498 vect1: [res1,res3,res5,res7],
9499 vect2: [res2,res4,res6,res8].
9500
9501 When vectorizing outer-loops, we execute the inner-loop sequentially
9502 (each vectorized inner-loop iteration contributes to VF outer-loop
9503 iterations in parallel). We therefore don't allow to change the
9504 order of the computation in the inner-loop during outer-loop
9505 vectorization. */
9506 /* TODO: Another case in which order doesn't *really* matter is when we
9507 widen and then contract again, e.g. (short)((int)x * y >> 8).
9508 Normally, pack_trunc performs an even/odd permute, whereas the
9509 repack from an even/odd expansion would be an interleave, which
9510 would be significantly simpler for e.g. AVX2. */
9511 /* In any case, in order to avoid duplicating the code below, recurse
9512 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9513 are properly set up for the caller. If we fail, we'll continue with
9514 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9515 if (vect_loop
9516 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9517 && !nested_in_vect_loop_p (vect_loop, stmt)
9518 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9519 stmt, vectype_out, vectype_in,
9520 code1, code2, multi_step_cvt,
9521 interm_types))
9522 {
9523 /* Elements in a vector with vect_used_by_reduction property cannot
9524 be reordered if the use chain with this property does not have the
9525 same operation. One such an example is s += a * b, where elements
9526 in a and b cannot be reordered. Here we check if the vector defined
9527 by STMT is only directly used in the reduction statement. */
9528 tree lhs = gimple_assign_lhs (stmt);
9529 use_operand_p dummy;
9530 gimple *use_stmt;
9531 stmt_vec_info use_stmt_info = NULL;
9532 if (single_imm_use (lhs, &dummy, &use_stmt)
9533 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9534 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9535 return true;
9536 }
9537 c1 = VEC_WIDEN_MULT_LO_EXPR;
9538 c2 = VEC_WIDEN_MULT_HI_EXPR;
9539 break;
9540
9541 case DOT_PROD_EXPR:
9542 c1 = DOT_PROD_EXPR;
9543 c2 = DOT_PROD_EXPR;
9544 break;
9545
9546 case SAD_EXPR:
9547 c1 = SAD_EXPR;
9548 c2 = SAD_EXPR;
9549 break;
9550
9551 case VEC_WIDEN_MULT_EVEN_EXPR:
9552 /* Support the recursion induced just above. */
9553 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9554 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9555 break;
9556
9557 case WIDEN_LSHIFT_EXPR:
9558 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9559 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9560 break;
9561
9562 CASE_CONVERT:
9563 c1 = VEC_UNPACK_LO_EXPR;
9564 c2 = VEC_UNPACK_HI_EXPR;
9565 break;
9566
9567 case FLOAT_EXPR:
9568 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9569 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9570 break;
9571
9572 case FIX_TRUNC_EXPR:
9573 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9574 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9575 computing the operation. */
9576 return false;
9577
9578 default:
9579 gcc_unreachable ();
9580 }
9581
9582 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9583 std::swap (c1, c2);
9584
9585 if (code == FIX_TRUNC_EXPR)
9586 {
9587 /* The signedness is determined from output operand. */
9588 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9589 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9590 }
9591 else
9592 {
9593 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9594 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9595 }
9596
9597 if (!optab1 || !optab2)
9598 return false;
9599
9600 vec_mode = TYPE_MODE (vectype);
9601 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9602 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9603 return false;
9604
9605 *code1 = c1;
9606 *code2 = c2;
9607
9608 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9609 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9610 /* For scalar masks we may have different boolean
9611 vector types having the same QImode. Thus we
9612 add additional check for elements number. */
9613 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9614 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9615 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9616
9617 /* Check if it's a multi-step conversion that can be done using intermediate
9618 types. */
9619
9620 prev_type = vectype;
9621 prev_mode = vec_mode;
9622
9623 if (!CONVERT_EXPR_CODE_P (code))
9624 return false;
9625
9626 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9627 intermediate steps in promotion sequence. We try
9628 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9629 not. */
9630 interm_types->create (MAX_INTERM_CVT_STEPS);
9631 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9632 {
9633 intermediate_mode = insn_data[icode1].operand[0].mode;
9634 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9635 {
9636 intermediate_type
9637 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9638 current_vector_size);
9639 if (intermediate_mode != TYPE_MODE (intermediate_type))
9640 return false;
9641 }
9642 else
9643 intermediate_type
9644 = lang_hooks.types.type_for_mode (intermediate_mode,
9645 TYPE_UNSIGNED (prev_type));
9646
9647 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9648 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9649
9650 if (!optab3 || !optab4
9651 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9652 || insn_data[icode1].operand[0].mode != intermediate_mode
9653 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9654 || insn_data[icode2].operand[0].mode != intermediate_mode
9655 || ((icode1 = optab_handler (optab3, intermediate_mode))
9656 == CODE_FOR_nothing)
9657 || ((icode2 = optab_handler (optab4, intermediate_mode))
9658 == CODE_FOR_nothing))
9659 break;
9660
9661 interm_types->quick_push (intermediate_type);
9662 (*multi_step_cvt)++;
9663
9664 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9665 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9666 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9667 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9668 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9669
9670 prev_type = intermediate_type;
9671 prev_mode = intermediate_mode;
9672 }
9673
9674 interm_types->release ();
9675 return false;
9676 }
9677
9678
9679 /* Function supportable_narrowing_operation
9680
9681 Check whether an operation represented by the code CODE is a
9682 narrowing operation that is supported by the target platform in
9683 vector form (i.e., when operating on arguments of type VECTYPE_IN
9684 and producing a result of type VECTYPE_OUT).
9685
9686 Narrowing operations we currently support are NOP (CONVERT) and
9687 FIX_TRUNC. This function checks if these operations are supported by
9688 the target platform directly via vector tree-codes.
9689
9690 Output:
9691 - CODE1 is the code of a vector operation to be used when
9692 vectorizing the operation, if available.
9693 - MULTI_STEP_CVT determines the number of required intermediate steps in
9694 case of multi-step conversion (like int->short->char - in that case
9695 MULTI_STEP_CVT will be 1).
9696 - INTERM_TYPES contains the intermediate type required to perform the
9697 narrowing operation (short in the above example). */
9698
9699 bool
9700 supportable_narrowing_operation (enum tree_code code,
9701 tree vectype_out, tree vectype_in,
9702 enum tree_code *code1, int *multi_step_cvt,
9703 vec<tree> *interm_types)
9704 {
9705 machine_mode vec_mode;
9706 enum insn_code icode1;
9707 optab optab1, interm_optab;
9708 tree vectype = vectype_in;
9709 tree narrow_vectype = vectype_out;
9710 enum tree_code c1;
9711 tree intermediate_type, prev_type;
9712 machine_mode intermediate_mode, prev_mode;
9713 int i;
9714 bool uns;
9715
9716 *multi_step_cvt = 0;
9717 switch (code)
9718 {
9719 CASE_CONVERT:
9720 c1 = VEC_PACK_TRUNC_EXPR;
9721 break;
9722
9723 case FIX_TRUNC_EXPR:
9724 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9725 break;
9726
9727 case FLOAT_EXPR:
9728 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9729 tree code and optabs used for computing the operation. */
9730 return false;
9731
9732 default:
9733 gcc_unreachable ();
9734 }
9735
9736 if (code == FIX_TRUNC_EXPR)
9737 /* The signedness is determined from output operand. */
9738 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9739 else
9740 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9741
9742 if (!optab1)
9743 return false;
9744
9745 vec_mode = TYPE_MODE (vectype);
9746 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9747 return false;
9748
9749 *code1 = c1;
9750
9751 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9752 /* For scalar masks we may have different boolean
9753 vector types having the same QImode. Thus we
9754 add additional check for elements number. */
9755 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9756 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9757 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9758
9759 /* Check if it's a multi-step conversion that can be done using intermediate
9760 types. */
9761 prev_mode = vec_mode;
9762 prev_type = vectype;
9763 if (code == FIX_TRUNC_EXPR)
9764 uns = TYPE_UNSIGNED (vectype_out);
9765 else
9766 uns = TYPE_UNSIGNED (vectype);
9767
9768 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9769 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9770 costly than signed. */
9771 if (code == FIX_TRUNC_EXPR && uns)
9772 {
9773 enum insn_code icode2;
9774
9775 intermediate_type
9776 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9777 interm_optab
9778 = optab_for_tree_code (c1, intermediate_type, optab_default);
9779 if (interm_optab != unknown_optab
9780 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9781 && insn_data[icode1].operand[0].mode
9782 == insn_data[icode2].operand[0].mode)
9783 {
9784 uns = false;
9785 optab1 = interm_optab;
9786 icode1 = icode2;
9787 }
9788 }
9789
9790 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9791 intermediate steps in promotion sequence. We try
9792 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9793 interm_types->create (MAX_INTERM_CVT_STEPS);
9794 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9795 {
9796 intermediate_mode = insn_data[icode1].operand[0].mode;
9797 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9798 {
9799 intermediate_type
9800 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9801 current_vector_size);
9802 if (intermediate_mode != TYPE_MODE (intermediate_type))
9803 return false;
9804 }
9805 else
9806 intermediate_type
9807 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9808 interm_optab
9809 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9810 optab_default);
9811 if (!interm_optab
9812 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9813 || insn_data[icode1].operand[0].mode != intermediate_mode
9814 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9815 == CODE_FOR_nothing))
9816 break;
9817
9818 interm_types->quick_push (intermediate_type);
9819 (*multi_step_cvt)++;
9820
9821 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9822 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9823 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9824 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9825
9826 prev_mode = intermediate_mode;
9827 prev_type = intermediate_type;
9828 optab1 = interm_optab;
9829 }
9830
9831 interm_types->release ();
9832 return false;
9833 }