Use explicit encodings for simple permutes
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
56
57 /* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59 enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
63 };
64
65 /* Return the vectorized type for the given statement. */
66
67 tree
68 stmt_vectype (struct _stmt_vec_info *stmt_info)
69 {
70 return STMT_VINFO_VECTYPE (stmt_info);
71 }
72
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75 bool
76 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
77 {
78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
82
83 if (!loop_vinfo)
84 return false;
85
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
87
88 return (bb->loop_father == loop->inner);
89 }
90
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
94
95 unsigned
96 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
98 int misalign, enum vect_cost_model_location where)
99 {
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
106 if (body_cost_vec)
107 {
108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
113 return (unsigned)
114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
115 }
116 else
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
119 }
120
121 /* Return a variable of type ELEM_TYPE[NELEMS]. */
122
123 static tree
124 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125 {
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
128 }
129
130 /* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
134
135 static tree
136 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
137 tree array, unsigned HOST_WIDE_INT n)
138 {
139 tree vect_type, vect, vect_name, array_ref;
140 gimple *new_stmt;
141
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
148
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
153
154 return vect_name;
155 }
156
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
160
161 static void
162 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
163 tree array, unsigned HOST_WIDE_INT n)
164 {
165 tree array_ref;
166 gimple *new_stmt;
167
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
171
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
174 }
175
176 /* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
179
180 static tree
181 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
182 {
183 tree mem_ref;
184
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
188 return mem_ref;
189 }
190
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193 /* Function vect_mark_relevant.
194
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196
197 static void
198 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
199 enum vect_relevant relevant, bool live_p)
200 {
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 gimple *pattern_stmt;
205
206 if (dump_enabled_p ())
207 {
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 }
212
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 {
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
223
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
225
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
235 }
236
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 {
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "already marked relevant/live.\n");
247 return;
248 }
249
250 worklist->safe_push (stmt);
251 }
252
253
254 /* Function is_simple_and_all_uses_invariant
255
256 Return true if STMT is simple and all uses of it are invariant. */
257
258 bool
259 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
260 {
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
315 *relevant = vect_used_in_scope;
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
321 {
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE, vect_location,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant = vect_used_in_scope;
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
339
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
362 return (*live_p || *relevant);
363 }
364
365
366 /* Function exist_non_indexing_operands_for_use_p
367
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
370
371 static bool
372 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
373 {
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
376
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
382
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
395
396 if (!gimple_assign_copy_p (stmt))
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
401 {
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
414 }
415 return false;
416 }
417
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
420 operand = gimple_assign_rhs1 (stmt);
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428 }
429
430
431 /*
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458 static bool
459 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
460 enum vect_relevant relevant, vec<gimple *> *worklist,
461 bool force)
462 {
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
467 gimple *def_stmt;
468 enum vect_def_type dt;
469
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
474
475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
476 {
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
492 }
493
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
530
531 switch (relevant)
532 {
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
537
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
542
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
547
548 case vect_used_in_scope:
549 break;
550
551 default:
552 gcc_unreachable ();
553 }
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
568
569 switch (relevant)
570 {
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
576
577 case vect_used_by_reduction:
578 case vect_used_only_live:
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
582 case vect_used_in_scope:
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
606
607 vect_mark_relevant (worklist, def_stmt, relevant, false);
608 return true;
609 }
610
611
612 /* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628 bool
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630 {
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
635 gimple *stmt;
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
639 gimple *phi;
640 bool live_p;
641 enum vect_relevant relevant;
642
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
646
647 auto_vec<gimple *, 64> worklist;
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
654 {
655 phi = gsi_stmt (si);
656 if (dump_enabled_p ())
657 {
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, phi, relevant, live_p);
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
668 if (dump_enabled_p ())
669 {
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
672 }
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
676 }
677 }
678
679 /* 2. Process_worklist */
680 while (worklist.length () > 0)
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
685 stmt = worklist.pop ();
686 if (dump_enabled_p ())
687 {
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
690 }
691
692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
697
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
700
701 One exception is when STMT has been identified as defining a reduction
702 variable; in this case we set the relevance to vect_used_by_reduction.
703 This is because we distinguish between two kinds of relevant stmts -
704 those that are used by a reduction computation, and those that are
705 (also) used by a regular computation. This allows us later on to
706 identify stmts that are used solely by a reduction, and therefore the
707 order of the results that they produce does not have to be kept. */
708
709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
710 {
711 case vect_reduction_def:
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
717 {
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
722 }
723 break;
724
725 case vect_nested_cycle:
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
729 {
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "unsupported use of nested cycle.\n");
733
734 return false;
735 }
736 break;
737
738 case vect_double_reduction_def:
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
742 {
743 if (dump_enabled_p ())
744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
745 "unsupported use of double reduction.\n");
746
747 return false;
748 }
749 break;
750
751 default:
752 break;
753 }
754
755 if (is_pattern_stmt_p (stmt_vinfo))
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
769 relevant, &worklist, false)
770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
771 relevant, &worklist, false))
772 return false;
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
776 {
777 op = gimple_op (stmt, i);
778 if (TREE_CODE (op) == SSA_NAME
779 && !process_use (stmt, op, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
789 if (!process_use (stmt, arg, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
799 if (!process_use (stmt, op, loop_vinfo, relevant,
800 &worklist, false))
801 return false;
802 }
803
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
805 {
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
811 return false;
812 }
813 } /* while worklist */
814
815 return true;
816 }
817
818
819 /* Function vect_model_simple_cost.
820
821 Models cost for simple operations, i.e. those that only emit ncopies of a
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
824
825 void
826 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
827 enum vect_def_type *dt,
828 int ndts,
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
831 {
832 int i;
833 int inside_cost = 0, prologue_cost = 0;
834
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
838
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
845 stmt_info, 0, vect_prologue);
846
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
850
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
855 }
856
857
858 /* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
862
863 static void
864 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
866 {
867 int i, tmp;
868 int inside_cost = 0, prologue_cost = 0;
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
872
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
876
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
881
882 for (i = 0; i < pwr + 1; i++)
883 {
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
889 }
890
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
896
897 if (dump_enabled_p ())
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
901 }
902
903 /* Function vect_model_store_cost
904
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
907
908 void
909 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
914 {
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
919
920 if (dt == vect_constant_def || dt == vect_external_def)
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
923
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
927 {
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
930 }
931
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
936
937 /* We assume that the cost of a single store-lanes instruction is
938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
939 access is instead being provided by a permute-and-store operation,
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
943 {
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
950
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE, vect_location,
953 "vect_model_store_cost: strided group_size = %d .\n",
954 group_size);
955 }
956
957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
958 /* Costs of the stores. */
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
961 /* N scalar stores plus extracting the elements. */
962 inside_cost += record_stmt_cost (body_cost_vec,
963 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
964 scalar_store, stmt_info, 0, vect_body);
965 else
966 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
967
968 if (memory_access_type == VMAT_ELEMENTWISE
969 || memory_access_type == VMAT_STRIDED_SLP)
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
972 vec_to_scalar, stmt_info, 0, vect_body);
973
974 if (dump_enabled_p ())
975 dump_printf_loc (MSG_NOTE, vect_location,
976 "vect_model_store_cost: inside_cost = %d, "
977 "prologue_cost = %d .\n", inside_cost, prologue_cost);
978 }
979
980
981 /* Calculate cost of DR's memory access. */
982 void
983 vect_get_store_cost (struct data_reference *dr, int ncopies,
984 unsigned int *inside_cost,
985 stmt_vector_for_cost *body_cost_vec)
986 {
987 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
988 gimple *stmt = DR_STMT (dr);
989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
990
991 switch (alignment_support_scheme)
992 {
993 case dr_aligned:
994 {
995 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 vector_store, stmt_info, 0,
997 vect_body);
998
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE, vect_location,
1001 "vect_model_store_cost: aligned.\n");
1002 break;
1003 }
1004
1005 case dr_unaligned_supported:
1006 {
1007 /* Here, we assign an additional cost for the unaligned store. */
1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 unaligned_store, stmt_info,
1010 DR_MISALIGNMENT (dr), vect_body);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: unaligned supported by "
1014 "hardware.\n");
1015 break;
1016 }
1017
1018 case dr_unaligned_unsupported:
1019 {
1020 *inside_cost = VECT_MAX_COST;
1021
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1024 "vect_model_store_cost: unsupported access.\n");
1025 break;
1026 }
1027
1028 default:
1029 gcc_unreachable ();
1030 }
1031 }
1032
1033
1034 /* Function vect_model_load_cost
1035
1036 Models cost for loads. In the case of grouped accesses, one access has
1037 the overhead of the grouped access attributed to it. Since unaligned
1038 accesses are supported for loads, we also account for the costs of the
1039 access scheme chosen. */
1040
1041 void
1042 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1043 vect_memory_access_type memory_access_type,
1044 slp_tree slp_node,
1045 stmt_vector_for_cost *prologue_cost_vec,
1046 stmt_vector_for_cost *body_cost_vec)
1047 {
1048 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1049 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1050 unsigned int inside_cost = 0, prologue_cost = 0;
1051 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1052
1053 /* Grouped loads read all elements in the group at once,
1054 so we want the DR for the first statement. */
1055 if (!slp_node && grouped_access_p)
1056 {
1057 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1058 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059 }
1060
1061 /* True if we should include any once-per-group costs as well as
1062 the cost of the statement itself. For SLP we only get called
1063 once per group anyhow. */
1064 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1065
1066 /* We assume that the cost of a single load-lanes instruction is
1067 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1068 access is instead being provided by a load-and-permute operation,
1069 include the cost of the permutes. */
1070 if (first_stmt_p
1071 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1072 {
1073 /* Uses an even and odd extract operations or shuffle operations
1074 for each needed permute. */
1075 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1076 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1077 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1078 stmt_info, 0, vect_body);
1079
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "vect_model_load_cost: strided group_size = %d .\n",
1083 group_size);
1084 }
1085
1086 /* The loads themselves. */
1087 if (memory_access_type == VMAT_ELEMENTWISE
1088 || memory_access_type == VMAT_GATHER_SCATTER)
1089 {
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1092 inside_cost += record_stmt_cost (body_cost_vec,
1093 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1094 scalar_load, stmt_info, 0, vect_body);
1095 }
1096 else
1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
1104
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1109 }
1110
1111
1112 /* Calculate cost of DR's memory access. */
1113 void
1114 vect_get_load_cost (struct data_reference *dr, int ncopies,
1115 bool add_realign_cost, unsigned int *inside_cost,
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
1120 {
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1122 gimple *stmt = DR_STMT (dr);
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1124
1125 switch (alignment_support_scheme)
1126 {
1127 case dr_aligned:
1128 {
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
1131
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_load_cost: aligned.\n");
1135
1136 break;
1137 }
1138 case dr_unaligned_supported:
1139 {
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1142 unaligned_load, stmt_info,
1143 DR_MISALIGNMENT (dr), vect_body);
1144
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
1148 "hardware.\n");
1149
1150 break;
1151 }
1152 case dr_explicit_realign:
1153 {
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
1158
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1161 prologue costs. */
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
1165
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
1169
1170 break;
1171 }
1172 case dr_explicit_realign_optimized:
1173 {
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: unaligned software "
1177 "pipelined.\n");
1178
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1185
1186 if (add_realign_cost && record_prologue_costs)
1187 {
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
1191 if (targetm.vectorize.builtin_mask_for_load)
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
1195 }
1196
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
1201
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
1206
1207 break;
1208 }
1209
1210 case dr_unaligned_unsupported:
1211 {
1212 *inside_cost = VECT_MAX_COST;
1213
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1216 "vect_model_load_cost: unsupported access.\n");
1217 break;
1218 }
1219
1220 default:
1221 gcc_unreachable ();
1222 }
1223 }
1224
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1227
1228 static void
1229 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1230 {
1231 if (gsi)
1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1233 else
1234 {
1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1237
1238 if (loop_vinfo)
1239 {
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1241 basic_block new_bb;
1242 edge pe;
1243
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
1246
1247 pe = loop_preheader_edge (loop);
1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1249 gcc_assert (!new_bb);
1250 }
1251 else
1252 {
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1256
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
1259 gsi_bb_start = gsi_after_labels (bb);
1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1261 }
1262 }
1263
1264 if (dump_enabled_p ())
1265 {
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1269 }
1270 }
1271
1272 /* Function vect_init_vector.
1273
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1281
1282 tree
1283 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1284 {
1285 gimple *init_stmt;
1286 tree new_temp;
1287
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1290 {
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1293 {
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1297 {
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
1300
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1304 {
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1310 }
1311 }
1312 else if (CONSTANT_CLASS_P (val))
1313 val = fold_convert (TREE_TYPE (type), val);
1314 else
1315 {
1316 new_temp = make_ssa_name (TREE_TYPE (type));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1324 vect_init_vector_1 (stmt, init_stmt, gsi);
1325 val = new_temp;
1326 }
1327 }
1328 val = build_vector_from_val (type, val);
1329 }
1330
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 return new_temp;
1335 }
1336
1337 /* Function vect_get_vec_def_for_operand_1.
1338
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1341
1342 tree
1343 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1344 {
1345 tree vec_oprnd;
1346 gimple *vec_stmt;
1347 stmt_vec_info def_stmt_info = NULL;
1348
1349 switch (dt)
1350 {
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def:
1353 case vect_external_def:
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
1356
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def:
1359 {
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
1362
1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1378 }
1379
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def:
1382 case vect_double_reduction_def:
1383 case vect_nested_cycle:
1384 case vect_induction_def:
1385 {
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1387
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
1395 return vec_oprnd;
1396 }
1397
1398 default:
1399 gcc_unreachable ();
1400 }
1401 }
1402
1403
1404 /* Function vect_get_vec_def_for_operand.
1405
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1408
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1411
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1415
1416 tree
1417 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1418 {
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1424
1425 if (dump_enabled_p ())
1426 {
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1431 }
1432
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1436 {
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1439 }
1440
1441 if (dt == vect_constant_def || dt == vect_external_def)
1442 {
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1445
1446 if (vectype)
1447 vector_type = vectype;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1453
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1456 }
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1459 }
1460
1461
1462 /* Function vect_get_vec_def_for_stmt_copy
1463
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1471
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1483
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1485
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1490
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1495
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1502
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1507
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1520 {
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1523
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1527
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1538 }
1539
1540
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1543
1544 void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1548 {
1549 tree vec_oprnd = vec_oprnds0->pop ();
1550
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1553
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1555 {
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1559 }
1560 }
1561
1562
1563 /* Get vectorized definitions for OP0 and OP1. */
1564
1565 void
1566 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
1569 slp_tree slp_node)
1570 {
1571 if (slp_node)
1572 {
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
1576
1577 ops.quick_push (op0);
1578 if (op1)
1579 ops.quick_push (op1);
1580
1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
1582
1583 *vec_oprnds0 = vec_defs[0];
1584 if (op1)
1585 *vec_oprnds1 = vec_defs[1];
1586 }
1587 else
1588 {
1589 tree vec_oprnd;
1590
1591 vec_oprnds0->create (1);
1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1593 vec_oprnds0->quick_push (vec_oprnd);
1594
1595 if (op1)
1596 {
1597 vec_oprnds1->create (1);
1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1599 vec_oprnds1->quick_push (vec_oprnd);
1600 }
1601 }
1602 }
1603
1604
1605 /* Function vect_finish_stmt_generation.
1606
1607 Insert a new stmt. */
1608
1609 void
1610 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1611 gimple_stmt_iterator *gsi)
1612 {
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1614 vec_info *vinfo = stmt_info->vinfo;
1615
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1617
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1620 {
1621 gimple *at_stmt = gsi_stmt (*gsi);
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1624 {
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1637 {
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1641 }
1642 }
1643 }
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1645
1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1647
1648 if (dump_enabled_p ())
1649 {
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1652 }
1653
1654 gimple_set_location (vec_stmt, gimple_location (stmt));
1655
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1662 }
1663
1664 /* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
1668
1669 static internal_fn
1670 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
1672 {
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1679 {
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1682 {
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
1687 return ifn;
1688 }
1689 }
1690 return IFN_LAST;
1691 }
1692
1693
1694 static tree permute_vec_elements (tree, tree, tree, gimple *,
1695 gimple_stmt_iterator *);
1696
1697 /* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1700
1701 static int
1702 compare_step_with_zero (gimple *stmt)
1703 {
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
1708 }
1709
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1712
1713 static tree
1714 perm_mask_for_reverse (tree vectype)
1715 {
1716 int i, nunits;
1717
1718 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1719
1720 /* The encoding has a single stepped pattern. */
1721 vec_perm_builder sel (nunits, 1, 3);
1722 for (i = 0; i < 3; ++i)
1723 sel.quick_push (nunits - 1 - i);
1724
1725 vec_perm_indices indices (sel, 1, nunits);
1726 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1727 return NULL_TREE;
1728 return vect_gen_perm_mask_checked (vectype, indices);
1729 }
1730
1731 /* A subroutine of get_load_store_type, with a subset of the same
1732 arguments. Handle the case where STMT is part of a grouped load
1733 or store.
1734
1735 For stores, the statements in the group are all consecutive
1736 and there is no gap at the end. For loads, the statements in the
1737 group might not be consecutive; there can be gaps between statements
1738 as well as at the end. */
1739
1740 static bool
1741 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1742 vec_load_store_type vls_type,
1743 vect_memory_access_type *memory_access_type)
1744 {
1745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1746 vec_info *vinfo = stmt_info->vinfo;
1747 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1748 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1749 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1750 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1751 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1752 bool single_element_p = (stmt == first_stmt
1753 && !GROUP_NEXT_ELEMENT (stmt_info));
1754 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1755 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1756
1757 /* True if the vectorized statements would access beyond the last
1758 statement in the group. */
1759 bool overrun_p = false;
1760
1761 /* True if we can cope with such overrun by peeling for gaps, so that
1762 there is at least one final scalar iteration after the vector loop. */
1763 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1764
1765 /* There can only be a gap at the end of the group if the stride is
1766 known at compile time. */
1767 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1768
1769 /* Stores can't yet have gaps. */
1770 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1771
1772 if (slp)
1773 {
1774 if (STMT_VINFO_STRIDED_P (stmt_info))
1775 {
1776 /* Try to use consecutive accesses of GROUP_SIZE elements,
1777 separated by the stride, until we have a complete vector.
1778 Fall back to scalar accesses if that isn't possible. */
1779 if (nunits % group_size == 0)
1780 *memory_access_type = VMAT_STRIDED_SLP;
1781 else
1782 *memory_access_type = VMAT_ELEMENTWISE;
1783 }
1784 else
1785 {
1786 overrun_p = loop_vinfo && gap != 0;
1787 if (overrun_p && vls_type != VLS_LOAD)
1788 {
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1790 "Grouped store with gaps requires"
1791 " non-consecutive accesses\n");
1792 return false;
1793 }
1794 /* An overrun is fine if the trailing elements are smaller
1795 than the alignment boundary B. Every vector access will
1796 be a multiple of B and so we are guaranteed to access a
1797 non-gap element in the same B-sized block. */
1798 if (overrun_p
1799 && gap < (vect_known_alignment_in_bytes (first_dr)
1800 / vect_get_scalar_dr_size (first_dr)))
1801 overrun_p = false;
1802 if (overrun_p && !can_overrun_p)
1803 {
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "Peeling for outer loop is not supported\n");
1807 return false;
1808 }
1809 *memory_access_type = VMAT_CONTIGUOUS;
1810 }
1811 }
1812 else
1813 {
1814 /* We can always handle this case using elementwise accesses,
1815 but see if something more efficient is available. */
1816 *memory_access_type = VMAT_ELEMENTWISE;
1817
1818 /* If there is a gap at the end of the group then these optimizations
1819 would access excess elements in the last iteration. */
1820 bool would_overrun_p = (gap != 0);
1821 /* An overrun is fine if the trailing elements are smaller than the
1822 alignment boundary B. Every vector access will be a multiple of B
1823 and so we are guaranteed to access a non-gap element in the
1824 same B-sized block. */
1825 if (would_overrun_p
1826 && gap < (vect_known_alignment_in_bytes (first_dr)
1827 / vect_get_scalar_dr_size (first_dr)))
1828 would_overrun_p = false;
1829
1830 if (!STMT_VINFO_STRIDED_P (stmt_info)
1831 && (can_overrun_p || !would_overrun_p)
1832 && compare_step_with_zero (stmt) > 0)
1833 {
1834 /* First try using LOAD/STORE_LANES. */
1835 if (vls_type == VLS_LOAD
1836 ? vect_load_lanes_supported (vectype, group_size)
1837 : vect_store_lanes_supported (vectype, group_size))
1838 {
1839 *memory_access_type = VMAT_LOAD_STORE_LANES;
1840 overrun_p = would_overrun_p;
1841 }
1842
1843 /* If that fails, try using permuting loads. */
1844 if (*memory_access_type == VMAT_ELEMENTWISE
1845 && (vls_type == VLS_LOAD
1846 ? vect_grouped_load_supported (vectype, single_element_p,
1847 group_size)
1848 : vect_grouped_store_supported (vectype, group_size)))
1849 {
1850 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1851 overrun_p = would_overrun_p;
1852 }
1853 }
1854 }
1855
1856 if (vls_type != VLS_LOAD && first_stmt == stmt)
1857 {
1858 /* STMT is the leader of the group. Check the operands of all the
1859 stmts of the group. */
1860 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1861 while (next_stmt)
1862 {
1863 gcc_assert (gimple_assign_single_p (next_stmt));
1864 tree op = gimple_assign_rhs1 (next_stmt);
1865 gimple *def_stmt;
1866 enum vect_def_type dt;
1867 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1868 {
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 "use not simple.\n");
1872 return false;
1873 }
1874 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1875 }
1876 }
1877
1878 if (overrun_p)
1879 {
1880 gcc_assert (can_overrun_p);
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1883 "Data access with gaps requires scalar "
1884 "epilogue loop\n");
1885 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1886 }
1887
1888 return true;
1889 }
1890
1891 /* A subroutine of get_load_store_type, with a subset of the same
1892 arguments. Handle the case where STMT is a load or store that
1893 accesses consecutive elements with a negative step. */
1894
1895 static vect_memory_access_type
1896 get_negative_load_store_type (gimple *stmt, tree vectype,
1897 vec_load_store_type vls_type,
1898 unsigned int ncopies)
1899 {
1900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1901 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1902 dr_alignment_support alignment_support_scheme;
1903
1904 if (ncopies > 1)
1905 {
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "multiple types with negative step.\n");
1909 return VMAT_ELEMENTWISE;
1910 }
1911
1912 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1913 if (alignment_support_scheme != dr_aligned
1914 && alignment_support_scheme != dr_unaligned_supported)
1915 {
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1918 "negative step but alignment required.\n");
1919 return VMAT_ELEMENTWISE;
1920 }
1921
1922 if (vls_type == VLS_STORE_INVARIANT)
1923 {
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_NOTE, vect_location,
1926 "negative step with invariant source;"
1927 " no permute needed.\n");
1928 return VMAT_CONTIGUOUS_DOWN;
1929 }
1930
1931 if (!perm_mask_for_reverse (vectype))
1932 {
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "negative step and reversing not supported.\n");
1936 return VMAT_ELEMENTWISE;
1937 }
1938
1939 return VMAT_CONTIGUOUS_REVERSE;
1940 }
1941
1942 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1943 if there is a memory access type that the vectorized form can use,
1944 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1945 or scatters, fill in GS_INFO accordingly.
1946
1947 SLP says whether we're performing SLP rather than loop vectorization.
1948 VECTYPE is the vector type that the vectorized statements will use.
1949 NCOPIES is the number of vector statements that will be needed. */
1950
1951 static bool
1952 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1953 vec_load_store_type vls_type, unsigned int ncopies,
1954 vect_memory_access_type *memory_access_type,
1955 gather_scatter_info *gs_info)
1956 {
1957 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1958 vec_info *vinfo = stmt_info->vinfo;
1959 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1961 {
1962 *memory_access_type = VMAT_GATHER_SCATTER;
1963 gimple *def_stmt;
1964 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1965 gcc_unreachable ();
1966 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1967 &gs_info->offset_dt,
1968 &gs_info->offset_vectype))
1969 {
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 "%s index use not simple.\n",
1973 vls_type == VLS_LOAD ? "gather" : "scatter");
1974 return false;
1975 }
1976 }
1977 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1978 {
1979 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1980 memory_access_type))
1981 return false;
1982 }
1983 else if (STMT_VINFO_STRIDED_P (stmt_info))
1984 {
1985 gcc_assert (!slp);
1986 *memory_access_type = VMAT_ELEMENTWISE;
1987 }
1988 else
1989 {
1990 int cmp = compare_step_with_zero (stmt);
1991 if (cmp < 0)
1992 *memory_access_type = get_negative_load_store_type
1993 (stmt, vectype, vls_type, ncopies);
1994 else if (cmp == 0)
1995 {
1996 gcc_assert (vls_type == VLS_LOAD);
1997 *memory_access_type = VMAT_INVARIANT;
1998 }
1999 else
2000 *memory_access_type = VMAT_CONTIGUOUS;
2001 }
2002
2003 /* FIXME: At the moment the cost model seems to underestimate the
2004 cost of using elementwise accesses. This check preserves the
2005 traditional behavior until that can be fixed. */
2006 if (*memory_access_type == VMAT_ELEMENTWISE
2007 && !STMT_VINFO_STRIDED_P (stmt_info))
2008 {
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2011 "not falling back to elementwise accesses\n");
2012 return false;
2013 }
2014 return true;
2015 }
2016
2017 /* Function vectorizable_mask_load_store.
2018
2019 Check if STMT performs a conditional load or store that can be vectorized.
2020 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2021 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2022 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2023
2024 static bool
2025 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2026 gimple **vec_stmt, slp_tree slp_node)
2027 {
2028 tree vec_dest = NULL;
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 stmt_vec_info prev_stmt_info;
2031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2032 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2033 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2034 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2036 tree rhs_vectype = NULL_TREE;
2037 tree mask_vectype;
2038 tree elem_type;
2039 gimple *new_stmt;
2040 tree dummy;
2041 tree dataref_ptr = NULL_TREE;
2042 gimple *ptr_incr;
2043 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2044 int ncopies;
2045 int i, j;
2046 bool inv_p;
2047 gather_scatter_info gs_info;
2048 vec_load_store_type vls_type;
2049 tree mask;
2050 gimple *def_stmt;
2051 enum vect_def_type dt;
2052
2053 if (slp_node != NULL)
2054 return false;
2055
2056 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2057 gcc_assert (ncopies >= 1);
2058
2059 mask = gimple_call_arg (stmt, 2);
2060
2061 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2062 return false;
2063
2064 /* FORNOW. This restriction should be relaxed. */
2065 if (nested_in_vect_loop && ncopies > 1)
2066 {
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2069 "multiple types in nested loop.");
2070 return false;
2071 }
2072
2073 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2074 return false;
2075
2076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2077 && ! vec_stmt)
2078 return false;
2079
2080 if (!STMT_VINFO_DATA_REF (stmt_info))
2081 return false;
2082
2083 elem_type = TREE_TYPE (vectype);
2084
2085 if (TREE_CODE (mask) != SSA_NAME)
2086 return false;
2087
2088 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2089 return false;
2090
2091 if (!mask_vectype)
2092 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2093
2094 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2095 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2096 return false;
2097
2098 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2099 {
2100 tree rhs = gimple_call_arg (stmt, 3);
2101 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2102 return false;
2103 if (dt == vect_constant_def || dt == vect_external_def)
2104 vls_type = VLS_STORE_INVARIANT;
2105 else
2106 vls_type = VLS_STORE;
2107 }
2108 else
2109 vls_type = VLS_LOAD;
2110
2111 vect_memory_access_type memory_access_type;
2112 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2113 &memory_access_type, &gs_info))
2114 return false;
2115
2116 if (memory_access_type == VMAT_GATHER_SCATTER)
2117 {
2118 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2119 tree masktype
2120 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2121 if (TREE_CODE (masktype) == INTEGER_TYPE)
2122 {
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2125 "masked gather with integer mask not supported.");
2126 return false;
2127 }
2128 }
2129 else if (memory_access_type != VMAT_CONTIGUOUS)
2130 {
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "unsupported access type for masked %s.\n",
2134 vls_type == VLS_LOAD ? "load" : "store");
2135 return false;
2136 }
2137 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2138 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2139 TYPE_MODE (mask_vectype),
2140 vls_type == VLS_LOAD)
2141 || (rhs_vectype
2142 && !useless_type_conversion_p (vectype, rhs_vectype)))
2143 return false;
2144
2145 if (!vec_stmt) /* transformation not required. */
2146 {
2147 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2148 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2149 if (vls_type == VLS_LOAD)
2150 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2151 NULL, NULL, NULL);
2152 else
2153 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2154 dt, NULL, NULL, NULL);
2155 return true;
2156 }
2157 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2158
2159 /* Transform. */
2160
2161 if (memory_access_type == VMAT_GATHER_SCATTER)
2162 {
2163 tree vec_oprnd0 = NULL_TREE, op;
2164 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2165 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2166 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2167 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2168 tree mask_perm_mask = NULL_TREE;
2169 edge pe = loop_preheader_edge (loop);
2170 gimple_seq seq;
2171 basic_block new_bb;
2172 enum { NARROW, NONE, WIDEN } modifier;
2173 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2174
2175 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2176 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2177 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2178 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2179 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2180 scaletype = TREE_VALUE (arglist);
2181 gcc_checking_assert (types_compatible_p (srctype, rettype)
2182 && types_compatible_p (srctype, masktype));
2183
2184 if (nunits == gather_off_nunits)
2185 modifier = NONE;
2186 else if (nunits == gather_off_nunits / 2)
2187 {
2188 modifier = WIDEN;
2189
2190 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
2191 for (i = 0; i < gather_off_nunits; ++i)
2192 sel.quick_push (i | nunits);
2193
2194 vec_perm_indices indices (sel, 1, gather_off_nunits);
2195 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2196 indices);
2197 }
2198 else if (nunits == gather_off_nunits * 2)
2199 {
2200 modifier = NARROW;
2201
2202 vec_perm_builder sel (nunits, nunits, 1);
2203 sel.quick_grow (nunits);
2204 for (i = 0; i < nunits; ++i)
2205 sel[i] = i < gather_off_nunits
2206 ? i : i + nunits - gather_off_nunits;
2207 vec_perm_indices indices (sel, 2, nunits);
2208 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2209
2210 ncopies *= 2;
2211
2212 for (i = 0; i < nunits; ++i)
2213 sel[i] = i | gather_off_nunits;
2214 indices.new_vector (sel, 2, gather_off_nunits);
2215 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2216 }
2217 else
2218 gcc_unreachable ();
2219
2220 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2221
2222 ptr = fold_convert (ptrtype, gs_info.base);
2223 if (!is_gimple_min_invariant (ptr))
2224 {
2225 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2226 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2227 gcc_assert (!new_bb);
2228 }
2229
2230 scale = build_int_cst (scaletype, gs_info.scale);
2231
2232 prev_stmt_info = NULL;
2233 for (j = 0; j < ncopies; ++j)
2234 {
2235 if (modifier == WIDEN && (j & 1))
2236 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2237 perm_mask, stmt, gsi);
2238 else if (j == 0)
2239 op = vec_oprnd0
2240 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2241 else
2242 op = vec_oprnd0
2243 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2244
2245 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2246 {
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2248 == TYPE_VECTOR_SUBPARTS (idxtype));
2249 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2250 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2251 new_stmt
2252 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2253 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2254 op = var;
2255 }
2256
2257 if (mask_perm_mask && (j & 1))
2258 mask_op = permute_vec_elements (mask_op, mask_op,
2259 mask_perm_mask, stmt, gsi);
2260 else
2261 {
2262 if (j == 0)
2263 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2264 else
2265 {
2266 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2267 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2268 }
2269
2270 mask_op = vec_mask;
2271 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2272 {
2273 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2274 == TYPE_VECTOR_SUBPARTS (masktype));
2275 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2276 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2277 new_stmt
2278 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2280 mask_op = var;
2281 }
2282 }
2283
2284 new_stmt
2285 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2286 scale);
2287
2288 if (!useless_type_conversion_p (vectype, rettype))
2289 {
2290 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2291 == TYPE_VECTOR_SUBPARTS (rettype));
2292 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2293 gimple_call_set_lhs (new_stmt, op);
2294 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2295 var = make_ssa_name (vec_dest);
2296 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2297 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2298 }
2299 else
2300 {
2301 var = make_ssa_name (vec_dest, new_stmt);
2302 gimple_call_set_lhs (new_stmt, var);
2303 }
2304
2305 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2306
2307 if (modifier == NARROW)
2308 {
2309 if ((j & 1) == 0)
2310 {
2311 prev_res = var;
2312 continue;
2313 }
2314 var = permute_vec_elements (prev_res, var,
2315 perm_mask, stmt, gsi);
2316 new_stmt = SSA_NAME_DEF_STMT (var);
2317 }
2318
2319 if (prev_stmt_info == NULL)
2320 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2321 else
2322 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2323 prev_stmt_info = vinfo_for_stmt (new_stmt);
2324 }
2325
2326 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2327 from the IL. */
2328 if (STMT_VINFO_RELATED_STMT (stmt_info))
2329 {
2330 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2331 stmt_info = vinfo_for_stmt (stmt);
2332 }
2333 tree lhs = gimple_call_lhs (stmt);
2334 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2335 set_vinfo_for_stmt (new_stmt, stmt_info);
2336 set_vinfo_for_stmt (stmt, NULL);
2337 STMT_VINFO_STMT (stmt_info) = new_stmt;
2338 gsi_replace (gsi, new_stmt, true);
2339 return true;
2340 }
2341 else if (vls_type != VLS_LOAD)
2342 {
2343 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2344 prev_stmt_info = NULL;
2345 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2346 for (i = 0; i < ncopies; i++)
2347 {
2348 unsigned align, misalign;
2349
2350 if (i == 0)
2351 {
2352 tree rhs = gimple_call_arg (stmt, 3);
2353 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2354 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2355 mask_vectype);
2356 /* We should have catched mismatched types earlier. */
2357 gcc_assert (useless_type_conversion_p (vectype,
2358 TREE_TYPE (vec_rhs)));
2359 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2360 NULL_TREE, &dummy, gsi,
2361 &ptr_incr, false, &inv_p);
2362 gcc_assert (!inv_p);
2363 }
2364 else
2365 {
2366 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2367 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2368 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2369 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2370 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2371 TYPE_SIZE_UNIT (vectype));
2372 }
2373
2374 align = DR_TARGET_ALIGNMENT (dr);
2375 if (aligned_access_p (dr))
2376 misalign = 0;
2377 else if (DR_MISALIGNMENT (dr) == -1)
2378 {
2379 align = TYPE_ALIGN_UNIT (elem_type);
2380 misalign = 0;
2381 }
2382 else
2383 misalign = DR_MISALIGNMENT (dr);
2384 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2385 misalign);
2386 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2387 misalign ? least_bit_hwi (misalign) : align);
2388 gcall *call
2389 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2390 ptr, vec_mask, vec_rhs);
2391 gimple_call_set_nothrow (call, true);
2392 new_stmt = call;
2393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2394 if (i == 0)
2395 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2396 else
2397 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2398 prev_stmt_info = vinfo_for_stmt (new_stmt);
2399 }
2400 }
2401 else
2402 {
2403 tree vec_mask = NULL_TREE;
2404 prev_stmt_info = NULL;
2405 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2406 for (i = 0; i < ncopies; i++)
2407 {
2408 unsigned align, misalign;
2409
2410 if (i == 0)
2411 {
2412 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2413 mask_vectype);
2414 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2415 NULL_TREE, &dummy, gsi,
2416 &ptr_incr, false, &inv_p);
2417 gcc_assert (!inv_p);
2418 }
2419 else
2420 {
2421 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2422 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2423 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2424 TYPE_SIZE_UNIT (vectype));
2425 }
2426
2427 align = DR_TARGET_ALIGNMENT (dr);
2428 if (aligned_access_p (dr))
2429 misalign = 0;
2430 else if (DR_MISALIGNMENT (dr) == -1)
2431 {
2432 align = TYPE_ALIGN_UNIT (elem_type);
2433 misalign = 0;
2434 }
2435 else
2436 misalign = DR_MISALIGNMENT (dr);
2437 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2438 misalign);
2439 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2440 misalign ? least_bit_hwi (misalign) : align);
2441 gcall *call
2442 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2443 ptr, vec_mask);
2444 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2445 gimple_call_set_nothrow (call, true);
2446 vect_finish_stmt_generation (stmt, call, gsi);
2447 if (i == 0)
2448 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
2449 else
2450 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2451 prev_stmt_info = vinfo_for_stmt (call);
2452 }
2453 }
2454
2455 if (vls_type == VLS_LOAD)
2456 {
2457 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2458 from the IL. */
2459 if (STMT_VINFO_RELATED_STMT (stmt_info))
2460 {
2461 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2462 stmt_info = vinfo_for_stmt (stmt);
2463 }
2464 tree lhs = gimple_call_lhs (stmt);
2465 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2466 set_vinfo_for_stmt (new_stmt, stmt_info);
2467 set_vinfo_for_stmt (stmt, NULL);
2468 STMT_VINFO_STMT (stmt_info) = new_stmt;
2469 gsi_replace (gsi, new_stmt, true);
2470 }
2471
2472 return true;
2473 }
2474
2475 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2476
2477 static bool
2478 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2479 gimple **vec_stmt, slp_tree slp_node,
2480 tree vectype_in, enum vect_def_type *dt)
2481 {
2482 tree op, vectype;
2483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2484 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2485 unsigned ncopies, nunits;
2486
2487 op = gimple_call_arg (stmt, 0);
2488 vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2490
2491 /* Multiple types in SLP are handled by creating the appropriate number of
2492 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2493 case of SLP. */
2494 if (slp_node)
2495 ncopies = 1;
2496 else
2497 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2498
2499 gcc_assert (ncopies >= 1);
2500
2501 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2502 if (! char_vectype)
2503 return false;
2504
2505 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2506 unsigned word_bytes = num_bytes / nunits;
2507
2508 /* The encoding uses one stepped pattern for each byte in the word. */
2509 vec_perm_builder elts (num_bytes, word_bytes, 3);
2510 for (unsigned i = 0; i < 3; ++i)
2511 for (unsigned j = 0; j < word_bytes; ++j)
2512 elts.quick_push ((i + 1) * word_bytes - j - 1);
2513
2514 vec_perm_indices indices (elts, 1, num_bytes);
2515 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2516 return false;
2517
2518 if (! vec_stmt)
2519 {
2520 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2523 "\n");
2524 if (! PURE_SLP_STMT (stmt_info))
2525 {
2526 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2527 1, vector_stmt, stmt_info, 0, vect_prologue);
2528 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2529 ncopies, vec_perm, stmt_info, 0, vect_body);
2530 }
2531 return true;
2532 }
2533
2534 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2535
2536 /* Transform. */
2537 vec<tree> vec_oprnds = vNULL;
2538 gimple *new_stmt = NULL;
2539 stmt_vec_info prev_stmt_info = NULL;
2540 for (unsigned j = 0; j < ncopies; j++)
2541 {
2542 /* Handle uses. */
2543 if (j == 0)
2544 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2545 else
2546 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2547
2548 /* Arguments are ready. create the new vector stmt. */
2549 unsigned i;
2550 tree vop;
2551 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2552 {
2553 tree tem = make_ssa_name (char_vectype);
2554 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2555 char_vectype, vop));
2556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2557 tree tem2 = make_ssa_name (char_vectype);
2558 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2559 tem, tem, bswap_vconst);
2560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2561 tem = make_ssa_name (vectype);
2562 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2563 vectype, tem2));
2564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2565 if (slp_node)
2566 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2567 }
2568
2569 if (slp_node)
2570 continue;
2571
2572 if (j == 0)
2573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2574 else
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2576
2577 prev_stmt_info = vinfo_for_stmt (new_stmt);
2578 }
2579
2580 vec_oprnds.release ();
2581 return true;
2582 }
2583
2584 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2585 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2586 in a single step. On success, store the binary pack code in
2587 *CONVERT_CODE. */
2588
2589 static bool
2590 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2591 tree_code *convert_code)
2592 {
2593 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2594 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2595 return false;
2596
2597 tree_code code;
2598 int multi_step_cvt = 0;
2599 auto_vec <tree, 8> interm_types;
2600 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2601 &code, &multi_step_cvt,
2602 &interm_types)
2603 || multi_step_cvt)
2604 return false;
2605
2606 *convert_code = code;
2607 return true;
2608 }
2609
2610 /* Function vectorizable_call.
2611
2612 Check if GS performs a function call that can be vectorized.
2613 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2614 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2615 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2616
2617 static bool
2618 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2619 slp_tree slp_node)
2620 {
2621 gcall *stmt;
2622 tree vec_dest;
2623 tree scalar_dest;
2624 tree op, type;
2625 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2626 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2627 tree vectype_out, vectype_in;
2628 int nunits_in;
2629 int nunits_out;
2630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2632 vec_info *vinfo = stmt_info->vinfo;
2633 tree fndecl, new_temp, rhs_type;
2634 gimple *def_stmt;
2635 enum vect_def_type dt[3]
2636 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2637 int ndts = 3;
2638 gimple *new_stmt = NULL;
2639 int ncopies, j;
2640 vec<tree> vargs = vNULL;
2641 enum { NARROW, NONE, WIDEN } modifier;
2642 size_t i, nargs;
2643 tree lhs;
2644
2645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2646 return false;
2647
2648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2649 && ! vec_stmt)
2650 return false;
2651
2652 /* Is GS a vectorizable call? */
2653 stmt = dyn_cast <gcall *> (gs);
2654 if (!stmt)
2655 return false;
2656
2657 if (gimple_call_internal_p (stmt)
2658 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2659 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2660 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2661 slp_node);
2662
2663 if (gimple_call_lhs (stmt) == NULL_TREE
2664 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2665 return false;
2666
2667 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2668
2669 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2670
2671 /* Process function arguments. */
2672 rhs_type = NULL_TREE;
2673 vectype_in = NULL_TREE;
2674 nargs = gimple_call_num_args (stmt);
2675
2676 /* Bail out if the function has more than three arguments, we do not have
2677 interesting builtin functions to vectorize with more than two arguments
2678 except for fma. No arguments is also not good. */
2679 if (nargs == 0 || nargs > 3)
2680 return false;
2681
2682 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2683 if (gimple_call_internal_p (stmt)
2684 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2685 {
2686 nargs = 0;
2687 rhs_type = unsigned_type_node;
2688 }
2689
2690 for (i = 0; i < nargs; i++)
2691 {
2692 tree opvectype;
2693
2694 op = gimple_call_arg (stmt, i);
2695
2696 /* We can only handle calls with arguments of the same type. */
2697 if (rhs_type
2698 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2699 {
2700 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2702 "argument types differ.\n");
2703 return false;
2704 }
2705 if (!rhs_type)
2706 rhs_type = TREE_TYPE (op);
2707
2708 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2709 {
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2712 "use not simple.\n");
2713 return false;
2714 }
2715
2716 if (!vectype_in)
2717 vectype_in = opvectype;
2718 else if (opvectype
2719 && opvectype != vectype_in)
2720 {
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2723 "argument vector types differ.\n");
2724 return false;
2725 }
2726 }
2727 /* If all arguments are external or constant defs use a vector type with
2728 the same size as the output vector type. */
2729 if (!vectype_in)
2730 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2731 if (vec_stmt)
2732 gcc_assert (vectype_in);
2733 if (!vectype_in)
2734 {
2735 if (dump_enabled_p ())
2736 {
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2738 "no vectype for scalar type ");
2739 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2740 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2741 }
2742
2743 return false;
2744 }
2745
2746 /* FORNOW */
2747 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2748 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2749 if (nunits_in == nunits_out / 2)
2750 modifier = NARROW;
2751 else if (nunits_out == nunits_in)
2752 modifier = NONE;
2753 else if (nunits_out == nunits_in / 2)
2754 modifier = WIDEN;
2755 else
2756 return false;
2757
2758 /* We only handle functions that do not read or clobber memory. */
2759 if (gimple_vuse (stmt))
2760 {
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763 "function reads from or writes to memory.\n");
2764 return false;
2765 }
2766
2767 /* For now, we only vectorize functions if a target specific builtin
2768 is available. TODO -- in some cases, it might be profitable to
2769 insert the calls for pieces of the vector, in order to be able
2770 to vectorize other operations in the loop. */
2771 fndecl = NULL_TREE;
2772 internal_fn ifn = IFN_LAST;
2773 combined_fn cfn = gimple_call_combined_fn (stmt);
2774 tree callee = gimple_call_fndecl (stmt);
2775
2776 /* First try using an internal function. */
2777 tree_code convert_code = ERROR_MARK;
2778 if (cfn != CFN_LAST
2779 && (modifier == NONE
2780 || (modifier == NARROW
2781 && simple_integer_narrowing (vectype_out, vectype_in,
2782 &convert_code))))
2783 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2784 vectype_in);
2785
2786 /* If that fails, try asking for a target-specific built-in function. */
2787 if (ifn == IFN_LAST)
2788 {
2789 if (cfn != CFN_LAST)
2790 fndecl = targetm.vectorize.builtin_vectorized_function
2791 (cfn, vectype_out, vectype_in);
2792 else
2793 fndecl = targetm.vectorize.builtin_md_vectorized_function
2794 (callee, vectype_out, vectype_in);
2795 }
2796
2797 if (ifn == IFN_LAST && !fndecl)
2798 {
2799 if (cfn == CFN_GOMP_SIMD_LANE
2800 && !slp_node
2801 && loop_vinfo
2802 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2803 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2804 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2805 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2806 {
2807 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2808 { 0, 1, 2, ... vf - 1 } vector. */
2809 gcc_assert (nargs == 0);
2810 }
2811 else if (modifier == NONE
2812 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2813 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2814 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2815 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2816 vectype_in, dt);
2817 else
2818 {
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2821 "function is not vectorizable.\n");
2822 return false;
2823 }
2824 }
2825
2826 if (slp_node)
2827 ncopies = 1;
2828 else if (modifier == NARROW && ifn == IFN_LAST)
2829 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2830 else
2831 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2832
2833 /* Sanity check: make sure that at least one copy of the vectorized stmt
2834 needs to be generated. */
2835 gcc_assert (ncopies >= 1);
2836
2837 if (!vec_stmt) /* transformation not required. */
2838 {
2839 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2840 if (dump_enabled_p ())
2841 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2842 "\n");
2843 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2844 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2845 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2846 vec_promote_demote, stmt_info, 0, vect_body);
2847
2848 return true;
2849 }
2850
2851 /* Transform. */
2852
2853 if (dump_enabled_p ())
2854 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2855
2856 /* Handle def. */
2857 scalar_dest = gimple_call_lhs (stmt);
2858 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2859
2860 prev_stmt_info = NULL;
2861 if (modifier == NONE || ifn != IFN_LAST)
2862 {
2863 tree prev_res = NULL_TREE;
2864 for (j = 0; j < ncopies; ++j)
2865 {
2866 /* Build argument list for the vectorized call. */
2867 if (j == 0)
2868 vargs.create (nargs);
2869 else
2870 vargs.truncate (0);
2871
2872 if (slp_node)
2873 {
2874 auto_vec<vec<tree> > vec_defs (nargs);
2875 vec<tree> vec_oprnds0;
2876
2877 for (i = 0; i < nargs; i++)
2878 vargs.quick_push (gimple_call_arg (stmt, i));
2879 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2880 vec_oprnds0 = vec_defs[0];
2881
2882 /* Arguments are ready. Create the new vector stmt. */
2883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2884 {
2885 size_t k;
2886 for (k = 0; k < nargs; k++)
2887 {
2888 vec<tree> vec_oprndsk = vec_defs[k];
2889 vargs[k] = vec_oprndsk[i];
2890 }
2891 if (modifier == NARROW)
2892 {
2893 tree half_res = make_ssa_name (vectype_in);
2894 gcall *call
2895 = gimple_build_call_internal_vec (ifn, vargs);
2896 gimple_call_set_lhs (call, half_res);
2897 gimple_call_set_nothrow (call, true);
2898 new_stmt = call;
2899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2900 if ((i & 1) == 0)
2901 {
2902 prev_res = half_res;
2903 continue;
2904 }
2905 new_temp = make_ssa_name (vec_dest);
2906 new_stmt = gimple_build_assign (new_temp, convert_code,
2907 prev_res, half_res);
2908 }
2909 else
2910 {
2911 gcall *call;
2912 if (ifn != IFN_LAST)
2913 call = gimple_build_call_internal_vec (ifn, vargs);
2914 else
2915 call = gimple_build_call_vec (fndecl, vargs);
2916 new_temp = make_ssa_name (vec_dest, call);
2917 gimple_call_set_lhs (call, new_temp);
2918 gimple_call_set_nothrow (call, true);
2919 new_stmt = call;
2920 }
2921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2922 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2923 }
2924
2925 for (i = 0; i < nargs; i++)
2926 {
2927 vec<tree> vec_oprndsi = vec_defs[i];
2928 vec_oprndsi.release ();
2929 }
2930 continue;
2931 }
2932
2933 for (i = 0; i < nargs; i++)
2934 {
2935 op = gimple_call_arg (stmt, i);
2936 if (j == 0)
2937 vec_oprnd0
2938 = vect_get_vec_def_for_operand (op, stmt);
2939 else
2940 {
2941 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2942 vec_oprnd0
2943 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2944 }
2945
2946 vargs.quick_push (vec_oprnd0);
2947 }
2948
2949 if (gimple_call_internal_p (stmt)
2950 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2951 {
2952 tree_vector_builder v (vectype_out, 1, 3);
2953 for (int k = 0; k < 3; ++k)
2954 v.quick_push (build_int_cst (unsigned_type_node,
2955 j * nunits_out + k));
2956 tree cst = v.build ();
2957 tree new_var
2958 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2959 gimple *init_stmt = gimple_build_assign (new_var, cst);
2960 vect_init_vector_1 (stmt, init_stmt, NULL);
2961 new_temp = make_ssa_name (vec_dest);
2962 new_stmt = gimple_build_assign (new_temp, new_var);
2963 }
2964 else if (modifier == NARROW)
2965 {
2966 tree half_res = make_ssa_name (vectype_in);
2967 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2968 gimple_call_set_lhs (call, half_res);
2969 gimple_call_set_nothrow (call, true);
2970 new_stmt = call;
2971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2972 if ((j & 1) == 0)
2973 {
2974 prev_res = half_res;
2975 continue;
2976 }
2977 new_temp = make_ssa_name (vec_dest);
2978 new_stmt = gimple_build_assign (new_temp, convert_code,
2979 prev_res, half_res);
2980 }
2981 else
2982 {
2983 gcall *call;
2984 if (ifn != IFN_LAST)
2985 call = gimple_build_call_internal_vec (ifn, vargs);
2986 else
2987 call = gimple_build_call_vec (fndecl, vargs);
2988 new_temp = make_ssa_name (vec_dest, new_stmt);
2989 gimple_call_set_lhs (call, new_temp);
2990 gimple_call_set_nothrow (call, true);
2991 new_stmt = call;
2992 }
2993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2994
2995 if (j == (modifier == NARROW ? 1 : 0))
2996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2997 else
2998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2999
3000 prev_stmt_info = vinfo_for_stmt (new_stmt);
3001 }
3002 }
3003 else if (modifier == NARROW)
3004 {
3005 for (j = 0; j < ncopies; ++j)
3006 {
3007 /* Build argument list for the vectorized call. */
3008 if (j == 0)
3009 vargs.create (nargs * 2);
3010 else
3011 vargs.truncate (0);
3012
3013 if (slp_node)
3014 {
3015 auto_vec<vec<tree> > vec_defs (nargs);
3016 vec<tree> vec_oprnds0;
3017
3018 for (i = 0; i < nargs; i++)
3019 vargs.quick_push (gimple_call_arg (stmt, i));
3020 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3021 vec_oprnds0 = vec_defs[0];
3022
3023 /* Arguments are ready. Create the new vector stmt. */
3024 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3025 {
3026 size_t k;
3027 vargs.truncate (0);
3028 for (k = 0; k < nargs; k++)
3029 {
3030 vec<tree> vec_oprndsk = vec_defs[k];
3031 vargs.quick_push (vec_oprndsk[i]);
3032 vargs.quick_push (vec_oprndsk[i + 1]);
3033 }
3034 gcall *call;
3035 if (ifn != IFN_LAST)
3036 call = gimple_build_call_internal_vec (ifn, vargs);
3037 else
3038 call = gimple_build_call_vec (fndecl, vargs);
3039 new_temp = make_ssa_name (vec_dest, call);
3040 gimple_call_set_lhs (call, new_temp);
3041 gimple_call_set_nothrow (call, true);
3042 new_stmt = call;
3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3044 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3045 }
3046
3047 for (i = 0; i < nargs; i++)
3048 {
3049 vec<tree> vec_oprndsi = vec_defs[i];
3050 vec_oprndsi.release ();
3051 }
3052 continue;
3053 }
3054
3055 for (i = 0; i < nargs; i++)
3056 {
3057 op = gimple_call_arg (stmt, i);
3058 if (j == 0)
3059 {
3060 vec_oprnd0
3061 = vect_get_vec_def_for_operand (op, stmt);
3062 vec_oprnd1
3063 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3064 }
3065 else
3066 {
3067 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3068 vec_oprnd0
3069 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3070 vec_oprnd1
3071 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3072 }
3073
3074 vargs.quick_push (vec_oprnd0);
3075 vargs.quick_push (vec_oprnd1);
3076 }
3077
3078 new_stmt = gimple_build_call_vec (fndecl, vargs);
3079 new_temp = make_ssa_name (vec_dest, new_stmt);
3080 gimple_call_set_lhs (new_stmt, new_temp);
3081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3082
3083 if (j == 0)
3084 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3085 else
3086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3087
3088 prev_stmt_info = vinfo_for_stmt (new_stmt);
3089 }
3090
3091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3092 }
3093 else
3094 /* No current target implements this case. */
3095 return false;
3096
3097 vargs.release ();
3098
3099 /* The call in STMT might prevent it from being removed in dce.
3100 We however cannot remove it here, due to the way the ssa name
3101 it defines is mapped to the new definition. So just replace
3102 rhs of the statement with something harmless. */
3103
3104 if (slp_node)
3105 return true;
3106
3107 type = TREE_TYPE (scalar_dest);
3108 if (is_pattern_stmt_p (stmt_info))
3109 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3110 else
3111 lhs = gimple_call_lhs (stmt);
3112
3113 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3114 set_vinfo_for_stmt (new_stmt, stmt_info);
3115 set_vinfo_for_stmt (stmt, NULL);
3116 STMT_VINFO_STMT (stmt_info) = new_stmt;
3117 gsi_replace (gsi, new_stmt, false);
3118
3119 return true;
3120 }
3121
3122
3123 struct simd_call_arg_info
3124 {
3125 tree vectype;
3126 tree op;
3127 HOST_WIDE_INT linear_step;
3128 enum vect_def_type dt;
3129 unsigned int align;
3130 bool simd_lane_linear;
3131 };
3132
3133 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3134 is linear within simd lane (but not within whole loop), note it in
3135 *ARGINFO. */
3136
3137 static void
3138 vect_simd_lane_linear (tree op, struct loop *loop,
3139 struct simd_call_arg_info *arginfo)
3140 {
3141 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3142
3143 if (!is_gimple_assign (def_stmt)
3144 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3145 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3146 return;
3147
3148 tree base = gimple_assign_rhs1 (def_stmt);
3149 HOST_WIDE_INT linear_step = 0;
3150 tree v = gimple_assign_rhs2 (def_stmt);
3151 while (TREE_CODE (v) == SSA_NAME)
3152 {
3153 tree t;
3154 def_stmt = SSA_NAME_DEF_STMT (v);
3155 if (is_gimple_assign (def_stmt))
3156 switch (gimple_assign_rhs_code (def_stmt))
3157 {
3158 case PLUS_EXPR:
3159 t = gimple_assign_rhs2 (def_stmt);
3160 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3161 return;
3162 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3163 v = gimple_assign_rhs1 (def_stmt);
3164 continue;
3165 case MULT_EXPR:
3166 t = gimple_assign_rhs2 (def_stmt);
3167 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3168 return;
3169 linear_step = tree_to_shwi (t);
3170 v = gimple_assign_rhs1 (def_stmt);
3171 continue;
3172 CASE_CONVERT:
3173 t = gimple_assign_rhs1 (def_stmt);
3174 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3175 || (TYPE_PRECISION (TREE_TYPE (v))
3176 < TYPE_PRECISION (TREE_TYPE (t))))
3177 return;
3178 if (!linear_step)
3179 linear_step = 1;
3180 v = t;
3181 continue;
3182 default:
3183 return;
3184 }
3185 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3186 && loop->simduid
3187 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3188 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3189 == loop->simduid))
3190 {
3191 if (!linear_step)
3192 linear_step = 1;
3193 arginfo->linear_step = linear_step;
3194 arginfo->op = base;
3195 arginfo->simd_lane_linear = true;
3196 return;
3197 }
3198 }
3199 }
3200
3201 /* Function vectorizable_simd_clone_call.
3202
3203 Check if STMT performs a function call that can be vectorized
3204 by calling a simd clone of the function.
3205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3208
3209 static bool
3210 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3211 gimple **vec_stmt, slp_tree slp_node)
3212 {
3213 tree vec_dest;
3214 tree scalar_dest;
3215 tree op, type;
3216 tree vec_oprnd0 = NULL_TREE;
3217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3218 tree vectype;
3219 unsigned int nunits;
3220 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3221 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3222 vec_info *vinfo = stmt_info->vinfo;
3223 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3224 tree fndecl, new_temp;
3225 gimple *def_stmt;
3226 gimple *new_stmt = NULL;
3227 int ncopies, j;
3228 auto_vec<simd_call_arg_info> arginfo;
3229 vec<tree> vargs = vNULL;
3230 size_t i, nargs;
3231 tree lhs, rtype, ratype;
3232 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3233
3234 /* Is STMT a vectorizable call? */
3235 if (!is_gimple_call (stmt))
3236 return false;
3237
3238 fndecl = gimple_call_fndecl (stmt);
3239 if (fndecl == NULL_TREE)
3240 return false;
3241
3242 struct cgraph_node *node = cgraph_node::get (fndecl);
3243 if (node == NULL || node->simd_clones == NULL)
3244 return false;
3245
3246 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3247 return false;
3248
3249 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3250 && ! vec_stmt)
3251 return false;
3252
3253 if (gimple_call_lhs (stmt)
3254 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3255 return false;
3256
3257 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3258
3259 vectype = STMT_VINFO_VECTYPE (stmt_info);
3260
3261 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3262 return false;
3263
3264 /* FORNOW */
3265 if (slp_node)
3266 return false;
3267
3268 /* Process function arguments. */
3269 nargs = gimple_call_num_args (stmt);
3270
3271 /* Bail out if the function has zero arguments. */
3272 if (nargs == 0)
3273 return false;
3274
3275 arginfo.reserve (nargs, true);
3276
3277 for (i = 0; i < nargs; i++)
3278 {
3279 simd_call_arg_info thisarginfo;
3280 affine_iv iv;
3281
3282 thisarginfo.linear_step = 0;
3283 thisarginfo.align = 0;
3284 thisarginfo.op = NULL_TREE;
3285 thisarginfo.simd_lane_linear = false;
3286
3287 op = gimple_call_arg (stmt, i);
3288 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3289 &thisarginfo.vectype)
3290 || thisarginfo.dt == vect_uninitialized_def)
3291 {
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "use not simple.\n");
3295 return false;
3296 }
3297
3298 if (thisarginfo.dt == vect_constant_def
3299 || thisarginfo.dt == vect_external_def)
3300 gcc_assert (thisarginfo.vectype == NULL_TREE);
3301 else
3302 gcc_assert (thisarginfo.vectype != NULL_TREE);
3303
3304 /* For linear arguments, the analyze phase should have saved
3305 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3306 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3307 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3308 {
3309 gcc_assert (vec_stmt);
3310 thisarginfo.linear_step
3311 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3312 thisarginfo.op
3313 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3314 thisarginfo.simd_lane_linear
3315 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3316 == boolean_true_node);
3317 /* If loop has been peeled for alignment, we need to adjust it. */
3318 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3319 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3320 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3321 {
3322 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3323 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3324 tree opt = TREE_TYPE (thisarginfo.op);
3325 bias = fold_convert (TREE_TYPE (step), bias);
3326 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3327 thisarginfo.op
3328 = fold_build2 (POINTER_TYPE_P (opt)
3329 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3330 thisarginfo.op, bias);
3331 }
3332 }
3333 else if (!vec_stmt
3334 && thisarginfo.dt != vect_constant_def
3335 && thisarginfo.dt != vect_external_def
3336 && loop_vinfo
3337 && TREE_CODE (op) == SSA_NAME
3338 && simple_iv (loop, loop_containing_stmt (stmt), op,
3339 &iv, false)
3340 && tree_fits_shwi_p (iv.step))
3341 {
3342 thisarginfo.linear_step = tree_to_shwi (iv.step);
3343 thisarginfo.op = iv.base;
3344 }
3345 else if ((thisarginfo.dt == vect_constant_def
3346 || thisarginfo.dt == vect_external_def)
3347 && POINTER_TYPE_P (TREE_TYPE (op)))
3348 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3349 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3350 linear too. */
3351 if (POINTER_TYPE_P (TREE_TYPE (op))
3352 && !thisarginfo.linear_step
3353 && !vec_stmt
3354 && thisarginfo.dt != vect_constant_def
3355 && thisarginfo.dt != vect_external_def
3356 && loop_vinfo
3357 && !slp_node
3358 && TREE_CODE (op) == SSA_NAME)
3359 vect_simd_lane_linear (op, loop, &thisarginfo);
3360
3361 arginfo.quick_push (thisarginfo);
3362 }
3363
3364 unsigned int badness = 0;
3365 struct cgraph_node *bestn = NULL;
3366 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3367 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3368 else
3369 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3370 n = n->simdclone->next_clone)
3371 {
3372 unsigned int this_badness = 0;
3373 if (n->simdclone->simdlen
3374 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3375 || n->simdclone->nargs != nargs)
3376 continue;
3377 if (n->simdclone->simdlen
3378 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3379 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3380 - exact_log2 (n->simdclone->simdlen)) * 1024;
3381 if (n->simdclone->inbranch)
3382 this_badness += 2048;
3383 int target_badness = targetm.simd_clone.usable (n);
3384 if (target_badness < 0)
3385 continue;
3386 this_badness += target_badness * 512;
3387 /* FORNOW: Have to add code to add the mask argument. */
3388 if (n->simdclone->inbranch)
3389 continue;
3390 for (i = 0; i < nargs; i++)
3391 {
3392 switch (n->simdclone->args[i].arg_type)
3393 {
3394 case SIMD_CLONE_ARG_TYPE_VECTOR:
3395 if (!useless_type_conversion_p
3396 (n->simdclone->args[i].orig_type,
3397 TREE_TYPE (gimple_call_arg (stmt, i))))
3398 i = -1;
3399 else if (arginfo[i].dt == vect_constant_def
3400 || arginfo[i].dt == vect_external_def
3401 || arginfo[i].linear_step)
3402 this_badness += 64;
3403 break;
3404 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3405 if (arginfo[i].dt != vect_constant_def
3406 && arginfo[i].dt != vect_external_def)
3407 i = -1;
3408 break;
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3411 if (arginfo[i].dt == vect_constant_def
3412 || arginfo[i].dt == vect_external_def
3413 || (arginfo[i].linear_step
3414 != n->simdclone->args[i].linear_step))
3415 i = -1;
3416 break;
3417 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3418 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3419 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3420 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3421 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3422 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3423 /* FORNOW */
3424 i = -1;
3425 break;
3426 case SIMD_CLONE_ARG_TYPE_MASK:
3427 gcc_unreachable ();
3428 }
3429 if (i == (size_t) -1)
3430 break;
3431 if (n->simdclone->args[i].alignment > arginfo[i].align)
3432 {
3433 i = -1;
3434 break;
3435 }
3436 if (arginfo[i].align)
3437 this_badness += (exact_log2 (arginfo[i].align)
3438 - exact_log2 (n->simdclone->args[i].alignment));
3439 }
3440 if (i == (size_t) -1)
3441 continue;
3442 if (bestn == NULL || this_badness < badness)
3443 {
3444 bestn = n;
3445 badness = this_badness;
3446 }
3447 }
3448
3449 if (bestn == NULL)
3450 return false;
3451
3452 for (i = 0; i < nargs; i++)
3453 if ((arginfo[i].dt == vect_constant_def
3454 || arginfo[i].dt == vect_external_def)
3455 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3456 {
3457 arginfo[i].vectype
3458 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3459 i)));
3460 if (arginfo[i].vectype == NULL
3461 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3462 > bestn->simdclone->simdlen))
3463 return false;
3464 }
3465
3466 fndecl = bestn->decl;
3467 nunits = bestn->simdclone->simdlen;
3468 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3469
3470 /* If the function isn't const, only allow it in simd loops where user
3471 has asserted that at least nunits consecutive iterations can be
3472 performed using SIMD instructions. */
3473 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3474 && gimple_vuse (stmt))
3475 return false;
3476
3477 /* Sanity check: make sure that at least one copy of the vectorized stmt
3478 needs to be generated. */
3479 gcc_assert (ncopies >= 1);
3480
3481 if (!vec_stmt) /* transformation not required. */
3482 {
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3484 for (i = 0; i < nargs; i++)
3485 if ((bestn->simdclone->args[i].arg_type
3486 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3487 || (bestn->simdclone->args[i].arg_type
3488 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3489 {
3490 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3491 + 1);
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3493 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3494 ? size_type_node : TREE_TYPE (arginfo[i].op);
3495 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3496 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3497 tree sll = arginfo[i].simd_lane_linear
3498 ? boolean_true_node : boolean_false_node;
3499 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3500 }
3501 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3502 if (dump_enabled_p ())
3503 dump_printf_loc (MSG_NOTE, vect_location,
3504 "=== vectorizable_simd_clone_call ===\n");
3505 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3506 return true;
3507 }
3508
3509 /* Transform. */
3510
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3513
3514 /* Handle def. */
3515 scalar_dest = gimple_call_lhs (stmt);
3516 vec_dest = NULL_TREE;
3517 rtype = NULL_TREE;
3518 ratype = NULL_TREE;
3519 if (scalar_dest)
3520 {
3521 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3522 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3523 if (TREE_CODE (rtype) == ARRAY_TYPE)
3524 {
3525 ratype = rtype;
3526 rtype = TREE_TYPE (ratype);
3527 }
3528 }
3529
3530 prev_stmt_info = NULL;
3531 for (j = 0; j < ncopies; ++j)
3532 {
3533 /* Build argument list for the vectorized call. */
3534 if (j == 0)
3535 vargs.create (nargs);
3536 else
3537 vargs.truncate (0);
3538
3539 for (i = 0; i < nargs; i++)
3540 {
3541 unsigned int k, l, m, o;
3542 tree atype;
3543 op = gimple_call_arg (stmt, i);
3544 switch (bestn->simdclone->args[i].arg_type)
3545 {
3546 case SIMD_CLONE_ARG_TYPE_VECTOR:
3547 atype = bestn->simdclone->args[i].vector_type;
3548 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3549 for (m = j * o; m < (j + 1) * o; m++)
3550 {
3551 if (TYPE_VECTOR_SUBPARTS (atype)
3552 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3553 {
3554 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3555 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3556 / TYPE_VECTOR_SUBPARTS (atype));
3557 gcc_assert ((k & (k - 1)) == 0);
3558 if (m == 0)
3559 vec_oprnd0
3560 = vect_get_vec_def_for_operand (op, stmt);
3561 else
3562 {
3563 vec_oprnd0 = arginfo[i].op;
3564 if ((m & (k - 1)) == 0)
3565 vec_oprnd0
3566 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3567 vec_oprnd0);
3568 }
3569 arginfo[i].op = vec_oprnd0;
3570 vec_oprnd0
3571 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3572 bitsize_int (prec),
3573 bitsize_int ((m & (k - 1)) * prec));
3574 new_stmt
3575 = gimple_build_assign (make_ssa_name (atype),
3576 vec_oprnd0);
3577 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3578 vargs.safe_push (gimple_assign_lhs (new_stmt));
3579 }
3580 else
3581 {
3582 k = (TYPE_VECTOR_SUBPARTS (atype)
3583 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3584 gcc_assert ((k & (k - 1)) == 0);
3585 vec<constructor_elt, va_gc> *ctor_elts;
3586 if (k != 1)
3587 vec_alloc (ctor_elts, k);
3588 else
3589 ctor_elts = NULL;
3590 for (l = 0; l < k; l++)
3591 {
3592 if (m == 0 && l == 0)
3593 vec_oprnd0
3594 = vect_get_vec_def_for_operand (op, stmt);
3595 else
3596 vec_oprnd0
3597 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3598 arginfo[i].op);
3599 arginfo[i].op = vec_oprnd0;
3600 if (k == 1)
3601 break;
3602 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3603 vec_oprnd0);
3604 }
3605 if (k == 1)
3606 vargs.safe_push (vec_oprnd0);
3607 else
3608 {
3609 vec_oprnd0 = build_constructor (atype, ctor_elts);
3610 new_stmt
3611 = gimple_build_assign (make_ssa_name (atype),
3612 vec_oprnd0);
3613 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3614 vargs.safe_push (gimple_assign_lhs (new_stmt));
3615 }
3616 }
3617 }
3618 break;
3619 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3620 vargs.safe_push (op);
3621 break;
3622 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3623 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3624 if (j == 0)
3625 {
3626 gimple_seq stmts;
3627 arginfo[i].op
3628 = force_gimple_operand (arginfo[i].op, &stmts, true,
3629 NULL_TREE);
3630 if (stmts != NULL)
3631 {
3632 basic_block new_bb;
3633 edge pe = loop_preheader_edge (loop);
3634 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3635 gcc_assert (!new_bb);
3636 }
3637 if (arginfo[i].simd_lane_linear)
3638 {
3639 vargs.safe_push (arginfo[i].op);
3640 break;
3641 }
3642 tree phi_res = copy_ssa_name (op);
3643 gphi *new_phi = create_phi_node (phi_res, loop->header);
3644 set_vinfo_for_stmt (new_phi,
3645 new_stmt_vec_info (new_phi, loop_vinfo));
3646 add_phi_arg (new_phi, arginfo[i].op,
3647 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3648 enum tree_code code
3649 = POINTER_TYPE_P (TREE_TYPE (op))
3650 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3651 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3652 ? sizetype : TREE_TYPE (op);
3653 widest_int cst
3654 = wi::mul (bestn->simdclone->args[i].linear_step,
3655 ncopies * nunits);
3656 tree tcst = wide_int_to_tree (type, cst);
3657 tree phi_arg = copy_ssa_name (op);
3658 new_stmt
3659 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3660 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3661 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3662 set_vinfo_for_stmt (new_stmt,
3663 new_stmt_vec_info (new_stmt, loop_vinfo));
3664 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3665 UNKNOWN_LOCATION);
3666 arginfo[i].op = phi_res;
3667 vargs.safe_push (phi_res);
3668 }
3669 else
3670 {
3671 enum tree_code code
3672 = POINTER_TYPE_P (TREE_TYPE (op))
3673 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3674 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3675 ? sizetype : TREE_TYPE (op);
3676 widest_int cst
3677 = wi::mul (bestn->simdclone->args[i].linear_step,
3678 j * nunits);
3679 tree tcst = wide_int_to_tree (type, cst);
3680 new_temp = make_ssa_name (TREE_TYPE (op));
3681 new_stmt = gimple_build_assign (new_temp, code,
3682 arginfo[i].op, tcst);
3683 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3684 vargs.safe_push (new_temp);
3685 }
3686 break;
3687 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3688 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3689 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3690 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3691 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3692 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3693 default:
3694 gcc_unreachable ();
3695 }
3696 }
3697
3698 new_stmt = gimple_build_call_vec (fndecl, vargs);
3699 if (vec_dest)
3700 {
3701 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3702 if (ratype)
3703 new_temp = create_tmp_var (ratype);
3704 else if (TYPE_VECTOR_SUBPARTS (vectype)
3705 == TYPE_VECTOR_SUBPARTS (rtype))
3706 new_temp = make_ssa_name (vec_dest, new_stmt);
3707 else
3708 new_temp = make_ssa_name (rtype, new_stmt);
3709 gimple_call_set_lhs (new_stmt, new_temp);
3710 }
3711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3712
3713 if (vec_dest)
3714 {
3715 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3716 {
3717 unsigned int k, l;
3718 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3719 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3720 gcc_assert ((k & (k - 1)) == 0);
3721 for (l = 0; l < k; l++)
3722 {
3723 tree t;
3724 if (ratype)
3725 {
3726 t = build_fold_addr_expr (new_temp);
3727 t = build2 (MEM_REF, vectype, t,
3728 build_int_cst (TREE_TYPE (t),
3729 l * prec / BITS_PER_UNIT));
3730 }
3731 else
3732 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3733 bitsize_int (prec), bitsize_int (l * prec));
3734 new_stmt
3735 = gimple_build_assign (make_ssa_name (vectype), t);
3736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3737 if (j == 0 && l == 0)
3738 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3739 else
3740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3741
3742 prev_stmt_info = vinfo_for_stmt (new_stmt);
3743 }
3744
3745 if (ratype)
3746 {
3747 tree clobber = build_constructor (ratype, NULL);
3748 TREE_THIS_VOLATILE (clobber) = 1;
3749 new_stmt = gimple_build_assign (new_temp, clobber);
3750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3751 }
3752 continue;
3753 }
3754 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3755 {
3756 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3757 / TYPE_VECTOR_SUBPARTS (rtype));
3758 gcc_assert ((k & (k - 1)) == 0);
3759 if ((j & (k - 1)) == 0)
3760 vec_alloc (ret_ctor_elts, k);
3761 if (ratype)
3762 {
3763 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3764 for (m = 0; m < o; m++)
3765 {
3766 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3767 size_int (m), NULL_TREE, NULL_TREE);
3768 new_stmt
3769 = gimple_build_assign (make_ssa_name (rtype), tem);
3770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3771 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3772 gimple_assign_lhs (new_stmt));
3773 }
3774 tree clobber = build_constructor (ratype, NULL);
3775 TREE_THIS_VOLATILE (clobber) = 1;
3776 new_stmt = gimple_build_assign (new_temp, clobber);
3777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3778 }
3779 else
3780 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3781 if ((j & (k - 1)) != k - 1)
3782 continue;
3783 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3784 new_stmt
3785 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3786 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3787
3788 if ((unsigned) j == k - 1)
3789 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3790 else
3791 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3792
3793 prev_stmt_info = vinfo_for_stmt (new_stmt);
3794 continue;
3795 }
3796 else if (ratype)
3797 {
3798 tree t = build_fold_addr_expr (new_temp);
3799 t = build2 (MEM_REF, vectype, t,
3800 build_int_cst (TREE_TYPE (t), 0));
3801 new_stmt
3802 = gimple_build_assign (make_ssa_name (vec_dest), t);
3803 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3804 tree clobber = build_constructor (ratype, NULL);
3805 TREE_THIS_VOLATILE (clobber) = 1;
3806 vect_finish_stmt_generation (stmt,
3807 gimple_build_assign (new_temp,
3808 clobber), gsi);
3809 }
3810 }
3811
3812 if (j == 0)
3813 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3814 else
3815 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3816
3817 prev_stmt_info = vinfo_for_stmt (new_stmt);
3818 }
3819
3820 vargs.release ();
3821
3822 /* The call in STMT might prevent it from being removed in dce.
3823 We however cannot remove it here, due to the way the ssa name
3824 it defines is mapped to the new definition. So just replace
3825 rhs of the statement with something harmless. */
3826
3827 if (slp_node)
3828 return true;
3829
3830 if (scalar_dest)
3831 {
3832 type = TREE_TYPE (scalar_dest);
3833 if (is_pattern_stmt_p (stmt_info))
3834 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3835 else
3836 lhs = gimple_call_lhs (stmt);
3837 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3838 }
3839 else
3840 new_stmt = gimple_build_nop ();
3841 set_vinfo_for_stmt (new_stmt, stmt_info);
3842 set_vinfo_for_stmt (stmt, NULL);
3843 STMT_VINFO_STMT (stmt_info) = new_stmt;
3844 gsi_replace (gsi, new_stmt, true);
3845 unlink_stmt_vdef (stmt);
3846
3847 return true;
3848 }
3849
3850
3851 /* Function vect_gen_widened_results_half
3852
3853 Create a vector stmt whose code, type, number of arguments, and result
3854 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3855 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3856 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3857 needs to be created (DECL is a function-decl of a target-builtin).
3858 STMT is the original scalar stmt that we are vectorizing. */
3859
3860 static gimple *
3861 vect_gen_widened_results_half (enum tree_code code,
3862 tree decl,
3863 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3864 tree vec_dest, gimple_stmt_iterator *gsi,
3865 gimple *stmt)
3866 {
3867 gimple *new_stmt;
3868 tree new_temp;
3869
3870 /* Generate half of the widened result: */
3871 if (code == CALL_EXPR)
3872 {
3873 /* Target specific support */
3874 if (op_type == binary_op)
3875 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3876 else
3877 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3878 new_temp = make_ssa_name (vec_dest, new_stmt);
3879 gimple_call_set_lhs (new_stmt, new_temp);
3880 }
3881 else
3882 {
3883 /* Generic support */
3884 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3885 if (op_type != binary_op)
3886 vec_oprnd1 = NULL;
3887 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3888 new_temp = make_ssa_name (vec_dest, new_stmt);
3889 gimple_assign_set_lhs (new_stmt, new_temp);
3890 }
3891 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3892
3893 return new_stmt;
3894 }
3895
3896
3897 /* Get vectorized definitions for loop-based vectorization. For the first
3898 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3899 scalar operand), and for the rest we get a copy with
3900 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3901 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3902 The vectors are collected into VEC_OPRNDS. */
3903
3904 static void
3905 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3906 vec<tree> *vec_oprnds, int multi_step_cvt)
3907 {
3908 tree vec_oprnd;
3909
3910 /* Get first vector operand. */
3911 /* All the vector operands except the very first one (that is scalar oprnd)
3912 are stmt copies. */
3913 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3914 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3915 else
3916 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3917
3918 vec_oprnds->quick_push (vec_oprnd);
3919
3920 /* Get second vector operand. */
3921 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3922 vec_oprnds->quick_push (vec_oprnd);
3923
3924 *oprnd = vec_oprnd;
3925
3926 /* For conversion in multiple steps, continue to get operands
3927 recursively. */
3928 if (multi_step_cvt)
3929 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3930 }
3931
3932
3933 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3934 For multi-step conversions store the resulting vectors and call the function
3935 recursively. */
3936
3937 static void
3938 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3939 int multi_step_cvt, gimple *stmt,
3940 vec<tree> vec_dsts,
3941 gimple_stmt_iterator *gsi,
3942 slp_tree slp_node, enum tree_code code,
3943 stmt_vec_info *prev_stmt_info)
3944 {
3945 unsigned int i;
3946 tree vop0, vop1, new_tmp, vec_dest;
3947 gimple *new_stmt;
3948 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3949
3950 vec_dest = vec_dsts.pop ();
3951
3952 for (i = 0; i < vec_oprnds->length (); i += 2)
3953 {
3954 /* Create demotion operation. */
3955 vop0 = (*vec_oprnds)[i];
3956 vop1 = (*vec_oprnds)[i + 1];
3957 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3958 new_tmp = make_ssa_name (vec_dest, new_stmt);
3959 gimple_assign_set_lhs (new_stmt, new_tmp);
3960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3961
3962 if (multi_step_cvt)
3963 /* Store the resulting vector for next recursive call. */
3964 (*vec_oprnds)[i/2] = new_tmp;
3965 else
3966 {
3967 /* This is the last step of the conversion sequence. Store the
3968 vectors in SLP_NODE or in vector info of the scalar statement
3969 (or in STMT_VINFO_RELATED_STMT chain). */
3970 if (slp_node)
3971 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3972 else
3973 {
3974 if (!*prev_stmt_info)
3975 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3976 else
3977 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3978
3979 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3980 }
3981 }
3982 }
3983
3984 /* For multi-step demotion operations we first generate demotion operations
3985 from the source type to the intermediate types, and then combine the
3986 results (stored in VEC_OPRNDS) in demotion operation to the destination
3987 type. */
3988 if (multi_step_cvt)
3989 {
3990 /* At each level of recursion we have half of the operands we had at the
3991 previous level. */
3992 vec_oprnds->truncate ((i+1)/2);
3993 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3994 stmt, vec_dsts, gsi, slp_node,
3995 VEC_PACK_TRUNC_EXPR,
3996 prev_stmt_info);
3997 }
3998
3999 vec_dsts.quick_push (vec_dest);
4000 }
4001
4002
4003 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4004 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4005 the resulting vectors and call the function recursively. */
4006
4007 static void
4008 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4009 vec<tree> *vec_oprnds1,
4010 gimple *stmt, tree vec_dest,
4011 gimple_stmt_iterator *gsi,
4012 enum tree_code code1,
4013 enum tree_code code2, tree decl1,
4014 tree decl2, int op_type)
4015 {
4016 int i;
4017 tree vop0, vop1, new_tmp1, new_tmp2;
4018 gimple *new_stmt1, *new_stmt2;
4019 vec<tree> vec_tmp = vNULL;
4020
4021 vec_tmp.create (vec_oprnds0->length () * 2);
4022 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4023 {
4024 if (op_type == binary_op)
4025 vop1 = (*vec_oprnds1)[i];
4026 else
4027 vop1 = NULL_TREE;
4028
4029 /* Generate the two halves of promotion operation. */
4030 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4031 op_type, vec_dest, gsi, stmt);
4032 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4033 op_type, vec_dest, gsi, stmt);
4034 if (is_gimple_call (new_stmt1))
4035 {
4036 new_tmp1 = gimple_call_lhs (new_stmt1);
4037 new_tmp2 = gimple_call_lhs (new_stmt2);
4038 }
4039 else
4040 {
4041 new_tmp1 = gimple_assign_lhs (new_stmt1);
4042 new_tmp2 = gimple_assign_lhs (new_stmt2);
4043 }
4044
4045 /* Store the results for the next step. */
4046 vec_tmp.quick_push (new_tmp1);
4047 vec_tmp.quick_push (new_tmp2);
4048 }
4049
4050 vec_oprnds0->release ();
4051 *vec_oprnds0 = vec_tmp;
4052 }
4053
4054
4055 /* Check if STMT performs a conversion operation, that can be vectorized.
4056 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4057 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4058 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4059
4060 static bool
4061 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4062 gimple **vec_stmt, slp_tree slp_node)
4063 {
4064 tree vec_dest;
4065 tree scalar_dest;
4066 tree op0, op1 = NULL_TREE;
4067 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4070 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4071 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4072 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4073 tree new_temp;
4074 gimple *def_stmt;
4075 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4076 int ndts = 2;
4077 gimple *new_stmt = NULL;
4078 stmt_vec_info prev_stmt_info;
4079 int nunits_in;
4080 int nunits_out;
4081 tree vectype_out, vectype_in;
4082 int ncopies, i, j;
4083 tree lhs_type, rhs_type;
4084 enum { NARROW, NONE, WIDEN } modifier;
4085 vec<tree> vec_oprnds0 = vNULL;
4086 vec<tree> vec_oprnds1 = vNULL;
4087 tree vop0;
4088 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4089 vec_info *vinfo = stmt_info->vinfo;
4090 int multi_step_cvt = 0;
4091 vec<tree> interm_types = vNULL;
4092 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4093 int op_type;
4094 unsigned short fltsz;
4095
4096 /* Is STMT a vectorizable conversion? */
4097
4098 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4099 return false;
4100
4101 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4102 && ! vec_stmt)
4103 return false;
4104
4105 if (!is_gimple_assign (stmt))
4106 return false;
4107
4108 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4109 return false;
4110
4111 code = gimple_assign_rhs_code (stmt);
4112 if (!CONVERT_EXPR_CODE_P (code)
4113 && code != FIX_TRUNC_EXPR
4114 && code != FLOAT_EXPR
4115 && code != WIDEN_MULT_EXPR
4116 && code != WIDEN_LSHIFT_EXPR)
4117 return false;
4118
4119 op_type = TREE_CODE_LENGTH (code);
4120
4121 /* Check types of lhs and rhs. */
4122 scalar_dest = gimple_assign_lhs (stmt);
4123 lhs_type = TREE_TYPE (scalar_dest);
4124 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4125
4126 op0 = gimple_assign_rhs1 (stmt);
4127 rhs_type = TREE_TYPE (op0);
4128
4129 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4130 && !((INTEGRAL_TYPE_P (lhs_type)
4131 && INTEGRAL_TYPE_P (rhs_type))
4132 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4133 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4134 return false;
4135
4136 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4137 && ((INTEGRAL_TYPE_P (lhs_type)
4138 && !type_has_mode_precision_p (lhs_type))
4139 || (INTEGRAL_TYPE_P (rhs_type)
4140 && !type_has_mode_precision_p (rhs_type))))
4141 {
4142 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4144 "type conversion to/from bit-precision unsupported."
4145 "\n");
4146 return false;
4147 }
4148
4149 /* Check the operands of the operation. */
4150 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4151 {
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4154 "use not simple.\n");
4155 return false;
4156 }
4157 if (op_type == binary_op)
4158 {
4159 bool ok;
4160
4161 op1 = gimple_assign_rhs2 (stmt);
4162 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4163 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4164 OP1. */
4165 if (CONSTANT_CLASS_P (op0))
4166 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4167 else
4168 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4169
4170 if (!ok)
4171 {
4172 if (dump_enabled_p ())
4173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4174 "use not simple.\n");
4175 return false;
4176 }
4177 }
4178
4179 /* If op0 is an external or constant defs use a vector type of
4180 the same size as the output vector type. */
4181 if (!vectype_in)
4182 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4183 if (vec_stmt)
4184 gcc_assert (vectype_in);
4185 if (!vectype_in)
4186 {
4187 if (dump_enabled_p ())
4188 {
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4190 "no vectype for scalar type ");
4191 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4192 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4193 }
4194
4195 return false;
4196 }
4197
4198 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4199 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4200 {
4201 if (dump_enabled_p ())
4202 {
4203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4204 "can't convert between boolean and non "
4205 "boolean vectors");
4206 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4207 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4208 }
4209
4210 return false;
4211 }
4212
4213 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4214 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4215 if (nunits_in < nunits_out)
4216 modifier = NARROW;
4217 else if (nunits_out == nunits_in)
4218 modifier = NONE;
4219 else
4220 modifier = WIDEN;
4221
4222 /* Multiple types in SLP are handled by creating the appropriate number of
4223 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4224 case of SLP. */
4225 if (slp_node)
4226 ncopies = 1;
4227 else if (modifier == NARROW)
4228 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4229 else
4230 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4231
4232 /* Sanity check: make sure that at least one copy of the vectorized stmt
4233 needs to be generated. */
4234 gcc_assert (ncopies >= 1);
4235
4236 bool found_mode = false;
4237 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4238 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4239 opt_scalar_mode rhs_mode_iter;
4240
4241 /* Supportable by target? */
4242 switch (modifier)
4243 {
4244 case NONE:
4245 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4246 return false;
4247 if (supportable_convert_operation (code, vectype_out, vectype_in,
4248 &decl1, &code1))
4249 break;
4250 /* FALLTHRU */
4251 unsupported:
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4254 "conversion not supported by target.\n");
4255 return false;
4256
4257 case WIDEN:
4258 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4259 &code1, &code2, &multi_step_cvt,
4260 &interm_types))
4261 {
4262 /* Binary widening operation can only be supported directly by the
4263 architecture. */
4264 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4265 break;
4266 }
4267
4268 if (code != FLOAT_EXPR
4269 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4270 goto unsupported;
4271
4272 fltsz = GET_MODE_SIZE (lhs_mode);
4273 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4274 {
4275 rhs_mode = rhs_mode_iter.require ();
4276 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4277 break;
4278
4279 cvt_type
4280 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4281 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4282 if (cvt_type == NULL_TREE)
4283 goto unsupported;
4284
4285 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4286 {
4287 if (!supportable_convert_operation (code, vectype_out,
4288 cvt_type, &decl1, &codecvt1))
4289 goto unsupported;
4290 }
4291 else if (!supportable_widening_operation (code, stmt, vectype_out,
4292 cvt_type, &codecvt1,
4293 &codecvt2, &multi_step_cvt,
4294 &interm_types))
4295 continue;
4296 else
4297 gcc_assert (multi_step_cvt == 0);
4298
4299 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4300 vectype_in, &code1, &code2,
4301 &multi_step_cvt, &interm_types))
4302 {
4303 found_mode = true;
4304 break;
4305 }
4306 }
4307
4308 if (!found_mode)
4309 goto unsupported;
4310
4311 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4312 codecvt2 = ERROR_MARK;
4313 else
4314 {
4315 multi_step_cvt++;
4316 interm_types.safe_push (cvt_type);
4317 cvt_type = NULL_TREE;
4318 }
4319 break;
4320
4321 case NARROW:
4322 gcc_assert (op_type == unary_op);
4323 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4324 &code1, &multi_step_cvt,
4325 &interm_types))
4326 break;
4327
4328 if (code != FIX_TRUNC_EXPR
4329 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4330 goto unsupported;
4331
4332 cvt_type
4333 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4334 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4335 if (cvt_type == NULL_TREE)
4336 goto unsupported;
4337 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4338 &decl1, &codecvt1))
4339 goto unsupported;
4340 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4341 &code1, &multi_step_cvt,
4342 &interm_types))
4343 break;
4344 goto unsupported;
4345
4346 default:
4347 gcc_unreachable ();
4348 }
4349
4350 if (!vec_stmt) /* transformation not required. */
4351 {
4352 if (dump_enabled_p ())
4353 dump_printf_loc (MSG_NOTE, vect_location,
4354 "=== vectorizable_conversion ===\n");
4355 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4356 {
4357 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4358 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4359 }
4360 else if (modifier == NARROW)
4361 {
4362 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4363 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4364 }
4365 else
4366 {
4367 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4368 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4369 }
4370 interm_types.release ();
4371 return true;
4372 }
4373
4374 /* Transform. */
4375 if (dump_enabled_p ())
4376 dump_printf_loc (MSG_NOTE, vect_location,
4377 "transform conversion. ncopies = %d.\n", ncopies);
4378
4379 if (op_type == binary_op)
4380 {
4381 if (CONSTANT_CLASS_P (op0))
4382 op0 = fold_convert (TREE_TYPE (op1), op0);
4383 else if (CONSTANT_CLASS_P (op1))
4384 op1 = fold_convert (TREE_TYPE (op0), op1);
4385 }
4386
4387 /* In case of multi-step conversion, we first generate conversion operations
4388 to the intermediate types, and then from that types to the final one.
4389 We create vector destinations for the intermediate type (TYPES) received
4390 from supportable_*_operation, and store them in the correct order
4391 for future use in vect_create_vectorized_*_stmts (). */
4392 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4393 vec_dest = vect_create_destination_var (scalar_dest,
4394 (cvt_type && modifier == WIDEN)
4395 ? cvt_type : vectype_out);
4396 vec_dsts.quick_push (vec_dest);
4397
4398 if (multi_step_cvt)
4399 {
4400 for (i = interm_types.length () - 1;
4401 interm_types.iterate (i, &intermediate_type); i--)
4402 {
4403 vec_dest = vect_create_destination_var (scalar_dest,
4404 intermediate_type);
4405 vec_dsts.quick_push (vec_dest);
4406 }
4407 }
4408
4409 if (cvt_type)
4410 vec_dest = vect_create_destination_var (scalar_dest,
4411 modifier == WIDEN
4412 ? vectype_out : cvt_type);
4413
4414 if (!slp_node)
4415 {
4416 if (modifier == WIDEN)
4417 {
4418 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4419 if (op_type == binary_op)
4420 vec_oprnds1.create (1);
4421 }
4422 else if (modifier == NARROW)
4423 vec_oprnds0.create (
4424 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4425 }
4426 else if (code == WIDEN_LSHIFT_EXPR)
4427 vec_oprnds1.create (slp_node->vec_stmts_size);
4428
4429 last_oprnd = op0;
4430 prev_stmt_info = NULL;
4431 switch (modifier)
4432 {
4433 case NONE:
4434 for (j = 0; j < ncopies; j++)
4435 {
4436 if (j == 0)
4437 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4438 else
4439 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4440
4441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4442 {
4443 /* Arguments are ready, create the new vector stmt. */
4444 if (code1 == CALL_EXPR)
4445 {
4446 new_stmt = gimple_build_call (decl1, 1, vop0);
4447 new_temp = make_ssa_name (vec_dest, new_stmt);
4448 gimple_call_set_lhs (new_stmt, new_temp);
4449 }
4450 else
4451 {
4452 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4453 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4454 new_temp = make_ssa_name (vec_dest, new_stmt);
4455 gimple_assign_set_lhs (new_stmt, new_temp);
4456 }
4457
4458 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4459 if (slp_node)
4460 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4461 else
4462 {
4463 if (!prev_stmt_info)
4464 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4465 else
4466 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4467 prev_stmt_info = vinfo_for_stmt (new_stmt);
4468 }
4469 }
4470 }
4471 break;
4472
4473 case WIDEN:
4474 /* In case the vectorization factor (VF) is bigger than the number
4475 of elements that we can fit in a vectype (nunits), we have to
4476 generate more than one vector stmt - i.e - we need to "unroll"
4477 the vector stmt by a factor VF/nunits. */
4478 for (j = 0; j < ncopies; j++)
4479 {
4480 /* Handle uses. */
4481 if (j == 0)
4482 {
4483 if (slp_node)
4484 {
4485 if (code == WIDEN_LSHIFT_EXPR)
4486 {
4487 unsigned int k;
4488
4489 vec_oprnd1 = op1;
4490 /* Store vec_oprnd1 for every vector stmt to be created
4491 for SLP_NODE. We check during the analysis that all
4492 the shift arguments are the same. */
4493 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4494 vec_oprnds1.quick_push (vec_oprnd1);
4495
4496 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4497 slp_node);
4498 }
4499 else
4500 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4501 &vec_oprnds1, slp_node);
4502 }
4503 else
4504 {
4505 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4506 vec_oprnds0.quick_push (vec_oprnd0);
4507 if (op_type == binary_op)
4508 {
4509 if (code == WIDEN_LSHIFT_EXPR)
4510 vec_oprnd1 = op1;
4511 else
4512 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4513 vec_oprnds1.quick_push (vec_oprnd1);
4514 }
4515 }
4516 }
4517 else
4518 {
4519 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4520 vec_oprnds0.truncate (0);
4521 vec_oprnds0.quick_push (vec_oprnd0);
4522 if (op_type == binary_op)
4523 {
4524 if (code == WIDEN_LSHIFT_EXPR)
4525 vec_oprnd1 = op1;
4526 else
4527 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4528 vec_oprnd1);
4529 vec_oprnds1.truncate (0);
4530 vec_oprnds1.quick_push (vec_oprnd1);
4531 }
4532 }
4533
4534 /* Arguments are ready. Create the new vector stmts. */
4535 for (i = multi_step_cvt; i >= 0; i--)
4536 {
4537 tree this_dest = vec_dsts[i];
4538 enum tree_code c1 = code1, c2 = code2;
4539 if (i == 0 && codecvt2 != ERROR_MARK)
4540 {
4541 c1 = codecvt1;
4542 c2 = codecvt2;
4543 }
4544 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4545 &vec_oprnds1,
4546 stmt, this_dest, gsi,
4547 c1, c2, decl1, decl2,
4548 op_type);
4549 }
4550
4551 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4552 {
4553 if (cvt_type)
4554 {
4555 if (codecvt1 == CALL_EXPR)
4556 {
4557 new_stmt = gimple_build_call (decl1, 1, vop0);
4558 new_temp = make_ssa_name (vec_dest, new_stmt);
4559 gimple_call_set_lhs (new_stmt, new_temp);
4560 }
4561 else
4562 {
4563 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4564 new_temp = make_ssa_name (vec_dest);
4565 new_stmt = gimple_build_assign (new_temp, codecvt1,
4566 vop0);
4567 }
4568
4569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4570 }
4571 else
4572 new_stmt = SSA_NAME_DEF_STMT (vop0);
4573
4574 if (slp_node)
4575 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4576 else
4577 {
4578 if (!prev_stmt_info)
4579 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4580 else
4581 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4582 prev_stmt_info = vinfo_for_stmt (new_stmt);
4583 }
4584 }
4585 }
4586
4587 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4588 break;
4589
4590 case NARROW:
4591 /* In case the vectorization factor (VF) is bigger than the number
4592 of elements that we can fit in a vectype (nunits), we have to
4593 generate more than one vector stmt - i.e - we need to "unroll"
4594 the vector stmt by a factor VF/nunits. */
4595 for (j = 0; j < ncopies; j++)
4596 {
4597 /* Handle uses. */
4598 if (slp_node)
4599 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4600 slp_node);
4601 else
4602 {
4603 vec_oprnds0.truncate (0);
4604 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4605 vect_pow2 (multi_step_cvt) - 1);
4606 }
4607
4608 /* Arguments are ready. Create the new vector stmts. */
4609 if (cvt_type)
4610 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4611 {
4612 if (codecvt1 == CALL_EXPR)
4613 {
4614 new_stmt = gimple_build_call (decl1, 1, vop0);
4615 new_temp = make_ssa_name (vec_dest, new_stmt);
4616 gimple_call_set_lhs (new_stmt, new_temp);
4617 }
4618 else
4619 {
4620 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4621 new_temp = make_ssa_name (vec_dest);
4622 new_stmt = gimple_build_assign (new_temp, codecvt1,
4623 vop0);
4624 }
4625
4626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4627 vec_oprnds0[i] = new_temp;
4628 }
4629
4630 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4631 stmt, vec_dsts, gsi,
4632 slp_node, code1,
4633 &prev_stmt_info);
4634 }
4635
4636 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4637 break;
4638 }
4639
4640 vec_oprnds0.release ();
4641 vec_oprnds1.release ();
4642 interm_types.release ();
4643
4644 return true;
4645 }
4646
4647
4648 /* Function vectorizable_assignment.
4649
4650 Check if STMT performs an assignment (copy) that can be vectorized.
4651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4654
4655 static bool
4656 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4657 gimple **vec_stmt, slp_tree slp_node)
4658 {
4659 tree vec_dest;
4660 tree scalar_dest;
4661 tree op;
4662 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4663 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4664 tree new_temp;
4665 gimple *def_stmt;
4666 enum vect_def_type dt[1] = {vect_unknown_def_type};
4667 int ndts = 1;
4668 int ncopies;
4669 int i, j;
4670 vec<tree> vec_oprnds = vNULL;
4671 tree vop;
4672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4673 vec_info *vinfo = stmt_info->vinfo;
4674 gimple *new_stmt = NULL;
4675 stmt_vec_info prev_stmt_info = NULL;
4676 enum tree_code code;
4677 tree vectype_in;
4678
4679 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4680 return false;
4681
4682 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4683 && ! vec_stmt)
4684 return false;
4685
4686 /* Is vectorizable assignment? */
4687 if (!is_gimple_assign (stmt))
4688 return false;
4689
4690 scalar_dest = gimple_assign_lhs (stmt);
4691 if (TREE_CODE (scalar_dest) != SSA_NAME)
4692 return false;
4693
4694 code = gimple_assign_rhs_code (stmt);
4695 if (gimple_assign_single_p (stmt)
4696 || code == PAREN_EXPR
4697 || CONVERT_EXPR_CODE_P (code))
4698 op = gimple_assign_rhs1 (stmt);
4699 else
4700 return false;
4701
4702 if (code == VIEW_CONVERT_EXPR)
4703 op = TREE_OPERAND (op, 0);
4704
4705 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4706 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4707
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4710 case of SLP. */
4711 if (slp_node)
4712 ncopies = 1;
4713 else
4714 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4715
4716 gcc_assert (ncopies >= 1);
4717
4718 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4719 {
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4722 "use not simple.\n");
4723 return false;
4724 }
4725
4726 /* We can handle NOP_EXPR conversions that do not change the number
4727 of elements or the vector size. */
4728 if ((CONVERT_EXPR_CODE_P (code)
4729 || code == VIEW_CONVERT_EXPR)
4730 && (!vectype_in
4731 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4732 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4733 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4734 return false;
4735
4736 /* We do not handle bit-precision changes. */
4737 if ((CONVERT_EXPR_CODE_P (code)
4738 || code == VIEW_CONVERT_EXPR)
4739 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4740 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4741 || !type_has_mode_precision_p (TREE_TYPE (op)))
4742 /* But a conversion that does not change the bit-pattern is ok. */
4743 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4744 > TYPE_PRECISION (TREE_TYPE (op)))
4745 && TYPE_UNSIGNED (TREE_TYPE (op)))
4746 /* Conversion between boolean types of different sizes is
4747 a simple assignment in case their vectypes are same
4748 boolean vectors. */
4749 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4750 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4751 {
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4754 "type conversion to/from bit-precision "
4755 "unsupported.\n");
4756 return false;
4757 }
4758
4759 if (!vec_stmt) /* transformation not required. */
4760 {
4761 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE, vect_location,
4764 "=== vectorizable_assignment ===\n");
4765 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4766 return true;
4767 }
4768
4769 /* Transform. */
4770 if (dump_enabled_p ())
4771 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4772
4773 /* Handle def. */
4774 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4775
4776 /* Handle use. */
4777 for (j = 0; j < ncopies; j++)
4778 {
4779 /* Handle uses. */
4780 if (j == 0)
4781 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4782 else
4783 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4784
4785 /* Arguments are ready. create the new vector stmt. */
4786 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4787 {
4788 if (CONVERT_EXPR_CODE_P (code)
4789 || code == VIEW_CONVERT_EXPR)
4790 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4791 new_stmt = gimple_build_assign (vec_dest, vop);
4792 new_temp = make_ssa_name (vec_dest, new_stmt);
4793 gimple_assign_set_lhs (new_stmt, new_temp);
4794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4795 if (slp_node)
4796 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4797 }
4798
4799 if (slp_node)
4800 continue;
4801
4802 if (j == 0)
4803 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4804 else
4805 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4806
4807 prev_stmt_info = vinfo_for_stmt (new_stmt);
4808 }
4809
4810 vec_oprnds.release ();
4811 return true;
4812 }
4813
4814
4815 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4816 either as shift by a scalar or by a vector. */
4817
4818 bool
4819 vect_supportable_shift (enum tree_code code, tree scalar_type)
4820 {
4821
4822 machine_mode vec_mode;
4823 optab optab;
4824 int icode;
4825 tree vectype;
4826
4827 vectype = get_vectype_for_scalar_type (scalar_type);
4828 if (!vectype)
4829 return false;
4830
4831 optab = optab_for_tree_code (code, vectype, optab_scalar);
4832 if (!optab
4833 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4834 {
4835 optab = optab_for_tree_code (code, vectype, optab_vector);
4836 if (!optab
4837 || (optab_handler (optab, TYPE_MODE (vectype))
4838 == CODE_FOR_nothing))
4839 return false;
4840 }
4841
4842 vec_mode = TYPE_MODE (vectype);
4843 icode = (int) optab_handler (optab, vec_mode);
4844 if (icode == CODE_FOR_nothing)
4845 return false;
4846
4847 return true;
4848 }
4849
4850
4851 /* Function vectorizable_shift.
4852
4853 Check if STMT performs a shift operation that can be vectorized.
4854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4857
4858 static bool
4859 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4860 gimple **vec_stmt, slp_tree slp_node)
4861 {
4862 tree vec_dest;
4863 tree scalar_dest;
4864 tree op0, op1 = NULL;
4865 tree vec_oprnd1 = NULL_TREE;
4866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4867 tree vectype;
4868 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4869 enum tree_code code;
4870 machine_mode vec_mode;
4871 tree new_temp;
4872 optab optab;
4873 int icode;
4874 machine_mode optab_op2_mode;
4875 gimple *def_stmt;
4876 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4877 int ndts = 2;
4878 gimple *new_stmt = NULL;
4879 stmt_vec_info prev_stmt_info;
4880 int nunits_in;
4881 int nunits_out;
4882 tree vectype_out;
4883 tree op1_vectype;
4884 int ncopies;
4885 int j, i;
4886 vec<tree> vec_oprnds0 = vNULL;
4887 vec<tree> vec_oprnds1 = vNULL;
4888 tree vop0, vop1;
4889 unsigned int k;
4890 bool scalar_shift_arg = true;
4891 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4892 vec_info *vinfo = stmt_info->vinfo;
4893
4894 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4895 return false;
4896
4897 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4898 && ! vec_stmt)
4899 return false;
4900
4901 /* Is STMT a vectorizable binary/unary operation? */
4902 if (!is_gimple_assign (stmt))
4903 return false;
4904
4905 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4906 return false;
4907
4908 code = gimple_assign_rhs_code (stmt);
4909
4910 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4911 || code == RROTATE_EXPR))
4912 return false;
4913
4914 scalar_dest = gimple_assign_lhs (stmt);
4915 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4916 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4917 {
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920 "bit-precision shifts not supported.\n");
4921 return false;
4922 }
4923
4924 op0 = gimple_assign_rhs1 (stmt);
4925 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4926 {
4927 if (dump_enabled_p ())
4928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4929 "use not simple.\n");
4930 return false;
4931 }
4932 /* If op0 is an external or constant def use a vector type with
4933 the same size as the output vector type. */
4934 if (!vectype)
4935 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4936 if (vec_stmt)
4937 gcc_assert (vectype);
4938 if (!vectype)
4939 {
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4942 "no vectype for scalar type\n");
4943 return false;
4944 }
4945
4946 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4947 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4948 if (nunits_out != nunits_in)
4949 return false;
4950
4951 op1 = gimple_assign_rhs2 (stmt);
4952 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4953 {
4954 if (dump_enabled_p ())
4955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4956 "use not simple.\n");
4957 return false;
4958 }
4959
4960 /* Multiple types in SLP are handled by creating the appropriate number of
4961 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4962 case of SLP. */
4963 if (slp_node)
4964 ncopies = 1;
4965 else
4966 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4967
4968 gcc_assert (ncopies >= 1);
4969
4970 /* Determine whether the shift amount is a vector, or scalar. If the
4971 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4972
4973 if ((dt[1] == vect_internal_def
4974 || dt[1] == vect_induction_def)
4975 && !slp_node)
4976 scalar_shift_arg = false;
4977 else if (dt[1] == vect_constant_def
4978 || dt[1] == vect_external_def
4979 || dt[1] == vect_internal_def)
4980 {
4981 /* In SLP, need to check whether the shift count is the same,
4982 in loops if it is a constant or invariant, it is always
4983 a scalar shift. */
4984 if (slp_node)
4985 {
4986 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4987 gimple *slpstmt;
4988
4989 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4990 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4991 scalar_shift_arg = false;
4992 }
4993
4994 /* If the shift amount is computed by a pattern stmt we cannot
4995 use the scalar amount directly thus give up and use a vector
4996 shift. */
4997 if (dt[1] == vect_internal_def)
4998 {
4999 gimple *def = SSA_NAME_DEF_STMT (op1);
5000 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5001 scalar_shift_arg = false;
5002 }
5003 }
5004 else
5005 {
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5008 "operand mode requires invariant argument.\n");
5009 return false;
5010 }
5011
5012 /* Vector shifted by vector. */
5013 if (!scalar_shift_arg)
5014 {
5015 optab = optab_for_tree_code (code, vectype, optab_vector);
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE, vect_location,
5018 "vector/vector shift/rotate found.\n");
5019
5020 if (!op1_vectype)
5021 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5022 if (op1_vectype == NULL_TREE
5023 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5024 {
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5027 "unusable type for last operand in"
5028 " vector/vector shift/rotate.\n");
5029 return false;
5030 }
5031 }
5032 /* See if the machine has a vector shifted by scalar insn and if not
5033 then see if it has a vector shifted by vector insn. */
5034 else
5035 {
5036 optab = optab_for_tree_code (code, vectype, optab_scalar);
5037 if (optab
5038 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5039 {
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_NOTE, vect_location,
5042 "vector/scalar shift/rotate found.\n");
5043 }
5044 else
5045 {
5046 optab = optab_for_tree_code (code, vectype, optab_vector);
5047 if (optab
5048 && (optab_handler (optab, TYPE_MODE (vectype))
5049 != CODE_FOR_nothing))
5050 {
5051 scalar_shift_arg = false;
5052
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_NOTE, vect_location,
5055 "vector/vector shift/rotate found.\n");
5056
5057 /* Unlike the other binary operators, shifts/rotates have
5058 the rhs being int, instead of the same type as the lhs,
5059 so make sure the scalar is the right type if we are
5060 dealing with vectors of long long/long/short/char. */
5061 if (dt[1] == vect_constant_def)
5062 op1 = fold_convert (TREE_TYPE (vectype), op1);
5063 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5064 TREE_TYPE (op1)))
5065 {
5066 if (slp_node
5067 && TYPE_MODE (TREE_TYPE (vectype))
5068 != TYPE_MODE (TREE_TYPE (op1)))
5069 {
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "unusable type for last operand in"
5073 " vector/vector shift/rotate.\n");
5074 return false;
5075 }
5076 if (vec_stmt && !slp_node)
5077 {
5078 op1 = fold_convert (TREE_TYPE (vectype), op1);
5079 op1 = vect_init_vector (stmt, op1,
5080 TREE_TYPE (vectype), NULL);
5081 }
5082 }
5083 }
5084 }
5085 }
5086
5087 /* Supportable by target? */
5088 if (!optab)
5089 {
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5092 "no optab.\n");
5093 return false;
5094 }
5095 vec_mode = TYPE_MODE (vectype);
5096 icode = (int) optab_handler (optab, vec_mode);
5097 if (icode == CODE_FOR_nothing)
5098 {
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5101 "op not supported by target.\n");
5102 /* Check only during analysis. */
5103 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5104 || (!vec_stmt
5105 && !vect_worthwhile_without_simd_p (vinfo, code)))
5106 return false;
5107 if (dump_enabled_p ())
5108 dump_printf_loc (MSG_NOTE, vect_location,
5109 "proceeding using word mode.\n");
5110 }
5111
5112 /* Worthwhile without SIMD support? Check only during analysis. */
5113 if (!vec_stmt
5114 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5115 && !vect_worthwhile_without_simd_p (vinfo, code))
5116 {
5117 if (dump_enabled_p ())
5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5119 "not worthwhile without SIMD support.\n");
5120 return false;
5121 }
5122
5123 if (!vec_stmt) /* transformation not required. */
5124 {
5125 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_NOTE, vect_location,
5128 "=== vectorizable_shift ===\n");
5129 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5130 return true;
5131 }
5132
5133 /* Transform. */
5134
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_NOTE, vect_location,
5137 "transform binary/unary operation.\n");
5138
5139 /* Handle def. */
5140 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5141
5142 prev_stmt_info = NULL;
5143 for (j = 0; j < ncopies; j++)
5144 {
5145 /* Handle uses. */
5146 if (j == 0)
5147 {
5148 if (scalar_shift_arg)
5149 {
5150 /* Vector shl and shr insn patterns can be defined with scalar
5151 operand 2 (shift operand). In this case, use constant or loop
5152 invariant op1 directly, without extending it to vector mode
5153 first. */
5154 optab_op2_mode = insn_data[icode].operand[2].mode;
5155 if (!VECTOR_MODE_P (optab_op2_mode))
5156 {
5157 if (dump_enabled_p ())
5158 dump_printf_loc (MSG_NOTE, vect_location,
5159 "operand 1 using scalar mode.\n");
5160 vec_oprnd1 = op1;
5161 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5162 vec_oprnds1.quick_push (vec_oprnd1);
5163 if (slp_node)
5164 {
5165 /* Store vec_oprnd1 for every vector stmt to be created
5166 for SLP_NODE. We check during the analysis that all
5167 the shift arguments are the same.
5168 TODO: Allow different constants for different vector
5169 stmts generated for an SLP instance. */
5170 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5171 vec_oprnds1.quick_push (vec_oprnd1);
5172 }
5173 }
5174 }
5175
5176 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5177 (a special case for certain kind of vector shifts); otherwise,
5178 operand 1 should be of a vector type (the usual case). */
5179 if (vec_oprnd1)
5180 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5181 slp_node);
5182 else
5183 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5184 slp_node);
5185 }
5186 else
5187 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5188
5189 /* Arguments are ready. Create the new vector stmt. */
5190 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5191 {
5192 vop1 = vec_oprnds1[i];
5193 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5194 new_temp = make_ssa_name (vec_dest, new_stmt);
5195 gimple_assign_set_lhs (new_stmt, new_temp);
5196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5197 if (slp_node)
5198 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5199 }
5200
5201 if (slp_node)
5202 continue;
5203
5204 if (j == 0)
5205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5206 else
5207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5208 prev_stmt_info = vinfo_for_stmt (new_stmt);
5209 }
5210
5211 vec_oprnds0.release ();
5212 vec_oprnds1.release ();
5213
5214 return true;
5215 }
5216
5217
5218 /* Function vectorizable_operation.
5219
5220 Check if STMT performs a binary, unary or ternary operation that can
5221 be vectorized.
5222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5223 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5225
5226 static bool
5227 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5228 gimple **vec_stmt, slp_tree slp_node)
5229 {
5230 tree vec_dest;
5231 tree scalar_dest;
5232 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5234 tree vectype;
5235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5236 enum tree_code code, orig_code;
5237 machine_mode vec_mode;
5238 tree new_temp;
5239 int op_type;
5240 optab optab;
5241 bool target_support_p;
5242 gimple *def_stmt;
5243 enum vect_def_type dt[3]
5244 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5245 int ndts = 3;
5246 gimple *new_stmt = NULL;
5247 stmt_vec_info prev_stmt_info;
5248 int nunits_in;
5249 int nunits_out;
5250 tree vectype_out;
5251 int ncopies;
5252 int j, i;
5253 vec<tree> vec_oprnds0 = vNULL;
5254 vec<tree> vec_oprnds1 = vNULL;
5255 vec<tree> vec_oprnds2 = vNULL;
5256 tree vop0, vop1, vop2;
5257 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5258 vec_info *vinfo = stmt_info->vinfo;
5259
5260 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5261 return false;
5262
5263 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5264 && ! vec_stmt)
5265 return false;
5266
5267 /* Is STMT a vectorizable binary/unary operation? */
5268 if (!is_gimple_assign (stmt))
5269 return false;
5270
5271 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5272 return false;
5273
5274 orig_code = code = gimple_assign_rhs_code (stmt);
5275
5276 /* For pointer addition and subtraction, we should use the normal
5277 plus and minus for the vector operation. */
5278 if (code == POINTER_PLUS_EXPR)
5279 code = PLUS_EXPR;
5280 if (code == POINTER_DIFF_EXPR)
5281 code = MINUS_EXPR;
5282
5283 /* Support only unary or binary operations. */
5284 op_type = TREE_CODE_LENGTH (code);
5285 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5286 {
5287 if (dump_enabled_p ())
5288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5289 "num. args = %d (not unary/binary/ternary op).\n",
5290 op_type);
5291 return false;
5292 }
5293
5294 scalar_dest = gimple_assign_lhs (stmt);
5295 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5296
5297 /* Most operations cannot handle bit-precision types without extra
5298 truncations. */
5299 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5300 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5301 /* Exception are bitwise binary operations. */
5302 && code != BIT_IOR_EXPR
5303 && code != BIT_XOR_EXPR
5304 && code != BIT_AND_EXPR)
5305 {
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5308 "bit-precision arithmetic not supported.\n");
5309 return false;
5310 }
5311
5312 op0 = gimple_assign_rhs1 (stmt);
5313 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5314 {
5315 if (dump_enabled_p ())
5316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5317 "use not simple.\n");
5318 return false;
5319 }
5320 /* If op0 is an external or constant def use a vector type with
5321 the same size as the output vector type. */
5322 if (!vectype)
5323 {
5324 /* For boolean type we cannot determine vectype by
5325 invariant value (don't know whether it is a vector
5326 of booleans or vector of integers). We use output
5327 vectype because operations on boolean don't change
5328 type. */
5329 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5330 {
5331 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5332 {
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5335 "not supported operation on bool value.\n");
5336 return false;
5337 }
5338 vectype = vectype_out;
5339 }
5340 else
5341 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5342 }
5343 if (vec_stmt)
5344 gcc_assert (vectype);
5345 if (!vectype)
5346 {
5347 if (dump_enabled_p ())
5348 {
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5350 "no vectype for scalar type ");
5351 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5352 TREE_TYPE (op0));
5353 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5354 }
5355
5356 return false;
5357 }
5358
5359 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5360 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5361 if (nunits_out != nunits_in)
5362 return false;
5363
5364 if (op_type == binary_op || op_type == ternary_op)
5365 {
5366 op1 = gimple_assign_rhs2 (stmt);
5367 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5368 {
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5371 "use not simple.\n");
5372 return false;
5373 }
5374 }
5375 if (op_type == ternary_op)
5376 {
5377 op2 = gimple_assign_rhs3 (stmt);
5378 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5379 {
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "use not simple.\n");
5383 return false;
5384 }
5385 }
5386
5387 /* Multiple types in SLP are handled by creating the appropriate number of
5388 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5389 case of SLP. */
5390 if (slp_node)
5391 ncopies = 1;
5392 else
5393 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5394
5395 gcc_assert (ncopies >= 1);
5396
5397 /* Shifts are handled in vectorizable_shift (). */
5398 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5399 || code == RROTATE_EXPR)
5400 return false;
5401
5402 /* Supportable by target? */
5403
5404 vec_mode = TYPE_MODE (vectype);
5405 if (code == MULT_HIGHPART_EXPR)
5406 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5407 else
5408 {
5409 optab = optab_for_tree_code (code, vectype, optab_default);
5410 if (!optab)
5411 {
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5414 "no optab.\n");
5415 return false;
5416 }
5417 target_support_p = (optab_handler (optab, vec_mode)
5418 != CODE_FOR_nothing);
5419 }
5420
5421 if (!target_support_p)
5422 {
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 "op not supported by target.\n");
5426 /* Check only during analysis. */
5427 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5428 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5429 return false;
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_NOTE, vect_location,
5432 "proceeding using word mode.\n");
5433 }
5434
5435 /* Worthwhile without SIMD support? Check only during analysis. */
5436 if (!VECTOR_MODE_P (vec_mode)
5437 && !vec_stmt
5438 && !vect_worthwhile_without_simd_p (vinfo, code))
5439 {
5440 if (dump_enabled_p ())
5441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5442 "not worthwhile without SIMD support.\n");
5443 return false;
5444 }
5445
5446 if (!vec_stmt) /* transformation not required. */
5447 {
5448 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE, vect_location,
5451 "=== vectorizable_operation ===\n");
5452 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5453 return true;
5454 }
5455
5456 /* Transform. */
5457
5458 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_NOTE, vect_location,
5460 "transform binary/unary operation.\n");
5461
5462 /* Handle def. */
5463 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5464
5465 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5466 vectors with unsigned elements, but the result is signed. So, we
5467 need to compute the MINUS_EXPR into vectype temporary and
5468 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5469 tree vec_cvt_dest = NULL_TREE;
5470 if (orig_code == POINTER_DIFF_EXPR)
5471 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5472
5473 /* In case the vectorization factor (VF) is bigger than the number
5474 of elements that we can fit in a vectype (nunits), we have to generate
5475 more than one vector stmt - i.e - we need to "unroll" the
5476 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5477 from one copy of the vector stmt to the next, in the field
5478 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5479 stages to find the correct vector defs to be used when vectorizing
5480 stmts that use the defs of the current stmt. The example below
5481 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5482 we need to create 4 vectorized stmts):
5483
5484 before vectorization:
5485 RELATED_STMT VEC_STMT
5486 S1: x = memref - -
5487 S2: z = x + 1 - -
5488
5489 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5490 there):
5491 RELATED_STMT VEC_STMT
5492 VS1_0: vx0 = memref0 VS1_1 -
5493 VS1_1: vx1 = memref1 VS1_2 -
5494 VS1_2: vx2 = memref2 VS1_3 -
5495 VS1_3: vx3 = memref3 - -
5496 S1: x = load - VS1_0
5497 S2: z = x + 1 - -
5498
5499 step2: vectorize stmt S2 (done here):
5500 To vectorize stmt S2 we first need to find the relevant vector
5501 def for the first operand 'x'. This is, as usual, obtained from
5502 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5503 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5504 relevant vector def 'vx0'. Having found 'vx0' we can generate
5505 the vector stmt VS2_0, and as usual, record it in the
5506 STMT_VINFO_VEC_STMT of stmt S2.
5507 When creating the second copy (VS2_1), we obtain the relevant vector
5508 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5509 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5510 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5511 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5512 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5513 chain of stmts and pointers:
5514 RELATED_STMT VEC_STMT
5515 VS1_0: vx0 = memref0 VS1_1 -
5516 VS1_1: vx1 = memref1 VS1_2 -
5517 VS1_2: vx2 = memref2 VS1_3 -
5518 VS1_3: vx3 = memref3 - -
5519 S1: x = load - VS1_0
5520 VS2_0: vz0 = vx0 + v1 VS2_1 -
5521 VS2_1: vz1 = vx1 + v1 VS2_2 -
5522 VS2_2: vz2 = vx2 + v1 VS2_3 -
5523 VS2_3: vz3 = vx3 + v1 - -
5524 S2: z = x + 1 - VS2_0 */
5525
5526 prev_stmt_info = NULL;
5527 for (j = 0; j < ncopies; j++)
5528 {
5529 /* Handle uses. */
5530 if (j == 0)
5531 {
5532 if (op_type == binary_op || op_type == ternary_op)
5533 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5534 slp_node);
5535 else
5536 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5537 slp_node);
5538 if (op_type == ternary_op)
5539 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5540 slp_node);
5541 }
5542 else
5543 {
5544 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5545 if (op_type == ternary_op)
5546 {
5547 tree vec_oprnd = vec_oprnds2.pop ();
5548 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5549 vec_oprnd));
5550 }
5551 }
5552
5553 /* Arguments are ready. Create the new vector stmt. */
5554 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5555 {
5556 vop1 = ((op_type == binary_op || op_type == ternary_op)
5557 ? vec_oprnds1[i] : NULL_TREE);
5558 vop2 = ((op_type == ternary_op)
5559 ? vec_oprnds2[i] : NULL_TREE);
5560 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5561 new_temp = make_ssa_name (vec_dest, new_stmt);
5562 gimple_assign_set_lhs (new_stmt, new_temp);
5563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5564 if (vec_cvt_dest)
5565 {
5566 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5567 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5568 new_temp);
5569 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5570 gimple_assign_set_lhs (new_stmt, new_temp);
5571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5572 }
5573 if (slp_node)
5574 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5575 }
5576
5577 if (slp_node)
5578 continue;
5579
5580 if (j == 0)
5581 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5582 else
5583 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5584 prev_stmt_info = vinfo_for_stmt (new_stmt);
5585 }
5586
5587 vec_oprnds0.release ();
5588 vec_oprnds1.release ();
5589 vec_oprnds2.release ();
5590
5591 return true;
5592 }
5593
5594 /* A helper function to ensure data reference DR's base alignment. */
5595
5596 static void
5597 ensure_base_align (struct data_reference *dr)
5598 {
5599 if (!dr->aux)
5600 return;
5601
5602 if (DR_VECT_AUX (dr)->base_misaligned)
5603 {
5604 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5605
5606 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5607
5608 if (decl_in_symtab_p (base_decl))
5609 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5610 else
5611 {
5612 SET_DECL_ALIGN (base_decl, align_base_to);
5613 DECL_USER_ALIGN (base_decl) = 1;
5614 }
5615 DR_VECT_AUX (dr)->base_misaligned = false;
5616 }
5617 }
5618
5619
5620 /* Function get_group_alias_ptr_type.
5621
5622 Return the alias type for the group starting at FIRST_STMT. */
5623
5624 static tree
5625 get_group_alias_ptr_type (gimple *first_stmt)
5626 {
5627 struct data_reference *first_dr, *next_dr;
5628 gimple *next_stmt;
5629
5630 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5631 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5632 while (next_stmt)
5633 {
5634 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5635 if (get_alias_set (DR_REF (first_dr))
5636 != get_alias_set (DR_REF (next_dr)))
5637 {
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_NOTE, vect_location,
5640 "conflicting alias set types.\n");
5641 return ptr_type_node;
5642 }
5643 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5644 }
5645 return reference_alias_ptr_type (DR_REF (first_dr));
5646 }
5647
5648
5649 /* Function vectorizable_store.
5650
5651 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5652 can be vectorized.
5653 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5654 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5655 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5656
5657 static bool
5658 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5659 slp_tree slp_node)
5660 {
5661 tree scalar_dest;
5662 tree data_ref;
5663 tree op;
5664 tree vec_oprnd = NULL_TREE;
5665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5666 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5667 tree elem_type;
5668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5669 struct loop *loop = NULL;
5670 machine_mode vec_mode;
5671 tree dummy;
5672 enum dr_alignment_support alignment_support_scheme;
5673 gimple *def_stmt;
5674 enum vect_def_type dt;
5675 stmt_vec_info prev_stmt_info = NULL;
5676 tree dataref_ptr = NULL_TREE;
5677 tree dataref_offset = NULL_TREE;
5678 gimple *ptr_incr = NULL;
5679 int ncopies;
5680 int j;
5681 gimple *next_stmt, *first_stmt;
5682 bool grouped_store;
5683 unsigned int group_size, i;
5684 vec<tree> oprnds = vNULL;
5685 vec<tree> result_chain = vNULL;
5686 bool inv_p;
5687 tree offset = NULL_TREE;
5688 vec<tree> vec_oprnds = vNULL;
5689 bool slp = (slp_node != NULL);
5690 unsigned int vec_num;
5691 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5692 vec_info *vinfo = stmt_info->vinfo;
5693 tree aggr_type;
5694 gather_scatter_info gs_info;
5695 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5696 gimple *new_stmt;
5697 int vf;
5698 vec_load_store_type vls_type;
5699 tree ref_type;
5700
5701 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5702 return false;
5703
5704 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5705 && ! vec_stmt)
5706 return false;
5707
5708 /* Is vectorizable store? */
5709
5710 if (!is_gimple_assign (stmt))
5711 return false;
5712
5713 scalar_dest = gimple_assign_lhs (stmt);
5714 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5715 && is_pattern_stmt_p (stmt_info))
5716 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5717 if (TREE_CODE (scalar_dest) != ARRAY_REF
5718 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5719 && TREE_CODE (scalar_dest) != INDIRECT_REF
5720 && TREE_CODE (scalar_dest) != COMPONENT_REF
5721 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5722 && TREE_CODE (scalar_dest) != REALPART_EXPR
5723 && TREE_CODE (scalar_dest) != MEM_REF)
5724 return false;
5725
5726 /* Cannot have hybrid store SLP -- that would mean storing to the
5727 same location twice. */
5728 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5729
5730 gcc_assert (gimple_assign_single_p (stmt));
5731
5732 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5733 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5734
5735 if (loop_vinfo)
5736 {
5737 loop = LOOP_VINFO_LOOP (loop_vinfo);
5738 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5739 }
5740 else
5741 vf = 1;
5742
5743 /* Multiple types in SLP are handled by creating the appropriate number of
5744 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5745 case of SLP. */
5746 if (slp)
5747 ncopies = 1;
5748 else
5749 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5750
5751 gcc_assert (ncopies >= 1);
5752
5753 /* FORNOW. This restriction should be relaxed. */
5754 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5755 {
5756 if (dump_enabled_p ())
5757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5758 "multiple types in nested loop.\n");
5759 return false;
5760 }
5761
5762 op = gimple_assign_rhs1 (stmt);
5763
5764 /* In the case this is a store from a constant make sure
5765 native_encode_expr can handle it. */
5766 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
5767 return false;
5768
5769 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5770 {
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "use not simple.\n");
5774 return false;
5775 }
5776
5777 if (dt == vect_constant_def || dt == vect_external_def)
5778 vls_type = VLS_STORE_INVARIANT;
5779 else
5780 vls_type = VLS_STORE;
5781
5782 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5783 return false;
5784
5785 elem_type = TREE_TYPE (vectype);
5786 vec_mode = TYPE_MODE (vectype);
5787
5788 /* FORNOW. In some cases can vectorize even if data-type not supported
5789 (e.g. - array initialization with 0). */
5790 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5791 return false;
5792
5793 if (!STMT_VINFO_DATA_REF (stmt_info))
5794 return false;
5795
5796 vect_memory_access_type memory_access_type;
5797 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5798 &memory_access_type, &gs_info))
5799 return false;
5800
5801 if (!vec_stmt) /* transformation not required. */
5802 {
5803 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5804 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5805 /* The SLP costs are calculated during SLP analysis. */
5806 if (!PURE_SLP_STMT (stmt_info))
5807 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5808 NULL, NULL, NULL);
5809 return true;
5810 }
5811 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5812
5813 /* Transform. */
5814
5815 ensure_base_align (dr);
5816
5817 if (memory_access_type == VMAT_GATHER_SCATTER)
5818 {
5819 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5820 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5821 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5822 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5823 edge pe = loop_preheader_edge (loop);
5824 gimple_seq seq;
5825 basic_block new_bb;
5826 enum { NARROW, NONE, WIDEN } modifier;
5827 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5828
5829 if (nunits == (unsigned int) scatter_off_nunits)
5830 modifier = NONE;
5831 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5832 {
5833 modifier = WIDEN;
5834
5835 vec_perm_builder sel (scatter_off_nunits, scatter_off_nunits, 1);
5836 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5837 sel.quick_push (i | nunits);
5838
5839 vec_perm_indices indices (sel, 1, scatter_off_nunits);
5840 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5841 indices);
5842 gcc_assert (perm_mask != NULL_TREE);
5843 }
5844 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5845 {
5846 modifier = NARROW;
5847
5848 vec_perm_builder sel (nunits, nunits, 1);
5849 for (i = 0; i < (unsigned int) nunits; ++i)
5850 sel.quick_push (i | scatter_off_nunits);
5851
5852 vec_perm_indices indices (sel, 2, nunits);
5853 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5854 gcc_assert (perm_mask != NULL_TREE);
5855 ncopies *= 2;
5856 }
5857 else
5858 gcc_unreachable ();
5859
5860 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5861 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5862 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5863 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5864 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5865 scaletype = TREE_VALUE (arglist);
5866
5867 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5868 && TREE_CODE (rettype) == VOID_TYPE);
5869
5870 ptr = fold_convert (ptrtype, gs_info.base);
5871 if (!is_gimple_min_invariant (ptr))
5872 {
5873 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5874 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5875 gcc_assert (!new_bb);
5876 }
5877
5878 /* Currently we support only unconditional scatter stores,
5879 so mask should be all ones. */
5880 mask = build_int_cst (masktype, -1);
5881 mask = vect_init_vector (stmt, mask, masktype, NULL);
5882
5883 scale = build_int_cst (scaletype, gs_info.scale);
5884
5885 prev_stmt_info = NULL;
5886 for (j = 0; j < ncopies; ++j)
5887 {
5888 if (j == 0)
5889 {
5890 src = vec_oprnd1
5891 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5892 op = vec_oprnd0
5893 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5894 }
5895 else if (modifier != NONE && (j & 1))
5896 {
5897 if (modifier == WIDEN)
5898 {
5899 src = vec_oprnd1
5900 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5901 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5902 stmt, gsi);
5903 }
5904 else if (modifier == NARROW)
5905 {
5906 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5907 stmt, gsi);
5908 op = vec_oprnd0
5909 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5910 vec_oprnd0);
5911 }
5912 else
5913 gcc_unreachable ();
5914 }
5915 else
5916 {
5917 src = vec_oprnd1
5918 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5919 op = vec_oprnd0
5920 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5921 vec_oprnd0);
5922 }
5923
5924 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5925 {
5926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5927 == TYPE_VECTOR_SUBPARTS (srctype));
5928 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5929 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5930 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5932 src = var;
5933 }
5934
5935 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5936 {
5937 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5938 == TYPE_VECTOR_SUBPARTS (idxtype));
5939 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5940 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5941 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5942 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5943 op = var;
5944 }
5945
5946 new_stmt
5947 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5948
5949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5950
5951 if (prev_stmt_info == NULL)
5952 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5953 else
5954 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5955 prev_stmt_info = vinfo_for_stmt (new_stmt);
5956 }
5957 return true;
5958 }
5959
5960 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5961 if (grouped_store)
5962 {
5963 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5964 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5965 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5966
5967 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5968
5969 /* FORNOW */
5970 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5971
5972 /* We vectorize all the stmts of the interleaving group when we
5973 reach the last stmt in the group. */
5974 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5975 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5976 && !slp)
5977 {
5978 *vec_stmt = NULL;
5979 return true;
5980 }
5981
5982 if (slp)
5983 {
5984 grouped_store = false;
5985 /* VEC_NUM is the number of vect stmts to be created for this
5986 group. */
5987 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5988 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5989 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5990 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5991 op = gimple_assign_rhs1 (first_stmt);
5992 }
5993 else
5994 /* VEC_NUM is the number of vect stmts to be created for this
5995 group. */
5996 vec_num = group_size;
5997
5998 ref_type = get_group_alias_ptr_type (first_stmt);
5999 }
6000 else
6001 {
6002 first_stmt = stmt;
6003 first_dr = dr;
6004 group_size = vec_num = 1;
6005 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6006 }
6007
6008 if (dump_enabled_p ())
6009 dump_printf_loc (MSG_NOTE, vect_location,
6010 "transform store. ncopies = %d\n", ncopies);
6011
6012 if (memory_access_type == VMAT_ELEMENTWISE
6013 || memory_access_type == VMAT_STRIDED_SLP)
6014 {
6015 gimple_stmt_iterator incr_gsi;
6016 bool insert_after;
6017 gimple *incr;
6018 tree offvar;
6019 tree ivstep;
6020 tree running_off;
6021 gimple_seq stmts = NULL;
6022 tree stride_base, stride_step, alias_off;
6023 tree vec_oprnd;
6024 unsigned int g;
6025
6026 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6027
6028 stride_base
6029 = fold_build_pointer_plus
6030 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6031 size_binop (PLUS_EXPR,
6032 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6033 convert_to_ptrofftype (DR_INIT (first_dr))));
6034 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6035
6036 /* For a store with loop-invariant (but other than power-of-2)
6037 stride (i.e. not a grouped access) like so:
6038
6039 for (i = 0; i < n; i += stride)
6040 array[i] = ...;
6041
6042 we generate a new induction variable and new stores from
6043 the components of the (vectorized) rhs:
6044
6045 for (j = 0; ; j += VF*stride)
6046 vectemp = ...;
6047 tmp1 = vectemp[0];
6048 array[j] = tmp1;
6049 tmp2 = vectemp[1];
6050 array[j + stride] = tmp2;
6051 ...
6052 */
6053
6054 unsigned nstores = nunits;
6055 unsigned lnel = 1;
6056 tree ltype = elem_type;
6057 tree lvectype = vectype;
6058 if (slp)
6059 {
6060 if (group_size < nunits
6061 && nunits % group_size == 0)
6062 {
6063 nstores = nunits / group_size;
6064 lnel = group_size;
6065 ltype = build_vector_type (elem_type, group_size);
6066 lvectype = vectype;
6067
6068 /* First check if vec_extract optab doesn't support extraction
6069 of vector elts directly. */
6070 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6071 machine_mode vmode;
6072 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6073 || !VECTOR_MODE_P (vmode)
6074 || (convert_optab_handler (vec_extract_optab,
6075 TYPE_MODE (vectype), vmode)
6076 == CODE_FOR_nothing))
6077 {
6078 /* Try to avoid emitting an extract of vector elements
6079 by performing the extracts using an integer type of the
6080 same size, extracting from a vector of those and then
6081 re-interpreting it as the original vector type if
6082 supported. */
6083 unsigned lsize
6084 = group_size * GET_MODE_BITSIZE (elmode);
6085 elmode = int_mode_for_size (lsize, 0).require ();
6086 /* If we can't construct such a vector fall back to
6087 element extracts from the original vector type and
6088 element size stores. */
6089 if (mode_for_vector (elmode,
6090 nunits / group_size).exists (&vmode)
6091 && VECTOR_MODE_P (vmode)
6092 && (convert_optab_handler (vec_extract_optab,
6093 vmode, elmode)
6094 != CODE_FOR_nothing))
6095 {
6096 nstores = nunits / group_size;
6097 lnel = group_size;
6098 ltype = build_nonstandard_integer_type (lsize, 1);
6099 lvectype = build_vector_type (ltype, nstores);
6100 }
6101 /* Else fall back to vector extraction anyway.
6102 Fewer stores are more important than avoiding spilling
6103 of the vector we extract from. Compared to the
6104 construction case in vectorizable_load no store-forwarding
6105 issue exists here for reasonable archs. */
6106 }
6107 }
6108 else if (group_size >= nunits
6109 && group_size % nunits == 0)
6110 {
6111 nstores = 1;
6112 lnel = nunits;
6113 ltype = vectype;
6114 lvectype = vectype;
6115 }
6116 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6118 }
6119
6120 ivstep = stride_step;
6121 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6122 build_int_cst (TREE_TYPE (ivstep), vf));
6123
6124 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6125
6126 create_iv (stride_base, ivstep, NULL,
6127 loop, &incr_gsi, insert_after,
6128 &offvar, NULL);
6129 incr = gsi_stmt (incr_gsi);
6130 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6131
6132 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6133 if (stmts)
6134 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6135
6136 prev_stmt_info = NULL;
6137 alias_off = build_int_cst (ref_type, 0);
6138 next_stmt = first_stmt;
6139 for (g = 0; g < group_size; g++)
6140 {
6141 running_off = offvar;
6142 if (g)
6143 {
6144 tree size = TYPE_SIZE_UNIT (ltype);
6145 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6146 size);
6147 tree newoff = copy_ssa_name (running_off, NULL);
6148 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6149 running_off, pos);
6150 vect_finish_stmt_generation (stmt, incr, gsi);
6151 running_off = newoff;
6152 }
6153 unsigned int group_el = 0;
6154 unsigned HOST_WIDE_INT
6155 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6156 for (j = 0; j < ncopies; j++)
6157 {
6158 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6159 and first_stmt == stmt. */
6160 if (j == 0)
6161 {
6162 if (slp)
6163 {
6164 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6165 slp_node);
6166 vec_oprnd = vec_oprnds[0];
6167 }
6168 else
6169 {
6170 gcc_assert (gimple_assign_single_p (next_stmt));
6171 op = gimple_assign_rhs1 (next_stmt);
6172 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6173 }
6174 }
6175 else
6176 {
6177 if (slp)
6178 vec_oprnd = vec_oprnds[j];
6179 else
6180 {
6181 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6182 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6183 }
6184 }
6185 /* Pun the vector to extract from if necessary. */
6186 if (lvectype != vectype)
6187 {
6188 tree tem = make_ssa_name (lvectype);
6189 gimple *pun
6190 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6191 lvectype, vec_oprnd));
6192 vect_finish_stmt_generation (stmt, pun, gsi);
6193 vec_oprnd = tem;
6194 }
6195 for (i = 0; i < nstores; i++)
6196 {
6197 tree newref, newoff;
6198 gimple *incr, *assign;
6199 tree size = TYPE_SIZE (ltype);
6200 /* Extract the i'th component. */
6201 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6202 bitsize_int (i), size);
6203 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6204 size, pos);
6205
6206 elem = force_gimple_operand_gsi (gsi, elem, true,
6207 NULL_TREE, true,
6208 GSI_SAME_STMT);
6209
6210 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6211 group_el * elsz);
6212 newref = build2 (MEM_REF, ltype,
6213 running_off, this_off);
6214
6215 /* And store it to *running_off. */
6216 assign = gimple_build_assign (newref, elem);
6217 vect_finish_stmt_generation (stmt, assign, gsi);
6218
6219 group_el += lnel;
6220 if (! slp
6221 || group_el == group_size)
6222 {
6223 newoff = copy_ssa_name (running_off, NULL);
6224 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6225 running_off, stride_step);
6226 vect_finish_stmt_generation (stmt, incr, gsi);
6227
6228 running_off = newoff;
6229 group_el = 0;
6230 }
6231 if (g == group_size - 1
6232 && !slp)
6233 {
6234 if (j == 0 && i == 0)
6235 STMT_VINFO_VEC_STMT (stmt_info)
6236 = *vec_stmt = assign;
6237 else
6238 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6239 prev_stmt_info = vinfo_for_stmt (assign);
6240 }
6241 }
6242 }
6243 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6244 if (slp)
6245 break;
6246 }
6247
6248 vec_oprnds.release ();
6249 return true;
6250 }
6251
6252 auto_vec<tree> dr_chain (group_size);
6253 oprnds.create (group_size);
6254
6255 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6256 gcc_assert (alignment_support_scheme);
6257 /* Targets with store-lane instructions must not require explicit
6258 realignment. */
6259 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6260 || alignment_support_scheme == dr_aligned
6261 || alignment_support_scheme == dr_unaligned_supported);
6262
6263 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6264 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6265 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6266
6267 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6268 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6269 else
6270 aggr_type = vectype;
6271
6272 /* In case the vectorization factor (VF) is bigger than the number
6273 of elements that we can fit in a vectype (nunits), we have to generate
6274 more than one vector stmt - i.e - we need to "unroll" the
6275 vector stmt by a factor VF/nunits. For more details see documentation in
6276 vect_get_vec_def_for_copy_stmt. */
6277
6278 /* In case of interleaving (non-unit grouped access):
6279
6280 S1: &base + 2 = x2
6281 S2: &base = x0
6282 S3: &base + 1 = x1
6283 S4: &base + 3 = x3
6284
6285 We create vectorized stores starting from base address (the access of the
6286 first stmt in the chain (S2 in the above example), when the last store stmt
6287 of the chain (S4) is reached:
6288
6289 VS1: &base = vx2
6290 VS2: &base + vec_size*1 = vx0
6291 VS3: &base + vec_size*2 = vx1
6292 VS4: &base + vec_size*3 = vx3
6293
6294 Then permutation statements are generated:
6295
6296 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6297 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6298 ...
6299
6300 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6301 (the order of the data-refs in the output of vect_permute_store_chain
6302 corresponds to the order of scalar stmts in the interleaving chain - see
6303 the documentation of vect_permute_store_chain()).
6304
6305 In case of both multiple types and interleaving, above vector stores and
6306 permutation stmts are created for every copy. The result vector stmts are
6307 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6308 STMT_VINFO_RELATED_STMT for the next copies.
6309 */
6310
6311 prev_stmt_info = NULL;
6312 for (j = 0; j < ncopies; j++)
6313 {
6314
6315 if (j == 0)
6316 {
6317 if (slp)
6318 {
6319 /* Get vectorized arguments for SLP_NODE. */
6320 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6321 NULL, slp_node);
6322
6323 vec_oprnd = vec_oprnds[0];
6324 }
6325 else
6326 {
6327 /* For interleaved stores we collect vectorized defs for all the
6328 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6329 used as an input to vect_permute_store_chain(), and OPRNDS as
6330 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6331
6332 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6333 OPRNDS are of size 1. */
6334 next_stmt = first_stmt;
6335 for (i = 0; i < group_size; i++)
6336 {
6337 /* Since gaps are not supported for interleaved stores,
6338 GROUP_SIZE is the exact number of stmts in the chain.
6339 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6340 there is no interleaving, GROUP_SIZE is 1, and only one
6341 iteration of the loop will be executed. */
6342 gcc_assert (next_stmt
6343 && gimple_assign_single_p (next_stmt));
6344 op = gimple_assign_rhs1 (next_stmt);
6345
6346 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6347 dr_chain.quick_push (vec_oprnd);
6348 oprnds.quick_push (vec_oprnd);
6349 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6350 }
6351 }
6352
6353 /* We should have catched mismatched types earlier. */
6354 gcc_assert (useless_type_conversion_p (vectype,
6355 TREE_TYPE (vec_oprnd)));
6356 bool simd_lane_access_p
6357 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6358 if (simd_lane_access_p
6359 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6360 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6361 && integer_zerop (DR_OFFSET (first_dr))
6362 && integer_zerop (DR_INIT (first_dr))
6363 && alias_sets_conflict_p (get_alias_set (aggr_type),
6364 get_alias_set (TREE_TYPE (ref_type))))
6365 {
6366 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6367 dataref_offset = build_int_cst (ref_type, 0);
6368 inv_p = false;
6369 }
6370 else
6371 dataref_ptr
6372 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6373 simd_lane_access_p ? loop : NULL,
6374 offset, &dummy, gsi, &ptr_incr,
6375 simd_lane_access_p, &inv_p);
6376 gcc_assert (bb_vinfo || !inv_p);
6377 }
6378 else
6379 {
6380 /* For interleaved stores we created vectorized defs for all the
6381 defs stored in OPRNDS in the previous iteration (previous copy).
6382 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6383 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6384 next copy.
6385 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6386 OPRNDS are of size 1. */
6387 for (i = 0; i < group_size; i++)
6388 {
6389 op = oprnds[i];
6390 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6391 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6392 dr_chain[i] = vec_oprnd;
6393 oprnds[i] = vec_oprnd;
6394 }
6395 if (dataref_offset)
6396 dataref_offset
6397 = int_const_binop (PLUS_EXPR, dataref_offset,
6398 TYPE_SIZE_UNIT (aggr_type));
6399 else
6400 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6401 TYPE_SIZE_UNIT (aggr_type));
6402 }
6403
6404 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6405 {
6406 tree vec_array;
6407
6408 /* Combine all the vectors into an array. */
6409 vec_array = create_vector_array (vectype, vec_num);
6410 for (i = 0; i < vec_num; i++)
6411 {
6412 vec_oprnd = dr_chain[i];
6413 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6414 }
6415
6416 /* Emit:
6417 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6418 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6419 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6420 vec_array);
6421 gimple_call_set_lhs (call, data_ref);
6422 gimple_call_set_nothrow (call, true);
6423 new_stmt = call;
6424 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6425 }
6426 else
6427 {
6428 new_stmt = NULL;
6429 if (grouped_store)
6430 {
6431 if (j == 0)
6432 result_chain.create (group_size);
6433 /* Permute. */
6434 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6435 &result_chain);
6436 }
6437
6438 next_stmt = first_stmt;
6439 for (i = 0; i < vec_num; i++)
6440 {
6441 unsigned align, misalign;
6442
6443 if (i > 0)
6444 /* Bump the vector pointer. */
6445 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6446 stmt, NULL_TREE);
6447
6448 if (slp)
6449 vec_oprnd = vec_oprnds[i];
6450 else if (grouped_store)
6451 /* For grouped stores vectorized defs are interleaved in
6452 vect_permute_store_chain(). */
6453 vec_oprnd = result_chain[i];
6454
6455 data_ref = fold_build2 (MEM_REF, vectype,
6456 dataref_ptr,
6457 dataref_offset
6458 ? dataref_offset
6459 : build_int_cst (ref_type, 0));
6460 align = DR_TARGET_ALIGNMENT (first_dr);
6461 if (aligned_access_p (first_dr))
6462 misalign = 0;
6463 else if (DR_MISALIGNMENT (first_dr) == -1)
6464 {
6465 align = dr_alignment (vect_dr_behavior (first_dr));
6466 misalign = 0;
6467 TREE_TYPE (data_ref)
6468 = build_aligned_type (TREE_TYPE (data_ref),
6469 align * BITS_PER_UNIT);
6470 }
6471 else
6472 {
6473 TREE_TYPE (data_ref)
6474 = build_aligned_type (TREE_TYPE (data_ref),
6475 TYPE_ALIGN (elem_type));
6476 misalign = DR_MISALIGNMENT (first_dr);
6477 }
6478 if (dataref_offset == NULL_TREE
6479 && TREE_CODE (dataref_ptr) == SSA_NAME)
6480 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6481 misalign);
6482
6483 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6484 {
6485 tree perm_mask = perm_mask_for_reverse (vectype);
6486 tree perm_dest
6487 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6488 vectype);
6489 tree new_temp = make_ssa_name (perm_dest);
6490
6491 /* Generate the permute statement. */
6492 gimple *perm_stmt
6493 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6494 vec_oprnd, perm_mask);
6495 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6496
6497 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6498 vec_oprnd = new_temp;
6499 }
6500
6501 /* Arguments are ready. Create the new vector stmt. */
6502 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6503 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6504
6505 if (slp)
6506 continue;
6507
6508 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6509 if (!next_stmt)
6510 break;
6511 }
6512 }
6513 if (!slp)
6514 {
6515 if (j == 0)
6516 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6517 else
6518 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6519 prev_stmt_info = vinfo_for_stmt (new_stmt);
6520 }
6521 }
6522
6523 oprnds.release ();
6524 result_chain.release ();
6525 vec_oprnds.release ();
6526
6527 return true;
6528 }
6529
6530 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6531 VECTOR_CST mask. No checks are made that the target platform supports the
6532 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6533 vect_gen_perm_mask_checked. */
6534
6535 tree
6536 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6537 {
6538 tree mask_elt_type, mask_type;
6539
6540 mask_elt_type = lang_hooks.types.type_for_mode
6541 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
6542 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6543 return vec_perm_indices_to_tree (mask_type, sel);
6544 }
6545
6546 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6547 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6548
6549 tree
6550 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6551 {
6552 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6553 return vect_gen_perm_mask_any (vectype, sel);
6554 }
6555
6556 /* Given a vector variable X and Y, that was generated for the scalar
6557 STMT, generate instructions to permute the vector elements of X and Y
6558 using permutation mask MASK_VEC, insert them at *GSI and return the
6559 permuted vector variable. */
6560
6561 static tree
6562 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6563 gimple_stmt_iterator *gsi)
6564 {
6565 tree vectype = TREE_TYPE (x);
6566 tree perm_dest, data_ref;
6567 gimple *perm_stmt;
6568
6569 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6570 data_ref = make_ssa_name (perm_dest);
6571
6572 /* Generate the permute statement. */
6573 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6574 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6575
6576 return data_ref;
6577 }
6578
6579 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6580 inserting them on the loops preheader edge. Returns true if we
6581 were successful in doing so (and thus STMT can be moved then),
6582 otherwise returns false. */
6583
6584 static bool
6585 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6586 {
6587 ssa_op_iter i;
6588 tree op;
6589 bool any = false;
6590
6591 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6592 {
6593 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6594 if (!gimple_nop_p (def_stmt)
6595 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6596 {
6597 /* Make sure we don't need to recurse. While we could do
6598 so in simple cases when there are more complex use webs
6599 we don't have an easy way to preserve stmt order to fulfil
6600 dependencies within them. */
6601 tree op2;
6602 ssa_op_iter i2;
6603 if (gimple_code (def_stmt) == GIMPLE_PHI)
6604 return false;
6605 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6606 {
6607 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6608 if (!gimple_nop_p (def_stmt2)
6609 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6610 return false;
6611 }
6612 any = true;
6613 }
6614 }
6615
6616 if (!any)
6617 return true;
6618
6619 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6620 {
6621 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6622 if (!gimple_nop_p (def_stmt)
6623 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6624 {
6625 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6626 gsi_remove (&gsi, false);
6627 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6628 }
6629 }
6630
6631 return true;
6632 }
6633
6634 /* vectorizable_load.
6635
6636 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6637 can be vectorized.
6638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6641
6642 static bool
6643 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6644 slp_tree slp_node, slp_instance slp_node_instance)
6645 {
6646 tree scalar_dest;
6647 tree vec_dest = NULL;
6648 tree data_ref = NULL;
6649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6650 stmt_vec_info prev_stmt_info;
6651 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6652 struct loop *loop = NULL;
6653 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6654 bool nested_in_vect_loop = false;
6655 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6656 tree elem_type;
6657 tree new_temp;
6658 machine_mode mode;
6659 gimple *new_stmt = NULL;
6660 tree dummy;
6661 enum dr_alignment_support alignment_support_scheme;
6662 tree dataref_ptr = NULL_TREE;
6663 tree dataref_offset = NULL_TREE;
6664 gimple *ptr_incr = NULL;
6665 int ncopies;
6666 int i, j, group_size, group_gap_adj;
6667 tree msq = NULL_TREE, lsq;
6668 tree offset = NULL_TREE;
6669 tree byte_offset = NULL_TREE;
6670 tree realignment_token = NULL_TREE;
6671 gphi *phi = NULL;
6672 vec<tree> dr_chain = vNULL;
6673 bool grouped_load = false;
6674 gimple *first_stmt;
6675 gimple *first_stmt_for_drptr = NULL;
6676 bool inv_p;
6677 bool compute_in_loop = false;
6678 struct loop *at_loop;
6679 int vec_num;
6680 bool slp = (slp_node != NULL);
6681 bool slp_perm = false;
6682 enum tree_code code;
6683 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6684 int vf;
6685 tree aggr_type;
6686 gather_scatter_info gs_info;
6687 vec_info *vinfo = stmt_info->vinfo;
6688 tree ref_type;
6689
6690 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6691 return false;
6692
6693 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6694 && ! vec_stmt)
6695 return false;
6696
6697 /* Is vectorizable load? */
6698 if (!is_gimple_assign (stmt))
6699 return false;
6700
6701 scalar_dest = gimple_assign_lhs (stmt);
6702 if (TREE_CODE (scalar_dest) != SSA_NAME)
6703 return false;
6704
6705 code = gimple_assign_rhs_code (stmt);
6706 if (code != ARRAY_REF
6707 && code != BIT_FIELD_REF
6708 && code != INDIRECT_REF
6709 && code != COMPONENT_REF
6710 && code != IMAGPART_EXPR
6711 && code != REALPART_EXPR
6712 && code != MEM_REF
6713 && TREE_CODE_CLASS (code) != tcc_declaration)
6714 return false;
6715
6716 if (!STMT_VINFO_DATA_REF (stmt_info))
6717 return false;
6718
6719 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6720 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6721
6722 if (loop_vinfo)
6723 {
6724 loop = LOOP_VINFO_LOOP (loop_vinfo);
6725 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6726 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6727 }
6728 else
6729 vf = 1;
6730
6731 /* Multiple types in SLP are handled by creating the appropriate number of
6732 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6733 case of SLP. */
6734 if (slp)
6735 ncopies = 1;
6736 else
6737 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6738
6739 gcc_assert (ncopies >= 1);
6740
6741 /* FORNOW. This restriction should be relaxed. */
6742 if (nested_in_vect_loop && ncopies > 1)
6743 {
6744 if (dump_enabled_p ())
6745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6746 "multiple types in nested loop.\n");
6747 return false;
6748 }
6749
6750 /* Invalidate assumptions made by dependence analysis when vectorization
6751 on the unrolled body effectively re-orders stmts. */
6752 if (ncopies > 1
6753 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6754 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6755 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6756 {
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6759 "cannot perform implicit CSE when unrolling "
6760 "with negative dependence distance\n");
6761 return false;
6762 }
6763
6764 elem_type = TREE_TYPE (vectype);
6765 mode = TYPE_MODE (vectype);
6766
6767 /* FORNOW. In some cases can vectorize even if data-type not supported
6768 (e.g. - data copies). */
6769 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6770 {
6771 if (dump_enabled_p ())
6772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6773 "Aligned load, but unsupported type.\n");
6774 return false;
6775 }
6776
6777 /* Check if the load is a part of an interleaving chain. */
6778 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6779 {
6780 grouped_load = true;
6781 /* FORNOW */
6782 gcc_assert (!nested_in_vect_loop);
6783 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6784
6785 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6786 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6787
6788 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6789 slp_perm = true;
6790
6791 /* Invalidate assumptions made by dependence analysis when vectorization
6792 on the unrolled body effectively re-orders stmts. */
6793 if (!PURE_SLP_STMT (stmt_info)
6794 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6795 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6796 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6797 {
6798 if (dump_enabled_p ())
6799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6800 "cannot perform implicit CSE when performing "
6801 "group loads with negative dependence distance\n");
6802 return false;
6803 }
6804
6805 /* Similarly when the stmt is a load that is both part of a SLP
6806 instance and a loop vectorized stmt via the same-dr mechanism
6807 we have to give up. */
6808 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6809 && (STMT_SLP_TYPE (stmt_info)
6810 != STMT_SLP_TYPE (vinfo_for_stmt
6811 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6812 {
6813 if (dump_enabled_p ())
6814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6815 "conflicting SLP types for CSEd load\n");
6816 return false;
6817 }
6818 }
6819
6820 vect_memory_access_type memory_access_type;
6821 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6822 &memory_access_type, &gs_info))
6823 return false;
6824
6825 if (!vec_stmt) /* transformation not required. */
6826 {
6827 if (!slp)
6828 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6829 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6830 /* The SLP costs are calculated during SLP analysis. */
6831 if (!PURE_SLP_STMT (stmt_info))
6832 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6833 NULL, NULL, NULL);
6834 return true;
6835 }
6836
6837 if (!slp)
6838 gcc_assert (memory_access_type
6839 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6840
6841 if (dump_enabled_p ())
6842 dump_printf_loc (MSG_NOTE, vect_location,
6843 "transform load. ncopies = %d\n", ncopies);
6844
6845 /* Transform. */
6846
6847 ensure_base_align (dr);
6848
6849 if (memory_access_type == VMAT_GATHER_SCATTER)
6850 {
6851 tree vec_oprnd0 = NULL_TREE, op;
6852 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6853 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6854 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6855 edge pe = loop_preheader_edge (loop);
6856 gimple_seq seq;
6857 basic_block new_bb;
6858 enum { NARROW, NONE, WIDEN } modifier;
6859 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6860
6861 if (nunits == gather_off_nunits)
6862 modifier = NONE;
6863 else if (nunits == gather_off_nunits / 2)
6864 {
6865 modifier = WIDEN;
6866
6867 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
6868 for (i = 0; i < gather_off_nunits; ++i)
6869 sel.quick_push (i | nunits);
6870
6871 vec_perm_indices indices (sel, 1, gather_off_nunits);
6872 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6873 indices);
6874 }
6875 else if (nunits == gather_off_nunits * 2)
6876 {
6877 modifier = NARROW;
6878
6879 vec_perm_builder sel (nunits, nunits, 1);
6880 for (i = 0; i < nunits; ++i)
6881 sel.quick_push (i < gather_off_nunits
6882 ? i : i + nunits - gather_off_nunits);
6883
6884 vec_perm_indices indices (sel, 2, nunits);
6885 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6886 ncopies *= 2;
6887 }
6888 else
6889 gcc_unreachable ();
6890
6891 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6892 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6893 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6894 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6895 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6896 scaletype = TREE_VALUE (arglist);
6897 gcc_checking_assert (types_compatible_p (srctype, rettype));
6898
6899 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6900
6901 ptr = fold_convert (ptrtype, gs_info.base);
6902 if (!is_gimple_min_invariant (ptr))
6903 {
6904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6905 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6906 gcc_assert (!new_bb);
6907 }
6908
6909 /* Currently we support only unconditional gather loads,
6910 so mask should be all ones. */
6911 if (TREE_CODE (masktype) == INTEGER_TYPE)
6912 mask = build_int_cst (masktype, -1);
6913 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6914 {
6915 mask = build_int_cst (TREE_TYPE (masktype), -1);
6916 mask = build_vector_from_val (masktype, mask);
6917 mask = vect_init_vector (stmt, mask, masktype, NULL);
6918 }
6919 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6920 {
6921 REAL_VALUE_TYPE r;
6922 long tmp[6];
6923 for (j = 0; j < 6; ++j)
6924 tmp[j] = -1;
6925 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6926 mask = build_real (TREE_TYPE (masktype), r);
6927 mask = build_vector_from_val (masktype, mask);
6928 mask = vect_init_vector (stmt, mask, masktype, NULL);
6929 }
6930 else
6931 gcc_unreachable ();
6932
6933 scale = build_int_cst (scaletype, gs_info.scale);
6934
6935 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6936 merge = build_int_cst (TREE_TYPE (rettype), 0);
6937 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6938 {
6939 REAL_VALUE_TYPE r;
6940 long tmp[6];
6941 for (j = 0; j < 6; ++j)
6942 tmp[j] = 0;
6943 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6944 merge = build_real (TREE_TYPE (rettype), r);
6945 }
6946 else
6947 gcc_unreachable ();
6948 merge = build_vector_from_val (rettype, merge);
6949 merge = vect_init_vector (stmt, merge, rettype, NULL);
6950
6951 prev_stmt_info = NULL;
6952 for (j = 0; j < ncopies; ++j)
6953 {
6954 if (modifier == WIDEN && (j & 1))
6955 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6956 perm_mask, stmt, gsi);
6957 else if (j == 0)
6958 op = vec_oprnd0
6959 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6960 else
6961 op = vec_oprnd0
6962 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6963
6964 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6965 {
6966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6967 == TYPE_VECTOR_SUBPARTS (idxtype));
6968 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6969 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6970 new_stmt
6971 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6973 op = var;
6974 }
6975
6976 new_stmt
6977 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6978
6979 if (!useless_type_conversion_p (vectype, rettype))
6980 {
6981 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6982 == TYPE_VECTOR_SUBPARTS (rettype));
6983 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6984 gimple_call_set_lhs (new_stmt, op);
6985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6986 var = make_ssa_name (vec_dest);
6987 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6988 new_stmt
6989 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6990 }
6991 else
6992 {
6993 var = make_ssa_name (vec_dest, new_stmt);
6994 gimple_call_set_lhs (new_stmt, var);
6995 }
6996
6997 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6998
6999 if (modifier == NARROW)
7000 {
7001 if ((j & 1) == 0)
7002 {
7003 prev_res = var;
7004 continue;
7005 }
7006 var = permute_vec_elements (prev_res, var,
7007 perm_mask, stmt, gsi);
7008 new_stmt = SSA_NAME_DEF_STMT (var);
7009 }
7010
7011 if (prev_stmt_info == NULL)
7012 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7013 else
7014 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7015 prev_stmt_info = vinfo_for_stmt (new_stmt);
7016 }
7017 return true;
7018 }
7019
7020 if (memory_access_type == VMAT_ELEMENTWISE
7021 || memory_access_type == VMAT_STRIDED_SLP)
7022 {
7023 gimple_stmt_iterator incr_gsi;
7024 bool insert_after;
7025 gimple *incr;
7026 tree offvar;
7027 tree ivstep;
7028 tree running_off;
7029 vec<constructor_elt, va_gc> *v = NULL;
7030 gimple_seq stmts = NULL;
7031 tree stride_base, stride_step, alias_off;
7032
7033 gcc_assert (!nested_in_vect_loop);
7034
7035 if (slp && grouped_load)
7036 {
7037 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7038 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7039 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7040 ref_type = get_group_alias_ptr_type (first_stmt);
7041 }
7042 else
7043 {
7044 first_stmt = stmt;
7045 first_dr = dr;
7046 group_size = 1;
7047 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7048 }
7049
7050 stride_base
7051 = fold_build_pointer_plus
7052 (DR_BASE_ADDRESS (first_dr),
7053 size_binop (PLUS_EXPR,
7054 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7055 convert_to_ptrofftype (DR_INIT (first_dr))));
7056 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7057
7058 /* For a load with loop-invariant (but other than power-of-2)
7059 stride (i.e. not a grouped access) like so:
7060
7061 for (i = 0; i < n; i += stride)
7062 ... = array[i];
7063
7064 we generate a new induction variable and new accesses to
7065 form a new vector (or vectors, depending on ncopies):
7066
7067 for (j = 0; ; j += VF*stride)
7068 tmp1 = array[j];
7069 tmp2 = array[j + stride];
7070 ...
7071 vectemp = {tmp1, tmp2, ...}
7072 */
7073
7074 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7075 build_int_cst (TREE_TYPE (stride_step), vf));
7076
7077 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7078
7079 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7080 loop, &incr_gsi, insert_after,
7081 &offvar, NULL);
7082 incr = gsi_stmt (incr_gsi);
7083 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7084
7085 stride_step = force_gimple_operand (unshare_expr (stride_step),
7086 &stmts, true, NULL_TREE);
7087 if (stmts)
7088 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7089
7090 prev_stmt_info = NULL;
7091 running_off = offvar;
7092 alias_off = build_int_cst (ref_type, 0);
7093 int nloads = nunits;
7094 int lnel = 1;
7095 tree ltype = TREE_TYPE (vectype);
7096 tree lvectype = vectype;
7097 auto_vec<tree> dr_chain;
7098 if (memory_access_type == VMAT_STRIDED_SLP)
7099 {
7100 if (group_size < nunits)
7101 {
7102 /* First check if vec_init optab supports construction from
7103 vector elts directly. */
7104 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7105 machine_mode vmode;
7106 if (mode_for_vector (elmode, group_size).exists (&vmode)
7107 && VECTOR_MODE_P (vmode)
7108 && (convert_optab_handler (vec_init_optab,
7109 TYPE_MODE (vectype), vmode)
7110 != CODE_FOR_nothing))
7111 {
7112 nloads = nunits / group_size;
7113 lnel = group_size;
7114 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7115 }
7116 else
7117 {
7118 /* Otherwise avoid emitting a constructor of vector elements
7119 by performing the loads using an integer type of the same
7120 size, constructing a vector of those and then
7121 re-interpreting it as the original vector type.
7122 This avoids a huge runtime penalty due to the general
7123 inability to perform store forwarding from smaller stores
7124 to a larger load. */
7125 unsigned lsize
7126 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7127 elmode = int_mode_for_size (lsize, 0).require ();
7128 /* If we can't construct such a vector fall back to
7129 element loads of the original vector type. */
7130 if (mode_for_vector (elmode,
7131 nunits / group_size).exists (&vmode)
7132 && VECTOR_MODE_P (vmode)
7133 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7134 != CODE_FOR_nothing))
7135 {
7136 nloads = nunits / group_size;
7137 lnel = group_size;
7138 ltype = build_nonstandard_integer_type (lsize, 1);
7139 lvectype = build_vector_type (ltype, nloads);
7140 }
7141 }
7142 }
7143 else
7144 {
7145 nloads = 1;
7146 lnel = nunits;
7147 ltype = vectype;
7148 }
7149 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7150 }
7151 if (slp)
7152 {
7153 /* For SLP permutation support we need to load the whole group,
7154 not only the number of vector stmts the permutation result
7155 fits in. */
7156 if (slp_perm)
7157 {
7158 ncopies = (group_size * vf + nunits - 1) / nunits;
7159 dr_chain.create (ncopies);
7160 }
7161 else
7162 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7163 }
7164 int group_el = 0;
7165 unsigned HOST_WIDE_INT
7166 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7167 for (j = 0; j < ncopies; j++)
7168 {
7169 if (nloads > 1)
7170 vec_alloc (v, nloads);
7171 for (i = 0; i < nloads; i++)
7172 {
7173 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7174 group_el * elsz);
7175 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7176 build2 (MEM_REF, ltype,
7177 running_off, this_off));
7178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7179 if (nloads > 1)
7180 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7181 gimple_assign_lhs (new_stmt));
7182
7183 group_el += lnel;
7184 if (! slp
7185 || group_el == group_size)
7186 {
7187 tree newoff = copy_ssa_name (running_off);
7188 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7189 running_off, stride_step);
7190 vect_finish_stmt_generation (stmt, incr, gsi);
7191
7192 running_off = newoff;
7193 group_el = 0;
7194 }
7195 }
7196 if (nloads > 1)
7197 {
7198 tree vec_inv = build_constructor (lvectype, v);
7199 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7200 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7201 if (lvectype != vectype)
7202 {
7203 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7204 VIEW_CONVERT_EXPR,
7205 build1 (VIEW_CONVERT_EXPR,
7206 vectype, new_temp));
7207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7208 }
7209 }
7210
7211 if (slp)
7212 {
7213 if (slp_perm)
7214 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7215 else
7216 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7217 }
7218 else
7219 {
7220 if (j == 0)
7221 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7222 else
7223 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7224 prev_stmt_info = vinfo_for_stmt (new_stmt);
7225 }
7226 }
7227 if (slp_perm)
7228 {
7229 unsigned n_perms;
7230 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7231 slp_node_instance, false, &n_perms);
7232 }
7233 return true;
7234 }
7235
7236 if (grouped_load)
7237 {
7238 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7239 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7240 /* For SLP vectorization we directly vectorize a subchain
7241 without permutation. */
7242 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7243 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7244 /* For BB vectorization always use the first stmt to base
7245 the data ref pointer on. */
7246 if (bb_vinfo)
7247 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7248
7249 /* Check if the chain of loads is already vectorized. */
7250 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7251 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7252 ??? But we can only do so if there is exactly one
7253 as we have no way to get at the rest. Leave the CSE
7254 opportunity alone.
7255 ??? With the group load eventually participating
7256 in multiple different permutations (having multiple
7257 slp nodes which refer to the same group) the CSE
7258 is even wrong code. See PR56270. */
7259 && !slp)
7260 {
7261 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7262 return true;
7263 }
7264 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7265 group_gap_adj = 0;
7266
7267 /* VEC_NUM is the number of vect stmts to be created for this group. */
7268 if (slp)
7269 {
7270 grouped_load = false;
7271 /* For SLP permutation support we need to load the whole group,
7272 not only the number of vector stmts the permutation result
7273 fits in. */
7274 if (slp_perm)
7275 {
7276 vec_num = (group_size * vf + nunits - 1) / nunits;
7277 group_gap_adj = vf * group_size - nunits * vec_num;
7278 }
7279 else
7280 {
7281 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7282 group_gap_adj
7283 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7284 }
7285 }
7286 else
7287 vec_num = group_size;
7288
7289 ref_type = get_group_alias_ptr_type (first_stmt);
7290 }
7291 else
7292 {
7293 first_stmt = stmt;
7294 first_dr = dr;
7295 group_size = vec_num = 1;
7296 group_gap_adj = 0;
7297 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7298 }
7299
7300 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7301 gcc_assert (alignment_support_scheme);
7302 /* Targets with load-lane instructions must not require explicit
7303 realignment. */
7304 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7305 || alignment_support_scheme == dr_aligned
7306 || alignment_support_scheme == dr_unaligned_supported);
7307
7308 /* In case the vectorization factor (VF) is bigger than the number
7309 of elements that we can fit in a vectype (nunits), we have to generate
7310 more than one vector stmt - i.e - we need to "unroll" the
7311 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7312 from one copy of the vector stmt to the next, in the field
7313 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7314 stages to find the correct vector defs to be used when vectorizing
7315 stmts that use the defs of the current stmt. The example below
7316 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7317 need to create 4 vectorized stmts):
7318
7319 before vectorization:
7320 RELATED_STMT VEC_STMT
7321 S1: x = memref - -
7322 S2: z = x + 1 - -
7323
7324 step 1: vectorize stmt S1:
7325 We first create the vector stmt VS1_0, and, as usual, record a
7326 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7327 Next, we create the vector stmt VS1_1, and record a pointer to
7328 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7329 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7330 stmts and pointers:
7331 RELATED_STMT VEC_STMT
7332 VS1_0: vx0 = memref0 VS1_1 -
7333 VS1_1: vx1 = memref1 VS1_2 -
7334 VS1_2: vx2 = memref2 VS1_3 -
7335 VS1_3: vx3 = memref3 - -
7336 S1: x = load - VS1_0
7337 S2: z = x + 1 - -
7338
7339 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7340 information we recorded in RELATED_STMT field is used to vectorize
7341 stmt S2. */
7342
7343 /* In case of interleaving (non-unit grouped access):
7344
7345 S1: x2 = &base + 2
7346 S2: x0 = &base
7347 S3: x1 = &base + 1
7348 S4: x3 = &base + 3
7349
7350 Vectorized loads are created in the order of memory accesses
7351 starting from the access of the first stmt of the chain:
7352
7353 VS1: vx0 = &base
7354 VS2: vx1 = &base + vec_size*1
7355 VS3: vx3 = &base + vec_size*2
7356 VS4: vx4 = &base + vec_size*3
7357
7358 Then permutation statements are generated:
7359
7360 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7361 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7362 ...
7363
7364 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7365 (the order of the data-refs in the output of vect_permute_load_chain
7366 corresponds to the order of scalar stmts in the interleaving chain - see
7367 the documentation of vect_permute_load_chain()).
7368 The generation of permutation stmts and recording them in
7369 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7370
7371 In case of both multiple types and interleaving, the vector loads and
7372 permutation stmts above are created for every copy. The result vector
7373 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7374 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7375
7376 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7377 on a target that supports unaligned accesses (dr_unaligned_supported)
7378 we generate the following code:
7379 p = initial_addr;
7380 indx = 0;
7381 loop {
7382 p = p + indx * vectype_size;
7383 vec_dest = *(p);
7384 indx = indx + 1;
7385 }
7386
7387 Otherwise, the data reference is potentially unaligned on a target that
7388 does not support unaligned accesses (dr_explicit_realign_optimized) -
7389 then generate the following code, in which the data in each iteration is
7390 obtained by two vector loads, one from the previous iteration, and one
7391 from the current iteration:
7392 p1 = initial_addr;
7393 msq_init = *(floor(p1))
7394 p2 = initial_addr + VS - 1;
7395 realignment_token = call target_builtin;
7396 indx = 0;
7397 loop {
7398 p2 = p2 + indx * vectype_size
7399 lsq = *(floor(p2))
7400 vec_dest = realign_load (msq, lsq, realignment_token)
7401 indx = indx + 1;
7402 msq = lsq;
7403 } */
7404
7405 /* If the misalignment remains the same throughout the execution of the
7406 loop, we can create the init_addr and permutation mask at the loop
7407 preheader. Otherwise, it needs to be created inside the loop.
7408 This can only occur when vectorizing memory accesses in the inner-loop
7409 nested within an outer-loop that is being vectorized. */
7410
7411 if (nested_in_vect_loop
7412 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7413 {
7414 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7415 compute_in_loop = true;
7416 }
7417
7418 if ((alignment_support_scheme == dr_explicit_realign_optimized
7419 || alignment_support_scheme == dr_explicit_realign)
7420 && !compute_in_loop)
7421 {
7422 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7423 alignment_support_scheme, NULL_TREE,
7424 &at_loop);
7425 if (alignment_support_scheme == dr_explicit_realign_optimized)
7426 {
7427 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7428 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7429 size_one_node);
7430 }
7431 }
7432 else
7433 at_loop = loop;
7434
7435 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7436 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7437
7438 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7439 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7440 else
7441 aggr_type = vectype;
7442
7443 prev_stmt_info = NULL;
7444 int group_elt = 0;
7445 for (j = 0; j < ncopies; j++)
7446 {
7447 /* 1. Create the vector or array pointer update chain. */
7448 if (j == 0)
7449 {
7450 bool simd_lane_access_p
7451 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7452 if (simd_lane_access_p
7453 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7454 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7455 && integer_zerop (DR_OFFSET (first_dr))
7456 && integer_zerop (DR_INIT (first_dr))
7457 && alias_sets_conflict_p (get_alias_set (aggr_type),
7458 get_alias_set (TREE_TYPE (ref_type)))
7459 && (alignment_support_scheme == dr_aligned
7460 || alignment_support_scheme == dr_unaligned_supported))
7461 {
7462 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7463 dataref_offset = build_int_cst (ref_type, 0);
7464 inv_p = false;
7465 }
7466 else if (first_stmt_for_drptr
7467 && first_stmt != first_stmt_for_drptr)
7468 {
7469 dataref_ptr
7470 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7471 at_loop, offset, &dummy, gsi,
7472 &ptr_incr, simd_lane_access_p,
7473 &inv_p, byte_offset);
7474 /* Adjust the pointer by the difference to first_stmt. */
7475 data_reference_p ptrdr
7476 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7477 tree diff = fold_convert (sizetype,
7478 size_binop (MINUS_EXPR,
7479 DR_INIT (first_dr),
7480 DR_INIT (ptrdr)));
7481 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7482 stmt, diff);
7483 }
7484 else
7485 dataref_ptr
7486 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7487 offset, &dummy, gsi, &ptr_incr,
7488 simd_lane_access_p, &inv_p,
7489 byte_offset);
7490 }
7491 else if (dataref_offset)
7492 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7493 TYPE_SIZE_UNIT (aggr_type));
7494 else
7495 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7496 TYPE_SIZE_UNIT (aggr_type));
7497
7498 if (grouped_load || slp_perm)
7499 dr_chain.create (vec_num);
7500
7501 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7502 {
7503 tree vec_array;
7504
7505 vec_array = create_vector_array (vectype, vec_num);
7506
7507 /* Emit:
7508 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7509 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7510 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7511 data_ref);
7512 gimple_call_set_lhs (call, vec_array);
7513 gimple_call_set_nothrow (call, true);
7514 new_stmt = call;
7515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7516
7517 /* Extract each vector into an SSA_NAME. */
7518 for (i = 0; i < vec_num; i++)
7519 {
7520 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7521 vec_array, i);
7522 dr_chain.quick_push (new_temp);
7523 }
7524
7525 /* Record the mapping between SSA_NAMEs and statements. */
7526 vect_record_grouped_load_vectors (stmt, dr_chain);
7527 }
7528 else
7529 {
7530 for (i = 0; i < vec_num; i++)
7531 {
7532 if (i > 0)
7533 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7534 stmt, NULL_TREE);
7535
7536 /* 2. Create the vector-load in the loop. */
7537 switch (alignment_support_scheme)
7538 {
7539 case dr_aligned:
7540 case dr_unaligned_supported:
7541 {
7542 unsigned int align, misalign;
7543
7544 data_ref
7545 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7546 dataref_offset
7547 ? dataref_offset
7548 : build_int_cst (ref_type, 0));
7549 align = DR_TARGET_ALIGNMENT (dr);
7550 if (alignment_support_scheme == dr_aligned)
7551 {
7552 gcc_assert (aligned_access_p (first_dr));
7553 misalign = 0;
7554 }
7555 else if (DR_MISALIGNMENT (first_dr) == -1)
7556 {
7557 align = dr_alignment (vect_dr_behavior (first_dr));
7558 misalign = 0;
7559 TREE_TYPE (data_ref)
7560 = build_aligned_type (TREE_TYPE (data_ref),
7561 align * BITS_PER_UNIT);
7562 }
7563 else
7564 {
7565 TREE_TYPE (data_ref)
7566 = build_aligned_type (TREE_TYPE (data_ref),
7567 TYPE_ALIGN (elem_type));
7568 misalign = DR_MISALIGNMENT (first_dr);
7569 }
7570 if (dataref_offset == NULL_TREE
7571 && TREE_CODE (dataref_ptr) == SSA_NAME)
7572 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7573 align, misalign);
7574 break;
7575 }
7576 case dr_explicit_realign:
7577 {
7578 tree ptr, bump;
7579
7580 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7581
7582 if (compute_in_loop)
7583 msq = vect_setup_realignment (first_stmt, gsi,
7584 &realignment_token,
7585 dr_explicit_realign,
7586 dataref_ptr, NULL);
7587
7588 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7589 ptr = copy_ssa_name (dataref_ptr);
7590 else
7591 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7592 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7593 new_stmt = gimple_build_assign
7594 (ptr, BIT_AND_EXPR, dataref_ptr,
7595 build_int_cst
7596 (TREE_TYPE (dataref_ptr),
7597 -(HOST_WIDE_INT) align));
7598 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7599 data_ref
7600 = build2 (MEM_REF, vectype, ptr,
7601 build_int_cst (ref_type, 0));
7602 vec_dest = vect_create_destination_var (scalar_dest,
7603 vectype);
7604 new_stmt = gimple_build_assign (vec_dest, data_ref);
7605 new_temp = make_ssa_name (vec_dest, new_stmt);
7606 gimple_assign_set_lhs (new_stmt, new_temp);
7607 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7608 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7609 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7610 msq = new_temp;
7611
7612 bump = size_binop (MULT_EXPR, vs,
7613 TYPE_SIZE_UNIT (elem_type));
7614 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7615 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7616 new_stmt = gimple_build_assign
7617 (NULL_TREE, BIT_AND_EXPR, ptr,
7618 build_int_cst
7619 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7620 ptr = copy_ssa_name (ptr, new_stmt);
7621 gimple_assign_set_lhs (new_stmt, ptr);
7622 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7623 data_ref
7624 = build2 (MEM_REF, vectype, ptr,
7625 build_int_cst (ref_type, 0));
7626 break;
7627 }
7628 case dr_explicit_realign_optimized:
7629 {
7630 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7631 new_temp = copy_ssa_name (dataref_ptr);
7632 else
7633 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7634 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7635 new_stmt = gimple_build_assign
7636 (new_temp, BIT_AND_EXPR, dataref_ptr,
7637 build_int_cst (TREE_TYPE (dataref_ptr),
7638 -(HOST_WIDE_INT) align));
7639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7640 data_ref
7641 = build2 (MEM_REF, vectype, new_temp,
7642 build_int_cst (ref_type, 0));
7643 break;
7644 }
7645 default:
7646 gcc_unreachable ();
7647 }
7648 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7649 new_stmt = gimple_build_assign (vec_dest, data_ref);
7650 new_temp = make_ssa_name (vec_dest, new_stmt);
7651 gimple_assign_set_lhs (new_stmt, new_temp);
7652 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7653
7654 /* 3. Handle explicit realignment if necessary/supported.
7655 Create in loop:
7656 vec_dest = realign_load (msq, lsq, realignment_token) */
7657 if (alignment_support_scheme == dr_explicit_realign_optimized
7658 || alignment_support_scheme == dr_explicit_realign)
7659 {
7660 lsq = gimple_assign_lhs (new_stmt);
7661 if (!realignment_token)
7662 realignment_token = dataref_ptr;
7663 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7664 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7665 msq, lsq, realignment_token);
7666 new_temp = make_ssa_name (vec_dest, new_stmt);
7667 gimple_assign_set_lhs (new_stmt, new_temp);
7668 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7669
7670 if (alignment_support_scheme == dr_explicit_realign_optimized)
7671 {
7672 gcc_assert (phi);
7673 if (i == vec_num - 1 && j == ncopies - 1)
7674 add_phi_arg (phi, lsq,
7675 loop_latch_edge (containing_loop),
7676 UNKNOWN_LOCATION);
7677 msq = lsq;
7678 }
7679 }
7680
7681 /* 4. Handle invariant-load. */
7682 if (inv_p && !bb_vinfo)
7683 {
7684 gcc_assert (!grouped_load);
7685 /* If we have versioned for aliasing or the loop doesn't
7686 have any data dependencies that would preclude this,
7687 then we are sure this is a loop invariant load and
7688 thus we can insert it on the preheader edge. */
7689 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7690 && !nested_in_vect_loop
7691 && hoist_defs_of_uses (stmt, loop))
7692 {
7693 if (dump_enabled_p ())
7694 {
7695 dump_printf_loc (MSG_NOTE, vect_location,
7696 "hoisting out of the vectorized "
7697 "loop: ");
7698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7699 }
7700 tree tem = copy_ssa_name (scalar_dest);
7701 gsi_insert_on_edge_immediate
7702 (loop_preheader_edge (loop),
7703 gimple_build_assign (tem,
7704 unshare_expr
7705 (gimple_assign_rhs1 (stmt))));
7706 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7707 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7708 set_vinfo_for_stmt (new_stmt,
7709 new_stmt_vec_info (new_stmt, vinfo));
7710 }
7711 else
7712 {
7713 gimple_stmt_iterator gsi2 = *gsi;
7714 gsi_next (&gsi2);
7715 new_temp = vect_init_vector (stmt, scalar_dest,
7716 vectype, &gsi2);
7717 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7718 }
7719 }
7720
7721 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7722 {
7723 tree perm_mask = perm_mask_for_reverse (vectype);
7724 new_temp = permute_vec_elements (new_temp, new_temp,
7725 perm_mask, stmt, gsi);
7726 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7727 }
7728
7729 /* Collect vector loads and later create their permutation in
7730 vect_transform_grouped_load (). */
7731 if (grouped_load || slp_perm)
7732 dr_chain.quick_push (new_temp);
7733
7734 /* Store vector loads in the corresponding SLP_NODE. */
7735 if (slp && !slp_perm)
7736 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7737
7738 /* With SLP permutation we load the gaps as well, without
7739 we need to skip the gaps after we manage to fully load
7740 all elements. group_gap_adj is GROUP_SIZE here. */
7741 group_elt += nunits;
7742 if (group_gap_adj != 0 && ! slp_perm
7743 && group_elt == group_size - group_gap_adj)
7744 {
7745 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7746 * group_gap_adj);
7747 tree bump = wide_int_to_tree (sizetype, bump_val);
7748 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7749 stmt, bump);
7750 group_elt = 0;
7751 }
7752 }
7753 /* Bump the vector pointer to account for a gap or for excess
7754 elements loaded for a permuted SLP load. */
7755 if (group_gap_adj != 0 && slp_perm)
7756 {
7757 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7758 * group_gap_adj);
7759 tree bump = wide_int_to_tree (sizetype, bump_val);
7760 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7761 stmt, bump);
7762 }
7763 }
7764
7765 if (slp && !slp_perm)
7766 continue;
7767
7768 if (slp_perm)
7769 {
7770 unsigned n_perms;
7771 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7772 slp_node_instance, false,
7773 &n_perms))
7774 {
7775 dr_chain.release ();
7776 return false;
7777 }
7778 }
7779 else
7780 {
7781 if (grouped_load)
7782 {
7783 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7784 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7785 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7786 }
7787 else
7788 {
7789 if (j == 0)
7790 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7791 else
7792 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7793 prev_stmt_info = vinfo_for_stmt (new_stmt);
7794 }
7795 }
7796 dr_chain.release ();
7797 }
7798
7799 return true;
7800 }
7801
7802 /* Function vect_is_simple_cond.
7803
7804 Input:
7805 LOOP - the loop that is being vectorized.
7806 COND - Condition that is checked for simple use.
7807
7808 Output:
7809 *COMP_VECTYPE - the vector type for the comparison.
7810 *DTS - The def types for the arguments of the comparison
7811
7812 Returns whether a COND can be vectorized. Checks whether
7813 condition operands are supportable using vec_is_simple_use. */
7814
7815 static bool
7816 vect_is_simple_cond (tree cond, vec_info *vinfo,
7817 tree *comp_vectype, enum vect_def_type *dts,
7818 tree vectype)
7819 {
7820 tree lhs, rhs;
7821 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7822
7823 /* Mask case. */
7824 if (TREE_CODE (cond) == SSA_NAME
7825 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7826 {
7827 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7828 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7829 &dts[0], comp_vectype)
7830 || !*comp_vectype
7831 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7832 return false;
7833 return true;
7834 }
7835
7836 if (!COMPARISON_CLASS_P (cond))
7837 return false;
7838
7839 lhs = TREE_OPERAND (cond, 0);
7840 rhs = TREE_OPERAND (cond, 1);
7841
7842 if (TREE_CODE (lhs) == SSA_NAME)
7843 {
7844 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7845 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7846 return false;
7847 }
7848 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7849 || TREE_CODE (lhs) == FIXED_CST)
7850 dts[0] = vect_constant_def;
7851 else
7852 return false;
7853
7854 if (TREE_CODE (rhs) == SSA_NAME)
7855 {
7856 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7857 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7858 return false;
7859 }
7860 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7861 || TREE_CODE (rhs) == FIXED_CST)
7862 dts[1] = vect_constant_def;
7863 else
7864 return false;
7865
7866 if (vectype1 && vectype2
7867 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7868 return false;
7869
7870 *comp_vectype = vectype1 ? vectype1 : vectype2;
7871 /* Invariant comparison. */
7872 if (! *comp_vectype)
7873 {
7874 tree scalar_type = TREE_TYPE (lhs);
7875 /* If we can widen the comparison to match vectype do so. */
7876 if (INTEGRAL_TYPE_P (scalar_type)
7877 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7878 TYPE_SIZE (TREE_TYPE (vectype))))
7879 scalar_type = build_nonstandard_integer_type
7880 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7881 TYPE_UNSIGNED (scalar_type));
7882 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7883 }
7884
7885 return true;
7886 }
7887
7888 /* vectorizable_condition.
7889
7890 Check if STMT is conditional modify expression that can be vectorized.
7891 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7892 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7893 at GSI.
7894
7895 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7896 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7897 else clause if it is 2).
7898
7899 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7900
7901 bool
7902 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7903 gimple **vec_stmt, tree reduc_def, int reduc_index,
7904 slp_tree slp_node)
7905 {
7906 tree scalar_dest = NULL_TREE;
7907 tree vec_dest = NULL_TREE;
7908 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7909 tree then_clause, else_clause;
7910 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7911 tree comp_vectype = NULL_TREE;
7912 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7913 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7914 tree vec_compare;
7915 tree new_temp;
7916 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7917 enum vect_def_type dts[4]
7918 = {vect_unknown_def_type, vect_unknown_def_type,
7919 vect_unknown_def_type, vect_unknown_def_type};
7920 int ndts = 4;
7921 int ncopies;
7922 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7923 stmt_vec_info prev_stmt_info = NULL;
7924 int i, j;
7925 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7926 vec<tree> vec_oprnds0 = vNULL;
7927 vec<tree> vec_oprnds1 = vNULL;
7928 vec<tree> vec_oprnds2 = vNULL;
7929 vec<tree> vec_oprnds3 = vNULL;
7930 tree vec_cmp_type;
7931 bool masked = false;
7932
7933 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7934 return false;
7935
7936 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7937 {
7938 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7939 return false;
7940
7941 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7942 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7943 && reduc_def))
7944 return false;
7945
7946 /* FORNOW: not yet supported. */
7947 if (STMT_VINFO_LIVE_P (stmt_info))
7948 {
7949 if (dump_enabled_p ())
7950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7951 "value used after loop.\n");
7952 return false;
7953 }
7954 }
7955
7956 /* Is vectorizable conditional operation? */
7957 if (!is_gimple_assign (stmt))
7958 return false;
7959
7960 code = gimple_assign_rhs_code (stmt);
7961
7962 if (code != COND_EXPR)
7963 return false;
7964
7965 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7966 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7967
7968 if (slp_node)
7969 ncopies = 1;
7970 else
7971 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7972
7973 gcc_assert (ncopies >= 1);
7974 if (reduc_index && ncopies > 1)
7975 return false; /* FORNOW */
7976
7977 cond_expr = gimple_assign_rhs1 (stmt);
7978 then_clause = gimple_assign_rhs2 (stmt);
7979 else_clause = gimple_assign_rhs3 (stmt);
7980
7981 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7982 &comp_vectype, &dts[0], vectype)
7983 || !comp_vectype)
7984 return false;
7985
7986 gimple *def_stmt;
7987 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7988 &vectype1))
7989 return false;
7990 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7991 &vectype2))
7992 return false;
7993
7994 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7995 return false;
7996
7997 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7998 return false;
7999
8000 masked = !COMPARISON_CLASS_P (cond_expr);
8001 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8002
8003 if (vec_cmp_type == NULL_TREE)
8004 return false;
8005
8006 cond_code = TREE_CODE (cond_expr);
8007 if (!masked)
8008 {
8009 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8010 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8011 }
8012
8013 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8014 {
8015 /* Boolean values may have another representation in vectors
8016 and therefore we prefer bit operations over comparison for
8017 them (which also works for scalar masks). We store opcodes
8018 to use in bitop1 and bitop2. Statement is vectorized as
8019 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8020 depending on bitop1 and bitop2 arity. */
8021 switch (cond_code)
8022 {
8023 case GT_EXPR:
8024 bitop1 = BIT_NOT_EXPR;
8025 bitop2 = BIT_AND_EXPR;
8026 break;
8027 case GE_EXPR:
8028 bitop1 = BIT_NOT_EXPR;
8029 bitop2 = BIT_IOR_EXPR;
8030 break;
8031 case LT_EXPR:
8032 bitop1 = BIT_NOT_EXPR;
8033 bitop2 = BIT_AND_EXPR;
8034 std::swap (cond_expr0, cond_expr1);
8035 break;
8036 case LE_EXPR:
8037 bitop1 = BIT_NOT_EXPR;
8038 bitop2 = BIT_IOR_EXPR;
8039 std::swap (cond_expr0, cond_expr1);
8040 break;
8041 case NE_EXPR:
8042 bitop1 = BIT_XOR_EXPR;
8043 break;
8044 case EQ_EXPR:
8045 bitop1 = BIT_XOR_EXPR;
8046 bitop2 = BIT_NOT_EXPR;
8047 break;
8048 default:
8049 return false;
8050 }
8051 cond_code = SSA_NAME;
8052 }
8053
8054 if (!vec_stmt)
8055 {
8056 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8057 if (bitop1 != NOP_EXPR)
8058 {
8059 machine_mode mode = TYPE_MODE (comp_vectype);
8060 optab optab;
8061
8062 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8063 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8064 return false;
8065
8066 if (bitop2 != NOP_EXPR)
8067 {
8068 optab = optab_for_tree_code (bitop2, comp_vectype,
8069 optab_default);
8070 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8071 return false;
8072 }
8073 }
8074 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8075 cond_code))
8076 {
8077 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8078 return true;
8079 }
8080 return false;
8081 }
8082
8083 /* Transform. */
8084
8085 if (!slp_node)
8086 {
8087 vec_oprnds0.create (1);
8088 vec_oprnds1.create (1);
8089 vec_oprnds2.create (1);
8090 vec_oprnds3.create (1);
8091 }
8092
8093 /* Handle def. */
8094 scalar_dest = gimple_assign_lhs (stmt);
8095 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8096
8097 /* Handle cond expr. */
8098 for (j = 0; j < ncopies; j++)
8099 {
8100 gassign *new_stmt = NULL;
8101 if (j == 0)
8102 {
8103 if (slp_node)
8104 {
8105 auto_vec<tree, 4> ops;
8106 auto_vec<vec<tree>, 4> vec_defs;
8107
8108 if (masked)
8109 ops.safe_push (cond_expr);
8110 else
8111 {
8112 ops.safe_push (cond_expr0);
8113 ops.safe_push (cond_expr1);
8114 }
8115 ops.safe_push (then_clause);
8116 ops.safe_push (else_clause);
8117 vect_get_slp_defs (ops, slp_node, &vec_defs);
8118 vec_oprnds3 = vec_defs.pop ();
8119 vec_oprnds2 = vec_defs.pop ();
8120 if (!masked)
8121 vec_oprnds1 = vec_defs.pop ();
8122 vec_oprnds0 = vec_defs.pop ();
8123 }
8124 else
8125 {
8126 gimple *gtemp;
8127 if (masked)
8128 {
8129 vec_cond_lhs
8130 = vect_get_vec_def_for_operand (cond_expr, stmt,
8131 comp_vectype);
8132 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8133 &gtemp, &dts[0]);
8134 }
8135 else
8136 {
8137 vec_cond_lhs
8138 = vect_get_vec_def_for_operand (cond_expr0,
8139 stmt, comp_vectype);
8140 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8141
8142 vec_cond_rhs
8143 = vect_get_vec_def_for_operand (cond_expr1,
8144 stmt, comp_vectype);
8145 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8146 }
8147 if (reduc_index == 1)
8148 vec_then_clause = reduc_def;
8149 else
8150 {
8151 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8152 stmt);
8153 vect_is_simple_use (then_clause, loop_vinfo,
8154 &gtemp, &dts[2]);
8155 }
8156 if (reduc_index == 2)
8157 vec_else_clause = reduc_def;
8158 else
8159 {
8160 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8161 stmt);
8162 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8163 }
8164 }
8165 }
8166 else
8167 {
8168 vec_cond_lhs
8169 = vect_get_vec_def_for_stmt_copy (dts[0],
8170 vec_oprnds0.pop ());
8171 if (!masked)
8172 vec_cond_rhs
8173 = vect_get_vec_def_for_stmt_copy (dts[1],
8174 vec_oprnds1.pop ());
8175
8176 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8177 vec_oprnds2.pop ());
8178 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8179 vec_oprnds3.pop ());
8180 }
8181
8182 if (!slp_node)
8183 {
8184 vec_oprnds0.quick_push (vec_cond_lhs);
8185 if (!masked)
8186 vec_oprnds1.quick_push (vec_cond_rhs);
8187 vec_oprnds2.quick_push (vec_then_clause);
8188 vec_oprnds3.quick_push (vec_else_clause);
8189 }
8190
8191 /* Arguments are ready. Create the new vector stmt. */
8192 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8193 {
8194 vec_then_clause = vec_oprnds2[i];
8195 vec_else_clause = vec_oprnds3[i];
8196
8197 if (masked)
8198 vec_compare = vec_cond_lhs;
8199 else
8200 {
8201 vec_cond_rhs = vec_oprnds1[i];
8202 if (bitop1 == NOP_EXPR)
8203 vec_compare = build2 (cond_code, vec_cmp_type,
8204 vec_cond_lhs, vec_cond_rhs);
8205 else
8206 {
8207 new_temp = make_ssa_name (vec_cmp_type);
8208 if (bitop1 == BIT_NOT_EXPR)
8209 new_stmt = gimple_build_assign (new_temp, bitop1,
8210 vec_cond_rhs);
8211 else
8212 new_stmt
8213 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8214 vec_cond_rhs);
8215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8216 if (bitop2 == NOP_EXPR)
8217 vec_compare = new_temp;
8218 else if (bitop2 == BIT_NOT_EXPR)
8219 {
8220 /* Instead of doing ~x ? y : z do x ? z : y. */
8221 vec_compare = new_temp;
8222 std::swap (vec_then_clause, vec_else_clause);
8223 }
8224 else
8225 {
8226 vec_compare = make_ssa_name (vec_cmp_type);
8227 new_stmt
8228 = gimple_build_assign (vec_compare, bitop2,
8229 vec_cond_lhs, new_temp);
8230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8231 }
8232 }
8233 }
8234 new_temp = make_ssa_name (vec_dest);
8235 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8236 vec_compare, vec_then_clause,
8237 vec_else_clause);
8238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8239 if (slp_node)
8240 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8241 }
8242
8243 if (slp_node)
8244 continue;
8245
8246 if (j == 0)
8247 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8248 else
8249 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8250
8251 prev_stmt_info = vinfo_for_stmt (new_stmt);
8252 }
8253
8254 vec_oprnds0.release ();
8255 vec_oprnds1.release ();
8256 vec_oprnds2.release ();
8257 vec_oprnds3.release ();
8258
8259 return true;
8260 }
8261
8262 /* vectorizable_comparison.
8263
8264 Check if STMT is comparison expression that can be vectorized.
8265 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8266 comparison, put it in VEC_STMT, and insert it at GSI.
8267
8268 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8269
8270 static bool
8271 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8272 gimple **vec_stmt, tree reduc_def,
8273 slp_tree slp_node)
8274 {
8275 tree lhs, rhs1, rhs2;
8276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8277 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8278 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8279 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8280 tree new_temp;
8281 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8282 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8283 int ndts = 2;
8284 unsigned nunits;
8285 int ncopies;
8286 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8287 stmt_vec_info prev_stmt_info = NULL;
8288 int i, j;
8289 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8290 vec<tree> vec_oprnds0 = vNULL;
8291 vec<tree> vec_oprnds1 = vNULL;
8292 gimple *def_stmt;
8293 tree mask_type;
8294 tree mask;
8295
8296 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8297 return false;
8298
8299 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8300 return false;
8301
8302 mask_type = vectype;
8303 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8304
8305 if (slp_node)
8306 ncopies = 1;
8307 else
8308 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8309
8310 gcc_assert (ncopies >= 1);
8311 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8312 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8313 && reduc_def))
8314 return false;
8315
8316 if (STMT_VINFO_LIVE_P (stmt_info))
8317 {
8318 if (dump_enabled_p ())
8319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8320 "value used after loop.\n");
8321 return false;
8322 }
8323
8324 if (!is_gimple_assign (stmt))
8325 return false;
8326
8327 code = gimple_assign_rhs_code (stmt);
8328
8329 if (TREE_CODE_CLASS (code) != tcc_comparison)
8330 return false;
8331
8332 rhs1 = gimple_assign_rhs1 (stmt);
8333 rhs2 = gimple_assign_rhs2 (stmt);
8334
8335 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8336 &dts[0], &vectype1))
8337 return false;
8338
8339 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8340 &dts[1], &vectype2))
8341 return false;
8342
8343 if (vectype1 && vectype2
8344 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8345 return false;
8346
8347 vectype = vectype1 ? vectype1 : vectype2;
8348
8349 /* Invariant comparison. */
8350 if (!vectype)
8351 {
8352 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8353 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8354 return false;
8355 }
8356 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8357 return false;
8358
8359 /* Can't compare mask and non-mask types. */
8360 if (vectype1 && vectype2
8361 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8362 return false;
8363
8364 /* Boolean values may have another representation in vectors
8365 and therefore we prefer bit operations over comparison for
8366 them (which also works for scalar masks). We store opcodes
8367 to use in bitop1 and bitop2. Statement is vectorized as
8368 BITOP2 (rhs1 BITOP1 rhs2) or
8369 rhs1 BITOP2 (BITOP1 rhs2)
8370 depending on bitop1 and bitop2 arity. */
8371 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8372 {
8373 if (code == GT_EXPR)
8374 {
8375 bitop1 = BIT_NOT_EXPR;
8376 bitop2 = BIT_AND_EXPR;
8377 }
8378 else if (code == GE_EXPR)
8379 {
8380 bitop1 = BIT_NOT_EXPR;
8381 bitop2 = BIT_IOR_EXPR;
8382 }
8383 else if (code == LT_EXPR)
8384 {
8385 bitop1 = BIT_NOT_EXPR;
8386 bitop2 = BIT_AND_EXPR;
8387 std::swap (rhs1, rhs2);
8388 std::swap (dts[0], dts[1]);
8389 }
8390 else if (code == LE_EXPR)
8391 {
8392 bitop1 = BIT_NOT_EXPR;
8393 bitop2 = BIT_IOR_EXPR;
8394 std::swap (rhs1, rhs2);
8395 std::swap (dts[0], dts[1]);
8396 }
8397 else
8398 {
8399 bitop1 = BIT_XOR_EXPR;
8400 if (code == EQ_EXPR)
8401 bitop2 = BIT_NOT_EXPR;
8402 }
8403 }
8404
8405 if (!vec_stmt)
8406 {
8407 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8408 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8409 dts, ndts, NULL, NULL);
8410 if (bitop1 == NOP_EXPR)
8411 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8412 else
8413 {
8414 machine_mode mode = TYPE_MODE (vectype);
8415 optab optab;
8416
8417 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8418 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8419 return false;
8420
8421 if (bitop2 != NOP_EXPR)
8422 {
8423 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8424 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8425 return false;
8426 }
8427 return true;
8428 }
8429 }
8430
8431 /* Transform. */
8432 if (!slp_node)
8433 {
8434 vec_oprnds0.create (1);
8435 vec_oprnds1.create (1);
8436 }
8437
8438 /* Handle def. */
8439 lhs = gimple_assign_lhs (stmt);
8440 mask = vect_create_destination_var (lhs, mask_type);
8441
8442 /* Handle cmp expr. */
8443 for (j = 0; j < ncopies; j++)
8444 {
8445 gassign *new_stmt = NULL;
8446 if (j == 0)
8447 {
8448 if (slp_node)
8449 {
8450 auto_vec<tree, 2> ops;
8451 auto_vec<vec<tree>, 2> vec_defs;
8452
8453 ops.safe_push (rhs1);
8454 ops.safe_push (rhs2);
8455 vect_get_slp_defs (ops, slp_node, &vec_defs);
8456 vec_oprnds1 = vec_defs.pop ();
8457 vec_oprnds0 = vec_defs.pop ();
8458 }
8459 else
8460 {
8461 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8462 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8463 }
8464 }
8465 else
8466 {
8467 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8468 vec_oprnds0.pop ());
8469 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8470 vec_oprnds1.pop ());
8471 }
8472
8473 if (!slp_node)
8474 {
8475 vec_oprnds0.quick_push (vec_rhs1);
8476 vec_oprnds1.quick_push (vec_rhs2);
8477 }
8478
8479 /* Arguments are ready. Create the new vector stmt. */
8480 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8481 {
8482 vec_rhs2 = vec_oprnds1[i];
8483
8484 new_temp = make_ssa_name (mask);
8485 if (bitop1 == NOP_EXPR)
8486 {
8487 new_stmt = gimple_build_assign (new_temp, code,
8488 vec_rhs1, vec_rhs2);
8489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8490 }
8491 else
8492 {
8493 if (bitop1 == BIT_NOT_EXPR)
8494 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8495 else
8496 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8497 vec_rhs2);
8498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8499 if (bitop2 != NOP_EXPR)
8500 {
8501 tree res = make_ssa_name (mask);
8502 if (bitop2 == BIT_NOT_EXPR)
8503 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8504 else
8505 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8506 new_temp);
8507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8508 }
8509 }
8510 if (slp_node)
8511 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8512 }
8513
8514 if (slp_node)
8515 continue;
8516
8517 if (j == 0)
8518 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8519 else
8520 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8521
8522 prev_stmt_info = vinfo_for_stmt (new_stmt);
8523 }
8524
8525 vec_oprnds0.release ();
8526 vec_oprnds1.release ();
8527
8528 return true;
8529 }
8530
8531 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8532 can handle all live statements in the node. Otherwise return true
8533 if STMT is not live or if vectorizable_live_operation can handle it.
8534 GSI and VEC_STMT are as for vectorizable_live_operation. */
8535
8536 static bool
8537 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8538 slp_tree slp_node, gimple **vec_stmt)
8539 {
8540 if (slp_node)
8541 {
8542 gimple *slp_stmt;
8543 unsigned int i;
8544 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8545 {
8546 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8547 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8548 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8549 vec_stmt))
8550 return false;
8551 }
8552 }
8553 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8554 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8555 return false;
8556
8557 return true;
8558 }
8559
8560 /* Make sure the statement is vectorizable. */
8561
8562 bool
8563 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8564 slp_instance node_instance)
8565 {
8566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8567 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8568 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8569 bool ok;
8570 gimple *pattern_stmt;
8571 gimple_seq pattern_def_seq;
8572
8573 if (dump_enabled_p ())
8574 {
8575 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8576 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8577 }
8578
8579 if (gimple_has_volatile_ops (stmt))
8580 {
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8583 "not vectorized: stmt has volatile operands\n");
8584
8585 return false;
8586 }
8587
8588 /* Skip stmts that do not need to be vectorized. In loops this is expected
8589 to include:
8590 - the COND_EXPR which is the loop exit condition
8591 - any LABEL_EXPRs in the loop
8592 - computations that are used only for array indexing or loop control.
8593 In basic blocks we only analyze statements that are a part of some SLP
8594 instance, therefore, all the statements are relevant.
8595
8596 Pattern statement needs to be analyzed instead of the original statement
8597 if the original statement is not relevant. Otherwise, we analyze both
8598 statements. In basic blocks we are called from some SLP instance
8599 traversal, don't analyze pattern stmts instead, the pattern stmts
8600 already will be part of SLP instance. */
8601
8602 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8603 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8604 && !STMT_VINFO_LIVE_P (stmt_info))
8605 {
8606 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8607 && pattern_stmt
8608 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8609 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8610 {
8611 /* Analyze PATTERN_STMT instead of the original stmt. */
8612 stmt = pattern_stmt;
8613 stmt_info = vinfo_for_stmt (pattern_stmt);
8614 if (dump_enabled_p ())
8615 {
8616 dump_printf_loc (MSG_NOTE, vect_location,
8617 "==> examining pattern statement: ");
8618 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8619 }
8620 }
8621 else
8622 {
8623 if (dump_enabled_p ())
8624 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8625
8626 return true;
8627 }
8628 }
8629 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8630 && node == NULL
8631 && pattern_stmt
8632 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8633 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8634 {
8635 /* Analyze PATTERN_STMT too. */
8636 if (dump_enabled_p ())
8637 {
8638 dump_printf_loc (MSG_NOTE, vect_location,
8639 "==> examining pattern statement: ");
8640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8641 }
8642
8643 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8644 node_instance))
8645 return false;
8646 }
8647
8648 if (is_pattern_stmt_p (stmt_info)
8649 && node == NULL
8650 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8651 {
8652 gimple_stmt_iterator si;
8653
8654 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8655 {
8656 gimple *pattern_def_stmt = gsi_stmt (si);
8657 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8658 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8659 {
8660 /* Analyze def stmt of STMT if it's a pattern stmt. */
8661 if (dump_enabled_p ())
8662 {
8663 dump_printf_loc (MSG_NOTE, vect_location,
8664 "==> examining pattern def statement: ");
8665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8666 }
8667
8668 if (!vect_analyze_stmt (pattern_def_stmt,
8669 need_to_vectorize, node, node_instance))
8670 return false;
8671 }
8672 }
8673 }
8674
8675 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8676 {
8677 case vect_internal_def:
8678 break;
8679
8680 case vect_reduction_def:
8681 case vect_nested_cycle:
8682 gcc_assert (!bb_vinfo
8683 && (relevance == vect_used_in_outer
8684 || relevance == vect_used_in_outer_by_reduction
8685 || relevance == vect_used_by_reduction
8686 || relevance == vect_unused_in_scope
8687 || relevance == vect_used_only_live));
8688 break;
8689
8690 case vect_induction_def:
8691 gcc_assert (!bb_vinfo);
8692 break;
8693
8694 case vect_constant_def:
8695 case vect_external_def:
8696 case vect_unknown_def_type:
8697 default:
8698 gcc_unreachable ();
8699 }
8700
8701 if (STMT_VINFO_RELEVANT_P (stmt_info))
8702 {
8703 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8704 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8705 || (is_gimple_call (stmt)
8706 && gimple_call_lhs (stmt) == NULL_TREE));
8707 *need_to_vectorize = true;
8708 }
8709
8710 if (PURE_SLP_STMT (stmt_info) && !node)
8711 {
8712 dump_printf_loc (MSG_NOTE, vect_location,
8713 "handled only by SLP analysis\n");
8714 return true;
8715 }
8716
8717 ok = true;
8718 if (!bb_vinfo
8719 && (STMT_VINFO_RELEVANT_P (stmt_info)
8720 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8721 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8722 || vectorizable_conversion (stmt, NULL, NULL, node)
8723 || vectorizable_shift (stmt, NULL, NULL, node)
8724 || vectorizable_operation (stmt, NULL, NULL, node)
8725 || vectorizable_assignment (stmt, NULL, NULL, node)
8726 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8727 || vectorizable_call (stmt, NULL, NULL, node)
8728 || vectorizable_store (stmt, NULL, NULL, node)
8729 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8730 || vectorizable_induction (stmt, NULL, NULL, node)
8731 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8732 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8733 else
8734 {
8735 if (bb_vinfo)
8736 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8737 || vectorizable_conversion (stmt, NULL, NULL, node)
8738 || vectorizable_shift (stmt, NULL, NULL, node)
8739 || vectorizable_operation (stmt, NULL, NULL, node)
8740 || vectorizable_assignment (stmt, NULL, NULL, node)
8741 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8742 || vectorizable_call (stmt, NULL, NULL, node)
8743 || vectorizable_store (stmt, NULL, NULL, node)
8744 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8745 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8746 }
8747
8748 if (!ok)
8749 {
8750 if (dump_enabled_p ())
8751 {
8752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8753 "not vectorized: relevant stmt not ");
8754 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8755 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8756 }
8757
8758 return false;
8759 }
8760
8761 if (bb_vinfo)
8762 return true;
8763
8764 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8765 need extra handling, except for vectorizable reductions. */
8766 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8767 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8768 {
8769 if (dump_enabled_p ())
8770 {
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8772 "not vectorized: live stmt not supported: ");
8773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8774 }
8775
8776 return false;
8777 }
8778
8779 return true;
8780 }
8781
8782
8783 /* Function vect_transform_stmt.
8784
8785 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8786
8787 bool
8788 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8789 bool *grouped_store, slp_tree slp_node,
8790 slp_instance slp_node_instance)
8791 {
8792 bool is_store = false;
8793 gimple *vec_stmt = NULL;
8794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8795 bool done;
8796
8797 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8798 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8799
8800 switch (STMT_VINFO_TYPE (stmt_info))
8801 {
8802 case type_demotion_vec_info_type:
8803 case type_promotion_vec_info_type:
8804 case type_conversion_vec_info_type:
8805 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8806 gcc_assert (done);
8807 break;
8808
8809 case induc_vec_info_type:
8810 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8811 gcc_assert (done);
8812 break;
8813
8814 case shift_vec_info_type:
8815 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8816 gcc_assert (done);
8817 break;
8818
8819 case op_vec_info_type:
8820 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8821 gcc_assert (done);
8822 break;
8823
8824 case assignment_vec_info_type:
8825 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8826 gcc_assert (done);
8827 break;
8828
8829 case load_vec_info_type:
8830 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8831 slp_node_instance);
8832 gcc_assert (done);
8833 break;
8834
8835 case store_vec_info_type:
8836 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8837 gcc_assert (done);
8838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8839 {
8840 /* In case of interleaving, the whole chain is vectorized when the
8841 last store in the chain is reached. Store stmts before the last
8842 one are skipped, and there vec_stmt_info shouldn't be freed
8843 meanwhile. */
8844 *grouped_store = true;
8845 if (STMT_VINFO_VEC_STMT (stmt_info))
8846 is_store = true;
8847 }
8848 else
8849 is_store = true;
8850 break;
8851
8852 case condition_vec_info_type:
8853 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8854 gcc_assert (done);
8855 break;
8856
8857 case comparison_vec_info_type:
8858 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8859 gcc_assert (done);
8860 break;
8861
8862 case call_vec_info_type:
8863 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8864 stmt = gsi_stmt (*gsi);
8865 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8866 is_store = true;
8867 break;
8868
8869 case call_simd_clone_vec_info_type:
8870 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8871 stmt = gsi_stmt (*gsi);
8872 break;
8873
8874 case reduc_vec_info_type:
8875 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8876 slp_node_instance);
8877 gcc_assert (done);
8878 break;
8879
8880 default:
8881 if (!STMT_VINFO_LIVE_P (stmt_info))
8882 {
8883 if (dump_enabled_p ())
8884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8885 "stmt not supported.\n");
8886 gcc_unreachable ();
8887 }
8888 }
8889
8890 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8891 This would break hybrid SLP vectorization. */
8892 if (slp_node)
8893 gcc_assert (!vec_stmt
8894 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8895
8896 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8897 is being vectorized, but outside the immediately enclosing loop. */
8898 if (vec_stmt
8899 && STMT_VINFO_LOOP_VINFO (stmt_info)
8900 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8901 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8902 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8903 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8904 || STMT_VINFO_RELEVANT (stmt_info) ==
8905 vect_used_in_outer_by_reduction))
8906 {
8907 struct loop *innerloop = LOOP_VINFO_LOOP (
8908 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8909 imm_use_iterator imm_iter;
8910 use_operand_p use_p;
8911 tree scalar_dest;
8912 gimple *exit_phi;
8913
8914 if (dump_enabled_p ())
8915 dump_printf_loc (MSG_NOTE, vect_location,
8916 "Record the vdef for outer-loop vectorization.\n");
8917
8918 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8919 (to be used when vectorizing outer-loop stmts that use the DEF of
8920 STMT). */
8921 if (gimple_code (stmt) == GIMPLE_PHI)
8922 scalar_dest = PHI_RESULT (stmt);
8923 else
8924 scalar_dest = gimple_assign_lhs (stmt);
8925
8926 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8927 {
8928 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8929 {
8930 exit_phi = USE_STMT (use_p);
8931 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8932 }
8933 }
8934 }
8935
8936 /* Handle stmts whose DEF is used outside the loop-nest that is
8937 being vectorized. */
8938 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8939 {
8940 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8941 gcc_assert (done);
8942 }
8943
8944 if (vec_stmt)
8945 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8946
8947 return is_store;
8948 }
8949
8950
8951 /* Remove a group of stores (for SLP or interleaving), free their
8952 stmt_vec_info. */
8953
8954 void
8955 vect_remove_stores (gimple *first_stmt)
8956 {
8957 gimple *next = first_stmt;
8958 gimple *tmp;
8959 gimple_stmt_iterator next_si;
8960
8961 while (next)
8962 {
8963 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8964
8965 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8966 if (is_pattern_stmt_p (stmt_info))
8967 next = STMT_VINFO_RELATED_STMT (stmt_info);
8968 /* Free the attached stmt_vec_info and remove the stmt. */
8969 next_si = gsi_for_stmt (next);
8970 unlink_stmt_vdef (next);
8971 gsi_remove (&next_si, true);
8972 release_defs (next);
8973 free_stmt_vec_info (next);
8974 next = tmp;
8975 }
8976 }
8977
8978
8979 /* Function new_stmt_vec_info.
8980
8981 Create and initialize a new stmt_vec_info struct for STMT. */
8982
8983 stmt_vec_info
8984 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8985 {
8986 stmt_vec_info res;
8987 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8988
8989 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8990 STMT_VINFO_STMT (res) = stmt;
8991 res->vinfo = vinfo;
8992 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8993 STMT_VINFO_LIVE_P (res) = false;
8994 STMT_VINFO_VECTYPE (res) = NULL;
8995 STMT_VINFO_VEC_STMT (res) = NULL;
8996 STMT_VINFO_VECTORIZABLE (res) = true;
8997 STMT_VINFO_IN_PATTERN_P (res) = false;
8998 STMT_VINFO_RELATED_STMT (res) = NULL;
8999 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9000 STMT_VINFO_DATA_REF (res) = NULL;
9001 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9002 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9003
9004 if (gimple_code (stmt) == GIMPLE_PHI
9005 && is_loop_header_bb_p (gimple_bb (stmt)))
9006 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9007 else
9008 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9009
9010 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9011 STMT_SLP_TYPE (res) = loop_vect;
9012 STMT_VINFO_NUM_SLP_USES (res) = 0;
9013
9014 GROUP_FIRST_ELEMENT (res) = NULL;
9015 GROUP_NEXT_ELEMENT (res) = NULL;
9016 GROUP_SIZE (res) = 0;
9017 GROUP_STORE_COUNT (res) = 0;
9018 GROUP_GAP (res) = 0;
9019 GROUP_SAME_DR_STMT (res) = NULL;
9020
9021 return res;
9022 }
9023
9024
9025 /* Create a hash table for stmt_vec_info. */
9026
9027 void
9028 init_stmt_vec_info_vec (void)
9029 {
9030 gcc_assert (!stmt_vec_info_vec.exists ());
9031 stmt_vec_info_vec.create (50);
9032 }
9033
9034
9035 /* Free hash table for stmt_vec_info. */
9036
9037 void
9038 free_stmt_vec_info_vec (void)
9039 {
9040 unsigned int i;
9041 stmt_vec_info info;
9042 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9043 if (info != NULL)
9044 free_stmt_vec_info (STMT_VINFO_STMT (info));
9045 gcc_assert (stmt_vec_info_vec.exists ());
9046 stmt_vec_info_vec.release ();
9047 }
9048
9049
9050 /* Free stmt vectorization related info. */
9051
9052 void
9053 free_stmt_vec_info (gimple *stmt)
9054 {
9055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9056
9057 if (!stmt_info)
9058 return;
9059
9060 /* Check if this statement has a related "pattern stmt"
9061 (introduced by the vectorizer during the pattern recognition
9062 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9063 too. */
9064 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9065 {
9066 stmt_vec_info patt_info
9067 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9068 if (patt_info)
9069 {
9070 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9071 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9072 gimple_set_bb (patt_stmt, NULL);
9073 tree lhs = gimple_get_lhs (patt_stmt);
9074 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9075 release_ssa_name (lhs);
9076 if (seq)
9077 {
9078 gimple_stmt_iterator si;
9079 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9080 {
9081 gimple *seq_stmt = gsi_stmt (si);
9082 gimple_set_bb (seq_stmt, NULL);
9083 lhs = gimple_get_lhs (seq_stmt);
9084 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9085 release_ssa_name (lhs);
9086 free_stmt_vec_info (seq_stmt);
9087 }
9088 }
9089 free_stmt_vec_info (patt_stmt);
9090 }
9091 }
9092
9093 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9094 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9095 set_vinfo_for_stmt (stmt, NULL);
9096 free (stmt_info);
9097 }
9098
9099
9100 /* Function get_vectype_for_scalar_type_and_size.
9101
9102 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9103 by the target. */
9104
9105 static tree
9106 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
9107 {
9108 tree orig_scalar_type = scalar_type;
9109 scalar_mode inner_mode;
9110 machine_mode simd_mode;
9111 int nunits;
9112 tree vectype;
9113
9114 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9115 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9116 return NULL_TREE;
9117
9118 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9119
9120 /* For vector types of elements whose mode precision doesn't
9121 match their types precision we use a element type of mode
9122 precision. The vectorization routines will have to make sure
9123 they support the proper result truncation/extension.
9124 We also make sure to build vector types with INTEGER_TYPE
9125 component type only. */
9126 if (INTEGRAL_TYPE_P (scalar_type)
9127 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9128 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9129 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9130 TYPE_UNSIGNED (scalar_type));
9131
9132 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9133 When the component mode passes the above test simply use a type
9134 corresponding to that mode. The theory is that any use that
9135 would cause problems with this will disable vectorization anyway. */
9136 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9137 && !INTEGRAL_TYPE_P (scalar_type))
9138 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9139
9140 /* We can't build a vector type of elements with alignment bigger than
9141 their size. */
9142 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9143 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9144 TYPE_UNSIGNED (scalar_type));
9145
9146 /* If we felt back to using the mode fail if there was
9147 no scalar type for it. */
9148 if (scalar_type == NULL_TREE)
9149 return NULL_TREE;
9150
9151 /* If no size was supplied use the mode the target prefers. Otherwise
9152 lookup a vector mode of the specified size. */
9153 if (size == 0)
9154 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9155 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9156 return NULL_TREE;
9157 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9158 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9159 if (nunits < 1)
9160 return NULL_TREE;
9161
9162 vectype = build_vector_type (scalar_type, nunits);
9163
9164 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9165 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9166 return NULL_TREE;
9167
9168 /* Re-attach the address-space qualifier if we canonicalized the scalar
9169 type. */
9170 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9171 return build_qualified_type
9172 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9173
9174 return vectype;
9175 }
9176
9177 unsigned int current_vector_size;
9178
9179 /* Function get_vectype_for_scalar_type.
9180
9181 Returns the vector type corresponding to SCALAR_TYPE as supported
9182 by the target. */
9183
9184 tree
9185 get_vectype_for_scalar_type (tree scalar_type)
9186 {
9187 tree vectype;
9188 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9189 current_vector_size);
9190 if (vectype
9191 && current_vector_size == 0)
9192 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9193 return vectype;
9194 }
9195
9196 /* Function get_mask_type_for_scalar_type.
9197
9198 Returns the mask type corresponding to a result of comparison
9199 of vectors of specified SCALAR_TYPE as supported by target. */
9200
9201 tree
9202 get_mask_type_for_scalar_type (tree scalar_type)
9203 {
9204 tree vectype = get_vectype_for_scalar_type (scalar_type);
9205
9206 if (!vectype)
9207 return NULL;
9208
9209 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9210 current_vector_size);
9211 }
9212
9213 /* Function get_same_sized_vectype
9214
9215 Returns a vector type corresponding to SCALAR_TYPE of size
9216 VECTOR_TYPE if supported by the target. */
9217
9218 tree
9219 get_same_sized_vectype (tree scalar_type, tree vector_type)
9220 {
9221 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9222 return build_same_sized_truth_vector_type (vector_type);
9223
9224 return get_vectype_for_scalar_type_and_size
9225 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9226 }
9227
9228 /* Function vect_is_simple_use.
9229
9230 Input:
9231 VINFO - the vect info of the loop or basic block that is being vectorized.
9232 OPERAND - operand in the loop or bb.
9233 Output:
9234 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9235 DT - the type of definition
9236
9237 Returns whether a stmt with OPERAND can be vectorized.
9238 For loops, supportable operands are constants, loop invariants, and operands
9239 that are defined by the current iteration of the loop. Unsupportable
9240 operands are those that are defined by a previous iteration of the loop (as
9241 is the case in reduction/induction computations).
9242 For basic blocks, supportable operands are constants and bb invariants.
9243 For now, operands defined outside the basic block are not supported. */
9244
9245 bool
9246 vect_is_simple_use (tree operand, vec_info *vinfo,
9247 gimple **def_stmt, enum vect_def_type *dt)
9248 {
9249 *def_stmt = NULL;
9250 *dt = vect_unknown_def_type;
9251
9252 if (dump_enabled_p ())
9253 {
9254 dump_printf_loc (MSG_NOTE, vect_location,
9255 "vect_is_simple_use: operand ");
9256 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9257 dump_printf (MSG_NOTE, "\n");
9258 }
9259
9260 if (CONSTANT_CLASS_P (operand))
9261 {
9262 *dt = vect_constant_def;
9263 return true;
9264 }
9265
9266 if (is_gimple_min_invariant (operand))
9267 {
9268 *dt = vect_external_def;
9269 return true;
9270 }
9271
9272 if (TREE_CODE (operand) != SSA_NAME)
9273 {
9274 if (dump_enabled_p ())
9275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9276 "not ssa-name.\n");
9277 return false;
9278 }
9279
9280 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9281 {
9282 *dt = vect_external_def;
9283 return true;
9284 }
9285
9286 *def_stmt = SSA_NAME_DEF_STMT (operand);
9287 if (dump_enabled_p ())
9288 {
9289 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9290 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9291 }
9292
9293 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9294 *dt = vect_external_def;
9295 else
9296 {
9297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9298 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9299 }
9300
9301 if (dump_enabled_p ())
9302 {
9303 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9304 switch (*dt)
9305 {
9306 case vect_uninitialized_def:
9307 dump_printf (MSG_NOTE, "uninitialized\n");
9308 break;
9309 case vect_constant_def:
9310 dump_printf (MSG_NOTE, "constant\n");
9311 break;
9312 case vect_external_def:
9313 dump_printf (MSG_NOTE, "external\n");
9314 break;
9315 case vect_internal_def:
9316 dump_printf (MSG_NOTE, "internal\n");
9317 break;
9318 case vect_induction_def:
9319 dump_printf (MSG_NOTE, "induction\n");
9320 break;
9321 case vect_reduction_def:
9322 dump_printf (MSG_NOTE, "reduction\n");
9323 break;
9324 case vect_double_reduction_def:
9325 dump_printf (MSG_NOTE, "double reduction\n");
9326 break;
9327 case vect_nested_cycle:
9328 dump_printf (MSG_NOTE, "nested cycle\n");
9329 break;
9330 case vect_unknown_def_type:
9331 dump_printf (MSG_NOTE, "unknown\n");
9332 break;
9333 }
9334 }
9335
9336 if (*dt == vect_unknown_def_type)
9337 {
9338 if (dump_enabled_p ())
9339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9340 "Unsupported pattern.\n");
9341 return false;
9342 }
9343
9344 switch (gimple_code (*def_stmt))
9345 {
9346 case GIMPLE_PHI:
9347 case GIMPLE_ASSIGN:
9348 case GIMPLE_CALL:
9349 break;
9350 default:
9351 if (dump_enabled_p ())
9352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9353 "unsupported defining stmt:\n");
9354 return false;
9355 }
9356
9357 return true;
9358 }
9359
9360 /* Function vect_is_simple_use.
9361
9362 Same as vect_is_simple_use but also determines the vector operand
9363 type of OPERAND and stores it to *VECTYPE. If the definition of
9364 OPERAND is vect_uninitialized_def, vect_constant_def or
9365 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9366 is responsible to compute the best suited vector type for the
9367 scalar operand. */
9368
9369 bool
9370 vect_is_simple_use (tree operand, vec_info *vinfo,
9371 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9372 {
9373 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9374 return false;
9375
9376 /* Now get a vector type if the def is internal, otherwise supply
9377 NULL_TREE and leave it up to the caller to figure out a proper
9378 type for the use stmt. */
9379 if (*dt == vect_internal_def
9380 || *dt == vect_induction_def
9381 || *dt == vect_reduction_def
9382 || *dt == vect_double_reduction_def
9383 || *dt == vect_nested_cycle)
9384 {
9385 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9386
9387 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9388 && !STMT_VINFO_RELEVANT (stmt_info)
9389 && !STMT_VINFO_LIVE_P (stmt_info))
9390 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9391
9392 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9393 gcc_assert (*vectype != NULL_TREE);
9394 }
9395 else if (*dt == vect_uninitialized_def
9396 || *dt == vect_constant_def
9397 || *dt == vect_external_def)
9398 *vectype = NULL_TREE;
9399 else
9400 gcc_unreachable ();
9401
9402 return true;
9403 }
9404
9405
9406 /* Function supportable_widening_operation
9407
9408 Check whether an operation represented by the code CODE is a
9409 widening operation that is supported by the target platform in
9410 vector form (i.e., when operating on arguments of type VECTYPE_IN
9411 producing a result of type VECTYPE_OUT).
9412
9413 Widening operations we currently support are NOP (CONVERT), FLOAT
9414 and WIDEN_MULT. This function checks if these operations are supported
9415 by the target platform either directly (via vector tree-codes), or via
9416 target builtins.
9417
9418 Output:
9419 - CODE1 and CODE2 are codes of vector operations to be used when
9420 vectorizing the operation, if available.
9421 - MULTI_STEP_CVT determines the number of required intermediate steps in
9422 case of multi-step conversion (like char->short->int - in that case
9423 MULTI_STEP_CVT will be 1).
9424 - INTERM_TYPES contains the intermediate type required to perform the
9425 widening operation (short in the above example). */
9426
9427 bool
9428 supportable_widening_operation (enum tree_code code, gimple *stmt,
9429 tree vectype_out, tree vectype_in,
9430 enum tree_code *code1, enum tree_code *code2,
9431 int *multi_step_cvt,
9432 vec<tree> *interm_types)
9433 {
9434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9435 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9436 struct loop *vect_loop = NULL;
9437 machine_mode vec_mode;
9438 enum insn_code icode1, icode2;
9439 optab optab1, optab2;
9440 tree vectype = vectype_in;
9441 tree wide_vectype = vectype_out;
9442 enum tree_code c1, c2;
9443 int i;
9444 tree prev_type, intermediate_type;
9445 machine_mode intermediate_mode, prev_mode;
9446 optab optab3, optab4;
9447
9448 *multi_step_cvt = 0;
9449 if (loop_info)
9450 vect_loop = LOOP_VINFO_LOOP (loop_info);
9451
9452 switch (code)
9453 {
9454 case WIDEN_MULT_EXPR:
9455 /* The result of a vectorized widening operation usually requires
9456 two vectors (because the widened results do not fit into one vector).
9457 The generated vector results would normally be expected to be
9458 generated in the same order as in the original scalar computation,
9459 i.e. if 8 results are generated in each vector iteration, they are
9460 to be organized as follows:
9461 vect1: [res1,res2,res3,res4],
9462 vect2: [res5,res6,res7,res8].
9463
9464 However, in the special case that the result of the widening
9465 operation is used in a reduction computation only, the order doesn't
9466 matter (because when vectorizing a reduction we change the order of
9467 the computation). Some targets can take advantage of this and
9468 generate more efficient code. For example, targets like Altivec,
9469 that support widen_mult using a sequence of {mult_even,mult_odd}
9470 generate the following vectors:
9471 vect1: [res1,res3,res5,res7],
9472 vect2: [res2,res4,res6,res8].
9473
9474 When vectorizing outer-loops, we execute the inner-loop sequentially
9475 (each vectorized inner-loop iteration contributes to VF outer-loop
9476 iterations in parallel). We therefore don't allow to change the
9477 order of the computation in the inner-loop during outer-loop
9478 vectorization. */
9479 /* TODO: Another case in which order doesn't *really* matter is when we
9480 widen and then contract again, e.g. (short)((int)x * y >> 8).
9481 Normally, pack_trunc performs an even/odd permute, whereas the
9482 repack from an even/odd expansion would be an interleave, which
9483 would be significantly simpler for e.g. AVX2. */
9484 /* In any case, in order to avoid duplicating the code below, recurse
9485 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9486 are properly set up for the caller. If we fail, we'll continue with
9487 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9488 if (vect_loop
9489 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9490 && !nested_in_vect_loop_p (vect_loop, stmt)
9491 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9492 stmt, vectype_out, vectype_in,
9493 code1, code2, multi_step_cvt,
9494 interm_types))
9495 {
9496 /* Elements in a vector with vect_used_by_reduction property cannot
9497 be reordered if the use chain with this property does not have the
9498 same operation. One such an example is s += a * b, where elements
9499 in a and b cannot be reordered. Here we check if the vector defined
9500 by STMT is only directly used in the reduction statement. */
9501 tree lhs = gimple_assign_lhs (stmt);
9502 use_operand_p dummy;
9503 gimple *use_stmt;
9504 stmt_vec_info use_stmt_info = NULL;
9505 if (single_imm_use (lhs, &dummy, &use_stmt)
9506 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9507 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9508 return true;
9509 }
9510 c1 = VEC_WIDEN_MULT_LO_EXPR;
9511 c2 = VEC_WIDEN_MULT_HI_EXPR;
9512 break;
9513
9514 case DOT_PROD_EXPR:
9515 c1 = DOT_PROD_EXPR;
9516 c2 = DOT_PROD_EXPR;
9517 break;
9518
9519 case SAD_EXPR:
9520 c1 = SAD_EXPR;
9521 c2 = SAD_EXPR;
9522 break;
9523
9524 case VEC_WIDEN_MULT_EVEN_EXPR:
9525 /* Support the recursion induced just above. */
9526 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9527 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9528 break;
9529
9530 case WIDEN_LSHIFT_EXPR:
9531 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9532 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9533 break;
9534
9535 CASE_CONVERT:
9536 c1 = VEC_UNPACK_LO_EXPR;
9537 c2 = VEC_UNPACK_HI_EXPR;
9538 break;
9539
9540 case FLOAT_EXPR:
9541 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9542 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9543 break;
9544
9545 case FIX_TRUNC_EXPR:
9546 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9547 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9548 computing the operation. */
9549 return false;
9550
9551 default:
9552 gcc_unreachable ();
9553 }
9554
9555 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9556 std::swap (c1, c2);
9557
9558 if (code == FIX_TRUNC_EXPR)
9559 {
9560 /* The signedness is determined from output operand. */
9561 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9562 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9563 }
9564 else
9565 {
9566 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9567 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9568 }
9569
9570 if (!optab1 || !optab2)
9571 return false;
9572
9573 vec_mode = TYPE_MODE (vectype);
9574 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9575 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9576 return false;
9577
9578 *code1 = c1;
9579 *code2 = c2;
9580
9581 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9582 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9583 /* For scalar masks we may have different boolean
9584 vector types having the same QImode. Thus we
9585 add additional check for elements number. */
9586 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9587 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9588 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9589
9590 /* Check if it's a multi-step conversion that can be done using intermediate
9591 types. */
9592
9593 prev_type = vectype;
9594 prev_mode = vec_mode;
9595
9596 if (!CONVERT_EXPR_CODE_P (code))
9597 return false;
9598
9599 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9600 intermediate steps in promotion sequence. We try
9601 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9602 not. */
9603 interm_types->create (MAX_INTERM_CVT_STEPS);
9604 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9605 {
9606 intermediate_mode = insn_data[icode1].operand[0].mode;
9607 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9608 {
9609 intermediate_type
9610 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9611 current_vector_size);
9612 if (intermediate_mode != TYPE_MODE (intermediate_type))
9613 return false;
9614 }
9615 else
9616 intermediate_type
9617 = lang_hooks.types.type_for_mode (intermediate_mode,
9618 TYPE_UNSIGNED (prev_type));
9619
9620 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9621 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9622
9623 if (!optab3 || !optab4
9624 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9625 || insn_data[icode1].operand[0].mode != intermediate_mode
9626 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9627 || insn_data[icode2].operand[0].mode != intermediate_mode
9628 || ((icode1 = optab_handler (optab3, intermediate_mode))
9629 == CODE_FOR_nothing)
9630 || ((icode2 = optab_handler (optab4, intermediate_mode))
9631 == CODE_FOR_nothing))
9632 break;
9633
9634 interm_types->quick_push (intermediate_type);
9635 (*multi_step_cvt)++;
9636
9637 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9638 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9639 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9640 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9641 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9642
9643 prev_type = intermediate_type;
9644 prev_mode = intermediate_mode;
9645 }
9646
9647 interm_types->release ();
9648 return false;
9649 }
9650
9651
9652 /* Function supportable_narrowing_operation
9653
9654 Check whether an operation represented by the code CODE is a
9655 narrowing operation that is supported by the target platform in
9656 vector form (i.e., when operating on arguments of type VECTYPE_IN
9657 and producing a result of type VECTYPE_OUT).
9658
9659 Narrowing operations we currently support are NOP (CONVERT) and
9660 FIX_TRUNC. This function checks if these operations are supported by
9661 the target platform directly via vector tree-codes.
9662
9663 Output:
9664 - CODE1 is the code of a vector operation to be used when
9665 vectorizing the operation, if available.
9666 - MULTI_STEP_CVT determines the number of required intermediate steps in
9667 case of multi-step conversion (like int->short->char - in that case
9668 MULTI_STEP_CVT will be 1).
9669 - INTERM_TYPES contains the intermediate type required to perform the
9670 narrowing operation (short in the above example). */
9671
9672 bool
9673 supportable_narrowing_operation (enum tree_code code,
9674 tree vectype_out, tree vectype_in,
9675 enum tree_code *code1, int *multi_step_cvt,
9676 vec<tree> *interm_types)
9677 {
9678 machine_mode vec_mode;
9679 enum insn_code icode1;
9680 optab optab1, interm_optab;
9681 tree vectype = vectype_in;
9682 tree narrow_vectype = vectype_out;
9683 enum tree_code c1;
9684 tree intermediate_type, prev_type;
9685 machine_mode intermediate_mode, prev_mode;
9686 int i;
9687 bool uns;
9688
9689 *multi_step_cvt = 0;
9690 switch (code)
9691 {
9692 CASE_CONVERT:
9693 c1 = VEC_PACK_TRUNC_EXPR;
9694 break;
9695
9696 case FIX_TRUNC_EXPR:
9697 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9698 break;
9699
9700 case FLOAT_EXPR:
9701 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9702 tree code and optabs used for computing the operation. */
9703 return false;
9704
9705 default:
9706 gcc_unreachable ();
9707 }
9708
9709 if (code == FIX_TRUNC_EXPR)
9710 /* The signedness is determined from output operand. */
9711 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9712 else
9713 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9714
9715 if (!optab1)
9716 return false;
9717
9718 vec_mode = TYPE_MODE (vectype);
9719 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9720 return false;
9721
9722 *code1 = c1;
9723
9724 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9725 /* For scalar masks we may have different boolean
9726 vector types having the same QImode. Thus we
9727 add additional check for elements number. */
9728 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9729 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9730 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9731
9732 /* Check if it's a multi-step conversion that can be done using intermediate
9733 types. */
9734 prev_mode = vec_mode;
9735 prev_type = vectype;
9736 if (code == FIX_TRUNC_EXPR)
9737 uns = TYPE_UNSIGNED (vectype_out);
9738 else
9739 uns = TYPE_UNSIGNED (vectype);
9740
9741 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9742 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9743 costly than signed. */
9744 if (code == FIX_TRUNC_EXPR && uns)
9745 {
9746 enum insn_code icode2;
9747
9748 intermediate_type
9749 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9750 interm_optab
9751 = optab_for_tree_code (c1, intermediate_type, optab_default);
9752 if (interm_optab != unknown_optab
9753 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9754 && insn_data[icode1].operand[0].mode
9755 == insn_data[icode2].operand[0].mode)
9756 {
9757 uns = false;
9758 optab1 = interm_optab;
9759 icode1 = icode2;
9760 }
9761 }
9762
9763 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9764 intermediate steps in promotion sequence. We try
9765 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9766 interm_types->create (MAX_INTERM_CVT_STEPS);
9767 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9768 {
9769 intermediate_mode = insn_data[icode1].operand[0].mode;
9770 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9771 {
9772 intermediate_type
9773 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9774 current_vector_size);
9775 if (intermediate_mode != TYPE_MODE (intermediate_type))
9776 return false;
9777 }
9778 else
9779 intermediate_type
9780 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9781 interm_optab
9782 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9783 optab_default);
9784 if (!interm_optab
9785 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9786 || insn_data[icode1].operand[0].mode != intermediate_mode
9787 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9788 == CODE_FOR_nothing))
9789 break;
9790
9791 interm_types->quick_push (intermediate_type);
9792 (*multi_step_cvt)++;
9793
9794 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9795 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9796 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9797 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9798
9799 prev_mode = intermediate_mode;
9800 prev_type = intermediate_type;
9801 optab1 = interm_optab;
9802 }
9803
9804 interm_types->release ();
9805 return false;
9806 }