tree-optimization/95487 - use a truth type for scatter masks
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
643
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
646 }
647 }
648
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
659
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
664
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
667
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
675
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
677 {
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
687
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
695
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
703
704 default:
705 break;
706 }
707
708 if (is_pattern_stmt_p (stmt_vinfo))
709 {
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
714 {
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
717
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
720 {
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
731 }
732 for (; i < gimple_num_ops (assign); i++)
733 {
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
736 {
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
742 }
743 }
744 }
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
746 {
747 for (i = 0; i < gimple_call_num_args (call); i++)
748 {
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
755 }
756 }
757 }
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
760 {
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
767 }
768
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
770 {
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 {
779 if (fatal)
780 *fatal = false;
781 return res;
782 }
783 }
784 } /* while worklist */
785
786 return opt_result::success ();
787 }
788
789 /* Function vect_model_simple_cost.
790
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
794
795 static void
796 vect_model_simple_cost (vec_info *,
797 stmt_vec_info stmt_info, int ncopies,
798 enum vect_def_type *dt,
799 int ndts,
800 slp_tree node,
801 stmt_vector_for_cost *cost_vec,
802 vect_cost_for_stmt kind = vector_stmt)
803 {
804 int inside_cost = 0, prologue_cost = 0;
805
806 gcc_assert (cost_vec != NULL);
807
808 /* ??? Somehow we need to fix this at the callers. */
809 if (node)
810 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811
812 if (!node)
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
815 cost model. */
816 for (int i = 0; i < ndts; i++)
817 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
818 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
819 stmt_info, 0, vect_prologue);
820
821 /* Adjust for two-operator SLP nodes. */
822 if (node && SLP_TREE_TWO_OPERATORS (node))
823 {
824 ncopies *= 2;
825 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
826 stmt_info, 0, vect_body);
827 }
828
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
831 stmt_info, 0, vect_body);
832
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost, prologue_cost);
837 }
838
839
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
846
847 static void
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
849 enum vect_def_type *dt,
850 unsigned int ncopies, int pwr,
851 stmt_vector_for_cost *cost_vec)
852 {
853 int i;
854 int inside_cost = 0, prologue_cost = 0;
855
856 for (i = 0; i < pwr + 1; i++)
857 {
858 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
859 stmt_info, 0, vect_body);
860 ncopies *= 2;
861 }
862
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i = 0; i < 2; i++)
865 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
866 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
867 stmt_info, 0, vect_prologue);
868
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE, vect_location,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 }
874
875 /* Returns true if the current function returns DECL. */
876
877 static bool
878 cfun_returns (tree decl)
879 {
880 edge_iterator ei;
881 edge e;
882 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
883 {
884 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
885 if (!ret)
886 continue;
887 if (gimple_return_retval (ret) == decl)
888 return true;
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
891 though. */
892 gimple *def = ret;
893 do
894 {
895 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
896 }
897 while (gimple_clobber_p (def));
898 if (is_a <gassign *> (def)
899 && gimple_assign_lhs (def) == gimple_return_retval (ret)
900 && gimple_assign_rhs1 (def) == decl)
901 return true;
902 }
903 return false;
904 }
905
906 /* Function vect_model_store_cost
907
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
910
911 static void
912 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
913 vect_memory_access_type memory_access_type,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
916 {
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
920
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
924
925 if (vls_type == VLS_STORE_INVARIANT)
926 {
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
930 }
931
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
936
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
941
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
948 {
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
955
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
960 }
961
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
966 {
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
972 }
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
975
976 if (memory_access_type == VMAT_ELEMENTWISE
977 || memory_access_type == VMAT_STRIDED_SLP)
978 {
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
981 inside_cost += record_stmt_cost (cost_vec,
982 ncopies * assumed_nunits,
983 vec_to_scalar, stmt_info, 0, vect_body);
984 }
985
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
991 if (base
992 && (TREE_CODE (base) == RESULT_DECL
993 || (DECL_P (base) && cfun_returns (base)))
994 && !aggregate_value_p (base, cfun->decl))
995 {
996 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
998 if (REG_P (reg))
999 {
1000 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1003 if (nregs > 1)
1004 {
1005 /* Spill. */
1006 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1007 vector_store,
1008 stmt_info, 0, vect_epilogue);
1009 /* Loads. */
1010 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1011 scalar_load,
1012 stmt_info, 0, vect_epilogue);
1013 }
1014 }
1015 }
1016
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1021 }
1022
1023
1024 /* Calculate cost of DR's memory access. */
1025 void
1026 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1027 unsigned int *inside_cost,
1028 stmt_vector_for_cost *body_cost_vec)
1029 {
1030 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1033
1034 switch (alignment_support_scheme)
1035 {
1036 case dr_aligned:
1037 {
1038 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1039 vector_store, stmt_info, 0,
1040 vect_body);
1041
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE, vect_location,
1044 "vect_model_store_cost: aligned.\n");
1045 break;
1046 }
1047
1048 case dr_unaligned_supported:
1049 {
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1052 unaligned_store, stmt_info,
1053 DR_MISALIGNMENT (dr_info),
1054 vect_body);
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE, vect_location,
1057 "vect_model_store_cost: unaligned supported by "
1058 "hardware.\n");
1059 break;
1060 }
1061
1062 case dr_unaligned_unsupported:
1063 {
1064 *inside_cost = VECT_MAX_COST;
1065
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1068 "vect_model_store_cost: unsupported access.\n");
1069 break;
1070 }
1071
1072 default:
1073 gcc_unreachable ();
1074 }
1075 }
1076
1077
1078 /* Function vect_model_load_cost
1079
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1084
1085 static void
1086 vect_model_load_cost (vec_info *vinfo,
1087 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1088 vect_memory_access_type memory_access_type,
1089 slp_tree slp_node,
1090 stmt_vector_for_cost *cost_vec)
1091 {
1092 unsigned int inside_cost = 0, prologue_cost = 0;
1093 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1094
1095 gcc_assert (cost_vec);
1096
1097 /* ??? Somehow we need to fix this at the callers. */
1098 if (slp_node)
1099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1100
1101 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1102 {
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1106 /* Record the cost for the permutation. */
1107 unsigned n_perms;
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1110 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1111 vf, true, &n_perms);
1112 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1113 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1117 bitmap_clear (perm);
1118 for (unsigned i = 0;
1119 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1120 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1121 ncopies = 0;
1122 bool load_seen = false;
1123 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1124 {
1125 if (i % assumed_nunits == 0)
1126 {
1127 if (load_seen)
1128 ncopies++;
1129 load_seen = false;
1130 }
1131 if (bitmap_bit_p (perm, i))
1132 load_seen = true;
1133 }
1134 if (load_seen)
1135 ncopies++;
1136 gcc_assert (ncopies
1137 <= (DR_GROUP_SIZE (first_stmt_info)
1138 - DR_GROUP_GAP (first_stmt_info)
1139 + assumed_nunits - 1) / assumed_nunits);
1140 }
1141
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info = stmt_info;
1145 if (!slp_node && grouped_access_p)
1146 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1147
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p = (first_stmt_info == stmt_info);
1152
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1157 if (first_stmt_p
1158 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1159 {
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size = DR_GROUP_SIZE (first_stmt_info);
1163 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1164 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1165 stmt_info, 0, vect_body);
1166
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1170 group_size);
1171 }
1172
1173 /* The loads themselves. */
1174 if (memory_access_type == VMAT_ELEMENTWISE
1175 || memory_access_type == VMAT_GATHER_SCATTER)
1176 {
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1179 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1180 inside_cost += record_stmt_cost (cost_vec,
1181 ncopies * assumed_nunits,
1182 scalar_load, stmt_info, 0, vect_body);
1183 }
1184 else
1185 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1186 &inside_cost, &prologue_cost,
1187 cost_vec, cost_vec, true);
1188 if (memory_access_type == VMAT_ELEMENTWISE
1189 || memory_access_type == VMAT_STRIDED_SLP)
1190 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1191 stmt_info, 0, vect_body);
1192
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE, vect_location,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1197 }
1198
1199
1200 /* Calculate cost of DR's memory access. */
1201 void
1202 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1203 bool add_realign_cost, unsigned int *inside_cost,
1204 unsigned int *prologue_cost,
1205 stmt_vector_for_cost *prologue_cost_vec,
1206 stmt_vector_for_cost *body_cost_vec,
1207 bool record_prologue_costs)
1208 {
1209 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1212
1213 switch (alignment_support_scheme)
1214 {
1215 case dr_aligned:
1216 {
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1218 stmt_info, 0, vect_body);
1219
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: aligned.\n");
1223
1224 break;
1225 }
1226 case dr_unaligned_supported:
1227 {
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1230 unaligned_load, stmt_info,
1231 DR_MISALIGNMENT (dr_info),
1232 vect_body);
1233
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE, vect_location,
1236 "vect_model_load_cost: unaligned supported by "
1237 "hardware.\n");
1238
1239 break;
1240 }
1241 case dr_explicit_realign:
1242 {
1243 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1244 vector_load, stmt_info, 0, vect_body);
1245 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1246 vec_perm, stmt_info, 0, vect_body);
1247
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1250 prologue costs. */
1251 if (targetm.vectorize.builtin_mask_for_load)
1252 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1253 stmt_info, 0, vect_body);
1254
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_model_load_cost: explicit realign\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign_optimized:
1262 {
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location,
1265 "vect_model_load_cost: unaligned software "
1266 "pipelined.\n");
1267
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1274
1275 if (add_realign_cost && record_prologue_costs)
1276 {
1277 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1278 vector_stmt, stmt_info,
1279 0, vect_prologue);
1280 if (targetm.vectorize.builtin_mask_for_load)
1281 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1282 vector_stmt, stmt_info,
1283 0, vect_prologue);
1284 }
1285
1286 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1287 stmt_info, 0, vect_body);
1288 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1289 stmt_info, 0, vect_body);
1290
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_model_load_cost: explicit realign optimized"
1294 "\n");
1295
1296 break;
1297 }
1298
1299 case dr_unaligned_unsupported:
1300 {
1301 *inside_cost = VECT_MAX_COST;
1302
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1305 "vect_model_load_cost: unsupported access.\n");
1306 break;
1307 }
1308
1309 default:
1310 gcc_unreachable ();
1311 }
1312 }
1313
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1316
1317 static void
1318 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1319 gimple_stmt_iterator *gsi)
1320 {
1321 if (gsi)
1322 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1323 else
1324 {
1325 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1326
1327 if (loop_vinfo)
1328 {
1329 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1330 basic_block new_bb;
1331 edge pe;
1332
1333 if (stmt_vinfo && nested_in_vect_loop_p (loop, stmt_vinfo))
1334 loop = loop->inner;
1335
1336 pe = loop_preheader_edge (loop);
1337 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1338 gcc_assert (!new_bb);
1339 }
1340 else
1341 {
1342 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
1343 gimple_stmt_iterator gsi_region_begin = bb_vinfo->region_begin;
1344 gsi_insert_before (&gsi_region_begin, new_stmt, GSI_SAME_STMT);
1345 }
1346 }
1347
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE, vect_location,
1350 "created new init_stmt: %G", new_stmt);
1351 }
1352
1353 /* Function vect_init_vector.
1354
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1362
1363 tree
1364 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1365 gimple_stmt_iterator *gsi)
1366 {
1367 gimple *init_stmt;
1368 tree new_temp;
1369
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1372 {
1373 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1374 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1375 {
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type))
1379 {
1380 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1381 tree false_val = build_zero_cst (TREE_TYPE (type));
1382
1383 if (CONSTANT_CLASS_P (val))
1384 val = integer_zerop (val) ? false_val : true_val;
1385 else
1386 {
1387 new_temp = make_ssa_name (TREE_TYPE (type));
1388 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1389 val, true_val, false_val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 val = new_temp;
1392 }
1393 }
1394 else
1395 {
1396 gimple_seq stmts = NULL;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1398 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1399 TREE_TYPE (type), val);
1400 else
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1404 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1405 !gsi_end_p (gsi2); )
1406 {
1407 init_stmt = gsi_stmt (gsi2);
1408 gsi_remove (&gsi2, false);
1409 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1410 }
1411 }
1412 }
1413 val = build_vector_from_val (type, val);
1414 }
1415
1416 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1417 init_stmt = gimple_build_assign (new_temp, val);
1418 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1419 return new_temp;
1420 }
1421
1422 /* Function vect_get_vec_def_for_operand_1.
1423
1424 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1425 with type DT that will be used in the vectorized stmt. */
1426
1427 tree
1428 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1429 enum vect_def_type dt)
1430 {
1431 tree vec_oprnd;
1432 stmt_vec_info vec_stmt_info;
1433
1434 switch (dt)
1435 {
1436 /* operand is a constant or a loop invariant. */
1437 case vect_constant_def:
1438 case vect_external_def:
1439 /* Code should use vect_get_vec_def_for_operand. */
1440 gcc_unreachable ();
1441
1442 /* Operand is defined by a loop header phi. In case of nested
1443 cycles we also may have uses of the backedge def. */
1444 case vect_reduction_def:
1445 case vect_double_reduction_def:
1446 case vect_nested_cycle:
1447 case vect_induction_def:
1448 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1449 || dt == vect_nested_cycle);
1450 /* Fallthru. */
1451
1452 /* operand is defined inside the loop. */
1453 case vect_internal_def:
1454 {
1455 /* Get the def from the vectorized stmt. */
1456 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1457 /* Get vectorized pattern statement. */
1458 if (!vec_stmt_info
1459 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1460 && !STMT_VINFO_RELEVANT (def_stmt_info))
1461 vec_stmt_info = (STMT_VINFO_VEC_STMT
1462 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1463 gcc_assert (vec_stmt_info);
1464 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1465 vec_oprnd = PHI_RESULT (phi);
1466 else
1467 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1468 return vec_oprnd;
1469 }
1470
1471 default:
1472 gcc_unreachable ();
1473 }
1474 }
1475
1476
1477 /* Function vect_get_vec_def_for_operand.
1478
1479 OP is an operand in STMT_VINFO. This function returns a (vector) def
1480 that will be used in the vectorized stmt for STMT_VINFO.
1481
1482 In the case that OP is an SSA_NAME which is defined in the loop, then
1483 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1484
1485 In case OP is an invariant or constant, a new stmt that creates a vector def
1486 needs to be introduced. VECTYPE may be used to specify a required type for
1487 vector invariant. */
1488
1489 tree
1490 vect_get_vec_def_for_operand (vec_info *vinfo,
1491 tree op, stmt_vec_info stmt_vinfo, tree vectype)
1492 {
1493 gimple *def_stmt;
1494 enum vect_def_type dt;
1495 bool is_simple_use;
1496 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1497
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE, vect_location,
1500 "vect_get_vec_def_for_operand: %T\n", op);
1501
1502 stmt_vec_info def_stmt_info;
1503 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1504 &def_stmt_info, &def_stmt);
1505 gcc_assert (is_simple_use);
1506 if (def_stmt && dump_enabled_p ())
1507 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1508
1509 if (dt == vect_constant_def || dt == vect_external_def)
1510 {
1511 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1512 tree vector_type;
1513
1514 if (vectype)
1515 vector_type = vectype;
1516 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1517 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1518 vector_type = truth_type_for (stmt_vectype);
1519 else
1520 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1521
1522 gcc_assert (vector_type);
1523 return vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1524 }
1525 else
1526 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1527 }
1528
1529
1530 /* Function vect_get_vec_def_for_stmt_copy
1531
1532 Return a vector-def for an operand. This function is used when the
1533 vectorized stmt to be created (by the caller to this function) is a "copy"
1534 created in case the vectorized result cannot fit in one vector, and several
1535 copies of the vector-stmt are required. In this case the vector-def is
1536 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1537 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1538
1539 Context:
1540 In case the vectorization factor (VF) is bigger than the number
1541 of elements that can fit in a vectype (nunits), we have to generate
1542 more than one vector stmt to vectorize the scalar stmt. This situation
1543 arises when there are multiple data-types operated upon in the loop; the
1544 smallest data-type determines the VF, and as a result, when vectorizing
1545 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1546 vector stmt (each computing a vector of 'nunits' results, and together
1547 computing 'VF' results in each iteration). This function is called when
1548 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1549 which VF=16 and nunits=4, so the number of copies required is 4):
1550
1551 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1552
1553 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1554 VS1.1: vx.1 = memref1 VS1.2
1555 VS1.2: vx.2 = memref2 VS1.3
1556 VS1.3: vx.3 = memref3
1557
1558 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1559 VSnew.1: vz1 = vx.1 + ... VSnew.2
1560 VSnew.2: vz2 = vx.2 + ... VSnew.3
1561 VSnew.3: vz3 = vx.3 + ...
1562
1563 The vectorization of S1 is explained in vectorizable_load.
1564 The vectorization of S2:
1565 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1566 the function 'vect_get_vec_def_for_operand' is called to
1567 get the relevant vector-def for each operand of S2. For operand x it
1568 returns the vector-def 'vx.0'.
1569
1570 To create the remaining copies of the vector-stmt (VSnew.j), this
1571 function is called to get the relevant vector-def for each operand. It is
1572 obtained from the respective VS1.j stmt, which is recorded in the
1573 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1574
1575 For example, to obtain the vector-def 'vx.1' in order to create the
1576 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1577 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1578 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1579 and return its def ('vx.1').
1580 Overall, to create the above sequence this function will be called 3 times:
1581 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1582 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1583 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1584
1585 tree
1586 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1587 {
1588 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1589 if (!def_stmt_info)
1590 /* Do nothing; can reuse same def. */
1591 return vec_oprnd;
1592
1593 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1594 gcc_assert (def_stmt_info);
1595 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1596 vec_oprnd = PHI_RESULT (phi);
1597 else
1598 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1599 return vec_oprnd;
1600 }
1601
1602
1603 /* Get vectorized definitions for the operands to create a copy of an original
1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1605
1606 void
1607 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1608 vec<tree> *vec_oprnds0,
1609 vec<tree> *vec_oprnds1)
1610 {
1611 tree vec_oprnd = vec_oprnds0->pop ();
1612
1613 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1614 vec_oprnds0->quick_push (vec_oprnd);
1615
1616 if (vec_oprnds1 && vec_oprnds1->length ())
1617 {
1618 vec_oprnd = vec_oprnds1->pop ();
1619 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1620 vec_oprnds1->quick_push (vec_oprnd);
1621 }
1622 }
1623
1624
1625 /* Get vectorized definitions for OP0 and OP1. */
1626
1627 void
1628 vect_get_vec_defs (vec_info *vinfo, tree op0, tree op1, stmt_vec_info stmt_info,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1,
1631 slp_tree slp_node)
1632 {
1633 if (slp_node)
1634 {
1635 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1636 vect_get_slp_defs (vinfo, slp_node, &vec_defs, op1 ? 2 : 1);
1637 *vec_oprnds0 = vec_defs[0];
1638 if (op1)
1639 *vec_oprnds1 = vec_defs[1];
1640 }
1641 else
1642 {
1643 tree vec_oprnd;
1644
1645 vec_oprnds0->create (1);
1646 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op0, stmt_info);
1647 vec_oprnds0->quick_push (vec_oprnd);
1648
1649 if (op1)
1650 {
1651 vec_oprnds1->create (1);
1652 vec_oprnd = vect_get_vec_def_for_operand (vinfo, op1, stmt_info);
1653 vec_oprnds1->quick_push (vec_oprnd);
1654 }
1655 }
1656 }
1657
1658 /* Helper function called by vect_finish_replace_stmt and
1659 vect_finish_stmt_generation. Set the location of the new
1660 statement and create and return a stmt_vec_info for it. */
1661
1662 static stmt_vec_info
1663 vect_finish_stmt_generation_1 (vec_info *vinfo,
1664 stmt_vec_info stmt_info, gimple *vec_stmt)
1665 {
1666 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1667
1668 if (dump_enabled_p ())
1669 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1670
1671 if (stmt_info)
1672 {
1673 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1674
1675 /* While EH edges will generally prevent vectorization, stmt might
1676 e.g. be in a must-not-throw region. Ensure newly created stmts
1677 that could throw are part of the same region. */
1678 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1679 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1680 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1681 }
1682 else
1683 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1684
1685 return vec_stmt_info;
1686 }
1687
1688 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1689 which sets the same scalar result as STMT_INFO did. Create and return a
1690 stmt_vec_info for VEC_STMT. */
1691
1692 stmt_vec_info
1693 vect_finish_replace_stmt (vec_info *vinfo,
1694 stmt_vec_info stmt_info, gimple *vec_stmt)
1695 {
1696 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1697 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1698
1699 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1700 gsi_replace (&gsi, vec_stmt, true);
1701
1702 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1703 }
1704
1705 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1706 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1707
1708 stmt_vec_info
1709 vect_finish_stmt_generation (vec_info *vinfo,
1710 stmt_vec_info stmt_info, gimple *vec_stmt,
1711 gimple_stmt_iterator *gsi)
1712 {
1713 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1714
1715 if (!gsi_end_p (*gsi)
1716 && gimple_has_mem_ops (vec_stmt))
1717 {
1718 gimple *at_stmt = gsi_stmt (*gsi);
1719 tree vuse = gimple_vuse (at_stmt);
1720 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1721 {
1722 tree vdef = gimple_vdef (at_stmt);
1723 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1724 /* If we have an SSA vuse and insert a store, update virtual
1725 SSA form to avoid triggering the renamer. Do so only
1726 if we can easily see all uses - which is what almost always
1727 happens with the way vectorized stmts are inserted. */
1728 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1729 && ((is_gimple_assign (vec_stmt)
1730 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1731 || (is_gimple_call (vec_stmt)
1732 && !(gimple_call_flags (vec_stmt)
1733 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1734 {
1735 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1736 gimple_set_vdef (vec_stmt, new_vdef);
1737 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1738 }
1739 }
1740 }
1741 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1742 return vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1743 }
1744
1745 /* We want to vectorize a call to combined function CFN with function
1746 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1747 as the types of all inputs. Check whether this is possible using
1748 an internal function, returning its code if so or IFN_LAST if not. */
1749
1750 static internal_fn
1751 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1752 tree vectype_out, tree vectype_in)
1753 {
1754 internal_fn ifn;
1755 if (internal_fn_p (cfn))
1756 ifn = as_internal_fn (cfn);
1757 else
1758 ifn = associated_internal_fn (fndecl);
1759 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1760 {
1761 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1762 if (info.vectorizable)
1763 {
1764 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1765 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1766 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1767 OPTIMIZE_FOR_SPEED))
1768 return ifn;
1769 }
1770 }
1771 return IFN_LAST;
1772 }
1773
1774
1775 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1776 gimple_stmt_iterator *);
1777
1778 /* Check whether a load or store statement in the loop described by
1779 LOOP_VINFO is possible in a fully-masked loop. This is testing
1780 whether the vectorizer pass has the appropriate support, as well as
1781 whether the target does.
1782
1783 VLS_TYPE says whether the statement is a load or store and VECTYPE
1784 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1785 says how the load or store is going to be implemented and GROUP_SIZE
1786 is the number of load or store statements in the containing group.
1787 If the access is a gather load or scatter store, GS_INFO describes
1788 its arguments. If the load or store is conditional, SCALAR_MASK is the
1789 condition under which it occurs.
1790
1791 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1792 supported, otherwise record the required mask types. */
1793
1794 static void
1795 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1796 vec_load_store_type vls_type, int group_size,
1797 vect_memory_access_type memory_access_type,
1798 gather_scatter_info *gs_info, tree scalar_mask)
1799 {
1800 /* Invariant loads need no special support. */
1801 if (memory_access_type == VMAT_INVARIANT)
1802 return;
1803
1804 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1805 machine_mode vecmode = TYPE_MODE (vectype);
1806 bool is_load = (vls_type == VLS_LOAD);
1807 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1808 {
1809 if (is_load
1810 ? !vect_load_lanes_supported (vectype, group_size, true)
1811 : !vect_store_lanes_supported (vectype, group_size, true))
1812 {
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 "can't use a fully-masked loop because the"
1816 " target doesn't have an appropriate masked"
1817 " load/store-lanes instruction.\n");
1818 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1819 return;
1820 }
1821 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1822 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1823 return;
1824 }
1825
1826 if (memory_access_type == VMAT_GATHER_SCATTER)
1827 {
1828 internal_fn ifn = (is_load
1829 ? IFN_MASK_GATHER_LOAD
1830 : IFN_MASK_SCATTER_STORE);
1831 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1832 gs_info->memory_type,
1833 gs_info->offset_vectype,
1834 gs_info->scale))
1835 {
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "can't use a fully-masked loop because the"
1839 " target doesn't have an appropriate masked"
1840 " gather load or scatter store instruction.\n");
1841 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1842 return;
1843 }
1844 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1845 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1846 return;
1847 }
1848
1849 if (memory_access_type != VMAT_CONTIGUOUS
1850 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1851 {
1852 /* Element X of the data must come from iteration i * VF + X of the
1853 scalar loop. We need more work to support other mappings. */
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1856 "can't use a fully-masked loop because an access"
1857 " isn't contiguous.\n");
1858 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1859 return;
1860 }
1861
1862 machine_mode mask_mode;
1863 if (!VECTOR_MODE_P (vecmode)
1864 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1865 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1866 {
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1869 "can't use a fully-masked loop because the target"
1870 " doesn't have the appropriate masked load or"
1871 " store.\n");
1872 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1873 return;
1874 }
1875 /* We might load more scalars than we need for permuting SLP loads.
1876 We checked in get_group_load_store_type that the extra elements
1877 don't leak into a new vector. */
1878 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1879 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1880 unsigned int nvectors;
1881 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1882 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1883 else
1884 gcc_unreachable ();
1885 }
1886
1887 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1888 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1889 that needs to be applied to all loads and stores in a vectorized loop.
1890 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1891
1892 MASK_TYPE is the type of both masks. If new statements are needed,
1893 insert them before GSI. */
1894
1895 static tree
1896 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1897 gimple_stmt_iterator *gsi)
1898 {
1899 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1900 if (!loop_mask)
1901 return vec_mask;
1902
1903 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1904 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1905 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1906 vec_mask, loop_mask);
1907 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1908 return and_res;
1909 }
1910
1911 /* Determine whether we can use a gather load or scatter store to vectorize
1912 strided load or store STMT_INFO by truncating the current offset to a
1913 smaller width. We need to be able to construct an offset vector:
1914
1915 { 0, X, X*2, X*3, ... }
1916
1917 without loss of precision, where X is STMT_INFO's DR_STEP.
1918
1919 Return true if this is possible, describing the gather load or scatter
1920 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1921
1922 static bool
1923 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1924 loop_vec_info loop_vinfo, bool masked_p,
1925 gather_scatter_info *gs_info)
1926 {
1927 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1928 data_reference *dr = dr_info->dr;
1929 tree step = DR_STEP (dr);
1930 if (TREE_CODE (step) != INTEGER_CST)
1931 {
1932 /* ??? Perhaps we could use range information here? */
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_NOTE, vect_location,
1935 "cannot truncate variable step.\n");
1936 return false;
1937 }
1938
1939 /* Get the number of bits in an element. */
1940 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1941 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1942 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1943
1944 /* Set COUNT to the upper limit on the number of elements - 1.
1945 Start with the maximum vectorization factor. */
1946 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1947
1948 /* Try lowering COUNT to the number of scalar latch iterations. */
1949 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1950 widest_int max_iters;
1951 if (max_loop_iterations (loop, &max_iters)
1952 && max_iters < count)
1953 count = max_iters.to_shwi ();
1954
1955 /* Try scales of 1 and the element size. */
1956 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1957 wi::overflow_type overflow = wi::OVF_NONE;
1958 for (int i = 0; i < 2; ++i)
1959 {
1960 int scale = scales[i];
1961 widest_int factor;
1962 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1963 continue;
1964
1965 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1966 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1967 if (overflow)
1968 continue;
1969 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1970 unsigned int min_offset_bits = wi::min_precision (range, sign);
1971
1972 /* Find the narrowest viable offset type. */
1973 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1974 tree offset_type = build_nonstandard_integer_type (offset_bits,
1975 sign == UNSIGNED);
1976
1977 /* See whether the target supports the operation with an offset
1978 no narrower than OFFSET_TYPE. */
1979 tree memory_type = TREE_TYPE (DR_REF (dr));
1980 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1981 vectype, memory_type, offset_type, scale,
1982 &gs_info->ifn, &gs_info->offset_vectype))
1983 continue;
1984
1985 gs_info->decl = NULL_TREE;
1986 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1987 but we don't need to store that here. */
1988 gs_info->base = NULL_TREE;
1989 gs_info->element_type = TREE_TYPE (vectype);
1990 gs_info->offset = fold_convert (offset_type, step);
1991 gs_info->offset_dt = vect_constant_def;
1992 gs_info->scale = scale;
1993 gs_info->memory_type = memory_type;
1994 return true;
1995 }
1996
1997 if (overflow && dump_enabled_p ())
1998 dump_printf_loc (MSG_NOTE, vect_location,
1999 "truncating gather/scatter offset to %d bits"
2000 " might change its value.\n", element_bits);
2001
2002 return false;
2003 }
2004
2005 /* Return true if we can use gather/scatter internal functions to
2006 vectorize STMT_INFO, which is a grouped or strided load or store.
2007 MASKED_P is true if load or store is conditional. When returning
2008 true, fill in GS_INFO with the information required to perform the
2009 operation. */
2010
2011 static bool
2012 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2013 loop_vec_info loop_vinfo, bool masked_p,
2014 gather_scatter_info *gs_info)
2015 {
2016 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2017 || gs_info->decl)
2018 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2019 masked_p, gs_info);
2020
2021 tree old_offset_type = TREE_TYPE (gs_info->offset);
2022 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2023
2024 gcc_assert (TYPE_PRECISION (new_offset_type)
2025 >= TYPE_PRECISION (old_offset_type));
2026 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2027
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE, vect_location,
2030 "using gather/scatter for strided/grouped access,"
2031 " scale = %d\n", gs_info->scale);
2032
2033 return true;
2034 }
2035
2036 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2037 elements with a known constant step. Return -1 if that step
2038 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2039
2040 static int
2041 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2042 {
2043 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2044 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2045 size_zero_node);
2046 }
2047
2048 /* If the target supports a permute mask that reverses the elements in
2049 a vector of type VECTYPE, return that mask, otherwise return null. */
2050
2051 static tree
2052 perm_mask_for_reverse (tree vectype)
2053 {
2054 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2055
2056 /* The encoding has a single stepped pattern. */
2057 vec_perm_builder sel (nunits, 1, 3);
2058 for (int i = 0; i < 3; ++i)
2059 sel.quick_push (nunits - 1 - i);
2060
2061 vec_perm_indices indices (sel, 1, nunits);
2062 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2063 return NULL_TREE;
2064 return vect_gen_perm_mask_checked (vectype, indices);
2065 }
2066
2067 /* A subroutine of get_load_store_type, with a subset of the same
2068 arguments. Handle the case where STMT_INFO is a load or store that
2069 accesses consecutive elements with a negative step. */
2070
2071 static vect_memory_access_type
2072 get_negative_load_store_type (vec_info *vinfo,
2073 stmt_vec_info stmt_info, tree vectype,
2074 vec_load_store_type vls_type,
2075 unsigned int ncopies)
2076 {
2077 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2078 dr_alignment_support alignment_support_scheme;
2079
2080 if (ncopies > 1)
2081 {
2082 if (dump_enabled_p ())
2083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2084 "multiple types with negative step.\n");
2085 return VMAT_ELEMENTWISE;
2086 }
2087
2088 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
2089 dr_info, false);
2090 if (alignment_support_scheme != dr_aligned
2091 && alignment_support_scheme != dr_unaligned_supported)
2092 {
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2095 "negative step but alignment required.\n");
2096 return VMAT_ELEMENTWISE;
2097 }
2098
2099 if (vls_type == VLS_STORE_INVARIANT)
2100 {
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_NOTE, vect_location,
2103 "negative step with invariant source;"
2104 " no permute needed.\n");
2105 return VMAT_CONTIGUOUS_DOWN;
2106 }
2107
2108 if (!perm_mask_for_reverse (vectype))
2109 {
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112 "negative step and reversing not supported.\n");
2113 return VMAT_ELEMENTWISE;
2114 }
2115
2116 return VMAT_CONTIGUOUS_REVERSE;
2117 }
2118
2119 /* STMT_INFO is either a masked or unconditional store. Return the value
2120 being stored. */
2121
2122 tree
2123 vect_get_store_rhs (stmt_vec_info stmt_info)
2124 {
2125 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2126 {
2127 gcc_assert (gimple_assign_single_p (assign));
2128 return gimple_assign_rhs1 (assign);
2129 }
2130 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2131 {
2132 internal_fn ifn = gimple_call_internal_fn (call);
2133 int index = internal_fn_stored_value_index (ifn);
2134 gcc_assert (index >= 0);
2135 return gimple_call_arg (call, index);
2136 }
2137 gcc_unreachable ();
2138 }
2139
2140 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2141
2142 This function returns a vector type which can be composed with NETLS pieces,
2143 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2144 same vector size as the return vector. It checks target whether supports
2145 pieces-size vector mode for construction firstly, if target fails to, check
2146 pieces-size scalar mode for construction further. It returns NULL_TREE if
2147 fails to find the available composition.
2148
2149 For example, for (vtype=V16QI, nelts=4), we can probably get:
2150 - V16QI with PTYPE V4QI.
2151 - V4SI with PTYPE SI.
2152 - NULL_TREE. */
2153
2154 static tree
2155 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2156 {
2157 gcc_assert (VECTOR_TYPE_P (vtype));
2158 gcc_assert (known_gt (nelts, 0U));
2159
2160 machine_mode vmode = TYPE_MODE (vtype);
2161 if (!VECTOR_MODE_P (vmode))
2162 return NULL_TREE;
2163
2164 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2165 unsigned int pbsize;
2166 if (constant_multiple_p (vbsize, nelts, &pbsize))
2167 {
2168 /* First check if vec_init optab supports construction from
2169 vector pieces directly. */
2170 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2171 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2172 machine_mode rmode;
2173 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2174 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2175 != CODE_FOR_nothing))
2176 {
2177 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2178 return vtype;
2179 }
2180
2181 /* Otherwise check if exists an integer type of the same piece size and
2182 if vec_init optab supports construction from it directly. */
2183 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2184 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2185 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2186 != CODE_FOR_nothing))
2187 {
2188 *ptype = build_nonstandard_integer_type (pbsize, 1);
2189 return build_vector_type (*ptype, nelts);
2190 }
2191 }
2192
2193 return NULL_TREE;
2194 }
2195
2196 /* A subroutine of get_load_store_type, with a subset of the same
2197 arguments. Handle the case where STMT_INFO is part of a grouped load
2198 or store.
2199
2200 For stores, the statements in the group are all consecutive
2201 and there is no gap at the end. For loads, the statements in the
2202 group might not be consecutive; there can be gaps between statements
2203 as well as at the end. */
2204
2205 static bool
2206 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2207 tree vectype, bool slp,
2208 bool masked_p, vec_load_store_type vls_type,
2209 vect_memory_access_type *memory_access_type,
2210 gather_scatter_info *gs_info)
2211 {
2212 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2213 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2214 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2215 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2216 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2217 bool single_element_p = (stmt_info == first_stmt_info
2218 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2219 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2220 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2221
2222 /* True if the vectorized statements would access beyond the last
2223 statement in the group. */
2224 bool overrun_p = false;
2225
2226 /* True if we can cope with such overrun by peeling for gaps, so that
2227 there is at least one final scalar iteration after the vector loop. */
2228 bool can_overrun_p = (!masked_p
2229 && vls_type == VLS_LOAD
2230 && loop_vinfo
2231 && !loop->inner);
2232
2233 /* There can only be a gap at the end of the group if the stride is
2234 known at compile time. */
2235 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2236
2237 /* Stores can't yet have gaps. */
2238 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2239
2240 if (slp)
2241 {
2242 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2243 {
2244 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2245 separated by the stride, until we have a complete vector.
2246 Fall back to scalar accesses if that isn't possible. */
2247 if (multiple_p (nunits, group_size))
2248 *memory_access_type = VMAT_STRIDED_SLP;
2249 else
2250 *memory_access_type = VMAT_ELEMENTWISE;
2251 }
2252 else
2253 {
2254 overrun_p = loop_vinfo && gap != 0;
2255 if (overrun_p && vls_type != VLS_LOAD)
2256 {
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2258 "Grouped store with gaps requires"
2259 " non-consecutive accesses\n");
2260 return false;
2261 }
2262 /* An overrun is fine if the trailing elements are smaller
2263 than the alignment boundary B. Every vector access will
2264 be a multiple of B and so we are guaranteed to access a
2265 non-gap element in the same B-sized block. */
2266 if (overrun_p
2267 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2268 / vect_get_scalar_dr_size (first_dr_info)))
2269 overrun_p = false;
2270
2271 /* If the gap splits the vector in half and the target
2272 can do half-vector operations avoid the epilogue peeling
2273 by simply loading half of the vector only. Usually
2274 the construction with an upper zero half will be elided. */
2275 dr_alignment_support alignment_support_scheme;
2276 tree half_vtype;
2277 if (overrun_p
2278 && !masked_p
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (vinfo,
2281 first_dr_info, false)))
2282 == dr_aligned
2283 || alignment_support_scheme == dr_unaligned_supported)
2284 && known_eq (nunits, (group_size - gap) * 2)
2285 && known_eq (nunits, group_size)
2286 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2287 != NULL_TREE))
2288 overrun_p = false;
2289
2290 if (overrun_p && !can_overrun_p)
2291 {
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "Peeling for outer loop is not supported\n");
2295 return false;
2296 }
2297 int cmp = compare_step_with_zero (vinfo, stmt_info);
2298 if (cmp < 0)
2299 *memory_access_type = get_negative_load_store_type
2300 (vinfo, stmt_info, vectype, vls_type, 1);
2301 else
2302 {
2303 gcc_assert (!loop_vinfo || cmp > 0);
2304 *memory_access_type = VMAT_CONTIGUOUS;
2305 }
2306 }
2307 }
2308 else
2309 {
2310 /* We can always handle this case using elementwise accesses,
2311 but see if something more efficient is available. */
2312 *memory_access_type = VMAT_ELEMENTWISE;
2313
2314 /* If there is a gap at the end of the group then these optimizations
2315 would access excess elements in the last iteration. */
2316 bool would_overrun_p = (gap != 0);
2317 /* An overrun is fine if the trailing elements are smaller than the
2318 alignment boundary B. Every vector access will be a multiple of B
2319 and so we are guaranteed to access a non-gap element in the
2320 same B-sized block. */
2321 if (would_overrun_p
2322 && !masked_p
2323 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2324 / vect_get_scalar_dr_size (first_dr_info)))
2325 would_overrun_p = false;
2326
2327 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2328 && (can_overrun_p || !would_overrun_p)
2329 && compare_step_with_zero (vinfo, stmt_info) > 0)
2330 {
2331 /* First cope with the degenerate case of a single-element
2332 vector. */
2333 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2334 *memory_access_type = VMAT_CONTIGUOUS;
2335
2336 /* Otherwise try using LOAD/STORE_LANES. */
2337 if (*memory_access_type == VMAT_ELEMENTWISE
2338 && (vls_type == VLS_LOAD
2339 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2340 : vect_store_lanes_supported (vectype, group_size,
2341 masked_p)))
2342 {
2343 *memory_access_type = VMAT_LOAD_STORE_LANES;
2344 overrun_p = would_overrun_p;
2345 }
2346
2347 /* If that fails, try using permuting loads. */
2348 if (*memory_access_type == VMAT_ELEMENTWISE
2349 && (vls_type == VLS_LOAD
2350 ? vect_grouped_load_supported (vectype, single_element_p,
2351 group_size)
2352 : vect_grouped_store_supported (vectype, group_size)))
2353 {
2354 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2355 overrun_p = would_overrun_p;
2356 }
2357 }
2358
2359 /* As a last resort, trying using a gather load or scatter store.
2360
2361 ??? Although the code can handle all group sizes correctly,
2362 it probably isn't a win to use separate strided accesses based
2363 on nearby locations. Or, even if it's a win over scalar code,
2364 it might not be a win over vectorizing at a lower VF, if that
2365 allows us to use contiguous accesses. */
2366 if (*memory_access_type == VMAT_ELEMENTWISE
2367 && single_element_p
2368 && loop_vinfo
2369 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2370 masked_p, gs_info))
2371 *memory_access_type = VMAT_GATHER_SCATTER;
2372 }
2373
2374 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2375 {
2376 /* STMT is the leader of the group. Check the operands of all the
2377 stmts of the group. */
2378 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2379 while (next_stmt_info)
2380 {
2381 tree op = vect_get_store_rhs (next_stmt_info);
2382 enum vect_def_type dt;
2383 if (!vect_is_simple_use (op, vinfo, &dt))
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "use not simple.\n");
2388 return false;
2389 }
2390 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2391 }
2392 }
2393
2394 if (overrun_p)
2395 {
2396 gcc_assert (can_overrun_p);
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "Data access with gaps requires scalar "
2400 "epilogue loop\n");
2401 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2402 }
2403
2404 return true;
2405 }
2406
2407 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2408 if there is a memory access type that the vectorized form can use,
2409 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2410 or scatters, fill in GS_INFO accordingly.
2411
2412 SLP says whether we're performing SLP rather than loop vectorization.
2413 MASKED_P is true if the statement is conditional on a vectorized mask.
2414 VECTYPE is the vector type that the vectorized statements will use.
2415 NCOPIES is the number of vector statements that will be needed. */
2416
2417 static bool
2418 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2419 tree vectype, bool slp,
2420 bool masked_p, vec_load_store_type vls_type,
2421 unsigned int ncopies,
2422 vect_memory_access_type *memory_access_type,
2423 gather_scatter_info *gs_info)
2424 {
2425 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2426 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2428 {
2429 *memory_access_type = VMAT_GATHER_SCATTER;
2430 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2431 gcc_unreachable ();
2432 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2433 &gs_info->offset_dt,
2434 &gs_info->offset_vectype))
2435 {
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438 "%s index use not simple.\n",
2439 vls_type == VLS_LOAD ? "gather" : "scatter");
2440 return false;
2441 }
2442 }
2443 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2444 {
2445 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp, masked_p,
2446 vls_type, memory_access_type, gs_info))
2447 return false;
2448 }
2449 else if (STMT_VINFO_STRIDED_P (stmt_info))
2450 {
2451 gcc_assert (!slp);
2452 if (loop_vinfo
2453 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2454 masked_p, gs_info))
2455 *memory_access_type = VMAT_GATHER_SCATTER;
2456 else
2457 *memory_access_type = VMAT_ELEMENTWISE;
2458 }
2459 else
2460 {
2461 int cmp = compare_step_with_zero (vinfo, stmt_info);
2462 if (cmp < 0)
2463 *memory_access_type = get_negative_load_store_type
2464 (vinfo, stmt_info, vectype, vls_type, ncopies);
2465 else if (cmp == 0)
2466 {
2467 gcc_assert (vls_type == VLS_LOAD);
2468 *memory_access_type = VMAT_INVARIANT;
2469 }
2470 else
2471 *memory_access_type = VMAT_CONTIGUOUS;
2472 }
2473
2474 if ((*memory_access_type == VMAT_ELEMENTWISE
2475 || *memory_access_type == VMAT_STRIDED_SLP)
2476 && !nunits.is_constant ())
2477 {
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480 "Not using elementwise accesses due to variable "
2481 "vectorization factor.\n");
2482 return false;
2483 }
2484
2485 /* FIXME: At the moment the cost model seems to underestimate the
2486 cost of using elementwise accesses. This check preserves the
2487 traditional behavior until that can be fixed. */
2488 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2489 if (!first_stmt_info)
2490 first_stmt_info = stmt_info;
2491 if (*memory_access_type == VMAT_ELEMENTWISE
2492 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2493 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2494 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2495 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2496 {
2497 if (dump_enabled_p ())
2498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2499 "not falling back to elementwise accesses\n");
2500 return false;
2501 }
2502 return true;
2503 }
2504
2505 /* Return true if boolean argument MASK is suitable for vectorizing
2506 conditional operation STMT_INFO. When returning true, store the type
2507 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2508 in *MASK_VECTYPE_OUT. */
2509
2510 static bool
2511 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2512 vect_def_type *mask_dt_out,
2513 tree *mask_vectype_out)
2514 {
2515 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "mask argument is not a boolean.\n");
2520 return false;
2521 }
2522
2523 if (TREE_CODE (mask) != SSA_NAME)
2524 {
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "mask argument is not an SSA name.\n");
2528 return false;
2529 }
2530
2531 enum vect_def_type mask_dt;
2532 tree mask_vectype;
2533 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2534 {
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2537 "mask use not simple.\n");
2538 return false;
2539 }
2540
2541 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2542 if (!mask_vectype)
2543 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2544
2545 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2546 {
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2549 "could not find an appropriate vector mask type.\n");
2550 return false;
2551 }
2552
2553 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2554 TYPE_VECTOR_SUBPARTS (vectype)))
2555 {
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2558 "vector mask type %T"
2559 " does not match vector data type %T.\n",
2560 mask_vectype, vectype);
2561
2562 return false;
2563 }
2564
2565 *mask_dt_out = mask_dt;
2566 *mask_vectype_out = mask_vectype;
2567 return true;
2568 }
2569
2570 /* Return true if stored value RHS is suitable for vectorizing store
2571 statement STMT_INFO. When returning true, store the type of the
2572 definition in *RHS_DT_OUT, the type of the vectorized store value in
2573 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2574
2575 static bool
2576 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs,
2577 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2578 vec_load_store_type *vls_type_out)
2579 {
2580 /* In the case this is a store from a constant make sure
2581 native_encode_expr can handle it. */
2582 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2583 {
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 "cannot encode constant as a byte sequence.\n");
2587 return false;
2588 }
2589
2590 enum vect_def_type rhs_dt;
2591 tree rhs_vectype;
2592 if (!vect_is_simple_use (rhs, vinfo, &rhs_dt, &rhs_vectype))
2593 {
2594 if (dump_enabled_p ())
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2596 "use not simple.\n");
2597 return false;
2598 }
2599
2600 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2601 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2602 {
2603 if (dump_enabled_p ())
2604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2605 "incompatible vector types.\n");
2606 return false;
2607 }
2608
2609 *rhs_dt_out = rhs_dt;
2610 *rhs_vectype_out = rhs_vectype;
2611 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2612 *vls_type_out = VLS_STORE_INVARIANT;
2613 else
2614 *vls_type_out = VLS_STORE;
2615 return true;
2616 }
2617
2618 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2619 Note that we support masks with floating-point type, in which case the
2620 floats are interpreted as a bitmask. */
2621
2622 static tree
2623 vect_build_all_ones_mask (vec_info *vinfo,
2624 stmt_vec_info stmt_info, tree masktype)
2625 {
2626 if (TREE_CODE (masktype) == INTEGER_TYPE)
2627 return build_int_cst (masktype, -1);
2628 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2629 {
2630 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2631 mask = build_vector_from_val (masktype, mask);
2632 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2633 }
2634 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2635 {
2636 REAL_VALUE_TYPE r;
2637 long tmp[6];
2638 for (int j = 0; j < 6; ++j)
2639 tmp[j] = -1;
2640 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2641 tree mask = build_real (TREE_TYPE (masktype), r);
2642 mask = build_vector_from_val (masktype, mask);
2643 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2644 }
2645 gcc_unreachable ();
2646 }
2647
2648 /* Build an all-zero merge value of type VECTYPE while vectorizing
2649 STMT_INFO as a gather load. */
2650
2651 static tree
2652 vect_build_zero_merge_argument (vec_info *vinfo,
2653 stmt_vec_info stmt_info, tree vectype)
2654 {
2655 tree merge;
2656 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2657 merge = build_int_cst (TREE_TYPE (vectype), 0);
2658 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2659 {
2660 REAL_VALUE_TYPE r;
2661 long tmp[6];
2662 for (int j = 0; j < 6; ++j)
2663 tmp[j] = 0;
2664 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2665 merge = build_real (TREE_TYPE (vectype), r);
2666 }
2667 else
2668 gcc_unreachable ();
2669 merge = build_vector_from_val (vectype, merge);
2670 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2671 }
2672
2673 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2674 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2675 the gather load operation. If the load is conditional, MASK is the
2676 unvectorized condition and MASK_DT is its definition type, otherwise
2677 MASK is null. */
2678
2679 static void
2680 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2681 gimple_stmt_iterator *gsi,
2682 stmt_vec_info *vec_stmt,
2683 gather_scatter_info *gs_info,
2684 tree mask)
2685 {
2686 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2687 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2689 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2690 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2691 edge pe = loop_preheader_edge (loop);
2692 enum { NARROW, NONE, WIDEN } modifier;
2693 poly_uint64 gather_off_nunits
2694 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2695
2696 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2697 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2698 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2699 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2700 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2701 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree scaletype = TREE_VALUE (arglist);
2703 tree real_masktype = masktype;
2704 gcc_checking_assert (types_compatible_p (srctype, rettype)
2705 && (!mask
2706 || TREE_CODE (masktype) == INTEGER_TYPE
2707 || types_compatible_p (srctype, masktype)));
2708 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2709 masktype = truth_type_for (srctype);
2710
2711 tree mask_halftype = masktype;
2712 tree perm_mask = NULL_TREE;
2713 tree mask_perm_mask = NULL_TREE;
2714 if (known_eq (nunits, gather_off_nunits))
2715 modifier = NONE;
2716 else if (known_eq (nunits * 2, gather_off_nunits))
2717 {
2718 modifier = WIDEN;
2719
2720 /* Currently widening gathers and scatters are only supported for
2721 fixed-length vectors. */
2722 int count = gather_off_nunits.to_constant ();
2723 vec_perm_builder sel (count, count, 1);
2724 for (int i = 0; i < count; ++i)
2725 sel.quick_push (i | (count / 2));
2726
2727 vec_perm_indices indices (sel, 1, count);
2728 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2729 indices);
2730 }
2731 else if (known_eq (nunits, gather_off_nunits * 2))
2732 {
2733 modifier = NARROW;
2734
2735 /* Currently narrowing gathers and scatters are only supported for
2736 fixed-length vectors. */
2737 int count = nunits.to_constant ();
2738 vec_perm_builder sel (count, count, 1);
2739 sel.quick_grow (count);
2740 for (int i = 0; i < count; ++i)
2741 sel[i] = i < count / 2 ? i : i + count / 2;
2742 vec_perm_indices indices (sel, 2, count);
2743 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2744
2745 ncopies *= 2;
2746
2747 if (mask && masktype == real_masktype)
2748 {
2749 for (int i = 0; i < count; ++i)
2750 sel[i] = i | (count / 2);
2751 indices.new_vector (sel, 2, count);
2752 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2753 }
2754 else if (mask)
2755 mask_halftype = truth_type_for (gs_info->offset_vectype);
2756 }
2757 else
2758 gcc_unreachable ();
2759
2760 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2761 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2762
2763 tree ptr = fold_convert (ptrtype, gs_info->base);
2764 if (!is_gimple_min_invariant (ptr))
2765 {
2766 gimple_seq seq;
2767 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2768 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2769 gcc_assert (!new_bb);
2770 }
2771
2772 tree scale = build_int_cst (scaletype, gs_info->scale);
2773
2774 tree vec_oprnd0 = NULL_TREE;
2775 tree vec_mask = NULL_TREE;
2776 tree src_op = NULL_TREE;
2777 tree mask_op = NULL_TREE;
2778 tree prev_res = NULL_TREE;
2779 stmt_vec_info prev_stmt_info = NULL;
2780
2781 if (!mask)
2782 {
2783 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2784 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2785 }
2786
2787 for (int j = 0; j < ncopies; ++j)
2788 {
2789 tree op, var;
2790 if (modifier == WIDEN && (j & 1))
2791 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2792 perm_mask, stmt_info, gsi);
2793 else if (j == 0)
2794 op = vec_oprnd0
2795 = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info);
2796 else
2797 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2798 vec_oprnd0);
2799
2800 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2801 {
2802 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2803 TYPE_VECTOR_SUBPARTS (idxtype)));
2804 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2805 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2806 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2807 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2808 op = var;
2809 }
2810
2811 if (mask)
2812 {
2813 if (mask_perm_mask && (j & 1))
2814 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2815 mask_perm_mask, stmt_info, gsi);
2816 else
2817 {
2818 if (j == 0)
2819 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info);
2820 else if (modifier != NARROW || (j & 1) == 0)
2821 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2822 vec_mask);
2823
2824 mask_op = vec_mask;
2825 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2826 {
2827 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2828 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2829 gcc_assert (known_eq (sub1, sub2));
2830 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2831 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2832 gassign *new_stmt
2833 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2834 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2835 mask_op = var;
2836 }
2837 }
2838 if (modifier == NARROW && masktype != real_masktype)
2839 {
2840 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2841 gassign *new_stmt
2842 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2843 : VEC_UNPACK_LO_EXPR,
2844 mask_op);
2845 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2846 mask_op = var;
2847 }
2848 src_op = mask_op;
2849 }
2850
2851 tree mask_arg = mask_op;
2852 if (masktype != real_masktype)
2853 {
2854 tree utype, optype = TREE_TYPE (mask_op);
2855 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2856 utype = real_masktype;
2857 else
2858 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2859 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2860 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2861 gassign *new_stmt
2862 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2863 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2864 mask_arg = var;
2865 if (!useless_type_conversion_p (real_masktype, utype))
2866 {
2867 gcc_assert (TYPE_PRECISION (utype)
2868 <= TYPE_PRECISION (real_masktype));
2869 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2870 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2871 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2872 mask_arg = var;
2873 }
2874 src_op = build_zero_cst (srctype);
2875 }
2876 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2877 mask_arg, scale);
2878
2879 stmt_vec_info new_stmt_info;
2880 if (!useless_type_conversion_p (vectype, rettype))
2881 {
2882 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2883 TYPE_VECTOR_SUBPARTS (rettype)));
2884 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2885 gimple_call_set_lhs (new_call, op);
2886 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2887 var = make_ssa_name (vec_dest);
2888 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2889 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2890 new_stmt_info
2891 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2892 }
2893 else
2894 {
2895 var = make_ssa_name (vec_dest, new_call);
2896 gimple_call_set_lhs (new_call, var);
2897 new_stmt_info
2898 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
2899 }
2900
2901 if (modifier == NARROW)
2902 {
2903 if ((j & 1) == 0)
2904 {
2905 prev_res = var;
2906 continue;
2907 }
2908 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2909 stmt_info, gsi);
2910 new_stmt_info = loop_vinfo->lookup_def (var);
2911 }
2912
2913 if (prev_stmt_info == NULL)
2914 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2915 else
2916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2917 prev_stmt_info = new_stmt_info;
2918 }
2919 }
2920
2921 /* Prepare the base and offset in GS_INFO for vectorization.
2922 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2923 to the vectorized offset argument for the first copy of STMT_INFO.
2924 STMT_INFO is the statement described by GS_INFO and LOOP is the
2925 containing loop. */
2926
2927 static void
2928 vect_get_gather_scatter_ops (vec_info *vinfo,
2929 class loop *loop, stmt_vec_info stmt_info,
2930 gather_scatter_info *gs_info,
2931 tree *dataref_ptr, tree *vec_offset)
2932 {
2933 gimple_seq stmts = NULL;
2934 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2935 if (stmts != NULL)
2936 {
2937 basic_block new_bb;
2938 edge pe = loop_preheader_edge (loop);
2939 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2940 gcc_assert (!new_bb);
2941 }
2942 *vec_offset = vect_get_vec_def_for_operand (vinfo, gs_info->offset, stmt_info,
2943 gs_info->offset_vectype);
2944 }
2945
2946 /* Prepare to implement a grouped or strided load or store using
2947 the gather load or scatter store operation described by GS_INFO.
2948 STMT_INFO is the load or store statement.
2949
2950 Set *DATAREF_BUMP to the amount that should be added to the base
2951 address after each copy of the vectorized statement. Set *VEC_OFFSET
2952 to an invariant offset vector in which element I has the value
2953 I * DR_STEP / SCALE. */
2954
2955 static void
2956 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2957 loop_vec_info loop_vinfo,
2958 gather_scatter_info *gs_info,
2959 tree *dataref_bump, tree *vec_offset)
2960 {
2961 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2962 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2964 gimple_seq stmts;
2965
2966 tree bump = size_binop (MULT_EXPR,
2967 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2968 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2969 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2970 if (stmts)
2971 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2972
2973 /* The offset given in GS_INFO can have pointer type, so use the element
2974 type of the vector instead. */
2975 tree offset_type = TREE_TYPE (gs_info->offset);
2976 offset_type = TREE_TYPE (gs_info->offset_vectype);
2977
2978 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2979 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2980 ssize_int (gs_info->scale));
2981 step = fold_convert (offset_type, step);
2982 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2983
2984 /* Create {0, X, X*2, X*3, ...}. */
2985 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
2986 build_zero_cst (offset_type), step);
2987 if (stmts)
2988 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2989 }
2990
2991 /* Return the amount that should be added to a vector pointer to move
2992 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2993 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2994 vectorization. */
2995
2996 static tree
2997 vect_get_data_ptr_increment (vec_info *vinfo,
2998 dr_vec_info *dr_info, tree aggr_type,
2999 vect_memory_access_type memory_access_type)
3000 {
3001 if (memory_access_type == VMAT_INVARIANT)
3002 return size_zero_node;
3003
3004 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3005 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3006 if (tree_int_cst_sgn (step) == -1)
3007 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3008 return iv_step;
3009 }
3010
3011 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3012
3013 static bool
3014 vectorizable_bswap (vec_info *vinfo,
3015 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3016 stmt_vec_info *vec_stmt, slp_tree slp_node,
3017 slp_tree *slp_op,
3018 tree vectype_in, stmt_vector_for_cost *cost_vec)
3019 {
3020 tree op, vectype;
3021 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3022 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3023 unsigned ncopies;
3024
3025 op = gimple_call_arg (stmt, 0);
3026 vectype = STMT_VINFO_VECTYPE (stmt_info);
3027 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3028
3029 /* Multiple types in SLP are handled by creating the appropriate number of
3030 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3031 case of SLP. */
3032 if (slp_node)
3033 ncopies = 1;
3034 else
3035 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3036
3037 gcc_assert (ncopies >= 1);
3038
3039 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3040 if (! char_vectype)
3041 return false;
3042
3043 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3044 unsigned word_bytes;
3045 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3046 return false;
3047
3048 /* The encoding uses one stepped pattern for each byte in the word. */
3049 vec_perm_builder elts (num_bytes, word_bytes, 3);
3050 for (unsigned i = 0; i < 3; ++i)
3051 for (unsigned j = 0; j < word_bytes; ++j)
3052 elts.quick_push ((i + 1) * word_bytes - j - 1);
3053
3054 vec_perm_indices indices (elts, 1, num_bytes);
3055 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3056 return false;
3057
3058 if (! vec_stmt)
3059 {
3060 if (slp_node
3061 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3062 {
3063 if (dump_enabled_p ())
3064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3065 "incompatible vector types for invariants\n");
3066 return false;
3067 }
3068
3069 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3070 DUMP_VECT_SCOPE ("vectorizable_bswap");
3071 if (! slp_node)
3072 {
3073 record_stmt_cost (cost_vec,
3074 1, vector_stmt, stmt_info, 0, vect_prologue);
3075 record_stmt_cost (cost_vec,
3076 ncopies, vec_perm, stmt_info, 0, vect_body);
3077 }
3078 return true;
3079 }
3080
3081 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3082
3083 /* Transform. */
3084 vec<tree> vec_oprnds = vNULL;
3085 stmt_vec_info new_stmt_info = NULL;
3086 stmt_vec_info prev_stmt_info = NULL;
3087 for (unsigned j = 0; j < ncopies; j++)
3088 {
3089 /* Handle uses. */
3090 if (j == 0)
3091 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
3092 slp_node);
3093 else
3094 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3095
3096 /* Arguments are ready. create the new vector stmt. */
3097 unsigned i;
3098 tree vop;
3099 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3100 {
3101 gimple *new_stmt;
3102 tree tem = make_ssa_name (char_vectype);
3103 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3104 char_vectype, vop));
3105 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3106 tree tem2 = make_ssa_name (char_vectype);
3107 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3108 tem, tem, bswap_vconst);
3109 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3110 tem = make_ssa_name (vectype);
3111 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3112 vectype, tem2));
3113 new_stmt_info
3114 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3115 if (slp_node)
3116 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3117 }
3118
3119 if (slp_node)
3120 continue;
3121
3122 if (j == 0)
3123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3124 else
3125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3126
3127 prev_stmt_info = new_stmt_info;
3128 }
3129
3130 vec_oprnds.release ();
3131 return true;
3132 }
3133
3134 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3135 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3136 in a single step. On success, store the binary pack code in
3137 *CONVERT_CODE. */
3138
3139 static bool
3140 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3141 tree_code *convert_code)
3142 {
3143 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3144 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3145 return false;
3146
3147 tree_code code;
3148 int multi_step_cvt = 0;
3149 auto_vec <tree, 8> interm_types;
3150 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3151 &code, &multi_step_cvt, &interm_types)
3152 || multi_step_cvt)
3153 return false;
3154
3155 *convert_code = code;
3156 return true;
3157 }
3158
3159 /* Function vectorizable_call.
3160
3161 Check if STMT_INFO performs a function call that can be vectorized.
3162 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3163 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3164 Return true if STMT_INFO is vectorizable in this way. */
3165
3166 static bool
3167 vectorizable_call (vec_info *vinfo,
3168 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3169 stmt_vec_info *vec_stmt, slp_tree slp_node,
3170 stmt_vector_for_cost *cost_vec)
3171 {
3172 gcall *stmt;
3173 tree vec_dest;
3174 tree scalar_dest;
3175 tree op;
3176 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3177 stmt_vec_info prev_stmt_info;
3178 tree vectype_out, vectype_in;
3179 poly_uint64 nunits_in;
3180 poly_uint64 nunits_out;
3181 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3182 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3183 tree fndecl, new_temp, rhs_type;
3184 enum vect_def_type dt[4]
3185 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3186 vect_unknown_def_type };
3187 tree vectypes[ARRAY_SIZE (dt)] = {};
3188 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3189 int ndts = ARRAY_SIZE (dt);
3190 int ncopies, j;
3191 auto_vec<tree, 8> vargs;
3192 auto_vec<tree, 8> orig_vargs;
3193 enum { NARROW, NONE, WIDEN } modifier;
3194 size_t i, nargs;
3195 tree lhs;
3196
3197 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3198 return false;
3199
3200 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3201 && ! vec_stmt)
3202 return false;
3203
3204 /* Is STMT_INFO a vectorizable call? */
3205 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3206 if (!stmt)
3207 return false;
3208
3209 if (gimple_call_internal_p (stmt)
3210 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3211 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3212 /* Handled by vectorizable_load and vectorizable_store. */
3213 return false;
3214
3215 if (gimple_call_lhs (stmt) == NULL_TREE
3216 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3217 return false;
3218
3219 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3220
3221 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3222
3223 /* Process function arguments. */
3224 rhs_type = NULL_TREE;
3225 vectype_in = NULL_TREE;
3226 nargs = gimple_call_num_args (stmt);
3227
3228 /* Bail out if the function has more than four arguments, we do not have
3229 interesting builtin functions to vectorize with more than two arguments
3230 except for fma. No arguments is also not good. */
3231 if (nargs == 0 || nargs > 4)
3232 return false;
3233
3234 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3235 combined_fn cfn = gimple_call_combined_fn (stmt);
3236 if (cfn == CFN_GOMP_SIMD_LANE)
3237 {
3238 nargs = 0;
3239 rhs_type = unsigned_type_node;
3240 }
3241
3242 int mask_opno = -1;
3243 if (internal_fn_p (cfn))
3244 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3245
3246 for (i = 0; i < nargs; i++)
3247 {
3248 if ((int) i == mask_opno)
3249 {
3250 op = gimple_call_arg (stmt, i);
3251 if (!vect_check_scalar_mask (vinfo,
3252 stmt_info, op, &dt[i], &vectypes[i]))
3253 return false;
3254 continue;
3255 }
3256
3257 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3258 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3259 {
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3262 "use not simple.\n");
3263 return false;
3264 }
3265
3266 /* We can only handle calls with arguments of the same type. */
3267 if (rhs_type
3268 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3269 {
3270 if (dump_enabled_p ())
3271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3272 "argument types differ.\n");
3273 return false;
3274 }
3275 if (!rhs_type)
3276 rhs_type = TREE_TYPE (op);
3277
3278 if (!vectype_in)
3279 vectype_in = vectypes[i];
3280 else if (vectypes[i]
3281 && !types_compatible_p (vectypes[i], vectype_in))
3282 {
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 "argument vector types differ.\n");
3286 return false;
3287 }
3288 }
3289 /* If all arguments are external or constant defs, infer the vector type
3290 from the scalar type. */
3291 if (!vectype_in)
3292 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3293 if (vec_stmt)
3294 gcc_assert (vectype_in);
3295 if (!vectype_in)
3296 {
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3299 "no vectype for scalar type %T\n", rhs_type);
3300
3301 return false;
3302 }
3303 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3304 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3305 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3306 by a pack of the two vectors into an SI vector. We would need
3307 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3308 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3309 {
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "mismatched vector sizes %T and %T\n",
3313 vectype_in, vectype_out);
3314 return false;
3315 }
3316
3317 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3318 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3319 {
3320 if (dump_enabled_p ())
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3322 "mixed mask and nonmask vector types\n");
3323 return false;
3324 }
3325
3326 /* FORNOW */
3327 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3328 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3329 if (known_eq (nunits_in * 2, nunits_out))
3330 modifier = NARROW;
3331 else if (known_eq (nunits_out, nunits_in))
3332 modifier = NONE;
3333 else if (known_eq (nunits_out * 2, nunits_in))
3334 modifier = WIDEN;
3335 else
3336 return false;
3337
3338 /* We only handle functions that do not read or clobber memory. */
3339 if (gimple_vuse (stmt))
3340 {
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3343 "function reads from or writes to memory.\n");
3344 return false;
3345 }
3346
3347 /* For now, we only vectorize functions if a target specific builtin
3348 is available. TODO -- in some cases, it might be profitable to
3349 insert the calls for pieces of the vector, in order to be able
3350 to vectorize other operations in the loop. */
3351 fndecl = NULL_TREE;
3352 internal_fn ifn = IFN_LAST;
3353 tree callee = gimple_call_fndecl (stmt);
3354
3355 /* First try using an internal function. */
3356 tree_code convert_code = ERROR_MARK;
3357 if (cfn != CFN_LAST
3358 && (modifier == NONE
3359 || (modifier == NARROW
3360 && simple_integer_narrowing (vectype_out, vectype_in,
3361 &convert_code))))
3362 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3363 vectype_in);
3364
3365 /* If that fails, try asking for a target-specific built-in function. */
3366 if (ifn == IFN_LAST)
3367 {
3368 if (cfn != CFN_LAST)
3369 fndecl = targetm.vectorize.builtin_vectorized_function
3370 (cfn, vectype_out, vectype_in);
3371 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3372 fndecl = targetm.vectorize.builtin_md_vectorized_function
3373 (callee, vectype_out, vectype_in);
3374 }
3375
3376 if (ifn == IFN_LAST && !fndecl)
3377 {
3378 if (cfn == CFN_GOMP_SIMD_LANE
3379 && !slp_node
3380 && loop_vinfo
3381 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3382 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3383 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3384 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3385 {
3386 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3387 { 0, 1, 2, ... vf - 1 } vector. */
3388 gcc_assert (nargs == 0);
3389 }
3390 else if (modifier == NONE
3391 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3392 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3393 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3394 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3395 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3396 slp_op, vectype_in, cost_vec);
3397 else
3398 {
3399 if (dump_enabled_p ())
3400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3401 "function is not vectorizable.\n");
3402 return false;
3403 }
3404 }
3405
3406 if (slp_node)
3407 ncopies = 1;
3408 else if (modifier == NARROW && ifn == IFN_LAST)
3409 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3410 else
3411 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3412
3413 /* Sanity check: make sure that at least one copy of the vectorized stmt
3414 needs to be generated. */
3415 gcc_assert (ncopies >= 1);
3416
3417 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3418 if (!vec_stmt) /* transformation not required. */
3419 {
3420 if (slp_node)
3421 for (i = 0; i < nargs; ++i)
3422 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3423 {
3424 if (dump_enabled_p ())
3425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3426 "incompatible vector types for invariants\n");
3427 return false;
3428 }
3429 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3430 DUMP_VECT_SCOPE ("vectorizable_call");
3431 vect_model_simple_cost (vinfo, stmt_info,
3432 ncopies, dt, ndts, slp_node, cost_vec);
3433 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3434 record_stmt_cost (cost_vec, ncopies / 2,
3435 vec_promote_demote, stmt_info, 0, vect_body);
3436
3437 if (loop_vinfo && mask_opno >= 0)
3438 {
3439 unsigned int nvectors = (slp_node
3440 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3441 : ncopies);
3442 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3443 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3444 vectype_out, scalar_mask);
3445 }
3446 return true;
3447 }
3448
3449 /* Transform. */
3450
3451 if (dump_enabled_p ())
3452 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3453
3454 /* Handle def. */
3455 scalar_dest = gimple_call_lhs (stmt);
3456 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3457
3458 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3459
3460 stmt_vec_info new_stmt_info = NULL;
3461 prev_stmt_info = NULL;
3462 if (modifier == NONE || ifn != IFN_LAST)
3463 {
3464 tree prev_res = NULL_TREE;
3465 vargs.safe_grow (nargs);
3466 orig_vargs.safe_grow (nargs);
3467 for (j = 0; j < ncopies; ++j)
3468 {
3469 /* Build argument list for the vectorized call. */
3470 if (slp_node)
3471 {
3472 auto_vec<vec<tree> > vec_defs (nargs);
3473 vec<tree> vec_oprnds0;
3474
3475 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3476 vec_oprnds0 = vec_defs[0];
3477
3478 /* Arguments are ready. Create the new vector stmt. */
3479 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3480 {
3481 size_t k;
3482 for (k = 0; k < nargs; k++)
3483 {
3484 vec<tree> vec_oprndsk = vec_defs[k];
3485 vargs[k] = vec_oprndsk[i];
3486 }
3487 if (modifier == NARROW)
3488 {
3489 /* We don't define any narrowing conditional functions
3490 at present. */
3491 gcc_assert (mask_opno < 0);
3492 tree half_res = make_ssa_name (vectype_in);
3493 gcall *call
3494 = gimple_build_call_internal_vec (ifn, vargs);
3495 gimple_call_set_lhs (call, half_res);
3496 gimple_call_set_nothrow (call, true);
3497 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3498 if ((i & 1) == 0)
3499 {
3500 prev_res = half_res;
3501 continue;
3502 }
3503 new_temp = make_ssa_name (vec_dest);
3504 gimple *new_stmt
3505 = gimple_build_assign (new_temp, convert_code,
3506 prev_res, half_res);
3507 new_stmt_info
3508 = vect_finish_stmt_generation (vinfo, stmt_info,
3509 new_stmt, gsi);
3510 }
3511 else
3512 {
3513 if (mask_opno >= 0 && masked_loop_p)
3514 {
3515 unsigned int vec_num = vec_oprnds0.length ();
3516 /* Always true for SLP. */
3517 gcc_assert (ncopies == 1);
3518 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3519 vectype_out, i);
3520 vargs[mask_opno] = prepare_load_store_mask
3521 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3522 }
3523
3524 gcall *call;
3525 if (ifn != IFN_LAST)
3526 call = gimple_build_call_internal_vec (ifn, vargs);
3527 else
3528 call = gimple_build_call_vec (fndecl, vargs);
3529 new_temp = make_ssa_name (vec_dest, call);
3530 gimple_call_set_lhs (call, new_temp);
3531 gimple_call_set_nothrow (call, true);
3532 new_stmt_info
3533 = vect_finish_stmt_generation (vinfo, stmt_info,
3534 call, gsi);
3535 }
3536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3537 }
3538
3539 for (i = 0; i < nargs; i++)
3540 {
3541 vec<tree> vec_oprndsi = vec_defs[i];
3542 vec_oprndsi.release ();
3543 }
3544 continue;
3545 }
3546
3547 for (i = 0; i < nargs; i++)
3548 {
3549 op = gimple_call_arg (stmt, i);
3550 if (j == 0)
3551 vec_oprnd0
3552 = vect_get_vec_def_for_operand (vinfo,
3553 op, stmt_info, vectypes[i]);
3554 else
3555 vec_oprnd0
3556 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3557
3558 orig_vargs[i] = vargs[i] = vec_oprnd0;
3559 }
3560
3561 if (mask_opno >= 0 && masked_loop_p)
3562 {
3563 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3564 vectype_out, j);
3565 vargs[mask_opno]
3566 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3567 vargs[mask_opno], gsi);
3568 }
3569
3570 if (cfn == CFN_GOMP_SIMD_LANE)
3571 {
3572 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3573 tree new_var
3574 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3575 gimple *init_stmt = gimple_build_assign (new_var, cst);
3576 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3577 new_temp = make_ssa_name (vec_dest);
3578 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3579 new_stmt_info
3580 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3581 }
3582 else if (modifier == NARROW)
3583 {
3584 /* We don't define any narrowing conditional functions at
3585 present. */
3586 gcc_assert (mask_opno < 0);
3587 tree half_res = make_ssa_name (vectype_in);
3588 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3589 gimple_call_set_lhs (call, half_res);
3590 gimple_call_set_nothrow (call, true);
3591 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3592 if ((j & 1) == 0)
3593 {
3594 prev_res = half_res;
3595 continue;
3596 }
3597 new_temp = make_ssa_name (vec_dest);
3598 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3599 prev_res, half_res);
3600 new_stmt_info
3601 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3602 }
3603 else
3604 {
3605 gcall *call;
3606 if (ifn != IFN_LAST)
3607 call = gimple_build_call_internal_vec (ifn, vargs);
3608 else
3609 call = gimple_build_call_vec (fndecl, vargs);
3610 new_temp = make_ssa_name (vec_dest, call);
3611 gimple_call_set_lhs (call, new_temp);
3612 gimple_call_set_nothrow (call, true);
3613 new_stmt_info
3614 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3615 }
3616
3617 if (j == (modifier == NARROW ? 1 : 0))
3618 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3619 else
3620 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3621
3622 prev_stmt_info = new_stmt_info;
3623 }
3624 }
3625 else if (modifier == NARROW)
3626 {
3627 /* We don't define any narrowing conditional functions at present. */
3628 gcc_assert (mask_opno < 0);
3629 for (j = 0; j < ncopies; ++j)
3630 {
3631 /* Build argument list for the vectorized call. */
3632 if (j == 0)
3633 vargs.create (nargs * 2);
3634 else
3635 vargs.truncate (0);
3636
3637 if (slp_node)
3638 {
3639 auto_vec<vec<tree> > vec_defs (nargs);
3640 vec<tree> vec_oprnds0;
3641
3642 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3643 vec_oprnds0 = vec_defs[0];
3644
3645 /* Arguments are ready. Create the new vector stmt. */
3646 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3647 {
3648 size_t k;
3649 vargs.truncate (0);
3650 for (k = 0; k < nargs; k++)
3651 {
3652 vec<tree> vec_oprndsk = vec_defs[k];
3653 vargs.quick_push (vec_oprndsk[i]);
3654 vargs.quick_push (vec_oprndsk[i + 1]);
3655 }
3656 gcall *call;
3657 if (ifn != IFN_LAST)
3658 call = gimple_build_call_internal_vec (ifn, vargs);
3659 else
3660 call = gimple_build_call_vec (fndecl, vargs);
3661 new_temp = make_ssa_name (vec_dest, call);
3662 gimple_call_set_lhs (call, new_temp);
3663 gimple_call_set_nothrow (call, true);
3664 new_stmt_info
3665 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3666 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3667 }
3668
3669 for (i = 0; i < nargs; i++)
3670 {
3671 vec<tree> vec_oprndsi = vec_defs[i];
3672 vec_oprndsi.release ();
3673 }
3674 continue;
3675 }
3676
3677 for (i = 0; i < nargs; i++)
3678 {
3679 op = gimple_call_arg (stmt, i);
3680 if (j == 0)
3681 {
3682 vec_oprnd0
3683 = vect_get_vec_def_for_operand (vinfo, op, stmt_info,
3684 vectypes[i]);
3685 vec_oprnd1
3686 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3687 }
3688 else
3689 {
3690 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3691 2 * i + 1);
3692 vec_oprnd0
3693 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3694 vec_oprnd1
3695 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3696 }
3697
3698 vargs.quick_push (vec_oprnd0);
3699 vargs.quick_push (vec_oprnd1);
3700 }
3701
3702 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3703 new_temp = make_ssa_name (vec_dest, new_stmt);
3704 gimple_call_set_lhs (new_stmt, new_temp);
3705 new_stmt_info
3706 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3707
3708 if (j == 0)
3709 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3710 else
3711 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3712
3713 prev_stmt_info = new_stmt_info;
3714 }
3715
3716 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3717 }
3718 else
3719 /* No current target implements this case. */
3720 return false;
3721
3722 vargs.release ();
3723
3724 /* The call in STMT might prevent it from being removed in dce.
3725 We however cannot remove it here, due to the way the ssa name
3726 it defines is mapped to the new definition. So just replace
3727 rhs of the statement with something harmless. */
3728
3729 if (slp_node)
3730 return true;
3731
3732 stmt_info = vect_orig_stmt (stmt_info);
3733 lhs = gimple_get_lhs (stmt_info->stmt);
3734
3735 gassign *new_stmt
3736 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3737 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3738
3739 return true;
3740 }
3741
3742
3743 struct simd_call_arg_info
3744 {
3745 tree vectype;
3746 tree op;
3747 HOST_WIDE_INT linear_step;
3748 enum vect_def_type dt;
3749 unsigned int align;
3750 bool simd_lane_linear;
3751 };
3752
3753 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3754 is linear within simd lane (but not within whole loop), note it in
3755 *ARGINFO. */
3756
3757 static void
3758 vect_simd_lane_linear (tree op, class loop *loop,
3759 struct simd_call_arg_info *arginfo)
3760 {
3761 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3762
3763 if (!is_gimple_assign (def_stmt)
3764 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3765 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3766 return;
3767
3768 tree base = gimple_assign_rhs1 (def_stmt);
3769 HOST_WIDE_INT linear_step = 0;
3770 tree v = gimple_assign_rhs2 (def_stmt);
3771 while (TREE_CODE (v) == SSA_NAME)
3772 {
3773 tree t;
3774 def_stmt = SSA_NAME_DEF_STMT (v);
3775 if (is_gimple_assign (def_stmt))
3776 switch (gimple_assign_rhs_code (def_stmt))
3777 {
3778 case PLUS_EXPR:
3779 t = gimple_assign_rhs2 (def_stmt);
3780 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3781 return;
3782 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3783 v = gimple_assign_rhs1 (def_stmt);
3784 continue;
3785 case MULT_EXPR:
3786 t = gimple_assign_rhs2 (def_stmt);
3787 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3788 return;
3789 linear_step = tree_to_shwi (t);
3790 v = gimple_assign_rhs1 (def_stmt);
3791 continue;
3792 CASE_CONVERT:
3793 t = gimple_assign_rhs1 (def_stmt);
3794 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3795 || (TYPE_PRECISION (TREE_TYPE (v))
3796 < TYPE_PRECISION (TREE_TYPE (t))))
3797 return;
3798 if (!linear_step)
3799 linear_step = 1;
3800 v = t;
3801 continue;
3802 default:
3803 return;
3804 }
3805 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3806 && loop->simduid
3807 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3808 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3809 == loop->simduid))
3810 {
3811 if (!linear_step)
3812 linear_step = 1;
3813 arginfo->linear_step = linear_step;
3814 arginfo->op = base;
3815 arginfo->simd_lane_linear = true;
3816 return;
3817 }
3818 }
3819 }
3820
3821 /* Return the number of elements in vector type VECTYPE, which is associated
3822 with a SIMD clone. At present these vectors always have a constant
3823 length. */
3824
3825 static unsigned HOST_WIDE_INT
3826 simd_clone_subparts (tree vectype)
3827 {
3828 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3829 }
3830
3831 /* Function vectorizable_simd_clone_call.
3832
3833 Check if STMT_INFO performs a function call that can be vectorized
3834 by calling a simd clone of the function.
3835 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3836 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3837 Return true if STMT_INFO is vectorizable in this way. */
3838
3839 static bool
3840 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3841 gimple_stmt_iterator *gsi,
3842 stmt_vec_info *vec_stmt, slp_tree slp_node,
3843 stmt_vector_for_cost *)
3844 {
3845 tree vec_dest;
3846 tree scalar_dest;
3847 tree op, type;
3848 tree vec_oprnd0 = NULL_TREE;
3849 stmt_vec_info prev_stmt_info;
3850 tree vectype;
3851 unsigned int nunits;
3852 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3853 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3854 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3855 tree fndecl, new_temp;
3856 int ncopies, j;
3857 auto_vec<simd_call_arg_info> arginfo;
3858 vec<tree> vargs = vNULL;
3859 size_t i, nargs;
3860 tree lhs, rtype, ratype;
3861 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3862
3863 /* Is STMT a vectorizable call? */
3864 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3865 if (!stmt)
3866 return false;
3867
3868 fndecl = gimple_call_fndecl (stmt);
3869 if (fndecl == NULL_TREE)
3870 return false;
3871
3872 struct cgraph_node *node = cgraph_node::get (fndecl);
3873 if (node == NULL || node->simd_clones == NULL)
3874 return false;
3875
3876 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3877 return false;
3878
3879 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3880 && ! vec_stmt)
3881 return false;
3882
3883 if (gimple_call_lhs (stmt)
3884 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3885 return false;
3886
3887 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3888
3889 vectype = STMT_VINFO_VECTYPE (stmt_info);
3890
3891 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3892 return false;
3893
3894 /* FORNOW */
3895 if (slp_node)
3896 return false;
3897
3898 /* Process function arguments. */
3899 nargs = gimple_call_num_args (stmt);
3900
3901 /* Bail out if the function has zero arguments. */
3902 if (nargs == 0)
3903 return false;
3904
3905 arginfo.reserve (nargs, true);
3906
3907 for (i = 0; i < nargs; i++)
3908 {
3909 simd_call_arg_info thisarginfo;
3910 affine_iv iv;
3911
3912 thisarginfo.linear_step = 0;
3913 thisarginfo.align = 0;
3914 thisarginfo.op = NULL_TREE;
3915 thisarginfo.simd_lane_linear = false;
3916
3917 op = gimple_call_arg (stmt, i);
3918 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3919 &thisarginfo.vectype)
3920 || thisarginfo.dt == vect_uninitialized_def)
3921 {
3922 if (dump_enabled_p ())
3923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3924 "use not simple.\n");
3925 return false;
3926 }
3927
3928 if (thisarginfo.dt == vect_constant_def
3929 || thisarginfo.dt == vect_external_def)
3930 gcc_assert (thisarginfo.vectype == NULL_TREE);
3931 else
3932 {
3933 gcc_assert (thisarginfo.vectype != NULL_TREE);
3934 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3935 {
3936 if (dump_enabled_p ())
3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3938 "vector mask arguments are not supported\n");
3939 return false;
3940 }
3941 }
3942
3943 /* For linear arguments, the analyze phase should have saved
3944 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3945 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3946 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3947 {
3948 gcc_assert (vec_stmt);
3949 thisarginfo.linear_step
3950 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3951 thisarginfo.op
3952 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3953 thisarginfo.simd_lane_linear
3954 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3955 == boolean_true_node);
3956 /* If loop has been peeled for alignment, we need to adjust it. */
3957 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3958 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3959 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3960 {
3961 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3962 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3963 tree opt = TREE_TYPE (thisarginfo.op);
3964 bias = fold_convert (TREE_TYPE (step), bias);
3965 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3966 thisarginfo.op
3967 = fold_build2 (POINTER_TYPE_P (opt)
3968 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3969 thisarginfo.op, bias);
3970 }
3971 }
3972 else if (!vec_stmt
3973 && thisarginfo.dt != vect_constant_def
3974 && thisarginfo.dt != vect_external_def
3975 && loop_vinfo
3976 && TREE_CODE (op) == SSA_NAME
3977 && simple_iv (loop, loop_containing_stmt (stmt), op,
3978 &iv, false)
3979 && tree_fits_shwi_p (iv.step))
3980 {
3981 thisarginfo.linear_step = tree_to_shwi (iv.step);
3982 thisarginfo.op = iv.base;
3983 }
3984 else if ((thisarginfo.dt == vect_constant_def
3985 || thisarginfo.dt == vect_external_def)
3986 && POINTER_TYPE_P (TREE_TYPE (op)))
3987 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3988 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3989 linear too. */
3990 if (POINTER_TYPE_P (TREE_TYPE (op))
3991 && !thisarginfo.linear_step
3992 && !vec_stmt
3993 && thisarginfo.dt != vect_constant_def
3994 && thisarginfo.dt != vect_external_def
3995 && loop_vinfo
3996 && !slp_node
3997 && TREE_CODE (op) == SSA_NAME)
3998 vect_simd_lane_linear (op, loop, &thisarginfo);
3999
4000 arginfo.quick_push (thisarginfo);
4001 }
4002
4003 unsigned HOST_WIDE_INT vf;
4004 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4005 {
4006 if (dump_enabled_p ())
4007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4008 "not considering SIMD clones; not yet supported"
4009 " for variable-width vectors.\n");
4010 return false;
4011 }
4012
4013 unsigned int badness = 0;
4014 struct cgraph_node *bestn = NULL;
4015 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4016 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4017 else
4018 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4019 n = n->simdclone->next_clone)
4020 {
4021 unsigned int this_badness = 0;
4022 if (n->simdclone->simdlen > vf
4023 || n->simdclone->nargs != nargs)
4024 continue;
4025 if (n->simdclone->simdlen < vf)
4026 this_badness += (exact_log2 (vf)
4027 - exact_log2 (n->simdclone->simdlen)) * 1024;
4028 if (n->simdclone->inbranch)
4029 this_badness += 2048;
4030 int target_badness = targetm.simd_clone.usable (n);
4031 if (target_badness < 0)
4032 continue;
4033 this_badness += target_badness * 512;
4034 /* FORNOW: Have to add code to add the mask argument. */
4035 if (n->simdclone->inbranch)
4036 continue;
4037 for (i = 0; i < nargs; i++)
4038 {
4039 switch (n->simdclone->args[i].arg_type)
4040 {
4041 case SIMD_CLONE_ARG_TYPE_VECTOR:
4042 if (!useless_type_conversion_p
4043 (n->simdclone->args[i].orig_type,
4044 TREE_TYPE (gimple_call_arg (stmt, i))))
4045 i = -1;
4046 else if (arginfo[i].dt == vect_constant_def
4047 || arginfo[i].dt == vect_external_def
4048 || arginfo[i].linear_step)
4049 this_badness += 64;
4050 break;
4051 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4052 if (arginfo[i].dt != vect_constant_def
4053 && arginfo[i].dt != vect_external_def)
4054 i = -1;
4055 break;
4056 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4057 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4058 if (arginfo[i].dt == vect_constant_def
4059 || arginfo[i].dt == vect_external_def
4060 || (arginfo[i].linear_step
4061 != n->simdclone->args[i].linear_step))
4062 i = -1;
4063 break;
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4065 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4066 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4067 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4068 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4070 /* FORNOW */
4071 i = -1;
4072 break;
4073 case SIMD_CLONE_ARG_TYPE_MASK:
4074 gcc_unreachable ();
4075 }
4076 if (i == (size_t) -1)
4077 break;
4078 if (n->simdclone->args[i].alignment > arginfo[i].align)
4079 {
4080 i = -1;
4081 break;
4082 }
4083 if (arginfo[i].align)
4084 this_badness += (exact_log2 (arginfo[i].align)
4085 - exact_log2 (n->simdclone->args[i].alignment));
4086 }
4087 if (i == (size_t) -1)
4088 continue;
4089 if (bestn == NULL || this_badness < badness)
4090 {
4091 bestn = n;
4092 badness = this_badness;
4093 }
4094 }
4095
4096 if (bestn == NULL)
4097 return false;
4098
4099 for (i = 0; i < nargs; i++)
4100 if ((arginfo[i].dt == vect_constant_def
4101 || arginfo[i].dt == vect_external_def)
4102 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4103 {
4104 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4105 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4106 slp_node);
4107 if (arginfo[i].vectype == NULL
4108 || (simd_clone_subparts (arginfo[i].vectype)
4109 > bestn->simdclone->simdlen))
4110 return false;
4111 }
4112
4113 fndecl = bestn->decl;
4114 nunits = bestn->simdclone->simdlen;
4115 ncopies = vf / nunits;
4116
4117 /* If the function isn't const, only allow it in simd loops where user
4118 has asserted that at least nunits consecutive iterations can be
4119 performed using SIMD instructions. */
4120 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4121 && gimple_vuse (stmt))
4122 return false;
4123
4124 /* Sanity check: make sure that at least one copy of the vectorized stmt
4125 needs to be generated. */
4126 gcc_assert (ncopies >= 1);
4127
4128 if (!vec_stmt) /* transformation not required. */
4129 {
4130 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4131 for (i = 0; i < nargs; i++)
4132 if ((bestn->simdclone->args[i].arg_type
4133 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4134 || (bestn->simdclone->args[i].arg_type
4135 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4136 {
4137 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4138 + 1);
4139 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4140 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4141 ? size_type_node : TREE_TYPE (arginfo[i].op);
4142 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4143 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4144 tree sll = arginfo[i].simd_lane_linear
4145 ? boolean_true_node : boolean_false_node;
4146 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4147 }
4148 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4149 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4150 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4151 dt, slp_node, cost_vec); */
4152 return true;
4153 }
4154
4155 /* Transform. */
4156
4157 if (dump_enabled_p ())
4158 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4159
4160 /* Handle def. */
4161 scalar_dest = gimple_call_lhs (stmt);
4162 vec_dest = NULL_TREE;
4163 rtype = NULL_TREE;
4164 ratype = NULL_TREE;
4165 if (scalar_dest)
4166 {
4167 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4168 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4169 if (TREE_CODE (rtype) == ARRAY_TYPE)
4170 {
4171 ratype = rtype;
4172 rtype = TREE_TYPE (ratype);
4173 }
4174 }
4175
4176 prev_stmt_info = NULL;
4177 for (j = 0; j < ncopies; ++j)
4178 {
4179 /* Build argument list for the vectorized call. */
4180 if (j == 0)
4181 vargs.create (nargs);
4182 else
4183 vargs.truncate (0);
4184
4185 for (i = 0; i < nargs; i++)
4186 {
4187 unsigned int k, l, m, o;
4188 tree atype;
4189 op = gimple_call_arg (stmt, i);
4190 switch (bestn->simdclone->args[i].arg_type)
4191 {
4192 case SIMD_CLONE_ARG_TYPE_VECTOR:
4193 atype = bestn->simdclone->args[i].vector_type;
4194 o = nunits / simd_clone_subparts (atype);
4195 for (m = j * o; m < (j + 1) * o; m++)
4196 {
4197 if (simd_clone_subparts (atype)
4198 < simd_clone_subparts (arginfo[i].vectype))
4199 {
4200 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4201 k = (simd_clone_subparts (arginfo[i].vectype)
4202 / simd_clone_subparts (atype));
4203 gcc_assert ((k & (k - 1)) == 0);
4204 if (m == 0)
4205 vec_oprnd0
4206 = vect_get_vec_def_for_operand (vinfo, op, stmt_info);
4207 else
4208 {
4209 vec_oprnd0 = arginfo[i].op;
4210 if ((m & (k - 1)) == 0)
4211 vec_oprnd0
4212 = vect_get_vec_def_for_stmt_copy (vinfo,
4213 vec_oprnd0);
4214 }
4215 arginfo[i].op = vec_oprnd0;
4216 vec_oprnd0
4217 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4218 bitsize_int (prec),
4219 bitsize_int ((m & (k - 1)) * prec));
4220 gassign *new_stmt
4221 = gimple_build_assign (make_ssa_name (atype),
4222 vec_oprnd0);
4223 vect_finish_stmt_generation (vinfo, stmt_info,
4224 new_stmt, gsi);
4225 vargs.safe_push (gimple_assign_lhs (new_stmt));
4226 }
4227 else
4228 {
4229 k = (simd_clone_subparts (atype)
4230 / simd_clone_subparts (arginfo[i].vectype));
4231 gcc_assert ((k & (k - 1)) == 0);
4232 vec<constructor_elt, va_gc> *ctor_elts;
4233 if (k != 1)
4234 vec_alloc (ctor_elts, k);
4235 else
4236 ctor_elts = NULL;
4237 for (l = 0; l < k; l++)
4238 {
4239 if (m == 0 && l == 0)
4240 vec_oprnd0
4241 = vect_get_vec_def_for_operand (vinfo,
4242 op, stmt_info);
4243 else
4244 vec_oprnd0
4245 = vect_get_vec_def_for_stmt_copy (vinfo,
4246 arginfo[i].op);
4247 arginfo[i].op = vec_oprnd0;
4248 if (k == 1)
4249 break;
4250 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4251 vec_oprnd0);
4252 }
4253 if (k == 1)
4254 vargs.safe_push (vec_oprnd0);
4255 else
4256 {
4257 vec_oprnd0 = build_constructor (atype, ctor_elts);
4258 gassign *new_stmt
4259 = gimple_build_assign (make_ssa_name (atype),
4260 vec_oprnd0);
4261 vect_finish_stmt_generation (vinfo, stmt_info,
4262 new_stmt, gsi);
4263 vargs.safe_push (gimple_assign_lhs (new_stmt));
4264 }
4265 }
4266 }
4267 break;
4268 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4269 vargs.safe_push (op);
4270 break;
4271 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4272 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4273 if (j == 0)
4274 {
4275 gimple_seq stmts;
4276 arginfo[i].op
4277 = force_gimple_operand (unshare_expr (arginfo[i].op),
4278 &stmts, true, NULL_TREE);
4279 if (stmts != NULL)
4280 {
4281 basic_block new_bb;
4282 edge pe = loop_preheader_edge (loop);
4283 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4284 gcc_assert (!new_bb);
4285 }
4286 if (arginfo[i].simd_lane_linear)
4287 {
4288 vargs.safe_push (arginfo[i].op);
4289 break;
4290 }
4291 tree phi_res = copy_ssa_name (op);
4292 gphi *new_phi = create_phi_node (phi_res, loop->header);
4293 loop_vinfo->add_stmt (new_phi);
4294 add_phi_arg (new_phi, arginfo[i].op,
4295 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4296 enum tree_code code
4297 = POINTER_TYPE_P (TREE_TYPE (op))
4298 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4299 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4300 ? sizetype : TREE_TYPE (op);
4301 widest_int cst
4302 = wi::mul (bestn->simdclone->args[i].linear_step,
4303 ncopies * nunits);
4304 tree tcst = wide_int_to_tree (type, cst);
4305 tree phi_arg = copy_ssa_name (op);
4306 gassign *new_stmt
4307 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4308 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4309 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4310 loop_vinfo->add_stmt (new_stmt);
4311 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4312 UNKNOWN_LOCATION);
4313 arginfo[i].op = phi_res;
4314 vargs.safe_push (phi_res);
4315 }
4316 else
4317 {
4318 enum tree_code code
4319 = POINTER_TYPE_P (TREE_TYPE (op))
4320 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4321 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4322 ? sizetype : TREE_TYPE (op);
4323 widest_int cst
4324 = wi::mul (bestn->simdclone->args[i].linear_step,
4325 j * nunits);
4326 tree tcst = wide_int_to_tree (type, cst);
4327 new_temp = make_ssa_name (TREE_TYPE (op));
4328 gassign *new_stmt
4329 = gimple_build_assign (new_temp, code,
4330 arginfo[i].op, tcst);
4331 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4332 vargs.safe_push (new_temp);
4333 }
4334 break;
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4341 default:
4342 gcc_unreachable ();
4343 }
4344 }
4345
4346 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4347 if (vec_dest)
4348 {
4349 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4350 if (ratype)
4351 new_temp = create_tmp_var (ratype);
4352 else if (simd_clone_subparts (vectype)
4353 == simd_clone_subparts (rtype))
4354 new_temp = make_ssa_name (vec_dest, new_call);
4355 else
4356 new_temp = make_ssa_name (rtype, new_call);
4357 gimple_call_set_lhs (new_call, new_temp);
4358 }
4359 stmt_vec_info new_stmt_info
4360 = vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4361
4362 if (vec_dest)
4363 {
4364 if (simd_clone_subparts (vectype) < nunits)
4365 {
4366 unsigned int k, l;
4367 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4368 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4369 k = nunits / simd_clone_subparts (vectype);
4370 gcc_assert ((k & (k - 1)) == 0);
4371 for (l = 0; l < k; l++)
4372 {
4373 tree t;
4374 if (ratype)
4375 {
4376 t = build_fold_addr_expr (new_temp);
4377 t = build2 (MEM_REF, vectype, t,
4378 build_int_cst (TREE_TYPE (t), l * bytes));
4379 }
4380 else
4381 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4382 bitsize_int (prec), bitsize_int (l * prec));
4383 gimple *new_stmt
4384 = gimple_build_assign (make_ssa_name (vectype), t);
4385 new_stmt_info
4386 = vect_finish_stmt_generation (vinfo, stmt_info,
4387 new_stmt, gsi);
4388
4389 if (j == 0 && l == 0)
4390 STMT_VINFO_VEC_STMT (stmt_info)
4391 = *vec_stmt = new_stmt_info;
4392 else
4393 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4394
4395 prev_stmt_info = new_stmt_info;
4396 }
4397
4398 if (ratype)
4399 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4400 continue;
4401 }
4402 else if (simd_clone_subparts (vectype) > nunits)
4403 {
4404 unsigned int k = (simd_clone_subparts (vectype)
4405 / simd_clone_subparts (rtype));
4406 gcc_assert ((k & (k - 1)) == 0);
4407 if ((j & (k - 1)) == 0)
4408 vec_alloc (ret_ctor_elts, k);
4409 if (ratype)
4410 {
4411 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4412 for (m = 0; m < o; m++)
4413 {
4414 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4415 size_int (m), NULL_TREE, NULL_TREE);
4416 gimple *new_stmt
4417 = gimple_build_assign (make_ssa_name (rtype), tem);
4418 new_stmt_info
4419 = vect_finish_stmt_generation (vinfo, stmt_info,
4420 new_stmt, gsi);
4421 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4422 gimple_assign_lhs (new_stmt));
4423 }
4424 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4425 }
4426 else
4427 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4428 if ((j & (k - 1)) != k - 1)
4429 continue;
4430 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4431 gimple *new_stmt
4432 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4433 new_stmt_info
4434 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4435
4436 if ((unsigned) j == k - 1)
4437 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4438 else
4439 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4440
4441 prev_stmt_info = new_stmt_info;
4442 continue;
4443 }
4444 else if (ratype)
4445 {
4446 tree t = build_fold_addr_expr (new_temp);
4447 t = build2 (MEM_REF, vectype, t,
4448 build_int_cst (TREE_TYPE (t), 0));
4449 gimple *new_stmt
4450 = gimple_build_assign (make_ssa_name (vec_dest), t);
4451 new_stmt_info
4452 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4453 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4454 }
4455 }
4456
4457 if (j == 0)
4458 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4459 else
4460 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4461
4462 prev_stmt_info = new_stmt_info;
4463 }
4464
4465 vargs.release ();
4466
4467 /* The call in STMT might prevent it from being removed in dce.
4468 We however cannot remove it here, due to the way the ssa name
4469 it defines is mapped to the new definition. So just replace
4470 rhs of the statement with something harmless. */
4471
4472 if (slp_node)
4473 return true;
4474
4475 gimple *new_stmt;
4476 if (scalar_dest)
4477 {
4478 type = TREE_TYPE (scalar_dest);
4479 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4480 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4481 }
4482 else
4483 new_stmt = gimple_build_nop ();
4484 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4485 unlink_stmt_vdef (stmt);
4486
4487 return true;
4488 }
4489
4490
4491 /* Function vect_gen_widened_results_half
4492
4493 Create a vector stmt whose code, type, number of arguments, and result
4494 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4495 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4496 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4497 needs to be created (DECL is a function-decl of a target-builtin).
4498 STMT_INFO is the original scalar stmt that we are vectorizing. */
4499
4500 static gimple *
4501 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4502 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4503 tree vec_dest, gimple_stmt_iterator *gsi,
4504 stmt_vec_info stmt_info)
4505 {
4506 gimple *new_stmt;
4507 tree new_temp;
4508
4509 /* Generate half of the widened result: */
4510 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4511 if (op_type != binary_op)
4512 vec_oprnd1 = NULL;
4513 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4514 new_temp = make_ssa_name (vec_dest, new_stmt);
4515 gimple_assign_set_lhs (new_stmt, new_temp);
4516 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4517
4518 return new_stmt;
4519 }
4520
4521
4522 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4523 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4524 containing scalar operand), and for the rest we get a copy with
4525 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4526 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4527 The vectors are collected into VEC_OPRNDS. */
4528
4529 static void
4530 vect_get_loop_based_defs (vec_info *vinfo, tree *oprnd, stmt_vec_info stmt_info,
4531 vec<tree> *vec_oprnds, int multi_step_cvt)
4532 {
4533 tree vec_oprnd;
4534
4535 /* Get first vector operand. */
4536 /* All the vector operands except the very first one (that is scalar oprnd)
4537 are stmt copies. */
4538 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4539 vec_oprnd = vect_get_vec_def_for_operand (vinfo, *oprnd, stmt_info);
4540 else
4541 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4542
4543 vec_oprnds->quick_push (vec_oprnd);
4544
4545 /* Get second vector operand. */
4546 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4547 vec_oprnds->quick_push (vec_oprnd);
4548
4549 *oprnd = vec_oprnd;
4550
4551 /* For conversion in multiple steps, continue to get operands
4552 recursively. */
4553 if (multi_step_cvt)
4554 vect_get_loop_based_defs (vinfo, oprnd, stmt_info, vec_oprnds,
4555 multi_step_cvt - 1);
4556 }
4557
4558
4559 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4560 For multi-step conversions store the resulting vectors and call the function
4561 recursively. */
4562
4563 static void
4564 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4565 int multi_step_cvt,
4566 stmt_vec_info stmt_info,
4567 vec<tree> vec_dsts,
4568 gimple_stmt_iterator *gsi,
4569 slp_tree slp_node, enum tree_code code,
4570 stmt_vec_info *prev_stmt_info)
4571 {
4572 unsigned int i;
4573 tree vop0, vop1, new_tmp, vec_dest;
4574
4575 vec_dest = vec_dsts.pop ();
4576
4577 for (i = 0; i < vec_oprnds->length (); i += 2)
4578 {
4579 /* Create demotion operation. */
4580 vop0 = (*vec_oprnds)[i];
4581 vop1 = (*vec_oprnds)[i + 1];
4582 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4583 new_tmp = make_ssa_name (vec_dest, new_stmt);
4584 gimple_assign_set_lhs (new_stmt, new_tmp);
4585 stmt_vec_info new_stmt_info
4586 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4587
4588 if (multi_step_cvt)
4589 /* Store the resulting vector for next recursive call. */
4590 (*vec_oprnds)[i/2] = new_tmp;
4591 else
4592 {
4593 /* This is the last step of the conversion sequence. Store the
4594 vectors in SLP_NODE or in vector info of the scalar statement
4595 (or in STMT_VINFO_RELATED_STMT chain). */
4596 if (slp_node)
4597 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4598 else
4599 {
4600 if (!*prev_stmt_info)
4601 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4602 else
4603 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4604
4605 *prev_stmt_info = new_stmt_info;
4606 }
4607 }
4608 }
4609
4610 /* For multi-step demotion operations we first generate demotion operations
4611 from the source type to the intermediate types, and then combine the
4612 results (stored in VEC_OPRNDS) in demotion operation to the destination
4613 type. */
4614 if (multi_step_cvt)
4615 {
4616 /* At each level of recursion we have half of the operands we had at the
4617 previous level. */
4618 vec_oprnds->truncate ((i+1)/2);
4619 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4620 multi_step_cvt - 1,
4621 stmt_info, vec_dsts, gsi,
4622 slp_node, VEC_PACK_TRUNC_EXPR,
4623 prev_stmt_info);
4624 }
4625
4626 vec_dsts.quick_push (vec_dest);
4627 }
4628
4629
4630 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4631 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4632 STMT_INFO. For multi-step conversions store the resulting vectors and
4633 call the function recursively. */
4634
4635 static void
4636 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4637 vec<tree> *vec_oprnds0,
4638 vec<tree> *vec_oprnds1,
4639 stmt_vec_info stmt_info, tree vec_dest,
4640 gimple_stmt_iterator *gsi,
4641 enum tree_code code1,
4642 enum tree_code code2, int op_type)
4643 {
4644 int i;
4645 tree vop0, vop1, new_tmp1, new_tmp2;
4646 gimple *new_stmt1, *new_stmt2;
4647 vec<tree> vec_tmp = vNULL;
4648
4649 vec_tmp.create (vec_oprnds0->length () * 2);
4650 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4651 {
4652 if (op_type == binary_op)
4653 vop1 = (*vec_oprnds1)[i];
4654 else
4655 vop1 = NULL_TREE;
4656
4657 /* Generate the two halves of promotion operation. */
4658 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4659 op_type, vec_dest, gsi,
4660 stmt_info);
4661 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4662 op_type, vec_dest, gsi,
4663 stmt_info);
4664 if (is_gimple_call (new_stmt1))
4665 {
4666 new_tmp1 = gimple_call_lhs (new_stmt1);
4667 new_tmp2 = gimple_call_lhs (new_stmt2);
4668 }
4669 else
4670 {
4671 new_tmp1 = gimple_assign_lhs (new_stmt1);
4672 new_tmp2 = gimple_assign_lhs (new_stmt2);
4673 }
4674
4675 /* Store the results for the next step. */
4676 vec_tmp.quick_push (new_tmp1);
4677 vec_tmp.quick_push (new_tmp2);
4678 }
4679
4680 vec_oprnds0->release ();
4681 *vec_oprnds0 = vec_tmp;
4682 }
4683
4684
4685 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4687 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4688 Return true if STMT_INFO is vectorizable in this way. */
4689
4690 static bool
4691 vectorizable_conversion (vec_info *vinfo,
4692 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4693 stmt_vec_info *vec_stmt, slp_tree slp_node,
4694 stmt_vector_for_cost *cost_vec)
4695 {
4696 tree vec_dest;
4697 tree scalar_dest;
4698 tree op0, op1 = NULL_TREE;
4699 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4700 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4701 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4702 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4703 tree new_temp;
4704 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4705 int ndts = 2;
4706 stmt_vec_info prev_stmt_info;
4707 poly_uint64 nunits_in;
4708 poly_uint64 nunits_out;
4709 tree vectype_out, vectype_in;
4710 int ncopies, i, j;
4711 tree lhs_type, rhs_type;
4712 enum { NARROW, NONE, WIDEN } modifier;
4713 vec<tree> vec_oprnds0 = vNULL;
4714 vec<tree> vec_oprnds1 = vNULL;
4715 tree vop0;
4716 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4717 int multi_step_cvt = 0;
4718 vec<tree> interm_types = vNULL;
4719 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4720 int op_type;
4721 unsigned short fltsz;
4722
4723 /* Is STMT a vectorizable conversion? */
4724
4725 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4726 return false;
4727
4728 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4729 && ! vec_stmt)
4730 return false;
4731
4732 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4733 if (!stmt)
4734 return false;
4735
4736 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4737 return false;
4738
4739 code = gimple_assign_rhs_code (stmt);
4740 if (!CONVERT_EXPR_CODE_P (code)
4741 && code != FIX_TRUNC_EXPR
4742 && code != FLOAT_EXPR
4743 && code != WIDEN_MULT_EXPR
4744 && code != WIDEN_LSHIFT_EXPR)
4745 return false;
4746
4747 op_type = TREE_CODE_LENGTH (code);
4748
4749 /* Check types of lhs and rhs. */
4750 scalar_dest = gimple_assign_lhs (stmt);
4751 lhs_type = TREE_TYPE (scalar_dest);
4752 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4753
4754 /* Check the operands of the operation. */
4755 slp_tree slp_op0, slp_op1 = NULL;
4756 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4757 0, &op0, &slp_op0, &dt[0], &vectype_in))
4758 {
4759 if (dump_enabled_p ())
4760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4761 "use not simple.\n");
4762 return false;
4763 }
4764
4765 rhs_type = TREE_TYPE (op0);
4766 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4767 && !((INTEGRAL_TYPE_P (lhs_type)
4768 && INTEGRAL_TYPE_P (rhs_type))
4769 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4770 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4771 return false;
4772
4773 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4774 && ((INTEGRAL_TYPE_P (lhs_type)
4775 && !type_has_mode_precision_p (lhs_type))
4776 || (INTEGRAL_TYPE_P (rhs_type)
4777 && !type_has_mode_precision_p (rhs_type))))
4778 {
4779 if (dump_enabled_p ())
4780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4781 "type conversion to/from bit-precision unsupported."
4782 "\n");
4783 return false;
4784 }
4785
4786 if (op_type == binary_op)
4787 {
4788 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4789
4790 op1 = gimple_assign_rhs2 (stmt);
4791 tree vectype1_in;
4792 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4793 &op1, &slp_op1, &dt[1], &vectype1_in))
4794 {
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4797 "use not simple.\n");
4798 return false;
4799 }
4800 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4801 OP1. */
4802 if (!vectype_in)
4803 vectype_in = vectype1_in;
4804 }
4805
4806 /* If op0 is an external or constant def, infer the vector type
4807 from the scalar type. */
4808 if (!vectype_in)
4809 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4810 if (vec_stmt)
4811 gcc_assert (vectype_in);
4812 if (!vectype_in)
4813 {
4814 if (dump_enabled_p ())
4815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4816 "no vectype for scalar type %T\n", rhs_type);
4817
4818 return false;
4819 }
4820
4821 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4822 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4823 {
4824 if (dump_enabled_p ())
4825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4826 "can't convert between boolean and non "
4827 "boolean vectors %T\n", rhs_type);
4828
4829 return false;
4830 }
4831
4832 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4833 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4834 if (known_eq (nunits_out, nunits_in))
4835 modifier = NONE;
4836 else if (multiple_p (nunits_out, nunits_in))
4837 modifier = NARROW;
4838 else
4839 {
4840 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4841 modifier = WIDEN;
4842 }
4843
4844 /* Multiple types in SLP are handled by creating the appropriate number of
4845 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4846 case of SLP. */
4847 if (slp_node)
4848 ncopies = 1;
4849 else if (modifier == NARROW)
4850 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4851 else
4852 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4853
4854 /* Sanity check: make sure that at least one copy of the vectorized stmt
4855 needs to be generated. */
4856 gcc_assert (ncopies >= 1);
4857
4858 bool found_mode = false;
4859 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4860 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4861 opt_scalar_mode rhs_mode_iter;
4862
4863 /* Supportable by target? */
4864 switch (modifier)
4865 {
4866 case NONE:
4867 if (code != FIX_TRUNC_EXPR
4868 && code != FLOAT_EXPR
4869 && !CONVERT_EXPR_CODE_P (code))
4870 return false;
4871 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4872 break;
4873 /* FALLTHRU */
4874 unsupported:
4875 if (dump_enabled_p ())
4876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4877 "conversion not supported by target.\n");
4878 return false;
4879
4880 case WIDEN:
4881 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4882 vectype_in, &code1, &code2,
4883 &multi_step_cvt, &interm_types))
4884 {
4885 /* Binary widening operation can only be supported directly by the
4886 architecture. */
4887 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4888 break;
4889 }
4890
4891 if (code != FLOAT_EXPR
4892 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4893 goto unsupported;
4894
4895 fltsz = GET_MODE_SIZE (lhs_mode);
4896 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4897 {
4898 rhs_mode = rhs_mode_iter.require ();
4899 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4900 break;
4901
4902 cvt_type
4903 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4904 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4905 if (cvt_type == NULL_TREE)
4906 goto unsupported;
4907
4908 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4909 {
4910 if (!supportable_convert_operation (code, vectype_out,
4911 cvt_type, &codecvt1))
4912 goto unsupported;
4913 }
4914 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4915 vectype_out, cvt_type,
4916 &codecvt1, &codecvt2,
4917 &multi_step_cvt,
4918 &interm_types))
4919 continue;
4920 else
4921 gcc_assert (multi_step_cvt == 0);
4922
4923 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4924 cvt_type,
4925 vectype_in, &code1, &code2,
4926 &multi_step_cvt, &interm_types))
4927 {
4928 found_mode = true;
4929 break;
4930 }
4931 }
4932
4933 if (!found_mode)
4934 goto unsupported;
4935
4936 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4937 codecvt2 = ERROR_MARK;
4938 else
4939 {
4940 multi_step_cvt++;
4941 interm_types.safe_push (cvt_type);
4942 cvt_type = NULL_TREE;
4943 }
4944 break;
4945
4946 case NARROW:
4947 gcc_assert (op_type == unary_op);
4948 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4949 &code1, &multi_step_cvt,
4950 &interm_types))
4951 break;
4952
4953 if (code != FIX_TRUNC_EXPR
4954 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4955 goto unsupported;
4956
4957 cvt_type
4958 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4959 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4960 if (cvt_type == NULL_TREE)
4961 goto unsupported;
4962 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4963 &codecvt1))
4964 goto unsupported;
4965 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4966 &code1, &multi_step_cvt,
4967 &interm_types))
4968 break;
4969 goto unsupported;
4970
4971 default:
4972 gcc_unreachable ();
4973 }
4974
4975 if (!vec_stmt) /* transformation not required. */
4976 {
4977 if (slp_node
4978 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4979 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4980 {
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4983 "incompatible vector types for invariants\n");
4984 return false;
4985 }
4986 DUMP_VECT_SCOPE ("vectorizable_conversion");
4987 if (modifier == NONE)
4988 {
4989 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4990 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4991 cost_vec);
4992 }
4993 else if (modifier == NARROW)
4994 {
4995 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4996 /* The final packing step produces one vector result per copy. */
4997 unsigned int nvectors
4998 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4999 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5000 multi_step_cvt, cost_vec);
5001 }
5002 else
5003 {
5004 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5005 /* The initial unpacking step produces two vector results
5006 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5007 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5008 unsigned int nvectors
5009 = (slp_node
5010 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5011 : ncopies * 2);
5012 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5013 multi_step_cvt, cost_vec);
5014 }
5015 interm_types.release ();
5016 return true;
5017 }
5018
5019 /* Transform. */
5020 if (dump_enabled_p ())
5021 dump_printf_loc (MSG_NOTE, vect_location,
5022 "transform conversion. ncopies = %d.\n", ncopies);
5023
5024 if (op_type == binary_op)
5025 {
5026 if (CONSTANT_CLASS_P (op0))
5027 op0 = fold_convert (TREE_TYPE (op1), op0);
5028 else if (CONSTANT_CLASS_P (op1))
5029 op1 = fold_convert (TREE_TYPE (op0), op1);
5030 }
5031
5032 /* In case of multi-step conversion, we first generate conversion operations
5033 to the intermediate types, and then from that types to the final one.
5034 We create vector destinations for the intermediate type (TYPES) received
5035 from supportable_*_operation, and store them in the correct order
5036 for future use in vect_create_vectorized_*_stmts (). */
5037 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5038 vec_dest = vect_create_destination_var (scalar_dest,
5039 (cvt_type && modifier == WIDEN)
5040 ? cvt_type : vectype_out);
5041 vec_dsts.quick_push (vec_dest);
5042
5043 if (multi_step_cvt)
5044 {
5045 for (i = interm_types.length () - 1;
5046 interm_types.iterate (i, &intermediate_type); i--)
5047 {
5048 vec_dest = vect_create_destination_var (scalar_dest,
5049 intermediate_type);
5050 vec_dsts.quick_push (vec_dest);
5051 }
5052 }
5053
5054 if (cvt_type)
5055 vec_dest = vect_create_destination_var (scalar_dest,
5056 modifier == WIDEN
5057 ? vectype_out : cvt_type);
5058
5059 if (!slp_node)
5060 {
5061 if (modifier == WIDEN)
5062 {
5063 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5064 if (op_type == binary_op)
5065 vec_oprnds1.create (1);
5066 }
5067 else if (modifier == NARROW)
5068 vec_oprnds0.create (
5069 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5070 }
5071 else if (code == WIDEN_LSHIFT_EXPR)
5072 vec_oprnds1.create (slp_node->vec_stmts_size);
5073
5074 last_oprnd = op0;
5075 prev_stmt_info = NULL;
5076 switch (modifier)
5077 {
5078 case NONE:
5079 for (j = 0; j < ncopies; j++)
5080 {
5081 if (j == 0)
5082 vect_get_vec_defs (vinfo, op0, NULL, stmt_info, &vec_oprnds0,
5083 NULL, slp_node);
5084 else
5085 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5086
5087 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5088 {
5089 stmt_vec_info new_stmt_info;
5090 /* Arguments are ready, create the new vector stmt. */
5091 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5092 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5093 new_temp = make_ssa_name (vec_dest, new_stmt);
5094 gimple_assign_set_lhs (new_stmt, new_temp);
5095 new_stmt_info
5096 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5097
5098 if (slp_node)
5099 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5100 else
5101 {
5102 if (!prev_stmt_info)
5103 STMT_VINFO_VEC_STMT (stmt_info)
5104 = *vec_stmt = new_stmt_info;
5105 else
5106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5107 prev_stmt_info = new_stmt_info;
5108 }
5109 }
5110 }
5111 break;
5112
5113 case WIDEN:
5114 /* In case the vectorization factor (VF) is bigger than the number
5115 of elements that we can fit in a vectype (nunits), we have to
5116 generate more than one vector stmt - i.e - we need to "unroll"
5117 the vector stmt by a factor VF/nunits. */
5118 for (j = 0; j < ncopies; j++)
5119 {
5120 /* Handle uses. */
5121 if (j == 0)
5122 {
5123 if (slp_node)
5124 {
5125 if (code == WIDEN_LSHIFT_EXPR)
5126 {
5127 unsigned int k;
5128
5129 vec_oprnd1 = op1;
5130 /* Store vec_oprnd1 for every vector stmt to be created
5131 for SLP_NODE. We check during the analysis that all
5132 the shift arguments are the same. */
5133 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5134 vec_oprnds1.quick_push (vec_oprnd1);
5135
5136 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5137 &vec_oprnds0, NULL, slp_node);
5138 }
5139 else
5140 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
5141 &vec_oprnds1, slp_node);
5142 }
5143 else
5144 {
5145 vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
5146 op0, stmt_info);
5147 vec_oprnds0.quick_push (vec_oprnd0);
5148 if (op_type == binary_op)
5149 {
5150 if (code == WIDEN_LSHIFT_EXPR)
5151 vec_oprnd1 = op1;
5152 else
5153 vec_oprnd1
5154 = vect_get_vec_def_for_operand (vinfo,
5155 op1, stmt_info);
5156 vec_oprnds1.quick_push (vec_oprnd1);
5157 }
5158 }
5159 }
5160 else
5161 {
5162 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5163 vec_oprnds0.truncate (0);
5164 vec_oprnds0.quick_push (vec_oprnd0);
5165 if (op_type == binary_op)
5166 {
5167 if (code == WIDEN_LSHIFT_EXPR)
5168 vec_oprnd1 = op1;
5169 else
5170 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5171 vec_oprnd1);
5172 vec_oprnds1.truncate (0);
5173 vec_oprnds1.quick_push (vec_oprnd1);
5174 }
5175 }
5176
5177 /* Arguments are ready. Create the new vector stmts. */
5178 for (i = multi_step_cvt; i >= 0; i--)
5179 {
5180 tree this_dest = vec_dsts[i];
5181 enum tree_code c1 = code1, c2 = code2;
5182 if (i == 0 && codecvt2 != ERROR_MARK)
5183 {
5184 c1 = codecvt1;
5185 c2 = codecvt2;
5186 }
5187 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5188 &vec_oprnds1, stmt_info,
5189 this_dest, gsi,
5190 c1, c2, op_type);
5191 }
5192
5193 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5194 {
5195 stmt_vec_info new_stmt_info;
5196 if (cvt_type)
5197 {
5198 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5199 new_temp = make_ssa_name (vec_dest);
5200 gassign *new_stmt
5201 = gimple_build_assign (new_temp, codecvt1, vop0);
5202 new_stmt_info
5203 = vect_finish_stmt_generation (vinfo, stmt_info,
5204 new_stmt, gsi);
5205 }
5206 else
5207 new_stmt_info = vinfo->lookup_def (vop0);
5208
5209 if (slp_node)
5210 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5211 else
5212 {
5213 if (!prev_stmt_info)
5214 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5215 else
5216 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5217 prev_stmt_info = new_stmt_info;
5218 }
5219 }
5220 }
5221
5222 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5223 break;
5224
5225 case NARROW:
5226 /* In case the vectorization factor (VF) is bigger than the number
5227 of elements that we can fit in a vectype (nunits), we have to
5228 generate more than one vector stmt - i.e - we need to "unroll"
5229 the vector stmt by a factor VF/nunits. */
5230 for (j = 0; j < ncopies; j++)
5231 {
5232 /* Handle uses. */
5233 if (slp_node)
5234 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
5235 NULL, slp_node);
5236 else
5237 {
5238 vec_oprnds0.truncate (0);
5239 vect_get_loop_based_defs (vinfo,
5240 &last_oprnd, stmt_info, &vec_oprnds0,
5241 vect_pow2 (multi_step_cvt) - 1);
5242 }
5243
5244 /* Arguments are ready. Create the new vector stmts. */
5245 if (cvt_type)
5246 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5247 {
5248 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5249 new_temp = make_ssa_name (vec_dest);
5250 gassign *new_stmt
5251 = gimple_build_assign (new_temp, codecvt1, vop0);
5252 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5253 vec_oprnds0[i] = new_temp;
5254 }
5255
5256 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5257 multi_step_cvt,
5258 stmt_info, vec_dsts, gsi,
5259 slp_node, code1,
5260 &prev_stmt_info);
5261 }
5262
5263 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5264 break;
5265 }
5266
5267 vec_oprnds0.release ();
5268 vec_oprnds1.release ();
5269 interm_types.release ();
5270
5271 return true;
5272 }
5273
5274 /* Return true if we can assume from the scalar form of STMT_INFO that
5275 neither the scalar nor the vector forms will generate code. STMT_INFO
5276 is known not to involve a data reference. */
5277
5278 bool
5279 vect_nop_conversion_p (stmt_vec_info stmt_info)
5280 {
5281 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5282 if (!stmt)
5283 return false;
5284
5285 tree lhs = gimple_assign_lhs (stmt);
5286 tree_code code = gimple_assign_rhs_code (stmt);
5287 tree rhs = gimple_assign_rhs1 (stmt);
5288
5289 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5290 return true;
5291
5292 if (CONVERT_EXPR_CODE_P (code))
5293 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5294
5295 return false;
5296 }
5297
5298 /* Function vectorizable_assignment.
5299
5300 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5301 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5302 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5303 Return true if STMT_INFO is vectorizable in this way. */
5304
5305 static bool
5306 vectorizable_assignment (vec_info *vinfo,
5307 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5308 stmt_vec_info *vec_stmt, slp_tree slp_node,
5309 stmt_vector_for_cost *cost_vec)
5310 {
5311 tree vec_dest;
5312 tree scalar_dest;
5313 tree op;
5314 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5315 tree new_temp;
5316 enum vect_def_type dt[1] = {vect_unknown_def_type};
5317 int ndts = 1;
5318 int ncopies;
5319 int i, j;
5320 vec<tree> vec_oprnds = vNULL;
5321 tree vop;
5322 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5323 stmt_vec_info prev_stmt_info = NULL;
5324 enum tree_code code;
5325 tree vectype_in;
5326
5327 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5328 return false;
5329
5330 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5331 && ! vec_stmt)
5332 return false;
5333
5334 /* Is vectorizable assignment? */
5335 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5336 if (!stmt)
5337 return false;
5338
5339 scalar_dest = gimple_assign_lhs (stmt);
5340 if (TREE_CODE (scalar_dest) != SSA_NAME)
5341 return false;
5342
5343 if (STMT_VINFO_DATA_REF (stmt_info))
5344 return false;
5345
5346 code = gimple_assign_rhs_code (stmt);
5347 if (!(gimple_assign_single_p (stmt)
5348 || code == PAREN_EXPR
5349 || CONVERT_EXPR_CODE_P (code)))
5350 return false;
5351
5352 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5353 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5354
5355 /* Multiple types in SLP are handled by creating the appropriate number of
5356 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5357 case of SLP. */
5358 if (slp_node)
5359 ncopies = 1;
5360 else
5361 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5362
5363 gcc_assert (ncopies >= 1);
5364
5365 slp_tree slp_op;
5366 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5367 &dt[0], &vectype_in))
5368 {
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5371 "use not simple.\n");
5372 return false;
5373 }
5374 if (!vectype_in)
5375 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5376
5377 /* We can handle NOP_EXPR conversions that do not change the number
5378 of elements or the vector size. */
5379 if ((CONVERT_EXPR_CODE_P (code)
5380 || code == VIEW_CONVERT_EXPR)
5381 && (!vectype_in
5382 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5383 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5384 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5385 return false;
5386
5387 /* We do not handle bit-precision changes. */
5388 if ((CONVERT_EXPR_CODE_P (code)
5389 || code == VIEW_CONVERT_EXPR)
5390 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5391 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5392 || !type_has_mode_precision_p (TREE_TYPE (op)))
5393 /* But a conversion that does not change the bit-pattern is ok. */
5394 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5395 > TYPE_PRECISION (TREE_TYPE (op)))
5396 && TYPE_UNSIGNED (TREE_TYPE (op)))
5397 /* Conversion between boolean types of different sizes is
5398 a simple assignment in case their vectypes are same
5399 boolean vectors. */
5400 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5401 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5402 {
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5405 "type conversion to/from bit-precision "
5406 "unsupported.\n");
5407 return false;
5408 }
5409
5410 if (!vec_stmt) /* transformation not required. */
5411 {
5412 if (slp_node
5413 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5414 {
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5417 "incompatible vector types for invariants\n");
5418 return false;
5419 }
5420 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5421 DUMP_VECT_SCOPE ("vectorizable_assignment");
5422 if (!vect_nop_conversion_p (stmt_info))
5423 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5424 cost_vec);
5425 return true;
5426 }
5427
5428 /* Transform. */
5429 if (dump_enabled_p ())
5430 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5431
5432 /* Handle def. */
5433 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5434
5435 /* Handle use. */
5436 for (j = 0; j < ncopies; j++)
5437 {
5438 /* Handle uses. */
5439 if (j == 0)
5440 vect_get_vec_defs (vinfo, op, NULL, stmt_info, &vec_oprnds, NULL,
5441 slp_node);
5442 else
5443 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5444
5445 /* Arguments are ready. create the new vector stmt. */
5446 stmt_vec_info new_stmt_info = NULL;
5447 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5448 {
5449 if (CONVERT_EXPR_CODE_P (code)
5450 || code == VIEW_CONVERT_EXPR)
5451 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5452 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5453 new_temp = make_ssa_name (vec_dest, new_stmt);
5454 gimple_assign_set_lhs (new_stmt, new_temp);
5455 new_stmt_info
5456 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5457 if (slp_node)
5458 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5459 }
5460
5461 if (slp_node)
5462 continue;
5463
5464 if (j == 0)
5465 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5466 else
5467 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5468
5469 prev_stmt_info = new_stmt_info;
5470 }
5471
5472 vec_oprnds.release ();
5473 return true;
5474 }
5475
5476
5477 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5478 either as shift by a scalar or by a vector. */
5479
5480 bool
5481 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5482 {
5483
5484 machine_mode vec_mode;
5485 optab optab;
5486 int icode;
5487 tree vectype;
5488
5489 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5490 if (!vectype)
5491 return false;
5492
5493 optab = optab_for_tree_code (code, vectype, optab_scalar);
5494 if (!optab
5495 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5496 {
5497 optab = optab_for_tree_code (code, vectype, optab_vector);
5498 if (!optab
5499 || (optab_handler (optab, TYPE_MODE (vectype))
5500 == CODE_FOR_nothing))
5501 return false;
5502 }
5503
5504 vec_mode = TYPE_MODE (vectype);
5505 icode = (int) optab_handler (optab, vec_mode);
5506 if (icode == CODE_FOR_nothing)
5507 return false;
5508
5509 return true;
5510 }
5511
5512
5513 /* Function vectorizable_shift.
5514
5515 Check if STMT_INFO performs a shift operation that can be vectorized.
5516 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5518 Return true if STMT_INFO is vectorizable in this way. */
5519
5520 static bool
5521 vectorizable_shift (vec_info *vinfo,
5522 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5523 stmt_vec_info *vec_stmt, slp_tree slp_node,
5524 stmt_vector_for_cost *cost_vec)
5525 {
5526 tree vec_dest;
5527 tree scalar_dest;
5528 tree op0, op1 = NULL;
5529 tree vec_oprnd1 = NULL_TREE;
5530 tree vectype;
5531 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5532 enum tree_code code;
5533 machine_mode vec_mode;
5534 tree new_temp;
5535 optab optab;
5536 int icode;
5537 machine_mode optab_op2_mode;
5538 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5539 int ndts = 2;
5540 stmt_vec_info prev_stmt_info;
5541 poly_uint64 nunits_in;
5542 poly_uint64 nunits_out;
5543 tree vectype_out;
5544 tree op1_vectype;
5545 int ncopies;
5546 int j, i;
5547 vec<tree> vec_oprnds0 = vNULL;
5548 vec<tree> vec_oprnds1 = vNULL;
5549 tree vop0, vop1;
5550 unsigned int k;
5551 bool scalar_shift_arg = true;
5552 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5553 bool incompatible_op1_vectype_p = false;
5554
5555 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5556 return false;
5557
5558 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5559 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5560 && ! vec_stmt)
5561 return false;
5562
5563 /* Is STMT a vectorizable binary/unary operation? */
5564 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5565 if (!stmt)
5566 return false;
5567
5568 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5569 return false;
5570
5571 code = gimple_assign_rhs_code (stmt);
5572
5573 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5574 || code == RROTATE_EXPR))
5575 return false;
5576
5577 scalar_dest = gimple_assign_lhs (stmt);
5578 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5579 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5580 {
5581 if (dump_enabled_p ())
5582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5583 "bit-precision shifts not supported.\n");
5584 return false;
5585 }
5586
5587 slp_tree slp_op0;
5588 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5589 0, &op0, &slp_op0, &dt[0], &vectype))
5590 {
5591 if (dump_enabled_p ())
5592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5593 "use not simple.\n");
5594 return false;
5595 }
5596 /* If op0 is an external or constant def, infer the vector type
5597 from the scalar type. */
5598 if (!vectype)
5599 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5600 if (vec_stmt)
5601 gcc_assert (vectype);
5602 if (!vectype)
5603 {
5604 if (dump_enabled_p ())
5605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5606 "no vectype for scalar type\n");
5607 return false;
5608 }
5609
5610 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5611 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5612 if (maybe_ne (nunits_out, nunits_in))
5613 return false;
5614
5615 stmt_vec_info op1_def_stmt_info;
5616 slp_tree slp_op1;
5617 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5618 &dt[1], &op1_vectype, &op1_def_stmt_info))
5619 {
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "use not simple.\n");
5623 return false;
5624 }
5625
5626 /* Multiple types in SLP are handled by creating the appropriate number of
5627 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5628 case of SLP. */
5629 if (slp_node)
5630 ncopies = 1;
5631 else
5632 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5633
5634 gcc_assert (ncopies >= 1);
5635
5636 /* Determine whether the shift amount is a vector, or scalar. If the
5637 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5638
5639 if ((dt[1] == vect_internal_def
5640 || dt[1] == vect_induction_def
5641 || dt[1] == vect_nested_cycle)
5642 && !slp_node)
5643 scalar_shift_arg = false;
5644 else if (dt[1] == vect_constant_def
5645 || dt[1] == vect_external_def
5646 || dt[1] == vect_internal_def)
5647 {
5648 /* In SLP, need to check whether the shift count is the same,
5649 in loops if it is a constant or invariant, it is always
5650 a scalar shift. */
5651 if (slp_node)
5652 {
5653 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5654 stmt_vec_info slpstmt_info;
5655
5656 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5657 {
5658 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5659 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5660 scalar_shift_arg = false;
5661 }
5662
5663 /* For internal SLP defs we have to make sure we see scalar stmts
5664 for all vector elements.
5665 ??? For different vectors we could resort to a different
5666 scalar shift operand but code-generation below simply always
5667 takes the first. */
5668 if (dt[1] == vect_internal_def
5669 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5670 stmts.length ()))
5671 scalar_shift_arg = false;
5672 }
5673
5674 /* If the shift amount is computed by a pattern stmt we cannot
5675 use the scalar amount directly thus give up and use a vector
5676 shift. */
5677 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5678 scalar_shift_arg = false;
5679 }
5680 else
5681 {
5682 if (dump_enabled_p ())
5683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5684 "operand mode requires invariant argument.\n");
5685 return false;
5686 }
5687
5688 /* Vector shifted by vector. */
5689 bool was_scalar_shift_arg = scalar_shift_arg;
5690 if (!scalar_shift_arg)
5691 {
5692 optab = optab_for_tree_code (code, vectype, optab_vector);
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_NOTE, vect_location,
5695 "vector/vector shift/rotate found.\n");
5696
5697 if (!op1_vectype)
5698 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5699 slp_op1);
5700 incompatible_op1_vectype_p
5701 = (op1_vectype == NULL_TREE
5702 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5703 TYPE_VECTOR_SUBPARTS (vectype))
5704 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5705 if (incompatible_op1_vectype_p
5706 && (!slp_node
5707 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5708 || slp_op1->refcnt != 1))
5709 {
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5712 "unusable type for last operand in"
5713 " vector/vector shift/rotate.\n");
5714 return false;
5715 }
5716 }
5717 /* See if the machine has a vector shifted by scalar insn and if not
5718 then see if it has a vector shifted by vector insn. */
5719 else
5720 {
5721 optab = optab_for_tree_code (code, vectype, optab_scalar);
5722 if (optab
5723 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5724 {
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_NOTE, vect_location,
5727 "vector/scalar shift/rotate found.\n");
5728 }
5729 else
5730 {
5731 optab = optab_for_tree_code (code, vectype, optab_vector);
5732 if (optab
5733 && (optab_handler (optab, TYPE_MODE (vectype))
5734 != CODE_FOR_nothing))
5735 {
5736 scalar_shift_arg = false;
5737
5738 if (dump_enabled_p ())
5739 dump_printf_loc (MSG_NOTE, vect_location,
5740 "vector/vector shift/rotate found.\n");
5741
5742 if (!op1_vectype)
5743 op1_vectype = get_vectype_for_scalar_type (vinfo,
5744 TREE_TYPE (op1),
5745 slp_node);
5746
5747 /* Unlike the other binary operators, shifts/rotates have
5748 the rhs being int, instead of the same type as the lhs,
5749 so make sure the scalar is the right type if we are
5750 dealing with vectors of long long/long/short/char. */
5751 incompatible_op1_vectype_p
5752 = (!op1_vectype
5753 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5754 TREE_TYPE (op1)));
5755 }
5756 }
5757 }
5758
5759 /* Supportable by target? */
5760 if (!optab)
5761 {
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5764 "no optab.\n");
5765 return false;
5766 }
5767 vec_mode = TYPE_MODE (vectype);
5768 icode = (int) optab_handler (optab, vec_mode);
5769 if (icode == CODE_FOR_nothing)
5770 {
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "op not supported by target.\n");
5774 /* Check only during analysis. */
5775 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5776 || (!vec_stmt
5777 && !vect_worthwhile_without_simd_p (vinfo, code)))
5778 return false;
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE, vect_location,
5781 "proceeding using word mode.\n");
5782 }
5783
5784 /* Worthwhile without SIMD support? Check only during analysis. */
5785 if (!vec_stmt
5786 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5787 && !vect_worthwhile_without_simd_p (vinfo, code))
5788 {
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5791 "not worthwhile without SIMD support.\n");
5792 return false;
5793 }
5794
5795 if (!vec_stmt) /* transformation not required. */
5796 {
5797 if (slp_node
5798 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5799 || (!scalar_shift_arg
5800 && (!incompatible_op1_vectype_p
5801 || dt[1] == vect_constant_def)
5802 && !vect_maybe_update_slp_op_vectype
5803 (slp_op1,
5804 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5805 {
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5808 "incompatible vector types for invariants\n");
5809 return false;
5810 }
5811 /* Now adjust the constant shift amount in place. */
5812 if (slp_node
5813 && incompatible_op1_vectype_p
5814 && dt[1] == vect_constant_def)
5815 {
5816 for (unsigned i = 0;
5817 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5818 {
5819 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5820 = fold_convert (TREE_TYPE (vectype),
5821 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5822 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5823 == INTEGER_CST));
5824 }
5825 }
5826 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5827 DUMP_VECT_SCOPE ("vectorizable_shift");
5828 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5829 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5830 return true;
5831 }
5832
5833 /* Transform. */
5834
5835 if (dump_enabled_p ())
5836 dump_printf_loc (MSG_NOTE, vect_location,
5837 "transform binary/unary operation.\n");
5838
5839 if (incompatible_op1_vectype_p && !slp_node)
5840 {
5841 op1 = fold_convert (TREE_TYPE (vectype), op1);
5842 if (dt[1] != vect_constant_def)
5843 op1 = vect_init_vector (vinfo, stmt_info, op1,
5844 TREE_TYPE (vectype), NULL);
5845 }
5846
5847 /* Handle def. */
5848 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5849
5850 prev_stmt_info = NULL;
5851 for (j = 0; j < ncopies; j++)
5852 {
5853 /* Handle uses. */
5854 if (j == 0)
5855 {
5856 if (scalar_shift_arg)
5857 {
5858 /* Vector shl and shr insn patterns can be defined with scalar
5859 operand 2 (shift operand). In this case, use constant or loop
5860 invariant op1 directly, without extending it to vector mode
5861 first. */
5862 optab_op2_mode = insn_data[icode].operand[2].mode;
5863 if (!VECTOR_MODE_P (optab_op2_mode))
5864 {
5865 if (dump_enabled_p ())
5866 dump_printf_loc (MSG_NOTE, vect_location,
5867 "operand 1 using scalar mode.\n");
5868 vec_oprnd1 = op1;
5869 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5870 vec_oprnds1.quick_push (vec_oprnd1);
5871 if (slp_node)
5872 {
5873 /* Store vec_oprnd1 for every vector stmt to be created
5874 for SLP_NODE. We check during the analysis that all
5875 the shift arguments are the same.
5876 TODO: Allow different constants for different vector
5877 stmts generated for an SLP instance. */
5878 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5879 vec_oprnds1.quick_push (vec_oprnd1);
5880 }
5881 }
5882 }
5883 else if (slp_node && incompatible_op1_vectype_p)
5884 {
5885 if (was_scalar_shift_arg)
5886 {
5887 /* If the argument was the same in all lanes create
5888 the correctly typed vector shift amount directly. */
5889 op1 = fold_convert (TREE_TYPE (vectype), op1);
5890 op1 = vect_init_vector (vinfo, stmt_info,
5891 op1, TREE_TYPE (vectype),
5892 !loop_vinfo ? gsi : NULL);
5893 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5894 !loop_vinfo ? gsi : NULL);
5895 vec_oprnds1.create (slp_node->vec_stmts_size);
5896 for (k = 0; k < slp_node->vec_stmts_size; k++)
5897 vec_oprnds1.quick_push (vec_oprnd1);
5898 }
5899 else if (dt[1] == vect_constant_def)
5900 /* The constant shift amount has been adjusted in place. */
5901 ;
5902 else
5903 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5904 }
5905
5906 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5907 (a special case for certain kind of vector shifts); otherwise,
5908 operand 1 should be of a vector type (the usual case). */
5909 if (vec_oprnd1)
5910 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info,
5911 &vec_oprnds0, NULL, slp_node);
5912 else
5913 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
5914 &vec_oprnds0, &vec_oprnds1, slp_node);
5915 }
5916 else
5917 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5918
5919 /* Arguments are ready. Create the new vector stmt. */
5920 stmt_vec_info new_stmt_info = NULL;
5921 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5922 {
5923 vop1 = vec_oprnds1[i];
5924 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5925 new_temp = make_ssa_name (vec_dest, new_stmt);
5926 gimple_assign_set_lhs (new_stmt, new_temp);
5927 new_stmt_info
5928 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5929 if (slp_node)
5930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5931 }
5932
5933 if (slp_node)
5934 continue;
5935
5936 if (j == 0)
5937 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5938 else
5939 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5940 prev_stmt_info = new_stmt_info;
5941 }
5942
5943 vec_oprnds0.release ();
5944 vec_oprnds1.release ();
5945
5946 return true;
5947 }
5948
5949
5950 /* Function vectorizable_operation.
5951
5952 Check if STMT_INFO performs a binary, unary or ternary operation that can
5953 be vectorized.
5954 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5955 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5956 Return true if STMT_INFO is vectorizable in this way. */
5957
5958 static bool
5959 vectorizable_operation (vec_info *vinfo,
5960 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5961 stmt_vec_info *vec_stmt, slp_tree slp_node,
5962 stmt_vector_for_cost *cost_vec)
5963 {
5964 tree vec_dest;
5965 tree scalar_dest;
5966 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5967 tree vectype;
5968 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5969 enum tree_code code, orig_code;
5970 machine_mode vec_mode;
5971 tree new_temp;
5972 int op_type;
5973 optab optab;
5974 bool target_support_p;
5975 enum vect_def_type dt[3]
5976 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5977 int ndts = 3;
5978 stmt_vec_info prev_stmt_info;
5979 poly_uint64 nunits_in;
5980 poly_uint64 nunits_out;
5981 tree vectype_out;
5982 int ncopies, vec_num;
5983 int j, i;
5984 vec<tree> vec_oprnds0 = vNULL;
5985 vec<tree> vec_oprnds1 = vNULL;
5986 vec<tree> vec_oprnds2 = vNULL;
5987 tree vop0, vop1, vop2;
5988 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5989
5990 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5991 return false;
5992
5993 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5994 && ! vec_stmt)
5995 return false;
5996
5997 /* Is STMT a vectorizable binary/unary operation? */
5998 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5999 if (!stmt)
6000 return false;
6001
6002 /* Loads and stores are handled in vectorizable_{load,store}. */
6003 if (STMT_VINFO_DATA_REF (stmt_info))
6004 return false;
6005
6006 orig_code = code = gimple_assign_rhs_code (stmt);
6007
6008 /* Shifts are handled in vectorizable_shift. */
6009 if (code == LSHIFT_EXPR
6010 || code == RSHIFT_EXPR
6011 || code == LROTATE_EXPR
6012 || code == RROTATE_EXPR)
6013 return false;
6014
6015 /* Comparisons are handled in vectorizable_comparison. */
6016 if (TREE_CODE_CLASS (code) == tcc_comparison)
6017 return false;
6018
6019 /* Conditions are handled in vectorizable_condition. */
6020 if (code == COND_EXPR)
6021 return false;
6022
6023 /* For pointer addition and subtraction, we should use the normal
6024 plus and minus for the vector operation. */
6025 if (code == POINTER_PLUS_EXPR)
6026 code = PLUS_EXPR;
6027 if (code == POINTER_DIFF_EXPR)
6028 code = MINUS_EXPR;
6029
6030 /* Support only unary or binary operations. */
6031 op_type = TREE_CODE_LENGTH (code);
6032 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6033 {
6034 if (dump_enabled_p ())
6035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6036 "num. args = %d (not unary/binary/ternary op).\n",
6037 op_type);
6038 return false;
6039 }
6040
6041 scalar_dest = gimple_assign_lhs (stmt);
6042 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6043
6044 /* Most operations cannot handle bit-precision types without extra
6045 truncations. */
6046 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6047 if (!mask_op_p
6048 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6049 /* Exception are bitwise binary operations. */
6050 && code != BIT_IOR_EXPR
6051 && code != BIT_XOR_EXPR
6052 && code != BIT_AND_EXPR)
6053 {
6054 if (dump_enabled_p ())
6055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6056 "bit-precision arithmetic not supported.\n");
6057 return false;
6058 }
6059
6060 slp_tree slp_op0;
6061 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6062 0, &op0, &slp_op0, &dt[0], &vectype))
6063 {
6064 if (dump_enabled_p ())
6065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6066 "use not simple.\n");
6067 return false;
6068 }
6069 /* If op0 is an external or constant def, infer the vector type
6070 from the scalar type. */
6071 if (!vectype)
6072 {
6073 /* For boolean type we cannot determine vectype by
6074 invariant value (don't know whether it is a vector
6075 of booleans or vector of integers). We use output
6076 vectype because operations on boolean don't change
6077 type. */
6078 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6079 {
6080 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6081 {
6082 if (dump_enabled_p ())
6083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6084 "not supported operation on bool value.\n");
6085 return false;
6086 }
6087 vectype = vectype_out;
6088 }
6089 else
6090 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6091 slp_node);
6092 }
6093 if (vec_stmt)
6094 gcc_assert (vectype);
6095 if (!vectype)
6096 {
6097 if (dump_enabled_p ())
6098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6099 "no vectype for scalar type %T\n",
6100 TREE_TYPE (op0));
6101
6102 return false;
6103 }
6104
6105 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6106 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6107 if (maybe_ne (nunits_out, nunits_in))
6108 return false;
6109
6110 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6111 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6112 if (op_type == binary_op || op_type == ternary_op)
6113 {
6114 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6115 1, &op1, &slp_op1, &dt[1], &vectype2))
6116 {
6117 if (dump_enabled_p ())
6118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6119 "use not simple.\n");
6120 return false;
6121 }
6122 }
6123 if (op_type == ternary_op)
6124 {
6125 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6126 2, &op2, &slp_op2, &dt[2], &vectype3))
6127 {
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "use not simple.\n");
6131 return false;
6132 }
6133 }
6134
6135 /* Multiple types in SLP are handled by creating the appropriate number of
6136 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6137 case of SLP. */
6138 if (slp_node)
6139 {
6140 ncopies = 1;
6141 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6142 }
6143 else
6144 {
6145 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6146 vec_num = 1;
6147 }
6148
6149 gcc_assert (ncopies >= 1);
6150
6151 /* Reject attempts to combine mask types with nonmask types, e.g. if
6152 we have an AND between a (nonmask) boolean loaded from memory and
6153 a (mask) boolean result of a comparison.
6154
6155 TODO: We could easily fix these cases up using pattern statements. */
6156 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6157 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6158 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6159 {
6160 if (dump_enabled_p ())
6161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6162 "mixed mask and nonmask vector types\n");
6163 return false;
6164 }
6165
6166 /* Supportable by target? */
6167
6168 vec_mode = TYPE_MODE (vectype);
6169 if (code == MULT_HIGHPART_EXPR)
6170 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6171 else
6172 {
6173 optab = optab_for_tree_code (code, vectype, optab_default);
6174 if (!optab)
6175 {
6176 if (dump_enabled_p ())
6177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6178 "no optab.\n");
6179 return false;
6180 }
6181 target_support_p = (optab_handler (optab, vec_mode)
6182 != CODE_FOR_nothing);
6183 }
6184
6185 if (!target_support_p)
6186 {
6187 if (dump_enabled_p ())
6188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6189 "op not supported by target.\n");
6190 /* Check only during analysis. */
6191 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6192 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6193 return false;
6194 if (dump_enabled_p ())
6195 dump_printf_loc (MSG_NOTE, vect_location,
6196 "proceeding using word mode.\n");
6197 }
6198
6199 /* Worthwhile without SIMD support? Check only during analysis. */
6200 if (!VECTOR_MODE_P (vec_mode)
6201 && !vec_stmt
6202 && !vect_worthwhile_without_simd_p (vinfo, code))
6203 {
6204 if (dump_enabled_p ())
6205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6206 "not worthwhile without SIMD support.\n");
6207 return false;
6208 }
6209
6210 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6211 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6212 internal_fn cond_fn = get_conditional_internal_fn (code);
6213
6214 if (!vec_stmt) /* transformation not required. */
6215 {
6216 /* If this operation is part of a reduction, a fully-masked loop
6217 should only change the active lanes of the reduction chain,
6218 keeping the inactive lanes as-is. */
6219 if (loop_vinfo
6220 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6221 && reduc_idx >= 0)
6222 {
6223 if (cond_fn == IFN_LAST
6224 || !direct_internal_fn_supported_p (cond_fn, vectype,
6225 OPTIMIZE_FOR_SPEED))
6226 {
6227 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6229 "can't use a fully-masked loop because no"
6230 " conditional operation is available.\n");
6231 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6232 }
6233 else
6234 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6235 vectype, NULL);
6236 }
6237
6238 /* Put types on constant and invariant SLP children. */
6239 if (slp_node
6240 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6241 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6242 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6243 {
6244 if (dump_enabled_p ())
6245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6246 "incompatible vector types for invariants\n");
6247 return false;
6248 }
6249
6250 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6251 DUMP_VECT_SCOPE ("vectorizable_operation");
6252 vect_model_simple_cost (vinfo, stmt_info,
6253 ncopies, dt, ndts, slp_node, cost_vec);
6254 return true;
6255 }
6256
6257 /* Transform. */
6258
6259 if (dump_enabled_p ())
6260 dump_printf_loc (MSG_NOTE, vect_location,
6261 "transform binary/unary operation.\n");
6262
6263 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6264
6265 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6266 vectors with unsigned elements, but the result is signed. So, we
6267 need to compute the MINUS_EXPR into vectype temporary and
6268 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6269 tree vec_cvt_dest = NULL_TREE;
6270 if (orig_code == POINTER_DIFF_EXPR)
6271 {
6272 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6273 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6274 }
6275 /* Handle def. */
6276 else
6277 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6278
6279 /* In case the vectorization factor (VF) is bigger than the number
6280 of elements that we can fit in a vectype (nunits), we have to generate
6281 more than one vector stmt - i.e - we need to "unroll" the
6282 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6283 from one copy of the vector stmt to the next, in the field
6284 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6285 stages to find the correct vector defs to be used when vectorizing
6286 stmts that use the defs of the current stmt. The example below
6287 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6288 we need to create 4 vectorized stmts):
6289
6290 before vectorization:
6291 RELATED_STMT VEC_STMT
6292 S1: x = memref - -
6293 S2: z = x + 1 - -
6294
6295 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6296 there):
6297 RELATED_STMT VEC_STMT
6298 VS1_0: vx0 = memref0 VS1_1 -
6299 VS1_1: vx1 = memref1 VS1_2 -
6300 VS1_2: vx2 = memref2 VS1_3 -
6301 VS1_3: vx3 = memref3 - -
6302 S1: x = load - VS1_0
6303 S2: z = x + 1 - -
6304
6305 step2: vectorize stmt S2 (done here):
6306 To vectorize stmt S2 we first need to find the relevant vector
6307 def for the first operand 'x'. This is, as usual, obtained from
6308 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6309 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6310 relevant vector def 'vx0'. Having found 'vx0' we can generate
6311 the vector stmt VS2_0, and as usual, record it in the
6312 STMT_VINFO_VEC_STMT of stmt S2.
6313 When creating the second copy (VS2_1), we obtain the relevant vector
6314 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6315 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6316 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6317 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6318 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6319 chain of stmts and pointers:
6320 RELATED_STMT VEC_STMT
6321 VS1_0: vx0 = memref0 VS1_1 -
6322 VS1_1: vx1 = memref1 VS1_2 -
6323 VS1_2: vx2 = memref2 VS1_3 -
6324 VS1_3: vx3 = memref3 - -
6325 S1: x = load - VS1_0
6326 VS2_0: vz0 = vx0 + v1 VS2_1 -
6327 VS2_1: vz1 = vx1 + v1 VS2_2 -
6328 VS2_2: vz2 = vx2 + v1 VS2_3 -
6329 VS2_3: vz3 = vx3 + v1 - -
6330 S2: z = x + 1 - VS2_0 */
6331
6332 prev_stmt_info = NULL;
6333 for (j = 0; j < ncopies; j++)
6334 {
6335 /* Handle uses. */
6336 if (j == 0)
6337 {
6338 if (op_type == binary_op)
6339 vect_get_vec_defs (vinfo, op0, op1, stmt_info,
6340 &vec_oprnds0, &vec_oprnds1, slp_node);
6341 else if (op_type == ternary_op)
6342 {
6343 if (slp_node)
6344 {
6345 auto_vec<vec<tree> > vec_defs(3);
6346 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
6347 vec_oprnds0 = vec_defs[0];
6348 vec_oprnds1 = vec_defs[1];
6349 vec_oprnds2 = vec_defs[2];
6350 }
6351 else
6352 {
6353 vect_get_vec_defs (vinfo, op0, op1, stmt_info, &vec_oprnds0,
6354 &vec_oprnds1, NULL);
6355 vect_get_vec_defs (vinfo, op2, NULL_TREE, stmt_info,
6356 &vec_oprnds2, NULL, NULL);
6357 }
6358 }
6359 else
6360 vect_get_vec_defs (vinfo, op0, NULL_TREE, stmt_info, &vec_oprnds0,
6361 NULL, slp_node);
6362 }
6363 else
6364 {
6365 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6366 if (op_type == ternary_op)
6367 {
6368 tree vec_oprnd = vec_oprnds2.pop ();
6369 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6370 vec_oprnd));
6371 }
6372 }
6373
6374 /* Arguments are ready. Create the new vector stmt. */
6375 stmt_vec_info new_stmt_info = NULL;
6376 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6377 {
6378 vop1 = ((op_type == binary_op || op_type == ternary_op)
6379 ? vec_oprnds1[i] : NULL_TREE);
6380 vop2 = ((op_type == ternary_op)
6381 ? vec_oprnds2[i] : NULL_TREE);
6382 if (masked_loop_p && reduc_idx >= 0)
6383 {
6384 /* Perform the operation on active elements only and take
6385 inactive elements from the reduction chain input. */
6386 gcc_assert (!vop2);
6387 vop2 = reduc_idx == 1 ? vop1 : vop0;
6388 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6389 vectype, i * ncopies + j);
6390 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6391 vop0, vop1, vop2);
6392 new_temp = make_ssa_name (vec_dest, call);
6393 gimple_call_set_lhs (call, new_temp);
6394 gimple_call_set_nothrow (call, true);
6395 new_stmt_info
6396 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6397 }
6398 else
6399 {
6400 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6401 vop0, vop1, vop2);
6402 new_temp = make_ssa_name (vec_dest, new_stmt);
6403 gimple_assign_set_lhs (new_stmt, new_temp);
6404 new_stmt_info
6405 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6406 if (vec_cvt_dest)
6407 {
6408 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6409 gassign *new_stmt
6410 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6411 new_temp);
6412 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6413 gimple_assign_set_lhs (new_stmt, new_temp);
6414 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
6415 new_stmt, gsi);
6416 }
6417 }
6418 if (slp_node)
6419 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6420 }
6421
6422 if (slp_node)
6423 continue;
6424
6425 if (j == 0)
6426 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6427 else
6428 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6429 prev_stmt_info = new_stmt_info;
6430 }
6431
6432 vec_oprnds0.release ();
6433 vec_oprnds1.release ();
6434 vec_oprnds2.release ();
6435
6436 return true;
6437 }
6438
6439 /* A helper function to ensure data reference DR_INFO's base alignment. */
6440
6441 static void
6442 ensure_base_align (dr_vec_info *dr_info)
6443 {
6444 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6445 return;
6446
6447 if (dr_info->base_misaligned)
6448 {
6449 tree base_decl = dr_info->base_decl;
6450
6451 // We should only be able to increase the alignment of a base object if
6452 // we know what its new alignment should be at compile time.
6453 unsigned HOST_WIDE_INT align_base_to =
6454 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6455
6456 if (decl_in_symtab_p (base_decl))
6457 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6458 else if (DECL_ALIGN (base_decl) < align_base_to)
6459 {
6460 SET_DECL_ALIGN (base_decl, align_base_to);
6461 DECL_USER_ALIGN (base_decl) = 1;
6462 }
6463 dr_info->base_misaligned = false;
6464 }
6465 }
6466
6467
6468 /* Function get_group_alias_ptr_type.
6469
6470 Return the alias type for the group starting at FIRST_STMT_INFO. */
6471
6472 static tree
6473 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6474 {
6475 struct data_reference *first_dr, *next_dr;
6476
6477 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6478 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6479 while (next_stmt_info)
6480 {
6481 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6482 if (get_alias_set (DR_REF (first_dr))
6483 != get_alias_set (DR_REF (next_dr)))
6484 {
6485 if (dump_enabled_p ())
6486 dump_printf_loc (MSG_NOTE, vect_location,
6487 "conflicting alias set types.\n");
6488 return ptr_type_node;
6489 }
6490 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6491 }
6492 return reference_alias_ptr_type (DR_REF (first_dr));
6493 }
6494
6495
6496 /* Function scan_operand_equal_p.
6497
6498 Helper function for check_scan_store. Compare two references
6499 with .GOMP_SIMD_LANE bases. */
6500
6501 static bool
6502 scan_operand_equal_p (tree ref1, tree ref2)
6503 {
6504 tree ref[2] = { ref1, ref2 };
6505 poly_int64 bitsize[2], bitpos[2];
6506 tree offset[2], base[2];
6507 for (int i = 0; i < 2; ++i)
6508 {
6509 machine_mode mode;
6510 int unsignedp, reversep, volatilep = 0;
6511 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6512 &offset[i], &mode, &unsignedp,
6513 &reversep, &volatilep);
6514 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6515 return false;
6516 if (TREE_CODE (base[i]) == MEM_REF
6517 && offset[i] == NULL_TREE
6518 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6519 {
6520 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6521 if (is_gimple_assign (def_stmt)
6522 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6523 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6524 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6525 {
6526 if (maybe_ne (mem_ref_offset (base[i]), 0))
6527 return false;
6528 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6529 offset[i] = gimple_assign_rhs2 (def_stmt);
6530 }
6531 }
6532 }
6533
6534 if (!operand_equal_p (base[0], base[1], 0))
6535 return false;
6536 if (maybe_ne (bitsize[0], bitsize[1]))
6537 return false;
6538 if (offset[0] != offset[1])
6539 {
6540 if (!offset[0] || !offset[1])
6541 return false;
6542 if (!operand_equal_p (offset[0], offset[1], 0))
6543 {
6544 tree step[2];
6545 for (int i = 0; i < 2; ++i)
6546 {
6547 step[i] = integer_one_node;
6548 if (TREE_CODE (offset[i]) == SSA_NAME)
6549 {
6550 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6551 if (is_gimple_assign (def_stmt)
6552 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6553 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6554 == INTEGER_CST))
6555 {
6556 step[i] = gimple_assign_rhs2 (def_stmt);
6557 offset[i] = gimple_assign_rhs1 (def_stmt);
6558 }
6559 }
6560 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6561 {
6562 step[i] = TREE_OPERAND (offset[i], 1);
6563 offset[i] = TREE_OPERAND (offset[i], 0);
6564 }
6565 tree rhs1 = NULL_TREE;
6566 if (TREE_CODE (offset[i]) == SSA_NAME)
6567 {
6568 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6569 if (gimple_assign_cast_p (def_stmt))
6570 rhs1 = gimple_assign_rhs1 (def_stmt);
6571 }
6572 else if (CONVERT_EXPR_P (offset[i]))
6573 rhs1 = TREE_OPERAND (offset[i], 0);
6574 if (rhs1
6575 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6576 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6577 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6578 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6579 offset[i] = rhs1;
6580 }
6581 if (!operand_equal_p (offset[0], offset[1], 0)
6582 || !operand_equal_p (step[0], step[1], 0))
6583 return false;
6584 }
6585 }
6586 return true;
6587 }
6588
6589
6590 enum scan_store_kind {
6591 /* Normal permutation. */
6592 scan_store_kind_perm,
6593
6594 /* Whole vector left shift permutation with zero init. */
6595 scan_store_kind_lshift_zero,
6596
6597 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6598 scan_store_kind_lshift_cond
6599 };
6600
6601 /* Function check_scan_store.
6602
6603 Verify if we can perform the needed permutations or whole vector shifts.
6604 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6605 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6606 to do at each step. */
6607
6608 static int
6609 scan_store_can_perm_p (tree vectype, tree init,
6610 vec<enum scan_store_kind> *use_whole_vector = NULL)
6611 {
6612 enum machine_mode vec_mode = TYPE_MODE (vectype);
6613 unsigned HOST_WIDE_INT nunits;
6614 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6615 return -1;
6616 int units_log2 = exact_log2 (nunits);
6617 if (units_log2 <= 0)
6618 return -1;
6619
6620 int i;
6621 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6622 for (i = 0; i <= units_log2; ++i)
6623 {
6624 unsigned HOST_WIDE_INT j, k;
6625 enum scan_store_kind kind = scan_store_kind_perm;
6626 vec_perm_builder sel (nunits, nunits, 1);
6627 sel.quick_grow (nunits);
6628 if (i == units_log2)
6629 {
6630 for (j = 0; j < nunits; ++j)
6631 sel[j] = nunits - 1;
6632 }
6633 else
6634 {
6635 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6636 sel[j] = j;
6637 for (k = 0; j < nunits; ++j, ++k)
6638 sel[j] = nunits + k;
6639 }
6640 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6641 if (!can_vec_perm_const_p (vec_mode, indices))
6642 {
6643 if (i == units_log2)
6644 return -1;
6645
6646 if (whole_vector_shift_kind == scan_store_kind_perm)
6647 {
6648 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6649 return -1;
6650 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6651 /* Whole vector shifts shift in zeros, so if init is all zero
6652 constant, there is no need to do anything further. */
6653 if ((TREE_CODE (init) != INTEGER_CST
6654 && TREE_CODE (init) != REAL_CST)
6655 || !initializer_zerop (init))
6656 {
6657 tree masktype = truth_type_for (vectype);
6658 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6659 return -1;
6660 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6661 }
6662 }
6663 kind = whole_vector_shift_kind;
6664 }
6665 if (use_whole_vector)
6666 {
6667 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6668 use_whole_vector->safe_grow_cleared (i);
6669 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6670 use_whole_vector->safe_push (kind);
6671 }
6672 }
6673
6674 return units_log2;
6675 }
6676
6677
6678 /* Function check_scan_store.
6679
6680 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6681
6682 static bool
6683 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6684 enum vect_def_type rhs_dt, bool slp, tree mask,
6685 vect_memory_access_type memory_access_type)
6686 {
6687 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6688 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6689 tree ref_type;
6690
6691 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6692 if (slp
6693 || mask
6694 || memory_access_type != VMAT_CONTIGUOUS
6695 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6696 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6697 || loop_vinfo == NULL
6698 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6699 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6700 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6701 || !integer_zerop (DR_INIT (dr_info->dr))
6702 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6703 || !alias_sets_conflict_p (get_alias_set (vectype),
6704 get_alias_set (TREE_TYPE (ref_type))))
6705 {
6706 if (dump_enabled_p ())
6707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6708 "unsupported OpenMP scan store.\n");
6709 return false;
6710 }
6711
6712 /* We need to pattern match code built by OpenMP lowering and simplified
6713 by following optimizations into something we can handle.
6714 #pragma omp simd reduction(inscan,+:r)
6715 for (...)
6716 {
6717 r += something ();
6718 #pragma omp scan inclusive (r)
6719 use (r);
6720 }
6721 shall have body with:
6722 // Initialization for input phase, store the reduction initializer:
6723 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6724 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6725 D.2042[_21] = 0;
6726 // Actual input phase:
6727 ...
6728 r.0_5 = D.2042[_20];
6729 _6 = _4 + r.0_5;
6730 D.2042[_20] = _6;
6731 // Initialization for scan phase:
6732 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6733 _26 = D.2043[_25];
6734 _27 = D.2042[_25];
6735 _28 = _26 + _27;
6736 D.2043[_25] = _28;
6737 D.2042[_25] = _28;
6738 // Actual scan phase:
6739 ...
6740 r.1_8 = D.2042[_20];
6741 ...
6742 The "omp simd array" variable D.2042 holds the privatized copy used
6743 inside of the loop and D.2043 is another one that holds copies of
6744 the current original list item. The separate GOMP_SIMD_LANE ifn
6745 kinds are there in order to allow optimizing the initializer store
6746 and combiner sequence, e.g. if it is originally some C++ish user
6747 defined reduction, but allow the vectorizer to pattern recognize it
6748 and turn into the appropriate vectorized scan.
6749
6750 For exclusive scan, this is slightly different:
6751 #pragma omp simd reduction(inscan,+:r)
6752 for (...)
6753 {
6754 use (r);
6755 #pragma omp scan exclusive (r)
6756 r += something ();
6757 }
6758 shall have body with:
6759 // Initialization for input phase, store the reduction initializer:
6760 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6761 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6762 D.2042[_21] = 0;
6763 // Actual input phase:
6764 ...
6765 r.0_5 = D.2042[_20];
6766 _6 = _4 + r.0_5;
6767 D.2042[_20] = _6;
6768 // Initialization for scan phase:
6769 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6770 _26 = D.2043[_25];
6771 D.2044[_25] = _26;
6772 _27 = D.2042[_25];
6773 _28 = _26 + _27;
6774 D.2043[_25] = _28;
6775 // Actual scan phase:
6776 ...
6777 r.1_8 = D.2044[_20];
6778 ... */
6779
6780 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6781 {
6782 /* Match the D.2042[_21] = 0; store above. Just require that
6783 it is a constant or external definition store. */
6784 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6785 {
6786 fail_init:
6787 if (dump_enabled_p ())
6788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6789 "unsupported OpenMP scan initializer store.\n");
6790 return false;
6791 }
6792
6793 if (! loop_vinfo->scan_map)
6794 loop_vinfo->scan_map = new hash_map<tree, tree>;
6795 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6796 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6797 if (cached)
6798 goto fail_init;
6799 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6800
6801 /* These stores can be vectorized normally. */
6802 return true;
6803 }
6804
6805 if (rhs_dt != vect_internal_def)
6806 {
6807 fail:
6808 if (dump_enabled_p ())
6809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6810 "unsupported OpenMP scan combiner pattern.\n");
6811 return false;
6812 }
6813
6814 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6815 tree rhs = gimple_assign_rhs1 (stmt);
6816 if (TREE_CODE (rhs) != SSA_NAME)
6817 goto fail;
6818
6819 gimple *other_store_stmt = NULL;
6820 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6821 bool inscan_var_store
6822 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6823
6824 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6825 {
6826 if (!inscan_var_store)
6827 {
6828 use_operand_p use_p;
6829 imm_use_iterator iter;
6830 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6831 {
6832 gimple *use_stmt = USE_STMT (use_p);
6833 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6834 continue;
6835 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6836 || !is_gimple_assign (use_stmt)
6837 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6838 || other_store_stmt
6839 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6840 goto fail;
6841 other_store_stmt = use_stmt;
6842 }
6843 if (other_store_stmt == NULL)
6844 goto fail;
6845 rhs = gimple_assign_lhs (other_store_stmt);
6846 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6847 goto fail;
6848 }
6849 }
6850 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6851 {
6852 use_operand_p use_p;
6853 imm_use_iterator iter;
6854 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6855 {
6856 gimple *use_stmt = USE_STMT (use_p);
6857 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6858 continue;
6859 if (other_store_stmt)
6860 goto fail;
6861 other_store_stmt = use_stmt;
6862 }
6863 }
6864 else
6865 goto fail;
6866
6867 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6868 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6869 || !is_gimple_assign (def_stmt)
6870 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6871 goto fail;
6872
6873 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6874 /* For pointer addition, we should use the normal plus for the vector
6875 operation. */
6876 switch (code)
6877 {
6878 case POINTER_PLUS_EXPR:
6879 code = PLUS_EXPR;
6880 break;
6881 case MULT_HIGHPART_EXPR:
6882 goto fail;
6883 default:
6884 break;
6885 }
6886 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6887 goto fail;
6888
6889 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6890 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6891 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6892 goto fail;
6893
6894 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6895 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6896 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6897 || !gimple_assign_load_p (load1_stmt)
6898 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6899 || !gimple_assign_load_p (load2_stmt))
6900 goto fail;
6901
6902 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6903 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6904 if (load1_stmt_info == NULL
6905 || load2_stmt_info == NULL
6906 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6907 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6908 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6909 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6910 goto fail;
6911
6912 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6913 {
6914 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6915 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6916 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6917 goto fail;
6918 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6919 tree lrhs;
6920 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6921 lrhs = rhs1;
6922 else
6923 lrhs = rhs2;
6924 use_operand_p use_p;
6925 imm_use_iterator iter;
6926 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6927 {
6928 gimple *use_stmt = USE_STMT (use_p);
6929 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6930 continue;
6931 if (other_store_stmt)
6932 goto fail;
6933 other_store_stmt = use_stmt;
6934 }
6935 }
6936
6937 if (other_store_stmt == NULL)
6938 goto fail;
6939 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6940 || !gimple_store_p (other_store_stmt))
6941 goto fail;
6942
6943 stmt_vec_info other_store_stmt_info
6944 = loop_vinfo->lookup_stmt (other_store_stmt);
6945 if (other_store_stmt_info == NULL
6946 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6947 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6948 goto fail;
6949
6950 gimple *stmt1 = stmt;
6951 gimple *stmt2 = other_store_stmt;
6952 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6953 std::swap (stmt1, stmt2);
6954 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6955 gimple_assign_rhs1 (load2_stmt)))
6956 {
6957 std::swap (rhs1, rhs2);
6958 std::swap (load1_stmt, load2_stmt);
6959 std::swap (load1_stmt_info, load2_stmt_info);
6960 }
6961 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6962 gimple_assign_rhs1 (load1_stmt)))
6963 goto fail;
6964
6965 tree var3 = NULL_TREE;
6966 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6967 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6968 gimple_assign_rhs1 (load2_stmt)))
6969 goto fail;
6970 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6971 {
6972 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6973 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6974 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6975 goto fail;
6976 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6977 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6978 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6979 || lookup_attribute ("omp simd inscan exclusive",
6980 DECL_ATTRIBUTES (var3)))
6981 goto fail;
6982 }
6983
6984 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6985 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6986 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6987 goto fail;
6988
6989 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6990 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6991 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6992 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6993 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6994 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6995 goto fail;
6996
6997 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6998 std::swap (var1, var2);
6999
7000 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7001 {
7002 if (!lookup_attribute ("omp simd inscan exclusive",
7003 DECL_ATTRIBUTES (var1)))
7004 goto fail;
7005 var1 = var3;
7006 }
7007
7008 if (loop_vinfo->scan_map == NULL)
7009 goto fail;
7010 tree *init = loop_vinfo->scan_map->get (var1);
7011 if (init == NULL)
7012 goto fail;
7013
7014 /* The IL is as expected, now check if we can actually vectorize it.
7015 Inclusive scan:
7016 _26 = D.2043[_25];
7017 _27 = D.2042[_25];
7018 _28 = _26 + _27;
7019 D.2043[_25] = _28;
7020 D.2042[_25] = _28;
7021 should be vectorized as (where _40 is the vectorized rhs
7022 from the D.2042[_21] = 0; store):
7023 _30 = MEM <vector(8) int> [(int *)&D.2043];
7024 _31 = MEM <vector(8) int> [(int *)&D.2042];
7025 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7026 _33 = _31 + _32;
7027 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7028 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7029 _35 = _33 + _34;
7030 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7031 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7032 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7033 _37 = _35 + _36;
7034 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7035 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7036 _38 = _30 + _37;
7037 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7038 MEM <vector(8) int> [(int *)&D.2043] = _39;
7039 MEM <vector(8) int> [(int *)&D.2042] = _38;
7040 Exclusive scan:
7041 _26 = D.2043[_25];
7042 D.2044[_25] = _26;
7043 _27 = D.2042[_25];
7044 _28 = _26 + _27;
7045 D.2043[_25] = _28;
7046 should be vectorized as (where _40 is the vectorized rhs
7047 from the D.2042[_21] = 0; store):
7048 _30 = MEM <vector(8) int> [(int *)&D.2043];
7049 _31 = MEM <vector(8) int> [(int *)&D.2042];
7050 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7051 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7052 _34 = _32 + _33;
7053 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7054 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7055 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7056 _36 = _34 + _35;
7057 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7058 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7059 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7060 _38 = _36 + _37;
7061 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7062 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7063 _39 = _30 + _38;
7064 _50 = _31 + _39;
7065 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7066 MEM <vector(8) int> [(int *)&D.2044] = _39;
7067 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7068 enum machine_mode vec_mode = TYPE_MODE (vectype);
7069 optab optab = optab_for_tree_code (code, vectype, optab_default);
7070 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7071 goto fail;
7072
7073 int units_log2 = scan_store_can_perm_p (vectype, *init);
7074 if (units_log2 == -1)
7075 goto fail;
7076
7077 return true;
7078 }
7079
7080
7081 /* Function vectorizable_scan_store.
7082
7083 Helper of vectorizable_score, arguments like on vectorizable_store.
7084 Handle only the transformation, checking is done in check_scan_store. */
7085
7086 static bool
7087 vectorizable_scan_store (vec_info *vinfo,
7088 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7089 stmt_vec_info *vec_stmt, int ncopies)
7090 {
7091 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7092 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7093 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7094 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7095
7096 if (dump_enabled_p ())
7097 dump_printf_loc (MSG_NOTE, vect_location,
7098 "transform scan store. ncopies = %d\n", ncopies);
7099
7100 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7101 tree rhs = gimple_assign_rhs1 (stmt);
7102 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7103
7104 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7105 bool inscan_var_store
7106 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7107
7108 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7109 {
7110 use_operand_p use_p;
7111 imm_use_iterator iter;
7112 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7113 {
7114 gimple *use_stmt = USE_STMT (use_p);
7115 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7116 continue;
7117 rhs = gimple_assign_lhs (use_stmt);
7118 break;
7119 }
7120 }
7121
7122 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7123 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7124 if (code == POINTER_PLUS_EXPR)
7125 code = PLUS_EXPR;
7126 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7127 && commutative_tree_code (code));
7128 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7129 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7130 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7131 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7132 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7133 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7134 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7135 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7136 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7137 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7138 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7139
7140 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7141 {
7142 std::swap (rhs1, rhs2);
7143 std::swap (var1, var2);
7144 std::swap (load1_dr_info, load2_dr_info);
7145 }
7146
7147 tree *init = loop_vinfo->scan_map->get (var1);
7148 gcc_assert (init);
7149
7150 unsigned HOST_WIDE_INT nunits;
7151 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7152 gcc_unreachable ();
7153 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7154 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7155 gcc_assert (units_log2 > 0);
7156 auto_vec<tree, 16> perms;
7157 perms.quick_grow (units_log2 + 1);
7158 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7159 for (int i = 0; i <= units_log2; ++i)
7160 {
7161 unsigned HOST_WIDE_INT j, k;
7162 vec_perm_builder sel (nunits, nunits, 1);
7163 sel.quick_grow (nunits);
7164 if (i == units_log2)
7165 for (j = 0; j < nunits; ++j)
7166 sel[j] = nunits - 1;
7167 else
7168 {
7169 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7170 sel[j] = j;
7171 for (k = 0; j < nunits; ++j, ++k)
7172 sel[j] = nunits + k;
7173 }
7174 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7175 if (!use_whole_vector.is_empty ()
7176 && use_whole_vector[i] != scan_store_kind_perm)
7177 {
7178 if (zero_vec == NULL_TREE)
7179 zero_vec = build_zero_cst (vectype);
7180 if (masktype == NULL_TREE
7181 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7182 masktype = truth_type_for (vectype);
7183 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7184 }
7185 else
7186 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7187 }
7188
7189 stmt_vec_info prev_stmt_info = NULL;
7190 tree vec_oprnd1 = NULL_TREE;
7191 tree vec_oprnd2 = NULL_TREE;
7192 tree vec_oprnd3 = NULL_TREE;
7193 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7194 tree dataref_offset = build_int_cst (ref_type, 0);
7195 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7196 vectype, VMAT_CONTIGUOUS);
7197 tree ldataref_ptr = NULL_TREE;
7198 tree orig = NULL_TREE;
7199 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7200 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7201 for (int j = 0; j < ncopies; j++)
7202 {
7203 stmt_vec_info new_stmt_info;
7204 if (j == 0)
7205 {
7206 vec_oprnd1 = vect_get_vec_def_for_operand (vinfo, *init, stmt_info);
7207 if (ldataref_ptr == NULL)
7208 vec_oprnd2 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info);
7209 vec_oprnd3 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info);
7210 orig = vec_oprnd3;
7211 }
7212 else
7213 {
7214 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7215 if (ldataref_ptr == NULL)
7216 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7217 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7218 if (!inscan_var_store)
7219 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7220 }
7221
7222 if (ldataref_ptr)
7223 {
7224 vec_oprnd2 = make_ssa_name (vectype);
7225 tree data_ref = fold_build2 (MEM_REF, vectype,
7226 unshare_expr (ldataref_ptr),
7227 dataref_offset);
7228 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7229 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7230 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7231 if (prev_stmt_info == NULL)
7232 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7233 else
7234 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7235 prev_stmt_info = new_stmt_info;
7236 }
7237
7238 tree v = vec_oprnd2;
7239 for (int i = 0; i < units_log2; ++i)
7240 {
7241 tree new_temp = make_ssa_name (vectype);
7242 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7243 (zero_vec
7244 && (use_whole_vector[i]
7245 != scan_store_kind_perm))
7246 ? zero_vec : vec_oprnd1, v,
7247 perms[i]);
7248 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7249 if (prev_stmt_info == NULL)
7250 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7251 else
7252 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7253 prev_stmt_info = new_stmt_info;
7254
7255 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7256 {
7257 /* Whole vector shift shifted in zero bits, but if *init
7258 is not initializer_zerop, we need to replace those elements
7259 with elements from vec_oprnd1. */
7260 tree_vector_builder vb (masktype, nunits, 1);
7261 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7262 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7263 ? boolean_false_node : boolean_true_node);
7264
7265 tree new_temp2 = make_ssa_name (vectype);
7266 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7267 new_temp, vec_oprnd1);
7268 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
7269 g, gsi);
7270 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7271 prev_stmt_info = new_stmt_info;
7272 new_temp = new_temp2;
7273 }
7274
7275 /* For exclusive scan, perform the perms[i] permutation once
7276 more. */
7277 if (i == 0
7278 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7279 && v == vec_oprnd2)
7280 {
7281 v = new_temp;
7282 --i;
7283 continue;
7284 }
7285
7286 tree new_temp2 = make_ssa_name (vectype);
7287 g = gimple_build_assign (new_temp2, code, v, new_temp);
7288 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7289 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7290 prev_stmt_info = new_stmt_info;
7291
7292 v = new_temp2;
7293 }
7294
7295 tree new_temp = make_ssa_name (vectype);
7296 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7297 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7298 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7299 prev_stmt_info = new_stmt_info;
7300
7301 tree last_perm_arg = new_temp;
7302 /* For exclusive scan, new_temp computed above is the exclusive scan
7303 prefix sum. Turn it into inclusive prefix sum for the broadcast
7304 of the last element into orig. */
7305 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7306 {
7307 last_perm_arg = make_ssa_name (vectype);
7308 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7309 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7310 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7311 prev_stmt_info = new_stmt_info;
7312 }
7313
7314 orig = make_ssa_name (vectype);
7315 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7316 last_perm_arg, perms[units_log2]);
7317 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7318 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7319 prev_stmt_info = new_stmt_info;
7320
7321 if (!inscan_var_store)
7322 {
7323 tree data_ref = fold_build2 (MEM_REF, vectype,
7324 unshare_expr (dataref_ptr),
7325 dataref_offset);
7326 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7327 g = gimple_build_assign (data_ref, new_temp);
7328 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7329 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7330 prev_stmt_info = new_stmt_info;
7331 }
7332 }
7333
7334 if (inscan_var_store)
7335 for (int j = 0; j < ncopies; j++)
7336 {
7337 if (j != 0)
7338 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7339
7340 tree data_ref = fold_build2 (MEM_REF, vectype,
7341 unshare_expr (dataref_ptr),
7342 dataref_offset);
7343 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7344 gimple *g = gimple_build_assign (data_ref, orig);
7345 stmt_vec_info new_stmt_info
7346 = vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7347 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7348 prev_stmt_info = new_stmt_info;
7349 }
7350 return true;
7351 }
7352
7353
7354 /* Function vectorizable_store.
7355
7356 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7357 that can be vectorized.
7358 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7359 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7360 Return true if STMT_INFO is vectorizable in this way. */
7361
7362 static bool
7363 vectorizable_store (vec_info *vinfo,
7364 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7365 stmt_vec_info *vec_stmt, slp_tree slp_node,
7366 stmt_vector_for_cost *cost_vec)
7367 {
7368 tree data_ref;
7369 tree op;
7370 tree vec_oprnd = NULL_TREE;
7371 tree elem_type;
7372 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7373 class loop *loop = NULL;
7374 machine_mode vec_mode;
7375 tree dummy;
7376 enum dr_alignment_support alignment_support_scheme;
7377 enum vect_def_type rhs_dt = vect_unknown_def_type;
7378 enum vect_def_type mask_dt = vect_unknown_def_type;
7379 stmt_vec_info prev_stmt_info = NULL;
7380 tree dataref_ptr = NULL_TREE;
7381 tree dataref_offset = NULL_TREE;
7382 gimple *ptr_incr = NULL;
7383 int ncopies;
7384 int j;
7385 stmt_vec_info first_stmt_info;
7386 bool grouped_store;
7387 unsigned int group_size, i;
7388 vec<tree> oprnds = vNULL;
7389 vec<tree> result_chain = vNULL;
7390 tree offset = NULL_TREE;
7391 vec<tree> vec_oprnds = vNULL;
7392 bool slp = (slp_node != NULL);
7393 unsigned int vec_num;
7394 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7395 tree aggr_type;
7396 gather_scatter_info gs_info;
7397 poly_uint64 vf;
7398 vec_load_store_type vls_type;
7399 tree ref_type;
7400
7401 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7402 return false;
7403
7404 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7405 && ! vec_stmt)
7406 return false;
7407
7408 /* Is vectorizable store? */
7409
7410 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7411 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7412 {
7413 tree scalar_dest = gimple_assign_lhs (assign);
7414 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7415 && is_pattern_stmt_p (stmt_info))
7416 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7417 if (TREE_CODE (scalar_dest) != ARRAY_REF
7418 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7419 && TREE_CODE (scalar_dest) != INDIRECT_REF
7420 && TREE_CODE (scalar_dest) != COMPONENT_REF
7421 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7422 && TREE_CODE (scalar_dest) != REALPART_EXPR
7423 && TREE_CODE (scalar_dest) != MEM_REF)
7424 return false;
7425 }
7426 else
7427 {
7428 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7429 if (!call || !gimple_call_internal_p (call))
7430 return false;
7431
7432 internal_fn ifn = gimple_call_internal_fn (call);
7433 if (!internal_store_fn_p (ifn))
7434 return false;
7435
7436 if (slp_node != NULL)
7437 {
7438 if (dump_enabled_p ())
7439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7440 "SLP of masked stores not supported.\n");
7441 return false;
7442 }
7443
7444 int mask_index = internal_fn_mask_index (ifn);
7445 if (mask_index >= 0)
7446 {
7447 mask = gimple_call_arg (call, mask_index);
7448 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7449 &mask_vectype))
7450 return false;
7451 }
7452 }
7453
7454 op = vect_get_store_rhs (stmt_info);
7455
7456 /* Cannot have hybrid store SLP -- that would mean storing to the
7457 same location twice. */
7458 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7459
7460 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7461 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7462
7463 if (loop_vinfo)
7464 {
7465 loop = LOOP_VINFO_LOOP (loop_vinfo);
7466 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7467 }
7468 else
7469 vf = 1;
7470
7471 /* Multiple types in SLP are handled by creating the appropriate number of
7472 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7473 case of SLP. */
7474 if (slp)
7475 ncopies = 1;
7476 else
7477 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7478
7479 gcc_assert (ncopies >= 1);
7480
7481 /* FORNOW. This restriction should be relaxed. */
7482 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7483 {
7484 if (dump_enabled_p ())
7485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7486 "multiple types in nested loop.\n");
7487 return false;
7488 }
7489
7490 if (!vect_check_store_rhs (vinfo, stmt_info,
7491 op, &rhs_dt, &rhs_vectype, &vls_type))
7492 return false;
7493
7494 elem_type = TREE_TYPE (vectype);
7495 vec_mode = TYPE_MODE (vectype);
7496
7497 if (!STMT_VINFO_DATA_REF (stmt_info))
7498 return false;
7499
7500 vect_memory_access_type memory_access_type;
7501 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, vls_type,
7502 ncopies, &memory_access_type, &gs_info))
7503 return false;
7504
7505 if (mask)
7506 {
7507 if (memory_access_type == VMAT_CONTIGUOUS)
7508 {
7509 if (!VECTOR_MODE_P (vec_mode)
7510 || !can_vec_mask_load_store_p (vec_mode,
7511 TYPE_MODE (mask_vectype), false))
7512 return false;
7513 }
7514 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7515 && (memory_access_type != VMAT_GATHER_SCATTER
7516 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7517 {
7518 if (dump_enabled_p ())
7519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7520 "unsupported access type for masked store.\n");
7521 return false;
7522 }
7523 }
7524 else
7525 {
7526 /* FORNOW. In some cases can vectorize even if data-type not supported
7527 (e.g. - array initialization with 0). */
7528 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7529 return false;
7530 }
7531
7532 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7533 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7534 && memory_access_type != VMAT_GATHER_SCATTER
7535 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7536 if (grouped_store)
7537 {
7538 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7539 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7540 group_size = DR_GROUP_SIZE (first_stmt_info);
7541 }
7542 else
7543 {
7544 first_stmt_info = stmt_info;
7545 first_dr_info = dr_info;
7546 group_size = vec_num = 1;
7547 }
7548
7549 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7550 {
7551 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7552 memory_access_type))
7553 return false;
7554 }
7555
7556 if (!vec_stmt) /* transformation not required. */
7557 {
7558 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7559
7560 if (loop_vinfo
7561 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7562 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7563 memory_access_type, &gs_info, mask);
7564
7565 if (slp_node
7566 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7567 vectype))
7568 {
7569 if (dump_enabled_p ())
7570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7571 "incompatible vector types for invariants\n");
7572 return false;
7573 }
7574
7575 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7576 vect_model_store_cost (vinfo, stmt_info, ncopies,
7577 memory_access_type, vls_type, slp_node, cost_vec);
7578 return true;
7579 }
7580 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7581
7582 /* Transform. */
7583
7584 ensure_base_align (dr_info);
7585
7586 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7587 {
7588 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7589 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7590 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7591 tree ptr, var, scale, vec_mask;
7592 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7593 tree mask_halfvectype = mask_vectype;
7594 edge pe = loop_preheader_edge (loop);
7595 gimple_seq seq;
7596 basic_block new_bb;
7597 enum { NARROW, NONE, WIDEN } modifier;
7598 poly_uint64 scatter_off_nunits
7599 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7600
7601 if (known_eq (nunits, scatter_off_nunits))
7602 modifier = NONE;
7603 else if (known_eq (nunits * 2, scatter_off_nunits))
7604 {
7605 modifier = WIDEN;
7606
7607 /* Currently gathers and scatters are only supported for
7608 fixed-length vectors. */
7609 unsigned int count = scatter_off_nunits.to_constant ();
7610 vec_perm_builder sel (count, count, 1);
7611 for (i = 0; i < (unsigned int) count; ++i)
7612 sel.quick_push (i | (count / 2));
7613
7614 vec_perm_indices indices (sel, 1, count);
7615 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7616 indices);
7617 gcc_assert (perm_mask != NULL_TREE);
7618 }
7619 else if (known_eq (nunits, scatter_off_nunits * 2))
7620 {
7621 modifier = NARROW;
7622
7623 /* Currently gathers and scatters are only supported for
7624 fixed-length vectors. */
7625 unsigned int count = nunits.to_constant ();
7626 vec_perm_builder sel (count, count, 1);
7627 for (i = 0; i < (unsigned int) count; ++i)
7628 sel.quick_push (i | (count / 2));
7629
7630 vec_perm_indices indices (sel, 2, count);
7631 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7632 gcc_assert (perm_mask != NULL_TREE);
7633 ncopies *= 2;
7634
7635 if (mask)
7636 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7637 }
7638 else
7639 gcc_unreachable ();
7640
7641 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7642 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7643 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7644 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7645 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7646 scaletype = TREE_VALUE (arglist);
7647
7648 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7649 && TREE_CODE (rettype) == VOID_TYPE);
7650
7651 ptr = fold_convert (ptrtype, gs_info.base);
7652 if (!is_gimple_min_invariant (ptr))
7653 {
7654 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7655 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7656 gcc_assert (!new_bb);
7657 }
7658
7659 if (mask == NULL_TREE)
7660 {
7661 mask_arg = build_int_cst (masktype, -1);
7662 mask_arg = vect_init_vector (vinfo, stmt_info,
7663 mask_arg, masktype, NULL);
7664 }
7665
7666 scale = build_int_cst (scaletype, gs_info.scale);
7667
7668 prev_stmt_info = NULL;
7669 for (j = 0; j < ncopies; ++j)
7670 {
7671 if (j == 0)
7672 {
7673 src = vec_oprnd1 = vect_get_vec_def_for_operand (vinfo,
7674 op, stmt_info);
7675 op = vec_oprnd0 = vect_get_vec_def_for_operand (vinfo,
7676 gs_info.offset,
7677 stmt_info);
7678 if (mask)
7679 {
7680 tree mask_vectype = truth_type_for (vectype);
7681 mask_op = vec_mask
7682 = vect_get_vec_def_for_operand (vinfo, mask,
7683 stmt_info, mask_vectype);
7684 }
7685 }
7686 else if (modifier != NONE && (j & 1))
7687 {
7688 if (modifier == WIDEN)
7689 {
7690 src
7691 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7692 vec_oprnd1);
7693 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7694 perm_mask, stmt_info, gsi);
7695 if (mask)
7696 mask_op
7697 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7698 vec_mask);
7699 }
7700 else if (modifier == NARROW)
7701 {
7702 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7703 perm_mask, stmt_info, gsi);
7704 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7705 vec_oprnd0);
7706 }
7707 else
7708 gcc_unreachable ();
7709 }
7710 else
7711 {
7712 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7713 vec_oprnd1);
7714 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7715 vec_oprnd0);
7716 if (mask)
7717 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7718 vec_mask);
7719 }
7720
7721 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7722 {
7723 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7724 TYPE_VECTOR_SUBPARTS (srctype)));
7725 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7726 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7727 gassign *new_stmt
7728 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7729 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7730 src = var;
7731 }
7732
7733 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7734 {
7735 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7736 TYPE_VECTOR_SUBPARTS (idxtype)));
7737 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7738 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7739 gassign *new_stmt
7740 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7741 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7742 op = var;
7743 }
7744
7745 if (mask)
7746 {
7747 tree utype;
7748 mask_arg = mask_op;
7749 if (modifier == NARROW)
7750 {
7751 var = vect_get_new_ssa_name (mask_halfvectype,
7752 vect_simple_var);
7753 gassign *new_stmt
7754 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7755 : VEC_UNPACK_LO_EXPR,
7756 mask_op);
7757 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7758 mask_arg = var;
7759 }
7760 tree optype = TREE_TYPE (mask_arg);
7761 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7762 utype = masktype;
7763 else
7764 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7765 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7766 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7767 gassign *new_stmt
7768 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7769 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7770 mask_arg = var;
7771 if (!useless_type_conversion_p (masktype, utype))
7772 {
7773 gcc_assert (TYPE_PRECISION (utype)
7774 <= TYPE_PRECISION (masktype));
7775 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7776 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7777 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7778 mask_arg = var;
7779 }
7780 }
7781
7782 gcall *new_stmt
7783 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7784 stmt_vec_info new_stmt_info
7785 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7786
7787 if (prev_stmt_info == NULL)
7788 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7789 else
7790 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7791 prev_stmt_info = new_stmt_info;
7792 }
7793 return true;
7794 }
7795 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7796 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7797
7798 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7799 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7800
7801 if (grouped_store)
7802 {
7803 /* FORNOW */
7804 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7805
7806 /* We vectorize all the stmts of the interleaving group when we
7807 reach the last stmt in the group. */
7808 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7809 < DR_GROUP_SIZE (first_stmt_info)
7810 && !slp)
7811 {
7812 *vec_stmt = NULL;
7813 return true;
7814 }
7815
7816 if (slp)
7817 {
7818 grouped_store = false;
7819 /* VEC_NUM is the number of vect stmts to be created for this
7820 group. */
7821 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7822 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7823 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7824 == first_stmt_info);
7825 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7826 op = vect_get_store_rhs (first_stmt_info);
7827 }
7828 else
7829 /* VEC_NUM is the number of vect stmts to be created for this
7830 group. */
7831 vec_num = group_size;
7832
7833 ref_type = get_group_alias_ptr_type (first_stmt_info);
7834 }
7835 else
7836 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7837
7838 if (dump_enabled_p ())
7839 dump_printf_loc (MSG_NOTE, vect_location,
7840 "transform store. ncopies = %d\n", ncopies);
7841
7842 if (memory_access_type == VMAT_ELEMENTWISE
7843 || memory_access_type == VMAT_STRIDED_SLP)
7844 {
7845 gimple_stmt_iterator incr_gsi;
7846 bool insert_after;
7847 gimple *incr;
7848 tree offvar;
7849 tree ivstep;
7850 tree running_off;
7851 tree stride_base, stride_step, alias_off;
7852 tree vec_oprnd;
7853 tree dr_offset;
7854 unsigned int g;
7855 /* Checked by get_load_store_type. */
7856 unsigned int const_nunits = nunits.to_constant ();
7857
7858 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7859 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7860
7861 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7862 stride_base
7863 = fold_build_pointer_plus
7864 (DR_BASE_ADDRESS (first_dr_info->dr),
7865 size_binop (PLUS_EXPR,
7866 convert_to_ptrofftype (dr_offset),
7867 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7868 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7869
7870 /* For a store with loop-invariant (but other than power-of-2)
7871 stride (i.e. not a grouped access) like so:
7872
7873 for (i = 0; i < n; i += stride)
7874 array[i] = ...;
7875
7876 we generate a new induction variable and new stores from
7877 the components of the (vectorized) rhs:
7878
7879 for (j = 0; ; j += VF*stride)
7880 vectemp = ...;
7881 tmp1 = vectemp[0];
7882 array[j] = tmp1;
7883 tmp2 = vectemp[1];
7884 array[j + stride] = tmp2;
7885 ...
7886 */
7887
7888 unsigned nstores = const_nunits;
7889 unsigned lnel = 1;
7890 tree ltype = elem_type;
7891 tree lvectype = vectype;
7892 if (slp)
7893 {
7894 if (group_size < const_nunits
7895 && const_nunits % group_size == 0)
7896 {
7897 nstores = const_nunits / group_size;
7898 lnel = group_size;
7899 ltype = build_vector_type (elem_type, group_size);
7900 lvectype = vectype;
7901
7902 /* First check if vec_extract optab doesn't support extraction
7903 of vector elts directly. */
7904 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7905 machine_mode vmode;
7906 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7907 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7908 group_size).exists (&vmode)
7909 || (convert_optab_handler (vec_extract_optab,
7910 TYPE_MODE (vectype), vmode)
7911 == CODE_FOR_nothing))
7912 {
7913 /* Try to avoid emitting an extract of vector elements
7914 by performing the extracts using an integer type of the
7915 same size, extracting from a vector of those and then
7916 re-interpreting it as the original vector type if
7917 supported. */
7918 unsigned lsize
7919 = group_size * GET_MODE_BITSIZE (elmode);
7920 unsigned int lnunits = const_nunits / group_size;
7921 /* If we can't construct such a vector fall back to
7922 element extracts from the original vector type and
7923 element size stores. */
7924 if (int_mode_for_size (lsize, 0).exists (&elmode)
7925 && VECTOR_MODE_P (TYPE_MODE (vectype))
7926 && related_vector_mode (TYPE_MODE (vectype), elmode,
7927 lnunits).exists (&vmode)
7928 && (convert_optab_handler (vec_extract_optab,
7929 vmode, elmode)
7930 != CODE_FOR_nothing))
7931 {
7932 nstores = lnunits;
7933 lnel = group_size;
7934 ltype = build_nonstandard_integer_type (lsize, 1);
7935 lvectype = build_vector_type (ltype, nstores);
7936 }
7937 /* Else fall back to vector extraction anyway.
7938 Fewer stores are more important than avoiding spilling
7939 of the vector we extract from. Compared to the
7940 construction case in vectorizable_load no store-forwarding
7941 issue exists here for reasonable archs. */
7942 }
7943 }
7944 else if (group_size >= const_nunits
7945 && group_size % const_nunits == 0)
7946 {
7947 nstores = 1;
7948 lnel = const_nunits;
7949 ltype = vectype;
7950 lvectype = vectype;
7951 }
7952 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7953 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7954 }
7955
7956 ivstep = stride_step;
7957 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7958 build_int_cst (TREE_TYPE (ivstep), vf));
7959
7960 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7961
7962 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7963 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7964 create_iv (stride_base, ivstep, NULL,
7965 loop, &incr_gsi, insert_after,
7966 &offvar, NULL);
7967 incr = gsi_stmt (incr_gsi);
7968 loop_vinfo->add_stmt (incr);
7969
7970 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7971
7972 prev_stmt_info = NULL;
7973 alias_off = build_int_cst (ref_type, 0);
7974 stmt_vec_info next_stmt_info = first_stmt_info;
7975 for (g = 0; g < group_size; g++)
7976 {
7977 running_off = offvar;
7978 if (g)
7979 {
7980 tree size = TYPE_SIZE_UNIT (ltype);
7981 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7982 size);
7983 tree newoff = copy_ssa_name (running_off, NULL);
7984 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7985 running_off, pos);
7986 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7987 running_off = newoff;
7988 }
7989 unsigned int group_el = 0;
7990 unsigned HOST_WIDE_INT
7991 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7992 for (j = 0; j < ncopies; j++)
7993 {
7994 /* We've set op and dt above, from vect_get_store_rhs,
7995 and first_stmt_info == stmt_info. */
7996 if (j == 0)
7997 {
7998 if (slp)
7999 {
8000 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info,
8001 &vec_oprnds, NULL, slp_node);
8002 vec_oprnd = vec_oprnds[0];
8003 }
8004 else
8005 {
8006 op = vect_get_store_rhs (next_stmt_info);
8007 vec_oprnd = vect_get_vec_def_for_operand
8008 (vinfo, op, next_stmt_info);
8009 }
8010 }
8011 else
8012 {
8013 if (slp)
8014 vec_oprnd = vec_oprnds[j];
8015 else
8016 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
8017 vec_oprnd);
8018 }
8019 /* Pun the vector to extract from if necessary. */
8020 if (lvectype != vectype)
8021 {
8022 tree tem = make_ssa_name (lvectype);
8023 gimple *pun
8024 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8025 lvectype, vec_oprnd));
8026 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8027 vec_oprnd = tem;
8028 }
8029 for (i = 0; i < nstores; i++)
8030 {
8031 tree newref, newoff;
8032 gimple *incr, *assign;
8033 tree size = TYPE_SIZE (ltype);
8034 /* Extract the i'th component. */
8035 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8036 bitsize_int (i), size);
8037 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8038 size, pos);
8039
8040 elem = force_gimple_operand_gsi (gsi, elem, true,
8041 NULL_TREE, true,
8042 GSI_SAME_STMT);
8043
8044 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8045 group_el * elsz);
8046 newref = build2 (MEM_REF, ltype,
8047 running_off, this_off);
8048 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8049
8050 /* And store it to *running_off. */
8051 assign = gimple_build_assign (newref, elem);
8052 stmt_vec_info assign_info
8053 = vect_finish_stmt_generation (vinfo, stmt_info,
8054 assign, gsi);
8055
8056 group_el += lnel;
8057 if (! slp
8058 || group_el == group_size)
8059 {
8060 newoff = copy_ssa_name (running_off, NULL);
8061 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8062 running_off, stride_step);
8063 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8064
8065 running_off = newoff;
8066 group_el = 0;
8067 }
8068 if (g == group_size - 1
8069 && !slp)
8070 {
8071 if (j == 0 && i == 0)
8072 STMT_VINFO_VEC_STMT (stmt_info)
8073 = *vec_stmt = assign_info;
8074 else
8075 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
8076 prev_stmt_info = assign_info;
8077 }
8078 }
8079 }
8080 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8081 if (slp)
8082 break;
8083 }
8084
8085 vec_oprnds.release ();
8086 return true;
8087 }
8088
8089 auto_vec<tree> dr_chain (group_size);
8090 oprnds.create (group_size);
8091
8092 /* Gather-scatter accesses perform only component accesses, alignment
8093 is irrelevant for them. */
8094 if (memory_access_type == VMAT_GATHER_SCATTER)
8095 alignment_support_scheme = dr_unaligned_supported;
8096 else
8097 alignment_support_scheme
8098 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
8099
8100 gcc_assert (alignment_support_scheme);
8101 vec_loop_masks *loop_masks
8102 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8103 ? &LOOP_VINFO_MASKS (loop_vinfo)
8104 : NULL);
8105 /* Targets with store-lane instructions must not require explicit
8106 realignment. vect_supportable_dr_alignment always returns either
8107 dr_aligned or dr_unaligned_supported for masked operations. */
8108 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8109 && !mask
8110 && !loop_masks)
8111 || alignment_support_scheme == dr_aligned
8112 || alignment_support_scheme == dr_unaligned_supported);
8113
8114 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8115 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8116 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8117
8118 tree bump;
8119 tree vec_offset = NULL_TREE;
8120 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8121 {
8122 aggr_type = NULL_TREE;
8123 bump = NULL_TREE;
8124 }
8125 else if (memory_access_type == VMAT_GATHER_SCATTER)
8126 {
8127 aggr_type = elem_type;
8128 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8129 &bump, &vec_offset);
8130 }
8131 else
8132 {
8133 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8134 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8135 else
8136 aggr_type = vectype;
8137 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8138 memory_access_type);
8139 }
8140
8141 if (mask)
8142 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8143
8144 /* In case the vectorization factor (VF) is bigger than the number
8145 of elements that we can fit in a vectype (nunits), we have to generate
8146 more than one vector stmt - i.e - we need to "unroll" the
8147 vector stmt by a factor VF/nunits. For more details see documentation in
8148 vect_get_vec_def_for_copy_stmt. */
8149
8150 /* In case of interleaving (non-unit grouped access):
8151
8152 S1: &base + 2 = x2
8153 S2: &base = x0
8154 S3: &base + 1 = x1
8155 S4: &base + 3 = x3
8156
8157 We create vectorized stores starting from base address (the access of the
8158 first stmt in the chain (S2 in the above example), when the last store stmt
8159 of the chain (S4) is reached:
8160
8161 VS1: &base = vx2
8162 VS2: &base + vec_size*1 = vx0
8163 VS3: &base + vec_size*2 = vx1
8164 VS4: &base + vec_size*3 = vx3
8165
8166 Then permutation statements are generated:
8167
8168 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8169 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8170 ...
8171
8172 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8173 (the order of the data-refs in the output of vect_permute_store_chain
8174 corresponds to the order of scalar stmts in the interleaving chain - see
8175 the documentation of vect_permute_store_chain()).
8176
8177 In case of both multiple types and interleaving, above vector stores and
8178 permutation stmts are created for every copy. The result vector stmts are
8179 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8180 STMT_VINFO_RELATED_STMT for the next copies.
8181 */
8182
8183 prev_stmt_info = NULL;
8184 tree vec_mask = NULL_TREE;
8185 for (j = 0; j < ncopies; j++)
8186 {
8187 stmt_vec_info new_stmt_info;
8188 if (j == 0)
8189 {
8190 if (slp)
8191 {
8192 /* Get vectorized arguments for SLP_NODE. */
8193 vect_get_vec_defs (vinfo, op, NULL_TREE, stmt_info, &vec_oprnds,
8194 NULL, slp_node);
8195
8196 vec_oprnd = vec_oprnds[0];
8197 }
8198 else
8199 {
8200 /* For interleaved stores we collect vectorized defs for all the
8201 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8202 used as an input to vect_permute_store_chain(), and OPRNDS as
8203 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8204
8205 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8206 OPRNDS are of size 1. */
8207 stmt_vec_info next_stmt_info = first_stmt_info;
8208 for (i = 0; i < group_size; i++)
8209 {
8210 /* Since gaps are not supported for interleaved stores,
8211 DR_GROUP_SIZE is the exact number of stmts in the chain.
8212 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8213 that there is no interleaving, DR_GROUP_SIZE is 1,
8214 and only one iteration of the loop will be executed. */
8215 op = vect_get_store_rhs (next_stmt_info);
8216 vec_oprnd = vect_get_vec_def_for_operand
8217 (vinfo, op, next_stmt_info);
8218 dr_chain.quick_push (vec_oprnd);
8219 oprnds.quick_push (vec_oprnd);
8220 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8221 }
8222 if (mask)
8223 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
8224 mask_vectype);
8225 }
8226
8227 /* We should have catched mismatched types earlier. */
8228 gcc_assert (useless_type_conversion_p (vectype,
8229 TREE_TYPE (vec_oprnd)));
8230 bool simd_lane_access_p
8231 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8232 if (simd_lane_access_p
8233 && !loop_masks
8234 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8235 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8236 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8237 && integer_zerop (DR_INIT (first_dr_info->dr))
8238 && alias_sets_conflict_p (get_alias_set (aggr_type),
8239 get_alias_set (TREE_TYPE (ref_type))))
8240 {
8241 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8242 dataref_offset = build_int_cst (ref_type, 0);
8243 }
8244 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8245 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
8246 &dataref_ptr, &vec_offset);
8247 else
8248 dataref_ptr
8249 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8250 simd_lane_access_p ? loop : NULL,
8251 offset, &dummy, gsi, &ptr_incr,
8252 simd_lane_access_p, NULL_TREE, bump);
8253 }
8254 else
8255 {
8256 /* For interleaved stores we created vectorized defs for all the
8257 defs stored in OPRNDS in the previous iteration (previous copy).
8258 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8259 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8260 next copy.
8261 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8262 OPRNDS are of size 1. */
8263 for (i = 0; i < group_size; i++)
8264 {
8265 op = oprnds[i];
8266 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8267 dr_chain[i] = vec_oprnd;
8268 oprnds[i] = vec_oprnd;
8269 }
8270 if (mask)
8271 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8272 if (dataref_offset)
8273 dataref_offset
8274 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8275 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8276 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8277 else
8278 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8279 stmt_info, bump);
8280 }
8281
8282 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8283 {
8284 tree vec_array;
8285
8286 /* Get an array into which we can store the individual vectors. */
8287 vec_array = create_vector_array (vectype, vec_num);
8288
8289 /* Invalidate the current contents of VEC_ARRAY. This should
8290 become an RTL clobber too, which prevents the vector registers
8291 from being upward-exposed. */
8292 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8293
8294 /* Store the individual vectors into the array. */
8295 for (i = 0; i < vec_num; i++)
8296 {
8297 vec_oprnd = dr_chain[i];
8298 write_vector_array (vinfo, stmt_info,
8299 gsi, vec_oprnd, vec_array, i);
8300 }
8301
8302 tree final_mask = NULL;
8303 if (loop_masks)
8304 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8305 vectype, j);
8306 if (vec_mask)
8307 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8308 vec_mask, gsi);
8309
8310 gcall *call;
8311 if (final_mask)
8312 {
8313 /* Emit:
8314 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8315 VEC_ARRAY). */
8316 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8317 tree alias_ptr = build_int_cst (ref_type, align);
8318 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8319 dataref_ptr, alias_ptr,
8320 final_mask, vec_array);
8321 }
8322 else
8323 {
8324 /* Emit:
8325 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8326 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8327 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8328 vec_array);
8329 gimple_call_set_lhs (call, data_ref);
8330 }
8331 gimple_call_set_nothrow (call, true);
8332 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
8333 call, gsi);
8334
8335 /* Record that VEC_ARRAY is now dead. */
8336 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8337 }
8338 else
8339 {
8340 new_stmt_info = NULL;
8341 if (grouped_store)
8342 {
8343 if (j == 0)
8344 result_chain.create (group_size);
8345 /* Permute. */
8346 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8347 gsi, &result_chain);
8348 }
8349
8350 stmt_vec_info next_stmt_info = first_stmt_info;
8351 for (i = 0; i < vec_num; i++)
8352 {
8353 unsigned misalign;
8354 unsigned HOST_WIDE_INT align;
8355
8356 tree final_mask = NULL_TREE;
8357 if (loop_masks)
8358 final_mask = vect_get_loop_mask (gsi, loop_masks,
8359 vec_num * ncopies,
8360 vectype, vec_num * j + i);
8361 if (vec_mask)
8362 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8363 vec_mask, gsi);
8364
8365 if (memory_access_type == VMAT_GATHER_SCATTER)
8366 {
8367 tree scale = size_int (gs_info.scale);
8368 gcall *call;
8369 if (loop_masks)
8370 call = gimple_build_call_internal
8371 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8372 scale, vec_oprnd, final_mask);
8373 else
8374 call = gimple_build_call_internal
8375 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8376 scale, vec_oprnd);
8377 gimple_call_set_nothrow (call, true);
8378 new_stmt_info
8379 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8380 break;
8381 }
8382
8383 if (i > 0)
8384 /* Bump the vector pointer. */
8385 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8386 gsi, stmt_info, bump);
8387
8388 if (slp)
8389 vec_oprnd = vec_oprnds[i];
8390 else if (grouped_store)
8391 /* For grouped stores vectorized defs are interleaved in
8392 vect_permute_store_chain(). */
8393 vec_oprnd = result_chain[i];
8394
8395 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8396 if (aligned_access_p (first_dr_info))
8397 misalign = 0;
8398 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8399 {
8400 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8401 misalign = 0;
8402 }
8403 else
8404 misalign = DR_MISALIGNMENT (first_dr_info);
8405 if (dataref_offset == NULL_TREE
8406 && TREE_CODE (dataref_ptr) == SSA_NAME)
8407 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8408 misalign);
8409
8410 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8411 {
8412 tree perm_mask = perm_mask_for_reverse (vectype);
8413 tree perm_dest = vect_create_destination_var
8414 (vect_get_store_rhs (stmt_info), vectype);
8415 tree new_temp = make_ssa_name (perm_dest);
8416
8417 /* Generate the permute statement. */
8418 gimple *perm_stmt
8419 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8420 vec_oprnd, perm_mask);
8421 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8422
8423 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8424 vec_oprnd = new_temp;
8425 }
8426
8427 /* Arguments are ready. Create the new vector stmt. */
8428 if (final_mask)
8429 {
8430 align = least_bit_hwi (misalign | align);
8431 tree ptr = build_int_cst (ref_type, align);
8432 gcall *call
8433 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8434 dataref_ptr, ptr,
8435 final_mask, vec_oprnd);
8436 gimple_call_set_nothrow (call, true);
8437 new_stmt_info
8438 = vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8439 }
8440 else
8441 {
8442 data_ref = fold_build2 (MEM_REF, vectype,
8443 dataref_ptr,
8444 dataref_offset
8445 ? dataref_offset
8446 : build_int_cst (ref_type, 0));
8447 if (aligned_access_p (first_dr_info))
8448 ;
8449 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8450 TREE_TYPE (data_ref)
8451 = build_aligned_type (TREE_TYPE (data_ref),
8452 align * BITS_PER_UNIT);
8453 else
8454 TREE_TYPE (data_ref)
8455 = build_aligned_type (TREE_TYPE (data_ref),
8456 TYPE_ALIGN (elem_type));
8457 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8458 gassign *new_stmt
8459 = gimple_build_assign (data_ref, vec_oprnd);
8460 new_stmt_info
8461 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8462 }
8463
8464 if (slp)
8465 continue;
8466
8467 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8468 if (!next_stmt_info)
8469 break;
8470 }
8471 }
8472 if (!slp)
8473 {
8474 if (j == 0)
8475 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8476 else
8477 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8478 prev_stmt_info = new_stmt_info;
8479 }
8480 }
8481
8482 oprnds.release ();
8483 result_chain.release ();
8484 vec_oprnds.release ();
8485
8486 return true;
8487 }
8488
8489 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8490 VECTOR_CST mask. No checks are made that the target platform supports the
8491 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8492 vect_gen_perm_mask_checked. */
8493
8494 tree
8495 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8496 {
8497 tree mask_type;
8498
8499 poly_uint64 nunits = sel.length ();
8500 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8501
8502 mask_type = build_vector_type (ssizetype, nunits);
8503 return vec_perm_indices_to_tree (mask_type, sel);
8504 }
8505
8506 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8507 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8508
8509 tree
8510 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8511 {
8512 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8513 return vect_gen_perm_mask_any (vectype, sel);
8514 }
8515
8516 /* Given a vector variable X and Y, that was generated for the scalar
8517 STMT_INFO, generate instructions to permute the vector elements of X and Y
8518 using permutation mask MASK_VEC, insert them at *GSI and return the
8519 permuted vector variable. */
8520
8521 static tree
8522 permute_vec_elements (vec_info *vinfo,
8523 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8524 gimple_stmt_iterator *gsi)
8525 {
8526 tree vectype = TREE_TYPE (x);
8527 tree perm_dest, data_ref;
8528 gimple *perm_stmt;
8529
8530 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8531 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8532 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8533 else
8534 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8535 data_ref = make_ssa_name (perm_dest);
8536
8537 /* Generate the permute statement. */
8538 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8539 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8540
8541 return data_ref;
8542 }
8543
8544 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8545 inserting them on the loops preheader edge. Returns true if we
8546 were successful in doing so (and thus STMT_INFO can be moved then),
8547 otherwise returns false. */
8548
8549 static bool
8550 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8551 {
8552 ssa_op_iter i;
8553 tree op;
8554 bool any = false;
8555
8556 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8557 {
8558 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8559 if (!gimple_nop_p (def_stmt)
8560 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8561 {
8562 /* Make sure we don't need to recurse. While we could do
8563 so in simple cases when there are more complex use webs
8564 we don't have an easy way to preserve stmt order to fulfil
8565 dependencies within them. */
8566 tree op2;
8567 ssa_op_iter i2;
8568 if (gimple_code (def_stmt) == GIMPLE_PHI)
8569 return false;
8570 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8571 {
8572 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8573 if (!gimple_nop_p (def_stmt2)
8574 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8575 return false;
8576 }
8577 any = true;
8578 }
8579 }
8580
8581 if (!any)
8582 return true;
8583
8584 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8585 {
8586 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8587 if (!gimple_nop_p (def_stmt)
8588 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8589 {
8590 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8591 gsi_remove (&gsi, false);
8592 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8593 }
8594 }
8595
8596 return true;
8597 }
8598
8599 /* vectorizable_load.
8600
8601 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8602 that can be vectorized.
8603 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8605 Return true if STMT_INFO is vectorizable in this way. */
8606
8607 static bool
8608 vectorizable_load (vec_info *vinfo,
8609 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8610 stmt_vec_info *vec_stmt, slp_tree slp_node,
8611 stmt_vector_for_cost *cost_vec)
8612 {
8613 tree scalar_dest;
8614 tree vec_dest = NULL;
8615 tree data_ref = NULL;
8616 stmt_vec_info prev_stmt_info;
8617 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8618 class loop *loop = NULL;
8619 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8620 bool nested_in_vect_loop = false;
8621 tree elem_type;
8622 tree new_temp;
8623 machine_mode mode;
8624 tree dummy;
8625 enum dr_alignment_support alignment_support_scheme;
8626 tree dataref_ptr = NULL_TREE;
8627 tree dataref_offset = NULL_TREE;
8628 gimple *ptr_incr = NULL;
8629 int ncopies;
8630 int i, j;
8631 unsigned int group_size;
8632 poly_uint64 group_gap_adj;
8633 tree msq = NULL_TREE, lsq;
8634 tree offset = NULL_TREE;
8635 tree byte_offset = NULL_TREE;
8636 tree realignment_token = NULL_TREE;
8637 gphi *phi = NULL;
8638 vec<tree> dr_chain = vNULL;
8639 bool grouped_load = false;
8640 stmt_vec_info first_stmt_info;
8641 stmt_vec_info first_stmt_info_for_drptr = NULL;
8642 bool compute_in_loop = false;
8643 class loop *at_loop;
8644 int vec_num;
8645 bool slp = (slp_node != NULL);
8646 bool slp_perm = false;
8647 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8648 poly_uint64 vf;
8649 tree aggr_type;
8650 gather_scatter_info gs_info;
8651 tree ref_type;
8652 enum vect_def_type mask_dt = vect_unknown_def_type;
8653
8654 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8655 return false;
8656
8657 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8658 && ! vec_stmt)
8659 return false;
8660
8661 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8662 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8663 {
8664 scalar_dest = gimple_assign_lhs (assign);
8665 if (TREE_CODE (scalar_dest) != SSA_NAME)
8666 return false;
8667
8668 tree_code code = gimple_assign_rhs_code (assign);
8669 if (code != ARRAY_REF
8670 && code != BIT_FIELD_REF
8671 && code != INDIRECT_REF
8672 && code != COMPONENT_REF
8673 && code != IMAGPART_EXPR
8674 && code != REALPART_EXPR
8675 && code != MEM_REF
8676 && TREE_CODE_CLASS (code) != tcc_declaration)
8677 return false;
8678 }
8679 else
8680 {
8681 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8682 if (!call || !gimple_call_internal_p (call))
8683 return false;
8684
8685 internal_fn ifn = gimple_call_internal_fn (call);
8686 if (!internal_load_fn_p (ifn))
8687 return false;
8688
8689 scalar_dest = gimple_call_lhs (call);
8690 if (!scalar_dest)
8691 return false;
8692
8693 int mask_index = internal_fn_mask_index (ifn);
8694 if (mask_index >= 0)
8695 {
8696 mask = gimple_call_arg (call, mask_index);
8697 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8698 &mask_vectype))
8699 return false;
8700 }
8701 }
8702
8703 if (!STMT_VINFO_DATA_REF (stmt_info))
8704 return false;
8705
8706 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8707 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8708
8709 if (loop_vinfo)
8710 {
8711 loop = LOOP_VINFO_LOOP (loop_vinfo);
8712 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8713 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8714 }
8715 else
8716 vf = 1;
8717
8718 /* Multiple types in SLP are handled by creating the appropriate number of
8719 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8720 case of SLP. */
8721 if (slp)
8722 ncopies = 1;
8723 else
8724 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8725
8726 gcc_assert (ncopies >= 1);
8727
8728 /* FORNOW. This restriction should be relaxed. */
8729 if (nested_in_vect_loop && ncopies > 1)
8730 {
8731 if (dump_enabled_p ())
8732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8733 "multiple types in nested loop.\n");
8734 return false;
8735 }
8736
8737 /* Invalidate assumptions made by dependence analysis when vectorization
8738 on the unrolled body effectively re-orders stmts. */
8739 if (ncopies > 1
8740 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8741 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8742 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8743 {
8744 if (dump_enabled_p ())
8745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8746 "cannot perform implicit CSE when unrolling "
8747 "with negative dependence distance\n");
8748 return false;
8749 }
8750
8751 elem_type = TREE_TYPE (vectype);
8752 mode = TYPE_MODE (vectype);
8753
8754 /* FORNOW. In some cases can vectorize even if data-type not supported
8755 (e.g. - data copies). */
8756 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8757 {
8758 if (dump_enabled_p ())
8759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8760 "Aligned load, but unsupported type.\n");
8761 return false;
8762 }
8763
8764 /* Check if the load is a part of an interleaving chain. */
8765 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8766 {
8767 grouped_load = true;
8768 /* FORNOW */
8769 gcc_assert (!nested_in_vect_loop);
8770 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8771
8772 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8773 group_size = DR_GROUP_SIZE (first_stmt_info);
8774
8775 /* Refuse non-SLP vectorization of SLP-only groups. */
8776 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8777 {
8778 if (dump_enabled_p ())
8779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8780 "cannot vectorize load in non-SLP mode.\n");
8781 return false;
8782 }
8783
8784 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8785 {
8786 slp_perm = true;
8787
8788 if (!loop_vinfo)
8789 {
8790 /* In BB vectorization we may not actually use a loaded vector
8791 accessing elements in excess of DR_GROUP_SIZE. */
8792 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8793 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8794 unsigned HOST_WIDE_INT nunits;
8795 unsigned j, k, maxk = 0;
8796 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8797 if (k > maxk)
8798 maxk = k;
8799 tree vectype = STMT_VINFO_VECTYPE (group_info);
8800 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8801 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8802 {
8803 if (dump_enabled_p ())
8804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8805 "BB vectorization with gaps at the end of "
8806 "a load is not supported\n");
8807 return false;
8808 }
8809 }
8810
8811 auto_vec<tree> tem;
8812 unsigned n_perms;
8813 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8814 true, &n_perms))
8815 {
8816 if (dump_enabled_p ())
8817 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8818 vect_location,
8819 "unsupported load permutation\n");
8820 return false;
8821 }
8822 }
8823
8824 /* Invalidate assumptions made by dependence analysis when vectorization
8825 on the unrolled body effectively re-orders stmts. */
8826 if (!PURE_SLP_STMT (stmt_info)
8827 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8828 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8829 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8830 {
8831 if (dump_enabled_p ())
8832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8833 "cannot perform implicit CSE when performing "
8834 "group loads with negative dependence distance\n");
8835 return false;
8836 }
8837 }
8838 else
8839 group_size = 1;
8840
8841 vect_memory_access_type memory_access_type;
8842 if (!get_load_store_type (vinfo, stmt_info, vectype, slp, mask, VLS_LOAD,
8843 ncopies, &memory_access_type, &gs_info))
8844 return false;
8845
8846 if (mask)
8847 {
8848 if (memory_access_type == VMAT_CONTIGUOUS)
8849 {
8850 machine_mode vec_mode = TYPE_MODE (vectype);
8851 if (!VECTOR_MODE_P (vec_mode)
8852 || !can_vec_mask_load_store_p (vec_mode,
8853 TYPE_MODE (mask_vectype), true))
8854 return false;
8855 }
8856 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8857 && memory_access_type != VMAT_GATHER_SCATTER)
8858 {
8859 if (dump_enabled_p ())
8860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8861 "unsupported access type for masked load.\n");
8862 return false;
8863 }
8864 }
8865
8866 if (!vec_stmt) /* transformation not required. */
8867 {
8868 if (!slp)
8869 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8870
8871 if (loop_vinfo
8872 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8873 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8874 memory_access_type, &gs_info, mask);
8875
8876 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8877 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8878 slp_node, cost_vec);
8879 return true;
8880 }
8881
8882 if (!slp)
8883 gcc_assert (memory_access_type
8884 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8885
8886 if (dump_enabled_p ())
8887 dump_printf_loc (MSG_NOTE, vect_location,
8888 "transform load. ncopies = %d\n", ncopies);
8889
8890 /* Transform. */
8891
8892 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8893 ensure_base_align (dr_info);
8894
8895 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8896 {
8897 vect_build_gather_load_calls (vinfo,
8898 stmt_info, gsi, vec_stmt, &gs_info, mask);
8899 return true;
8900 }
8901
8902 if (memory_access_type == VMAT_INVARIANT)
8903 {
8904 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8905 /* If we have versioned for aliasing or the loop doesn't
8906 have any data dependencies that would preclude this,
8907 then we are sure this is a loop invariant load and
8908 thus we can insert it on the preheader edge. */
8909 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8910 && !nested_in_vect_loop
8911 && hoist_defs_of_uses (stmt_info, loop));
8912 if (hoist_p)
8913 {
8914 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8915 if (dump_enabled_p ())
8916 dump_printf_loc (MSG_NOTE, vect_location,
8917 "hoisting out of the vectorized loop: %G", stmt);
8918 scalar_dest = copy_ssa_name (scalar_dest);
8919 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8920 gsi_insert_on_edge_immediate
8921 (loop_preheader_edge (loop),
8922 gimple_build_assign (scalar_dest, rhs));
8923 }
8924 /* These copies are all equivalent, but currently the representation
8925 requires a separate STMT_VINFO_VEC_STMT for each one. */
8926 prev_stmt_info = NULL;
8927 gimple_stmt_iterator gsi2 = *gsi;
8928 gsi_next (&gsi2);
8929 for (j = 0; j < ncopies; j++)
8930 {
8931 stmt_vec_info new_stmt_info;
8932 if (hoist_p)
8933 {
8934 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8935 vectype, NULL);
8936 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8937 new_stmt_info = vinfo->add_stmt (new_stmt);
8938 }
8939 else
8940 {
8941 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8942 vectype, &gsi2);
8943 new_stmt_info = vinfo->lookup_def (new_temp);
8944 }
8945 if (slp)
8946 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8947 else if (j == 0)
8948 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8949 else
8950 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8951 prev_stmt_info = new_stmt_info;
8952 }
8953 return true;
8954 }
8955
8956 if (memory_access_type == VMAT_ELEMENTWISE
8957 || memory_access_type == VMAT_STRIDED_SLP)
8958 {
8959 gimple_stmt_iterator incr_gsi;
8960 bool insert_after;
8961 gimple *incr;
8962 tree offvar;
8963 tree ivstep;
8964 tree running_off;
8965 vec<constructor_elt, va_gc> *v = NULL;
8966 tree stride_base, stride_step, alias_off;
8967 /* Checked by get_load_store_type. */
8968 unsigned int const_nunits = nunits.to_constant ();
8969 unsigned HOST_WIDE_INT cst_offset = 0;
8970 tree dr_offset;
8971
8972 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8973 gcc_assert (!nested_in_vect_loop);
8974
8975 if (grouped_load)
8976 {
8977 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8978 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8979 }
8980 else
8981 {
8982 first_stmt_info = stmt_info;
8983 first_dr_info = dr_info;
8984 }
8985 if (slp && grouped_load)
8986 {
8987 group_size = DR_GROUP_SIZE (first_stmt_info);
8988 ref_type = get_group_alias_ptr_type (first_stmt_info);
8989 }
8990 else
8991 {
8992 if (grouped_load)
8993 cst_offset
8994 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8995 * vect_get_place_in_interleaving_chain (stmt_info,
8996 first_stmt_info));
8997 group_size = 1;
8998 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8999 }
9000
9001 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9002 stride_base
9003 = fold_build_pointer_plus
9004 (DR_BASE_ADDRESS (first_dr_info->dr),
9005 size_binop (PLUS_EXPR,
9006 convert_to_ptrofftype (dr_offset),
9007 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9008 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9009
9010 /* For a load with loop-invariant (but other than power-of-2)
9011 stride (i.e. not a grouped access) like so:
9012
9013 for (i = 0; i < n; i += stride)
9014 ... = array[i];
9015
9016 we generate a new induction variable and new accesses to
9017 form a new vector (or vectors, depending on ncopies):
9018
9019 for (j = 0; ; j += VF*stride)
9020 tmp1 = array[j];
9021 tmp2 = array[j + stride];
9022 ...
9023 vectemp = {tmp1, tmp2, ...}
9024 */
9025
9026 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9027 build_int_cst (TREE_TYPE (stride_step), vf));
9028
9029 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9030
9031 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9032 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9033 create_iv (stride_base, ivstep, NULL,
9034 loop, &incr_gsi, insert_after,
9035 &offvar, NULL);
9036 incr = gsi_stmt (incr_gsi);
9037 loop_vinfo->add_stmt (incr);
9038
9039 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9040
9041 prev_stmt_info = NULL;
9042 running_off = offvar;
9043 alias_off = build_int_cst (ref_type, 0);
9044 int nloads = const_nunits;
9045 int lnel = 1;
9046 tree ltype = TREE_TYPE (vectype);
9047 tree lvectype = vectype;
9048 auto_vec<tree> dr_chain;
9049 if (memory_access_type == VMAT_STRIDED_SLP)
9050 {
9051 if (group_size < const_nunits)
9052 {
9053 /* First check if vec_init optab supports construction from vector
9054 elts directly. Otherwise avoid emitting a constructor of
9055 vector elements by performing the loads using an integer type
9056 of the same size, constructing a vector of those and then
9057 re-interpreting it as the original vector type. This avoids a
9058 huge runtime penalty due to the general inability to perform
9059 store forwarding from smaller stores to a larger load. */
9060 tree ptype;
9061 tree vtype
9062 = vector_vector_composition_type (vectype,
9063 const_nunits / group_size,
9064 &ptype);
9065 if (vtype != NULL_TREE)
9066 {
9067 nloads = const_nunits / group_size;
9068 lnel = group_size;
9069 lvectype = vtype;
9070 ltype = ptype;
9071 }
9072 }
9073 else
9074 {
9075 nloads = 1;
9076 lnel = const_nunits;
9077 ltype = vectype;
9078 }
9079 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9080 }
9081 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9082 else if (nloads == 1)
9083 ltype = vectype;
9084
9085 if (slp)
9086 {
9087 /* For SLP permutation support we need to load the whole group,
9088 not only the number of vector stmts the permutation result
9089 fits in. */
9090 if (slp_perm)
9091 {
9092 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9093 variable VF. */
9094 unsigned int const_vf = vf.to_constant ();
9095 ncopies = CEIL (group_size * const_vf, const_nunits);
9096 dr_chain.create (ncopies);
9097 }
9098 else
9099 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9100 }
9101 unsigned int group_el = 0;
9102 unsigned HOST_WIDE_INT
9103 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9104 for (j = 0; j < ncopies; j++)
9105 {
9106 if (nloads > 1)
9107 vec_alloc (v, nloads);
9108 stmt_vec_info new_stmt_info = NULL;
9109 for (i = 0; i < nloads; i++)
9110 {
9111 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9112 group_el * elsz + cst_offset);
9113 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9114 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9115 gassign *new_stmt
9116 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9117 new_stmt_info
9118 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9119 if (nloads > 1)
9120 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9121 gimple_assign_lhs (new_stmt));
9122
9123 group_el += lnel;
9124 if (! slp
9125 || group_el == group_size)
9126 {
9127 tree newoff = copy_ssa_name (running_off);
9128 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9129 running_off, stride_step);
9130 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9131
9132 running_off = newoff;
9133 group_el = 0;
9134 }
9135 }
9136 if (nloads > 1)
9137 {
9138 tree vec_inv = build_constructor (lvectype, v);
9139 new_temp = vect_init_vector (vinfo, stmt_info,
9140 vec_inv, lvectype, gsi);
9141 new_stmt_info = vinfo->lookup_def (new_temp);
9142 if (lvectype != vectype)
9143 {
9144 gassign *new_stmt
9145 = gimple_build_assign (make_ssa_name (vectype),
9146 VIEW_CONVERT_EXPR,
9147 build1 (VIEW_CONVERT_EXPR,
9148 vectype, new_temp));
9149 new_stmt_info
9150 = vect_finish_stmt_generation (vinfo, stmt_info,
9151 new_stmt, gsi);
9152 }
9153 }
9154
9155 if (slp)
9156 {
9157 if (slp_perm)
9158 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9159 else
9160 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9161 }
9162 else
9163 {
9164 if (j == 0)
9165 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9166 else
9167 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9168 prev_stmt_info = new_stmt_info;
9169 }
9170 }
9171 if (slp_perm)
9172 {
9173 unsigned n_perms;
9174 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9175 false, &n_perms);
9176 }
9177 return true;
9178 }
9179
9180 if (memory_access_type == VMAT_GATHER_SCATTER
9181 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9182 grouped_load = false;
9183
9184 if (grouped_load)
9185 {
9186 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9187 group_size = DR_GROUP_SIZE (first_stmt_info);
9188 /* For SLP vectorization we directly vectorize a subchain
9189 without permutation. */
9190 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9191 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9192 /* For BB vectorization always use the first stmt to base
9193 the data ref pointer on. */
9194 if (bb_vinfo)
9195 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9196
9197 /* Check if the chain of loads is already vectorized. */
9198 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9199 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9200 ??? But we can only do so if there is exactly one
9201 as we have no way to get at the rest. Leave the CSE
9202 opportunity alone.
9203 ??? With the group load eventually participating
9204 in multiple different permutations (having multiple
9205 slp nodes which refer to the same group) the CSE
9206 is even wrong code. See PR56270. */
9207 && !slp)
9208 {
9209 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9210 return true;
9211 }
9212 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9213 group_gap_adj = 0;
9214
9215 /* VEC_NUM is the number of vect stmts to be created for this group. */
9216 if (slp)
9217 {
9218 grouped_load = false;
9219 /* If an SLP permutation is from N elements to N elements,
9220 and if one vector holds a whole number of N, we can load
9221 the inputs to the permutation in the same way as an
9222 unpermuted sequence. In other cases we need to load the
9223 whole group, not only the number of vector stmts the
9224 permutation result fits in. */
9225 unsigned scalar_lanes = SLP_TREE_SCALAR_STMTS (slp_node).length ();
9226 if (slp_perm
9227 && (group_size != scalar_lanes
9228 || !multiple_p (nunits, group_size)))
9229 {
9230 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9231 variable VF; see vect_transform_slp_perm_load. */
9232 unsigned int const_vf = vf.to_constant ();
9233 unsigned int const_nunits = nunits.to_constant ();
9234 vec_num = CEIL (group_size * const_vf, const_nunits);
9235 group_gap_adj = vf * group_size - nunits * vec_num;
9236 }
9237 else
9238 {
9239 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9240 group_gap_adj
9241 = group_size - scalar_lanes;
9242 }
9243 }
9244 else
9245 vec_num = group_size;
9246
9247 ref_type = get_group_alias_ptr_type (first_stmt_info);
9248 }
9249 else
9250 {
9251 first_stmt_info = stmt_info;
9252 first_dr_info = dr_info;
9253 group_size = vec_num = 1;
9254 group_gap_adj = 0;
9255 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9256 }
9257
9258 /* Gather-scatter accesses perform only component accesses, alignment
9259 is irrelevant for them. */
9260 if (memory_access_type == VMAT_GATHER_SCATTER)
9261 alignment_support_scheme = dr_unaligned_supported;
9262 else
9263 alignment_support_scheme
9264 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
9265
9266 gcc_assert (alignment_support_scheme);
9267 vec_loop_masks *loop_masks
9268 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9269 ? &LOOP_VINFO_MASKS (loop_vinfo)
9270 : NULL);
9271 /* Targets with store-lane instructions must not require explicit
9272 realignment. vect_supportable_dr_alignment always returns either
9273 dr_aligned or dr_unaligned_supported for masked operations. */
9274 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9275 && !mask
9276 && !loop_masks)
9277 || alignment_support_scheme == dr_aligned
9278 || alignment_support_scheme == dr_unaligned_supported);
9279
9280 /* In case the vectorization factor (VF) is bigger than the number
9281 of elements that we can fit in a vectype (nunits), we have to generate
9282 more than one vector stmt - i.e - we need to "unroll" the
9283 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9284 from one copy of the vector stmt to the next, in the field
9285 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9286 stages to find the correct vector defs to be used when vectorizing
9287 stmts that use the defs of the current stmt. The example below
9288 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9289 need to create 4 vectorized stmts):
9290
9291 before vectorization:
9292 RELATED_STMT VEC_STMT
9293 S1: x = memref - -
9294 S2: z = x + 1 - -
9295
9296 step 1: vectorize stmt S1:
9297 We first create the vector stmt VS1_0, and, as usual, record a
9298 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9299 Next, we create the vector stmt VS1_1, and record a pointer to
9300 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9301 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9302 stmts and pointers:
9303 RELATED_STMT VEC_STMT
9304 VS1_0: vx0 = memref0 VS1_1 -
9305 VS1_1: vx1 = memref1 VS1_2 -
9306 VS1_2: vx2 = memref2 VS1_3 -
9307 VS1_3: vx3 = memref3 - -
9308 S1: x = load - VS1_0
9309 S2: z = x + 1 - -
9310
9311 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9312 information we recorded in RELATED_STMT field is used to vectorize
9313 stmt S2. */
9314
9315 /* In case of interleaving (non-unit grouped access):
9316
9317 S1: x2 = &base + 2
9318 S2: x0 = &base
9319 S3: x1 = &base + 1
9320 S4: x3 = &base + 3
9321
9322 Vectorized loads are created in the order of memory accesses
9323 starting from the access of the first stmt of the chain:
9324
9325 VS1: vx0 = &base
9326 VS2: vx1 = &base + vec_size*1
9327 VS3: vx3 = &base + vec_size*2
9328 VS4: vx4 = &base + vec_size*3
9329
9330 Then permutation statements are generated:
9331
9332 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9333 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9334 ...
9335
9336 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9337 (the order of the data-refs in the output of vect_permute_load_chain
9338 corresponds to the order of scalar stmts in the interleaving chain - see
9339 the documentation of vect_permute_load_chain()).
9340 The generation of permutation stmts and recording them in
9341 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9342
9343 In case of both multiple types and interleaving, the vector loads and
9344 permutation stmts above are created for every copy. The result vector
9345 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9346 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9347
9348 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9349 on a target that supports unaligned accesses (dr_unaligned_supported)
9350 we generate the following code:
9351 p = initial_addr;
9352 indx = 0;
9353 loop {
9354 p = p + indx * vectype_size;
9355 vec_dest = *(p);
9356 indx = indx + 1;
9357 }
9358
9359 Otherwise, the data reference is potentially unaligned on a target that
9360 does not support unaligned accesses (dr_explicit_realign_optimized) -
9361 then generate the following code, in which the data in each iteration is
9362 obtained by two vector loads, one from the previous iteration, and one
9363 from the current iteration:
9364 p1 = initial_addr;
9365 msq_init = *(floor(p1))
9366 p2 = initial_addr + VS - 1;
9367 realignment_token = call target_builtin;
9368 indx = 0;
9369 loop {
9370 p2 = p2 + indx * vectype_size
9371 lsq = *(floor(p2))
9372 vec_dest = realign_load (msq, lsq, realignment_token)
9373 indx = indx + 1;
9374 msq = lsq;
9375 } */
9376
9377 /* If the misalignment remains the same throughout the execution of the
9378 loop, we can create the init_addr and permutation mask at the loop
9379 preheader. Otherwise, it needs to be created inside the loop.
9380 This can only occur when vectorizing memory accesses in the inner-loop
9381 nested within an outer-loop that is being vectorized. */
9382
9383 if (nested_in_vect_loop
9384 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9385 GET_MODE_SIZE (TYPE_MODE (vectype))))
9386 {
9387 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9388 compute_in_loop = true;
9389 }
9390
9391 bool diff_first_stmt_info
9392 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9393
9394 if ((alignment_support_scheme == dr_explicit_realign_optimized
9395 || alignment_support_scheme == dr_explicit_realign)
9396 && !compute_in_loop)
9397 {
9398 /* If we have different first_stmt_info, we can't set up realignment
9399 here, since we can't guarantee first_stmt_info DR has been
9400 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9401 distance from first_stmt_info DR instead as below. */
9402 if (!diff_first_stmt_info)
9403 msq = vect_setup_realignment (vinfo,
9404 first_stmt_info, gsi, &realignment_token,
9405 alignment_support_scheme, NULL_TREE,
9406 &at_loop);
9407 if (alignment_support_scheme == dr_explicit_realign_optimized)
9408 {
9409 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9410 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9411 size_one_node);
9412 gcc_assert (!first_stmt_info_for_drptr);
9413 }
9414 }
9415 else
9416 at_loop = loop;
9417
9418 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9419 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9420
9421 tree bump;
9422 tree vec_offset = NULL_TREE;
9423 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9424 {
9425 aggr_type = NULL_TREE;
9426 bump = NULL_TREE;
9427 }
9428 else if (memory_access_type == VMAT_GATHER_SCATTER)
9429 {
9430 aggr_type = elem_type;
9431 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9432 &bump, &vec_offset);
9433 }
9434 else
9435 {
9436 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9437 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9438 else
9439 aggr_type = vectype;
9440 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9441 memory_access_type);
9442 }
9443
9444 tree vec_mask = NULL_TREE;
9445 prev_stmt_info = NULL;
9446 poly_uint64 group_elt = 0;
9447 for (j = 0; j < ncopies; j++)
9448 {
9449 stmt_vec_info new_stmt_info = NULL;
9450 /* 1. Create the vector or array pointer update chain. */
9451 if (j == 0)
9452 {
9453 bool simd_lane_access_p
9454 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9455 if (simd_lane_access_p
9456 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9457 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9458 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9459 && integer_zerop (DR_INIT (first_dr_info->dr))
9460 && alias_sets_conflict_p (get_alias_set (aggr_type),
9461 get_alias_set (TREE_TYPE (ref_type)))
9462 && (alignment_support_scheme == dr_aligned
9463 || alignment_support_scheme == dr_unaligned_supported))
9464 {
9465 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9466 dataref_offset = build_int_cst (ref_type, 0);
9467 }
9468 else if (diff_first_stmt_info)
9469 {
9470 dataref_ptr
9471 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9472 aggr_type, at_loop, offset, &dummy,
9473 gsi, &ptr_incr, simd_lane_access_p,
9474 byte_offset, bump);
9475 /* Adjust the pointer by the difference to first_stmt. */
9476 data_reference_p ptrdr
9477 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9478 tree diff
9479 = fold_convert (sizetype,
9480 size_binop (MINUS_EXPR,
9481 DR_INIT (first_dr_info->dr),
9482 DR_INIT (ptrdr)));
9483 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9484 stmt_info, diff);
9485 if (alignment_support_scheme == dr_explicit_realign)
9486 {
9487 msq = vect_setup_realignment (vinfo,
9488 first_stmt_info_for_drptr, gsi,
9489 &realignment_token,
9490 alignment_support_scheme,
9491 dataref_ptr, &at_loop);
9492 gcc_assert (!compute_in_loop);
9493 }
9494 }
9495 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9496 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9497 &dataref_ptr, &vec_offset);
9498 else
9499 dataref_ptr
9500 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9501 at_loop,
9502 offset, &dummy, gsi, &ptr_incr,
9503 simd_lane_access_p,
9504 byte_offset, bump);
9505 if (mask)
9506 {
9507 if (slp_node)
9508 {
9509 auto_vec<vec<tree> > vec_defs (1);
9510 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
9511 vec_mask = vec_defs[0][0];
9512 }
9513 else
9514 vec_mask = vect_get_vec_def_for_operand (vinfo, mask, stmt_info,
9515 mask_vectype);
9516 }
9517 }
9518 else
9519 {
9520 if (dataref_offset)
9521 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9522 bump);
9523 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9524 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9525 else
9526 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9527 stmt_info, bump);
9528 if (mask)
9529 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9530 }
9531
9532 if (grouped_load || slp_perm)
9533 dr_chain.create (vec_num);
9534
9535 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9536 {
9537 tree vec_array;
9538
9539 vec_array = create_vector_array (vectype, vec_num);
9540
9541 tree final_mask = NULL_TREE;
9542 if (loop_masks)
9543 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9544 vectype, j);
9545 if (vec_mask)
9546 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9547 vec_mask, gsi);
9548
9549 gcall *call;
9550 if (final_mask)
9551 {
9552 /* Emit:
9553 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9554 VEC_MASK). */
9555 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9556 tree alias_ptr = build_int_cst (ref_type, align);
9557 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9558 dataref_ptr, alias_ptr,
9559 final_mask);
9560 }
9561 else
9562 {
9563 /* Emit:
9564 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9565 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9566 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9567 }
9568 gimple_call_set_lhs (call, vec_array);
9569 gimple_call_set_nothrow (call, true);
9570 new_stmt_info = vect_finish_stmt_generation (vinfo, stmt_info,
9571 call, gsi);
9572
9573 /* Extract each vector into an SSA_NAME. */
9574 for (i = 0; i < vec_num; i++)
9575 {
9576 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9577 vec_array, i);
9578 dr_chain.quick_push (new_temp);
9579 }
9580
9581 /* Record the mapping between SSA_NAMEs and statements. */
9582 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9583
9584 /* Record that VEC_ARRAY is now dead. */
9585 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9586 }
9587 else
9588 {
9589 for (i = 0; i < vec_num; i++)
9590 {
9591 tree final_mask = NULL_TREE;
9592 if (loop_masks
9593 && memory_access_type != VMAT_INVARIANT)
9594 final_mask = vect_get_loop_mask (gsi, loop_masks,
9595 vec_num * ncopies,
9596 vectype, vec_num * j + i);
9597 if (vec_mask)
9598 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9599 vec_mask, gsi);
9600
9601 if (i > 0)
9602 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9603 gsi, stmt_info, bump);
9604
9605 /* 2. Create the vector-load in the loop. */
9606 gimple *new_stmt = NULL;
9607 switch (alignment_support_scheme)
9608 {
9609 case dr_aligned:
9610 case dr_unaligned_supported:
9611 {
9612 unsigned int misalign;
9613 unsigned HOST_WIDE_INT align;
9614
9615 if (memory_access_type == VMAT_GATHER_SCATTER)
9616 {
9617 tree zero = build_zero_cst (vectype);
9618 tree scale = size_int (gs_info.scale);
9619 gcall *call;
9620 if (loop_masks)
9621 call = gimple_build_call_internal
9622 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9623 vec_offset, scale, zero, final_mask);
9624 else
9625 call = gimple_build_call_internal
9626 (IFN_GATHER_LOAD, 4, dataref_ptr,
9627 vec_offset, scale, zero);
9628 gimple_call_set_nothrow (call, true);
9629 new_stmt = call;
9630 data_ref = NULL_TREE;
9631 break;
9632 }
9633
9634 align =
9635 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9636 if (alignment_support_scheme == dr_aligned)
9637 {
9638 gcc_assert (aligned_access_p (first_dr_info));
9639 misalign = 0;
9640 }
9641 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9642 {
9643 align = dr_alignment
9644 (vect_dr_behavior (vinfo, first_dr_info));
9645 misalign = 0;
9646 }
9647 else
9648 misalign = DR_MISALIGNMENT (first_dr_info);
9649 if (dataref_offset == NULL_TREE
9650 && TREE_CODE (dataref_ptr) == SSA_NAME)
9651 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9652 align, misalign);
9653
9654 if (final_mask)
9655 {
9656 align = least_bit_hwi (misalign | align);
9657 tree ptr = build_int_cst (ref_type, align);
9658 gcall *call
9659 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9660 dataref_ptr, ptr,
9661 final_mask);
9662 gimple_call_set_nothrow (call, true);
9663 new_stmt = call;
9664 data_ref = NULL_TREE;
9665 }
9666 else
9667 {
9668 tree ltype = vectype;
9669 tree new_vtype = NULL_TREE;
9670 unsigned HOST_WIDE_INT gap
9671 = DR_GROUP_GAP (first_stmt_info);
9672 unsigned int vect_align
9673 = vect_known_alignment_in_bytes (first_dr_info);
9674 unsigned int scalar_dr_size
9675 = vect_get_scalar_dr_size (first_dr_info);
9676 /* If there's no peeling for gaps but we have a gap
9677 with slp loads then load the lower half of the
9678 vector only. See get_group_load_store_type for
9679 when we apply this optimization. */
9680 if (slp
9681 && loop_vinfo
9682 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9683 && gap != 0
9684 && known_eq (nunits, (group_size - gap) * 2)
9685 && known_eq (nunits, group_size)
9686 && gap >= (vect_align / scalar_dr_size))
9687 {
9688 tree half_vtype;
9689 new_vtype
9690 = vector_vector_composition_type (vectype, 2,
9691 &half_vtype);
9692 if (new_vtype != NULL_TREE)
9693 ltype = half_vtype;
9694 }
9695 tree offset
9696 = (dataref_offset ? dataref_offset
9697 : build_int_cst (ref_type, 0));
9698 if (ltype != vectype
9699 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9700 {
9701 unsigned HOST_WIDE_INT gap_offset
9702 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9703 tree gapcst = build_int_cst (ref_type, gap_offset);
9704 offset = size_binop (PLUS_EXPR, offset, gapcst);
9705 }
9706 data_ref
9707 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9708 if (alignment_support_scheme == dr_aligned)
9709 ;
9710 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9711 TREE_TYPE (data_ref)
9712 = build_aligned_type (TREE_TYPE (data_ref),
9713 align * BITS_PER_UNIT);
9714 else
9715 TREE_TYPE (data_ref)
9716 = build_aligned_type (TREE_TYPE (data_ref),
9717 TYPE_ALIGN (elem_type));
9718 if (ltype != vectype)
9719 {
9720 vect_copy_ref_info (data_ref,
9721 DR_REF (first_dr_info->dr));
9722 tree tem = make_ssa_name (ltype);
9723 new_stmt = gimple_build_assign (tem, data_ref);
9724 vect_finish_stmt_generation (vinfo, stmt_info,
9725 new_stmt, gsi);
9726 data_ref = NULL;
9727 vec<constructor_elt, va_gc> *v;
9728 vec_alloc (v, 2);
9729 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9730 {
9731 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9732 build_zero_cst (ltype));
9733 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9734 }
9735 else
9736 {
9737 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9738 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9739 build_zero_cst (ltype));
9740 }
9741 gcc_assert (new_vtype != NULL_TREE);
9742 if (new_vtype == vectype)
9743 new_stmt = gimple_build_assign (
9744 vec_dest, build_constructor (vectype, v));
9745 else
9746 {
9747 tree new_vname = make_ssa_name (new_vtype);
9748 new_stmt = gimple_build_assign (
9749 new_vname, build_constructor (new_vtype, v));
9750 vect_finish_stmt_generation (vinfo, stmt_info,
9751 new_stmt, gsi);
9752 new_stmt = gimple_build_assign (
9753 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9754 new_vname));
9755 }
9756 }
9757 }
9758 break;
9759 }
9760 case dr_explicit_realign:
9761 {
9762 tree ptr, bump;
9763
9764 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9765
9766 if (compute_in_loop)
9767 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9768 &realignment_token,
9769 dr_explicit_realign,
9770 dataref_ptr, NULL);
9771
9772 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9773 ptr = copy_ssa_name (dataref_ptr);
9774 else
9775 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9776 // For explicit realign the target alignment should be
9777 // known at compile time.
9778 unsigned HOST_WIDE_INT align =
9779 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9780 new_stmt = gimple_build_assign
9781 (ptr, BIT_AND_EXPR, dataref_ptr,
9782 build_int_cst
9783 (TREE_TYPE (dataref_ptr),
9784 -(HOST_WIDE_INT) align));
9785 vect_finish_stmt_generation (vinfo, stmt_info,
9786 new_stmt, gsi);
9787 data_ref
9788 = build2 (MEM_REF, vectype, ptr,
9789 build_int_cst (ref_type, 0));
9790 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9791 vec_dest = vect_create_destination_var (scalar_dest,
9792 vectype);
9793 new_stmt = gimple_build_assign (vec_dest, data_ref);
9794 new_temp = make_ssa_name (vec_dest, new_stmt);
9795 gimple_assign_set_lhs (new_stmt, new_temp);
9796 gimple_move_vops (new_stmt, stmt_info->stmt);
9797 vect_finish_stmt_generation (vinfo, stmt_info,
9798 new_stmt, gsi);
9799 msq = new_temp;
9800
9801 bump = size_binop (MULT_EXPR, vs,
9802 TYPE_SIZE_UNIT (elem_type));
9803 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9804 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9805 stmt_info, bump);
9806 new_stmt = gimple_build_assign
9807 (NULL_TREE, BIT_AND_EXPR, ptr,
9808 build_int_cst
9809 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9810 ptr = copy_ssa_name (ptr, new_stmt);
9811 gimple_assign_set_lhs (new_stmt, ptr);
9812 vect_finish_stmt_generation (vinfo, stmt_info,
9813 new_stmt, gsi);
9814 data_ref
9815 = build2 (MEM_REF, vectype, ptr,
9816 build_int_cst (ref_type, 0));
9817 break;
9818 }
9819 case dr_explicit_realign_optimized:
9820 {
9821 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9822 new_temp = copy_ssa_name (dataref_ptr);
9823 else
9824 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9825 // We should only be doing this if we know the target
9826 // alignment at compile time.
9827 unsigned HOST_WIDE_INT align =
9828 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9829 new_stmt = gimple_build_assign
9830 (new_temp, BIT_AND_EXPR, dataref_ptr,
9831 build_int_cst (TREE_TYPE (dataref_ptr),
9832 -(HOST_WIDE_INT) align));
9833 vect_finish_stmt_generation (vinfo, stmt_info,
9834 new_stmt, gsi);
9835 data_ref
9836 = build2 (MEM_REF, vectype, new_temp,
9837 build_int_cst (ref_type, 0));
9838 break;
9839 }
9840 default:
9841 gcc_unreachable ();
9842 }
9843 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9844 /* DATA_REF is null if we've already built the statement. */
9845 if (data_ref)
9846 {
9847 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9848 new_stmt = gimple_build_assign (vec_dest, data_ref);
9849 }
9850 new_temp = make_ssa_name (vec_dest, new_stmt);
9851 gimple_set_lhs (new_stmt, new_temp);
9852 new_stmt_info
9853 = vect_finish_stmt_generation (vinfo, stmt_info,
9854 new_stmt, gsi);
9855
9856 /* 3. Handle explicit realignment if necessary/supported.
9857 Create in loop:
9858 vec_dest = realign_load (msq, lsq, realignment_token) */
9859 if (alignment_support_scheme == dr_explicit_realign_optimized
9860 || alignment_support_scheme == dr_explicit_realign)
9861 {
9862 lsq = gimple_assign_lhs (new_stmt);
9863 if (!realignment_token)
9864 realignment_token = dataref_ptr;
9865 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9866 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9867 msq, lsq, realignment_token);
9868 new_temp = make_ssa_name (vec_dest, new_stmt);
9869 gimple_assign_set_lhs (new_stmt, new_temp);
9870 new_stmt_info
9871 = vect_finish_stmt_generation (vinfo, stmt_info,
9872 new_stmt, gsi);
9873
9874 if (alignment_support_scheme == dr_explicit_realign_optimized)
9875 {
9876 gcc_assert (phi);
9877 if (i == vec_num - 1 && j == ncopies - 1)
9878 add_phi_arg (phi, lsq,
9879 loop_latch_edge (containing_loop),
9880 UNKNOWN_LOCATION);
9881 msq = lsq;
9882 }
9883 }
9884
9885 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9886 {
9887 tree perm_mask = perm_mask_for_reverse (vectype);
9888 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9889 perm_mask, stmt_info, gsi);
9890 new_stmt_info = vinfo->lookup_def (new_temp);
9891 }
9892
9893 /* Collect vector loads and later create their permutation in
9894 vect_transform_grouped_load (). */
9895 if (grouped_load || slp_perm)
9896 dr_chain.quick_push (new_temp);
9897
9898 /* Store vector loads in the corresponding SLP_NODE. */
9899 if (slp && !slp_perm)
9900 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9901
9902 /* With SLP permutation we load the gaps as well, without
9903 we need to skip the gaps after we manage to fully load
9904 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9905 group_elt += nunits;
9906 if (maybe_ne (group_gap_adj, 0U)
9907 && !slp_perm
9908 && known_eq (group_elt, group_size - group_gap_adj))
9909 {
9910 poly_wide_int bump_val
9911 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9912 * group_gap_adj);
9913 tree bump = wide_int_to_tree (sizetype, bump_val);
9914 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9915 gsi, stmt_info, bump);
9916 group_elt = 0;
9917 }
9918 }
9919 /* Bump the vector pointer to account for a gap or for excess
9920 elements loaded for a permuted SLP load. */
9921 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9922 {
9923 poly_wide_int bump_val
9924 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9925 * group_gap_adj);
9926 tree bump = wide_int_to_tree (sizetype, bump_val);
9927 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9928 stmt_info, bump);
9929 }
9930 }
9931
9932 if (slp && !slp_perm)
9933 continue;
9934
9935 if (slp_perm)
9936 {
9937 unsigned n_perms;
9938 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9939 gsi, vf, false, &n_perms);
9940 gcc_assert (ok);
9941 }
9942 else
9943 {
9944 if (grouped_load)
9945 {
9946 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9947 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9948 group_size, gsi);
9949 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9950 }
9951 else
9952 {
9953 if (j == 0)
9954 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9955 else
9956 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9957 prev_stmt_info = new_stmt_info;
9958 }
9959 }
9960 dr_chain.release ();
9961 }
9962
9963 return true;
9964 }
9965
9966 /* Function vect_is_simple_cond.
9967
9968 Input:
9969 LOOP - the loop that is being vectorized.
9970 COND - Condition that is checked for simple use.
9971
9972 Output:
9973 *COMP_VECTYPE - the vector type for the comparison.
9974 *DTS - The def types for the arguments of the comparison
9975
9976 Returns whether a COND can be vectorized. Checks whether
9977 condition operands are supportable using vec_is_simple_use. */
9978
9979 static bool
9980 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9981 tree *comp_vectype, enum vect_def_type *dts,
9982 tree vectype)
9983 {
9984 tree lhs, rhs;
9985 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9986
9987 /* Mask case. */
9988 if (TREE_CODE (cond) == SSA_NAME
9989 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9990 {
9991 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9992 || !*comp_vectype
9993 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9994 return false;
9995 return true;
9996 }
9997
9998 if (!COMPARISON_CLASS_P (cond))
9999 return false;
10000
10001 lhs = TREE_OPERAND (cond, 0);
10002 rhs = TREE_OPERAND (cond, 1);
10003
10004 if (TREE_CODE (lhs) == SSA_NAME)
10005 {
10006 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
10007 return false;
10008 }
10009 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10010 || TREE_CODE (lhs) == FIXED_CST)
10011 dts[0] = vect_constant_def;
10012 else
10013 return false;
10014
10015 if (TREE_CODE (rhs) == SSA_NAME)
10016 {
10017 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
10018 return false;
10019 }
10020 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10021 || TREE_CODE (rhs) == FIXED_CST)
10022 dts[1] = vect_constant_def;
10023 else
10024 return false;
10025
10026 if (vectype1 && vectype2
10027 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10028 TYPE_VECTOR_SUBPARTS (vectype2)))
10029 return false;
10030
10031 *comp_vectype = vectype1 ? vectype1 : vectype2;
10032 /* Invariant comparison. */
10033 if (! *comp_vectype)
10034 {
10035 tree scalar_type = TREE_TYPE (lhs);
10036 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10037 *comp_vectype = truth_type_for (vectype);
10038 else
10039 {
10040 /* If we can widen the comparison to match vectype do so. */
10041 if (INTEGRAL_TYPE_P (scalar_type)
10042 && !slp_node
10043 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10044 TYPE_SIZE (TREE_TYPE (vectype))))
10045 scalar_type = build_nonstandard_integer_type
10046 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10047 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10048 slp_node);
10049 }
10050 }
10051
10052 return true;
10053 }
10054
10055 /* vectorizable_condition.
10056
10057 Check if STMT_INFO is conditional modify expression that can be vectorized.
10058 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10059 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10060 at GSI.
10061
10062 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10063
10064 Return true if STMT_INFO is vectorizable in this way. */
10065
10066 static bool
10067 vectorizable_condition (vec_info *vinfo,
10068 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10069 stmt_vec_info *vec_stmt,
10070 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10071 {
10072 tree scalar_dest = NULL_TREE;
10073 tree vec_dest = NULL_TREE;
10074 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10075 tree then_clause, else_clause;
10076 tree comp_vectype = NULL_TREE;
10077 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10078 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10079 tree vec_compare;
10080 tree new_temp;
10081 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10082 enum vect_def_type dts[4]
10083 = {vect_unknown_def_type, vect_unknown_def_type,
10084 vect_unknown_def_type, vect_unknown_def_type};
10085 int ndts = 4;
10086 int ncopies;
10087 int vec_num;
10088 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10089 stmt_vec_info prev_stmt_info = NULL;
10090 int i, j;
10091 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10092 vec<tree> vec_oprnds0 = vNULL;
10093 vec<tree> vec_oprnds1 = vNULL;
10094 vec<tree> vec_oprnds2 = vNULL;
10095 vec<tree> vec_oprnds3 = vNULL;
10096 tree vec_cmp_type;
10097 bool masked = false;
10098
10099 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10100 return false;
10101
10102 /* Is vectorizable conditional operation? */
10103 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10104 if (!stmt)
10105 return false;
10106
10107 code = gimple_assign_rhs_code (stmt);
10108 if (code != COND_EXPR)
10109 return false;
10110
10111 stmt_vec_info reduc_info = NULL;
10112 int reduc_index = -1;
10113 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10114 bool for_reduction
10115 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10116 if (for_reduction)
10117 {
10118 if (STMT_SLP_TYPE (stmt_info))
10119 return false;
10120 reduc_info = info_for_reduction (vinfo, stmt_info);
10121 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10122 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10123 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10124 || reduc_index != -1);
10125 }
10126 else
10127 {
10128 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10129 return false;
10130
10131 /* FORNOW: only supported as part of a reduction. */
10132 if (STMT_VINFO_LIVE_P (stmt_info))
10133 {
10134 if (dump_enabled_p ())
10135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10136 "value used after loop.\n");
10137 return false;
10138 }
10139 }
10140
10141 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10142 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10143
10144 if (slp_node)
10145 {
10146 ncopies = 1;
10147 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10148 }
10149 else
10150 {
10151 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10152 vec_num = 1;
10153 }
10154
10155 gcc_assert (ncopies >= 1);
10156 if (for_reduction && ncopies > 1)
10157 return false; /* FORNOW */
10158
10159 cond_expr = gimple_assign_rhs1 (stmt);
10160
10161 if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
10162 &comp_vectype, &dts[0], vectype)
10163 || !comp_vectype)
10164 return false;
10165
10166 unsigned slp_adjust = 0;
10167 if (slp_node && SLP_TREE_CHILDREN (slp_node).length () == 4)
10168 /* ??? Hack. Hope for COND_EXPR GIMPLE sanitizing or refactor
10169 things more... */
10170 slp_adjust = 1;
10171 slp_tree then_slp_node, else_slp_node;
10172 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + slp_adjust,
10173 &then_clause, &then_slp_node, &dts[2], &vectype1))
10174 return false;
10175 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + slp_adjust,
10176 &else_clause, &else_slp_node, &dts[3], &vectype2))
10177 return false;
10178
10179 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10180 return false;
10181
10182 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10183 return false;
10184
10185 masked = !COMPARISON_CLASS_P (cond_expr);
10186 vec_cmp_type = truth_type_for (comp_vectype);
10187
10188 if (vec_cmp_type == NULL_TREE)
10189 return false;
10190
10191 cond_code = TREE_CODE (cond_expr);
10192 if (!masked)
10193 {
10194 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10195 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10196 }
10197
10198 /* For conditional reductions, the "then" value needs to be the candidate
10199 value calculated by this iteration while the "else" value needs to be
10200 the result carried over from previous iterations. If the COND_EXPR
10201 is the other way around, we need to swap it. */
10202 bool must_invert_cmp_result = false;
10203 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10204 {
10205 if (masked)
10206 must_invert_cmp_result = true;
10207 else
10208 {
10209 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10210 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10211 if (new_code == ERROR_MARK)
10212 must_invert_cmp_result = true;
10213 else
10214 {
10215 cond_code = new_code;
10216 /* Make sure we don't accidentally use the old condition. */
10217 cond_expr = NULL_TREE;
10218 }
10219 }
10220 std::swap (then_clause, else_clause);
10221 }
10222
10223 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10224 {
10225 /* Boolean values may have another representation in vectors
10226 and therefore we prefer bit operations over comparison for
10227 them (which also works for scalar masks). We store opcodes
10228 to use in bitop1 and bitop2. Statement is vectorized as
10229 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10230 depending on bitop1 and bitop2 arity. */
10231 switch (cond_code)
10232 {
10233 case GT_EXPR:
10234 bitop1 = BIT_NOT_EXPR;
10235 bitop2 = BIT_AND_EXPR;
10236 break;
10237 case GE_EXPR:
10238 bitop1 = BIT_NOT_EXPR;
10239 bitop2 = BIT_IOR_EXPR;
10240 break;
10241 case LT_EXPR:
10242 bitop1 = BIT_NOT_EXPR;
10243 bitop2 = BIT_AND_EXPR;
10244 std::swap (cond_expr0, cond_expr1);
10245 break;
10246 case LE_EXPR:
10247 bitop1 = BIT_NOT_EXPR;
10248 bitop2 = BIT_IOR_EXPR;
10249 std::swap (cond_expr0, cond_expr1);
10250 break;
10251 case NE_EXPR:
10252 bitop1 = BIT_XOR_EXPR;
10253 break;
10254 case EQ_EXPR:
10255 bitop1 = BIT_XOR_EXPR;
10256 bitop2 = BIT_NOT_EXPR;
10257 break;
10258 default:
10259 return false;
10260 }
10261 cond_code = SSA_NAME;
10262 }
10263
10264 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10265 && reduction_type == EXTRACT_LAST_REDUCTION
10266 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10267 {
10268 if (dump_enabled_p ())
10269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10270 "reduction comparison operation not supported.\n");
10271 return false;
10272 }
10273
10274 if (!vec_stmt)
10275 {
10276 if (bitop1 != NOP_EXPR)
10277 {
10278 machine_mode mode = TYPE_MODE (comp_vectype);
10279 optab optab;
10280
10281 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10282 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10283 return false;
10284
10285 if (bitop2 != NOP_EXPR)
10286 {
10287 optab = optab_for_tree_code (bitop2, comp_vectype,
10288 optab_default);
10289 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10290 return false;
10291 }
10292 }
10293
10294 vect_cost_for_stmt kind = vector_stmt;
10295 if (reduction_type == EXTRACT_LAST_REDUCTION)
10296 /* Count one reduction-like operation per vector. */
10297 kind = vec_to_scalar;
10298 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10299 return false;
10300
10301 if (slp_node
10302 && (!vect_maybe_update_slp_op_vectype
10303 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10304 || (slp_adjust == 1
10305 && !vect_maybe_update_slp_op_vectype
10306 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10307 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10308 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10309 {
10310 if (dump_enabled_p ())
10311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10312 "incompatible vector types for invariants\n");
10313 return false;
10314 }
10315
10316 if (loop_vinfo
10317 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10318 && reduction_type == EXTRACT_LAST_REDUCTION)
10319 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10320 ncopies * vec_num, vectype, NULL);
10321
10322 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10323 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10324 cost_vec, kind);
10325 return true;
10326 }
10327
10328 /* Transform. */
10329
10330 if (!slp_node)
10331 {
10332 vec_oprnds0.create (1);
10333 vec_oprnds1.create (1);
10334 vec_oprnds2.create (1);
10335 vec_oprnds3.create (1);
10336 }
10337
10338 /* Handle def. */
10339 scalar_dest = gimple_assign_lhs (stmt);
10340 if (reduction_type != EXTRACT_LAST_REDUCTION)
10341 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10342
10343 /* Handle cond expr. */
10344 for (j = 0; j < ncopies; j++)
10345 {
10346 bool swap_cond_operands = false;
10347
10348 /* See whether another part of the vectorized code applies a loop
10349 mask to the condition, or to its inverse. */
10350
10351 vec_loop_masks *masks = NULL;
10352 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10353 {
10354 if (reduction_type == EXTRACT_LAST_REDUCTION)
10355 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10356 else
10357 {
10358 scalar_cond_masked_key cond (cond_expr, ncopies);
10359 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10360 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10361 else
10362 {
10363 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10364 cond.code = invert_tree_comparison (cond.code, honor_nans);
10365 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10366 {
10367 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10368 cond_code = cond.code;
10369 swap_cond_operands = true;
10370 }
10371 }
10372 }
10373 }
10374
10375 stmt_vec_info new_stmt_info = NULL;
10376 if (j == 0)
10377 {
10378 if (slp_node)
10379 {
10380 auto_vec<vec<tree>, 4> vec_defs;
10381 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10382 vec_oprnds3 = vec_defs.pop ();
10383 vec_oprnds2 = vec_defs.pop ();
10384 if (!masked)
10385 vec_oprnds1 = vec_defs.pop ();
10386 vec_oprnds0 = vec_defs.pop ();
10387 }
10388 else
10389 {
10390 if (masked)
10391 {
10392 vec_cond_lhs
10393 = vect_get_vec_def_for_operand (vinfo, cond_expr, stmt_info,
10394 comp_vectype);
10395 }
10396 else
10397 {
10398 vec_cond_lhs
10399 = vect_get_vec_def_for_operand (vinfo, cond_expr0,
10400 stmt_info, comp_vectype);
10401 vec_cond_rhs
10402 = vect_get_vec_def_for_operand (vinfo, cond_expr1,
10403 stmt_info, comp_vectype);
10404 }
10405 vec_then_clause = vect_get_vec_def_for_operand (vinfo,
10406 then_clause,
10407 stmt_info);
10408 if (reduction_type != EXTRACT_LAST_REDUCTION)
10409 vec_else_clause = vect_get_vec_def_for_operand (vinfo,
10410 else_clause,
10411 stmt_info);
10412 }
10413 }
10414 else
10415 {
10416 vec_cond_lhs
10417 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10418 if (!masked)
10419 vec_cond_rhs
10420 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10421
10422 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10423 vec_oprnds2.pop ());
10424 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10425 vec_oprnds3.pop ());
10426 }
10427
10428 if (!slp_node)
10429 {
10430 vec_oprnds0.quick_push (vec_cond_lhs);
10431 if (!masked)
10432 vec_oprnds1.quick_push (vec_cond_rhs);
10433 vec_oprnds2.quick_push (vec_then_clause);
10434 vec_oprnds3.quick_push (vec_else_clause);
10435 }
10436
10437 /* Arguments are ready. Create the new vector stmt. */
10438 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10439 {
10440 vec_then_clause = vec_oprnds2[i];
10441 vec_else_clause = vec_oprnds3[i];
10442
10443 if (swap_cond_operands)
10444 std::swap (vec_then_clause, vec_else_clause);
10445
10446 if (masked)
10447 vec_compare = vec_cond_lhs;
10448 else
10449 {
10450 vec_cond_rhs = vec_oprnds1[i];
10451 if (bitop1 == NOP_EXPR)
10452 vec_compare = build2 (cond_code, vec_cmp_type,
10453 vec_cond_lhs, vec_cond_rhs);
10454 else
10455 {
10456 new_temp = make_ssa_name (vec_cmp_type);
10457 gassign *new_stmt;
10458 if (bitop1 == BIT_NOT_EXPR)
10459 new_stmt = gimple_build_assign (new_temp, bitop1,
10460 vec_cond_rhs);
10461 else
10462 new_stmt
10463 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10464 vec_cond_rhs);
10465 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10466 if (bitop2 == NOP_EXPR)
10467 vec_compare = new_temp;
10468 else if (bitop2 == BIT_NOT_EXPR)
10469 {
10470 /* Instead of doing ~x ? y : z do x ? z : y. */
10471 vec_compare = new_temp;
10472 std::swap (vec_then_clause, vec_else_clause);
10473 }
10474 else
10475 {
10476 vec_compare = make_ssa_name (vec_cmp_type);
10477 new_stmt
10478 = gimple_build_assign (vec_compare, bitop2,
10479 vec_cond_lhs, new_temp);
10480 vect_finish_stmt_generation (vinfo, stmt_info,
10481 new_stmt, gsi);
10482 }
10483 }
10484 }
10485
10486 /* If we decided to apply a loop mask to the result of the vector
10487 comparison, AND the comparison with the mask now. Later passes
10488 should then be able to reuse the AND results between mulitple
10489 vector statements.
10490
10491 For example:
10492 for (int i = 0; i < 100; ++i)
10493 x[i] = y[i] ? z[i] : 10;
10494
10495 results in following optimized GIMPLE:
10496
10497 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10498 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10499 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10500 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10501 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10502 vect_iftmp.11_47, { 10, ... }>;
10503
10504 instead of using a masked and unmasked forms of
10505 vec != { 0, ... } (masked in the MASK_LOAD,
10506 unmasked in the VEC_COND_EXPR). */
10507
10508 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10509 in cases where that's necessary. */
10510
10511 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10512 {
10513 if (!is_gimple_val (vec_compare))
10514 {
10515 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10516 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10517 vec_compare);
10518 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10519 vec_compare = vec_compare_name;
10520 }
10521
10522 if (must_invert_cmp_result)
10523 {
10524 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10525 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10526 BIT_NOT_EXPR,
10527 vec_compare);
10528 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10529 vec_compare = vec_compare_name;
10530 }
10531
10532 if (masks)
10533 {
10534 unsigned vec_num = vec_oprnds0.length ();
10535 tree loop_mask
10536 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10537 vectype, vec_num * j + i);
10538 tree tmp2 = make_ssa_name (vec_cmp_type);
10539 gassign *g
10540 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10541 loop_mask);
10542 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10543 vec_compare = tmp2;
10544 }
10545 }
10546
10547 if (reduction_type == EXTRACT_LAST_REDUCTION)
10548 {
10549 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10550 tree lhs = gimple_get_lhs (old_stmt);
10551 gcall *new_stmt = gimple_build_call_internal
10552 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10553 vec_then_clause);
10554 gimple_call_set_lhs (new_stmt, lhs);
10555 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10556 if (old_stmt == gsi_stmt (*gsi))
10557 new_stmt_info = vect_finish_replace_stmt (vinfo,
10558 stmt_info, new_stmt);
10559 else
10560 {
10561 /* In this case we're moving the definition to later in the
10562 block. That doesn't matter because the only uses of the
10563 lhs are in phi statements. */
10564 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10565 gsi_remove (&old_gsi, true);
10566 new_stmt_info
10567 = vect_finish_stmt_generation (vinfo, stmt_info,
10568 new_stmt, gsi);
10569 }
10570 }
10571 else
10572 {
10573 new_temp = make_ssa_name (vec_dest);
10574 gassign *new_stmt
10575 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10576 vec_then_clause, vec_else_clause);
10577 new_stmt_info
10578 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10579 }
10580 if (slp_node)
10581 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10582 }
10583
10584 if (slp_node)
10585 continue;
10586
10587 if (j == 0)
10588 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10589 else
10590 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10591
10592 prev_stmt_info = new_stmt_info;
10593 }
10594
10595 vec_oprnds0.release ();
10596 vec_oprnds1.release ();
10597 vec_oprnds2.release ();
10598 vec_oprnds3.release ();
10599
10600 return true;
10601 }
10602
10603 /* vectorizable_comparison.
10604
10605 Check if STMT_INFO is comparison expression that can be vectorized.
10606 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10607 comparison, put it in VEC_STMT, and insert it at GSI.
10608
10609 Return true if STMT_INFO is vectorizable in this way. */
10610
10611 static bool
10612 vectorizable_comparison (vec_info *vinfo,
10613 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10614 stmt_vec_info *vec_stmt,
10615 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10616 {
10617 tree lhs, rhs1, rhs2;
10618 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10620 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10621 tree new_temp;
10622 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10623 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10624 int ndts = 2;
10625 poly_uint64 nunits;
10626 int ncopies;
10627 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10628 stmt_vec_info prev_stmt_info = NULL;
10629 int i, j;
10630 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10631 vec<tree> vec_oprnds0 = vNULL;
10632 vec<tree> vec_oprnds1 = vNULL;
10633 tree mask_type;
10634 tree mask;
10635
10636 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10637 return false;
10638
10639 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10640 return false;
10641
10642 mask_type = vectype;
10643 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10644
10645 if (slp_node)
10646 ncopies = 1;
10647 else
10648 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10649
10650 gcc_assert (ncopies >= 1);
10651 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10652 return false;
10653
10654 if (STMT_VINFO_LIVE_P (stmt_info))
10655 {
10656 if (dump_enabled_p ())
10657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10658 "value used after loop.\n");
10659 return false;
10660 }
10661
10662 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10663 if (!stmt)
10664 return false;
10665
10666 code = gimple_assign_rhs_code (stmt);
10667
10668 if (TREE_CODE_CLASS (code) != tcc_comparison)
10669 return false;
10670
10671 slp_tree slp_rhs1, slp_rhs2;
10672 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10673 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10674 return false;
10675
10676 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10677 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10678 return false;
10679
10680 if (vectype1 && vectype2
10681 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10682 TYPE_VECTOR_SUBPARTS (vectype2)))
10683 return false;
10684
10685 vectype = vectype1 ? vectype1 : vectype2;
10686
10687 /* Invariant comparison. */
10688 if (!vectype)
10689 {
10690 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10691 vectype = mask_type;
10692 else
10693 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10694 slp_node);
10695 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10696 return false;
10697 }
10698 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10699 return false;
10700
10701 /* Can't compare mask and non-mask types. */
10702 if (vectype1 && vectype2
10703 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10704 return false;
10705
10706 /* Boolean values may have another representation in vectors
10707 and therefore we prefer bit operations over comparison for
10708 them (which also works for scalar masks). We store opcodes
10709 to use in bitop1 and bitop2. Statement is vectorized as
10710 BITOP2 (rhs1 BITOP1 rhs2) or
10711 rhs1 BITOP2 (BITOP1 rhs2)
10712 depending on bitop1 and bitop2 arity. */
10713 bool swap_p = false;
10714 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10715 {
10716 if (code == GT_EXPR)
10717 {
10718 bitop1 = BIT_NOT_EXPR;
10719 bitop2 = BIT_AND_EXPR;
10720 }
10721 else if (code == GE_EXPR)
10722 {
10723 bitop1 = BIT_NOT_EXPR;
10724 bitop2 = BIT_IOR_EXPR;
10725 }
10726 else if (code == LT_EXPR)
10727 {
10728 bitop1 = BIT_NOT_EXPR;
10729 bitop2 = BIT_AND_EXPR;
10730 swap_p = true;
10731 }
10732 else if (code == LE_EXPR)
10733 {
10734 bitop1 = BIT_NOT_EXPR;
10735 bitop2 = BIT_IOR_EXPR;
10736 swap_p = true;
10737 }
10738 else
10739 {
10740 bitop1 = BIT_XOR_EXPR;
10741 if (code == EQ_EXPR)
10742 bitop2 = BIT_NOT_EXPR;
10743 }
10744 }
10745
10746 if (!vec_stmt)
10747 {
10748 if (bitop1 == NOP_EXPR)
10749 {
10750 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10751 return false;
10752 }
10753 else
10754 {
10755 machine_mode mode = TYPE_MODE (vectype);
10756 optab optab;
10757
10758 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10759 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10760 return false;
10761
10762 if (bitop2 != NOP_EXPR)
10763 {
10764 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10765 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10766 return false;
10767 }
10768 }
10769
10770 /* Put types on constant and invariant SLP children. */
10771 if (slp_node
10772 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10773 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10774 {
10775 if (dump_enabled_p ())
10776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10777 "incompatible vector types for invariants\n");
10778 return false;
10779 }
10780
10781 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10782 vect_model_simple_cost (vinfo, stmt_info,
10783 ncopies * (1 + (bitop2 != NOP_EXPR)),
10784 dts, ndts, slp_node, cost_vec);
10785 return true;
10786 }
10787
10788 /* Transform. */
10789 if (!slp_node)
10790 {
10791 vec_oprnds0.create (1);
10792 vec_oprnds1.create (1);
10793 }
10794
10795 /* Handle def. */
10796 lhs = gimple_assign_lhs (stmt);
10797 mask = vect_create_destination_var (lhs, mask_type);
10798
10799 /* Handle cmp expr. */
10800 for (j = 0; j < ncopies; j++)
10801 {
10802 stmt_vec_info new_stmt_info = NULL;
10803 if (j == 0)
10804 {
10805 if (slp_node)
10806 {
10807 auto_vec<vec<tree>, 2> vec_defs;
10808 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
10809 vec_oprnds1 = vec_defs.pop ();
10810 vec_oprnds0 = vec_defs.pop ();
10811 if (swap_p)
10812 std::swap (vec_oprnds0, vec_oprnds1);
10813 }
10814 else
10815 {
10816 vec_rhs1 = vect_get_vec_def_for_operand (vinfo, rhs1, stmt_info,
10817 vectype);
10818 vec_rhs2 = vect_get_vec_def_for_operand (vinfo, rhs2, stmt_info,
10819 vectype);
10820 }
10821 }
10822 else
10823 {
10824 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10825 vec_oprnds0.pop ());
10826 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10827 vec_oprnds1.pop ());
10828 }
10829
10830 if (!slp_node)
10831 {
10832 if (swap_p && j == 0)
10833 std::swap (vec_rhs1, vec_rhs2);
10834 vec_oprnds0.quick_push (vec_rhs1);
10835 vec_oprnds1.quick_push (vec_rhs2);
10836 }
10837
10838 /* Arguments are ready. Create the new vector stmt. */
10839 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10840 {
10841 vec_rhs2 = vec_oprnds1[i];
10842
10843 new_temp = make_ssa_name (mask);
10844 if (bitop1 == NOP_EXPR)
10845 {
10846 gassign *new_stmt = gimple_build_assign (new_temp, code,
10847 vec_rhs1, vec_rhs2);
10848 new_stmt_info
10849 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10850 }
10851 else
10852 {
10853 gassign *new_stmt;
10854 if (bitop1 == BIT_NOT_EXPR)
10855 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10856 else
10857 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10858 vec_rhs2);
10859 new_stmt_info
10860 = vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10861 if (bitop2 != NOP_EXPR)
10862 {
10863 tree res = make_ssa_name (mask);
10864 if (bitop2 == BIT_NOT_EXPR)
10865 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10866 else
10867 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10868 new_temp);
10869 new_stmt_info
10870 = vect_finish_stmt_generation (vinfo, stmt_info,
10871 new_stmt, gsi);
10872 }
10873 }
10874 if (slp_node)
10875 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10876 }
10877
10878 if (slp_node)
10879 continue;
10880
10881 if (j == 0)
10882 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10883 else
10884 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10885
10886 prev_stmt_info = new_stmt_info;
10887 }
10888
10889 vec_oprnds0.release ();
10890 vec_oprnds1.release ();
10891
10892 return true;
10893 }
10894
10895 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10896 can handle all live statements in the node. Otherwise return true
10897 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10898 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10899
10900 static bool
10901 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10902 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10903 slp_tree slp_node, slp_instance slp_node_instance,
10904 bool vec_stmt_p,
10905 stmt_vector_for_cost *cost_vec)
10906 {
10907 if (slp_node)
10908 {
10909 stmt_vec_info slp_stmt_info;
10910 unsigned int i;
10911 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10912 {
10913 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10914 && !vectorizable_live_operation (loop_vinfo,
10915 slp_stmt_info, gsi, slp_node,
10916 slp_node_instance, i,
10917 vec_stmt_p, cost_vec))
10918 return false;
10919 }
10920 }
10921 else if (STMT_VINFO_LIVE_P (stmt_info)
10922 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10923 slp_node, slp_node_instance, -1,
10924 vec_stmt_p, cost_vec))
10925 return false;
10926
10927 return true;
10928 }
10929
10930 /* Make sure the statement is vectorizable. */
10931
10932 opt_result
10933 vect_analyze_stmt (vec_info *vinfo,
10934 stmt_vec_info stmt_info, bool *need_to_vectorize,
10935 slp_tree node, slp_instance node_instance,
10936 stmt_vector_for_cost *cost_vec)
10937 {
10938 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10939 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10940 bool ok;
10941 gimple_seq pattern_def_seq;
10942
10943 if (dump_enabled_p ())
10944 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10945 stmt_info->stmt);
10946
10947 if (gimple_has_volatile_ops (stmt_info->stmt))
10948 return opt_result::failure_at (stmt_info->stmt,
10949 "not vectorized:"
10950 " stmt has volatile operands: %G\n",
10951 stmt_info->stmt);
10952
10953 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10954 && node == NULL
10955 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10956 {
10957 gimple_stmt_iterator si;
10958
10959 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10960 {
10961 stmt_vec_info pattern_def_stmt_info
10962 = vinfo->lookup_stmt (gsi_stmt (si));
10963 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10964 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10965 {
10966 /* Analyze def stmt of STMT if it's a pattern stmt. */
10967 if (dump_enabled_p ())
10968 dump_printf_loc (MSG_NOTE, vect_location,
10969 "==> examining pattern def statement: %G",
10970 pattern_def_stmt_info->stmt);
10971
10972 opt_result res
10973 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10974 need_to_vectorize, node, node_instance,
10975 cost_vec);
10976 if (!res)
10977 return res;
10978 }
10979 }
10980 }
10981
10982 /* Skip stmts that do not need to be vectorized. In loops this is expected
10983 to include:
10984 - the COND_EXPR which is the loop exit condition
10985 - any LABEL_EXPRs in the loop
10986 - computations that are used only for array indexing or loop control.
10987 In basic blocks we only analyze statements that are a part of some SLP
10988 instance, therefore, all the statements are relevant.
10989
10990 Pattern statement needs to be analyzed instead of the original statement
10991 if the original statement is not relevant. Otherwise, we analyze both
10992 statements. In basic blocks we are called from some SLP instance
10993 traversal, don't analyze pattern stmts instead, the pattern stmts
10994 already will be part of SLP instance. */
10995
10996 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10997 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10998 && !STMT_VINFO_LIVE_P (stmt_info))
10999 {
11000 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11001 && pattern_stmt_info
11002 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11003 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11004 {
11005 /* Analyze PATTERN_STMT instead of the original stmt. */
11006 stmt_info = pattern_stmt_info;
11007 if (dump_enabled_p ())
11008 dump_printf_loc (MSG_NOTE, vect_location,
11009 "==> examining pattern statement: %G",
11010 stmt_info->stmt);
11011 }
11012 else
11013 {
11014 if (dump_enabled_p ())
11015 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11016
11017 return opt_result::success ();
11018 }
11019 }
11020 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11021 && node == NULL
11022 && pattern_stmt_info
11023 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11024 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11025 {
11026 /* Analyze PATTERN_STMT too. */
11027 if (dump_enabled_p ())
11028 dump_printf_loc (MSG_NOTE, vect_location,
11029 "==> examining pattern statement: %G",
11030 pattern_stmt_info->stmt);
11031
11032 opt_result res
11033 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11034 node_instance, cost_vec);
11035 if (!res)
11036 return res;
11037 }
11038
11039 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11040 {
11041 case vect_internal_def:
11042 break;
11043
11044 case vect_reduction_def:
11045 case vect_nested_cycle:
11046 gcc_assert (!bb_vinfo
11047 && (relevance == vect_used_in_outer
11048 || relevance == vect_used_in_outer_by_reduction
11049 || relevance == vect_used_by_reduction
11050 || relevance == vect_unused_in_scope
11051 || relevance == vect_used_only_live));
11052 break;
11053
11054 case vect_induction_def:
11055 gcc_assert (!bb_vinfo);
11056 break;
11057
11058 case vect_constant_def:
11059 case vect_external_def:
11060 case vect_unknown_def_type:
11061 default:
11062 gcc_unreachable ();
11063 }
11064
11065 if (STMT_VINFO_RELEVANT_P (stmt_info))
11066 {
11067 tree type = gimple_expr_type (stmt_info->stmt);
11068 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
11069 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11070 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11071 || (call && gimple_call_lhs (call) == NULL_TREE));
11072 *need_to_vectorize = true;
11073 }
11074
11075 if (PURE_SLP_STMT (stmt_info) && !node)
11076 {
11077 if (dump_enabled_p ())
11078 dump_printf_loc (MSG_NOTE, vect_location,
11079 "handled only by SLP analysis\n");
11080 return opt_result::success ();
11081 }
11082
11083 ok = true;
11084 if (!bb_vinfo
11085 && (STMT_VINFO_RELEVANT_P (stmt_info)
11086 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11087 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11088 -mveclibabi= takes preference over library functions with
11089 the simd attribute. */
11090 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11091 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11092 cost_vec)
11093 || vectorizable_conversion (vinfo, stmt_info,
11094 NULL, NULL, node, cost_vec)
11095 || vectorizable_operation (vinfo, stmt_info,
11096 NULL, NULL, node, cost_vec)
11097 || vectorizable_assignment (vinfo, stmt_info,
11098 NULL, NULL, node, cost_vec)
11099 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11100 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11101 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11102 node, node_instance, cost_vec)
11103 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11104 NULL, NULL, node, cost_vec)
11105 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11106 || vectorizable_condition (vinfo, stmt_info,
11107 NULL, NULL, node, cost_vec)
11108 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11109 cost_vec)
11110 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11111 stmt_info, NULL, node));
11112 else
11113 {
11114 if (bb_vinfo)
11115 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11116 || vectorizable_simd_clone_call (vinfo, stmt_info,
11117 NULL, NULL, node, cost_vec)
11118 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11119 cost_vec)
11120 || vectorizable_shift (vinfo, stmt_info,
11121 NULL, NULL, node, cost_vec)
11122 || vectorizable_operation (vinfo, stmt_info,
11123 NULL, NULL, node, cost_vec)
11124 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11125 cost_vec)
11126 || vectorizable_load (vinfo, stmt_info,
11127 NULL, NULL, node, cost_vec)
11128 || vectorizable_store (vinfo, stmt_info,
11129 NULL, NULL, node, cost_vec)
11130 || vectorizable_condition (vinfo, stmt_info,
11131 NULL, NULL, node, cost_vec)
11132 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11133 cost_vec));
11134 }
11135
11136 if (!ok)
11137 return opt_result::failure_at (stmt_info->stmt,
11138 "not vectorized:"
11139 " relevant stmt not supported: %G",
11140 stmt_info->stmt);
11141
11142 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11143 need extra handling, except for vectorizable reductions. */
11144 if (!bb_vinfo
11145 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11146 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11147 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11148 stmt_info, NULL, node, node_instance,
11149 false, cost_vec))
11150 return opt_result::failure_at (stmt_info->stmt,
11151 "not vectorized:"
11152 " live stmt not supported: %G",
11153 stmt_info->stmt);
11154
11155 return opt_result::success ();
11156 }
11157
11158
11159 /* Function vect_transform_stmt.
11160
11161 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11162
11163 bool
11164 vect_transform_stmt (vec_info *vinfo,
11165 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11166 slp_tree slp_node, slp_instance slp_node_instance)
11167 {
11168 bool is_store = false;
11169 stmt_vec_info vec_stmt = NULL;
11170 bool done;
11171
11172 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11173 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
11174
11175 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11176 bool nested_p = (loop_vinfo
11177 && nested_in_vect_loop_p
11178 (LOOP_VINFO_LOOP (loop_vinfo), stmt_info));
11179
11180 gimple *stmt = stmt_info->stmt;
11181 switch (STMT_VINFO_TYPE (stmt_info))
11182 {
11183 case type_demotion_vec_info_type:
11184 case type_promotion_vec_info_type:
11185 case type_conversion_vec_info_type:
11186 done = vectorizable_conversion (vinfo, stmt_info,
11187 gsi, &vec_stmt, slp_node, NULL);
11188 gcc_assert (done);
11189 break;
11190
11191 case induc_vec_info_type:
11192 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11193 stmt_info, gsi, &vec_stmt, slp_node,
11194 NULL);
11195 gcc_assert (done);
11196 break;
11197
11198 case shift_vec_info_type:
11199 done = vectorizable_shift (vinfo, stmt_info,
11200 gsi, &vec_stmt, slp_node, NULL);
11201 gcc_assert (done);
11202 break;
11203
11204 case op_vec_info_type:
11205 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11206 NULL);
11207 gcc_assert (done);
11208 break;
11209
11210 case assignment_vec_info_type:
11211 done = vectorizable_assignment (vinfo, stmt_info,
11212 gsi, &vec_stmt, slp_node, NULL);
11213 gcc_assert (done);
11214 break;
11215
11216 case load_vec_info_type:
11217 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11218 NULL);
11219 gcc_assert (done);
11220 break;
11221
11222 case store_vec_info_type:
11223 done = vectorizable_store (vinfo, stmt_info,
11224 gsi, &vec_stmt, slp_node, NULL);
11225 gcc_assert (done);
11226 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11227 {
11228 /* In case of interleaving, the whole chain is vectorized when the
11229 last store in the chain is reached. Store stmts before the last
11230 one are skipped, and there vec_stmt_info shouldn't be freed
11231 meanwhile. */
11232 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11233 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11234 is_store = true;
11235 }
11236 else
11237 is_store = true;
11238 break;
11239
11240 case condition_vec_info_type:
11241 done = vectorizable_condition (vinfo, stmt_info,
11242 gsi, &vec_stmt, slp_node, NULL);
11243 gcc_assert (done);
11244 break;
11245
11246 case comparison_vec_info_type:
11247 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11248 slp_node, NULL);
11249 gcc_assert (done);
11250 break;
11251
11252 case call_vec_info_type:
11253 done = vectorizable_call (vinfo, stmt_info,
11254 gsi, &vec_stmt, slp_node, NULL);
11255 stmt = gsi_stmt (*gsi);
11256 break;
11257
11258 case call_simd_clone_vec_info_type:
11259 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11260 slp_node, NULL);
11261 stmt = gsi_stmt (*gsi);
11262 break;
11263
11264 case reduc_vec_info_type:
11265 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11266 gsi, &vec_stmt, slp_node);
11267 gcc_assert (done);
11268 break;
11269
11270 case cycle_phi_info_type:
11271 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11272 &vec_stmt, slp_node, slp_node_instance);
11273 gcc_assert (done);
11274 break;
11275
11276 case lc_phi_info_type:
11277 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11278 stmt_info, &vec_stmt, slp_node);
11279 gcc_assert (done);
11280 break;
11281
11282 default:
11283 if (!STMT_VINFO_LIVE_P (stmt_info))
11284 {
11285 if (dump_enabled_p ())
11286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11287 "stmt not supported.\n");
11288 gcc_unreachable ();
11289 }
11290 done = true;
11291 }
11292
11293 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11294 This would break hybrid SLP vectorization. */
11295 if (slp_node)
11296 gcc_assert (!vec_stmt
11297 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11298
11299 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11300 is being vectorized, but outside the immediately enclosing loop. */
11301 if (vec_stmt
11302 && nested_p
11303 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11304 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11305 || STMT_VINFO_RELEVANT (stmt_info) ==
11306 vect_used_in_outer_by_reduction))
11307 {
11308 class loop *innerloop = LOOP_VINFO_LOOP (loop_vinfo)->inner;
11309 imm_use_iterator imm_iter;
11310 use_operand_p use_p;
11311 tree scalar_dest;
11312
11313 if (dump_enabled_p ())
11314 dump_printf_loc (MSG_NOTE, vect_location,
11315 "Record the vdef for outer-loop vectorization.\n");
11316
11317 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11318 (to be used when vectorizing outer-loop stmts that use the DEF of
11319 STMT). */
11320 if (gimple_code (stmt) == GIMPLE_PHI)
11321 scalar_dest = PHI_RESULT (stmt);
11322 else
11323 scalar_dest = gimple_get_lhs (stmt);
11324
11325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11326 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11327 {
11328 stmt_vec_info exit_phi_info
11329 = vinfo->lookup_stmt (USE_STMT (use_p));
11330 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11331 }
11332 }
11333
11334 if (vec_stmt)
11335 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11336
11337 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11338 return is_store;
11339
11340 /* If this stmt defines a value used on a backedge, update the
11341 vectorized PHIs. */
11342 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11343 stmt_vec_info reduc_info;
11344 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11345 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11346 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
11347 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11348 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11349 {
11350 gphi *phi;
11351 edge e;
11352 if (!slp_node
11353 && (phi = dyn_cast <gphi *>
11354 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11355 && dominated_by_p (CDI_DOMINATORS,
11356 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11357 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11358 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11359 == gimple_get_lhs (orig_stmt_info->stmt)))
11360 {
11361 stmt_vec_info phi_info
11362 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11363 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11364 do
11365 {
11366 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11367 gimple_get_lhs (vec_stmt->stmt), e,
11368 gimple_phi_arg_location (phi, e->dest_idx));
11369 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11370 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11371 }
11372 while (phi_info);
11373 gcc_assert (!vec_stmt);
11374 }
11375 else if (slp_node
11376 && slp_node != slp_node_instance->reduc_phis)
11377 {
11378 slp_tree phi_node = slp_node_instance->reduc_phis;
11379 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11380 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11381 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11382 == SLP_TREE_VEC_STMTS (slp_node).length ());
11383 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11384 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11385 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11386 e, gimple_phi_arg_location (phi, e->dest_idx));
11387 }
11388 }
11389
11390 /* Handle stmts whose DEF is used outside the loop-nest that is
11391 being vectorized. */
11392 if (is_a <loop_vec_info> (vinfo))
11393 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11394 stmt_info, gsi, slp_node,
11395 slp_node_instance, true, NULL);
11396 gcc_assert (done);
11397
11398 return false;
11399 }
11400
11401
11402 /* Remove a group of stores (for SLP or interleaving), free their
11403 stmt_vec_info. */
11404
11405 void
11406 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11407 {
11408 stmt_vec_info next_stmt_info = first_stmt_info;
11409
11410 while (next_stmt_info)
11411 {
11412 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11413 next_stmt_info = vect_orig_stmt (next_stmt_info);
11414 /* Free the attached stmt_vec_info and remove the stmt. */
11415 vinfo->remove_stmt (next_stmt_info);
11416 next_stmt_info = tmp;
11417 }
11418 }
11419
11420 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11421 elements of type SCALAR_TYPE, or null if the target doesn't support
11422 such a type.
11423
11424 If NUNITS is zero, return a vector type that contains elements of
11425 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11426
11427 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11428 for this vectorization region and want to "autodetect" the best choice.
11429 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11430 and we want the new type to be interoperable with it. PREVAILING_MODE
11431 in this case can be a scalar integer mode or a vector mode; when it
11432 is a vector mode, the function acts like a tree-level version of
11433 related_vector_mode. */
11434
11435 tree
11436 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11437 tree scalar_type, poly_uint64 nunits)
11438 {
11439 tree orig_scalar_type = scalar_type;
11440 scalar_mode inner_mode;
11441 machine_mode simd_mode;
11442 tree vectype;
11443
11444 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11445 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11446 return NULL_TREE;
11447
11448 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11449
11450 /* For vector types of elements whose mode precision doesn't
11451 match their types precision we use a element type of mode
11452 precision. The vectorization routines will have to make sure
11453 they support the proper result truncation/extension.
11454 We also make sure to build vector types with INTEGER_TYPE
11455 component type only. */
11456 if (INTEGRAL_TYPE_P (scalar_type)
11457 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11458 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11459 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11460 TYPE_UNSIGNED (scalar_type));
11461
11462 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11463 When the component mode passes the above test simply use a type
11464 corresponding to that mode. The theory is that any use that
11465 would cause problems with this will disable vectorization anyway. */
11466 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11467 && !INTEGRAL_TYPE_P (scalar_type))
11468 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11469
11470 /* We can't build a vector type of elements with alignment bigger than
11471 their size. */
11472 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11473 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11474 TYPE_UNSIGNED (scalar_type));
11475
11476 /* If we felt back to using the mode fail if there was
11477 no scalar type for it. */
11478 if (scalar_type == NULL_TREE)
11479 return NULL_TREE;
11480
11481 /* If no prevailing mode was supplied, use the mode the target prefers.
11482 Otherwise lookup a vector mode based on the prevailing mode. */
11483 if (prevailing_mode == VOIDmode)
11484 {
11485 gcc_assert (known_eq (nunits, 0U));
11486 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11487 if (SCALAR_INT_MODE_P (simd_mode))
11488 {
11489 /* Traditional behavior is not to take the integer mode
11490 literally, but simply to use it as a way of determining
11491 the vector size. It is up to mode_for_vector to decide
11492 what the TYPE_MODE should be.
11493
11494 Note that nunits == 1 is allowed in order to support single
11495 element vector types. */
11496 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11497 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11498 return NULL_TREE;
11499 }
11500 }
11501 else if (SCALAR_INT_MODE_P (prevailing_mode)
11502 || !related_vector_mode (prevailing_mode,
11503 inner_mode, nunits).exists (&simd_mode))
11504 {
11505 /* Fall back to using mode_for_vector, mostly in the hope of being
11506 able to use an integer mode. */
11507 if (known_eq (nunits, 0U)
11508 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11509 return NULL_TREE;
11510
11511 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11512 return NULL_TREE;
11513 }
11514
11515 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11516
11517 /* In cases where the mode was chosen by mode_for_vector, check that
11518 the target actually supports the chosen mode, or that it at least
11519 allows the vector mode to be replaced by a like-sized integer. */
11520 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11521 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11522 return NULL_TREE;
11523
11524 /* Re-attach the address-space qualifier if we canonicalized the scalar
11525 type. */
11526 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11527 return build_qualified_type
11528 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11529
11530 return vectype;
11531 }
11532
11533 /* Function get_vectype_for_scalar_type.
11534
11535 Returns the vector type corresponding to SCALAR_TYPE as supported
11536 by the target. If GROUP_SIZE is nonzero and we're performing BB
11537 vectorization, make sure that the number of elements in the vector
11538 is no bigger than GROUP_SIZE. */
11539
11540 tree
11541 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11542 unsigned int group_size)
11543 {
11544 /* For BB vectorization, we should always have a group size once we've
11545 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11546 are tentative requests during things like early data reference
11547 analysis and pattern recognition. */
11548 if (is_a <bb_vec_info> (vinfo))
11549 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11550 else
11551 group_size = 0;
11552
11553 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11554 scalar_type);
11555 if (vectype && vinfo->vector_mode == VOIDmode)
11556 vinfo->vector_mode = TYPE_MODE (vectype);
11557
11558 /* Register the natural choice of vector type, before the group size
11559 has been applied. */
11560 if (vectype)
11561 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11562
11563 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11564 try again with an explicit number of elements. */
11565 if (vectype
11566 && group_size
11567 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11568 {
11569 /* Start with the biggest number of units that fits within
11570 GROUP_SIZE and halve it until we find a valid vector type.
11571 Usually either the first attempt will succeed or all will
11572 fail (in the latter case because GROUP_SIZE is too small
11573 for the target), but it's possible that a target could have
11574 a hole between supported vector types.
11575
11576 If GROUP_SIZE is not a power of 2, this has the effect of
11577 trying the largest power of 2 that fits within the group,
11578 even though the group is not a multiple of that vector size.
11579 The BB vectorizer will then try to carve up the group into
11580 smaller pieces. */
11581 unsigned int nunits = 1 << floor_log2 (group_size);
11582 do
11583 {
11584 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11585 scalar_type, nunits);
11586 nunits /= 2;
11587 }
11588 while (nunits > 1 && !vectype);
11589 }
11590
11591 return vectype;
11592 }
11593
11594 /* Return the vector type corresponding to SCALAR_TYPE as supported
11595 by the target. NODE, if nonnull, is the SLP tree node that will
11596 use the returned vector type. */
11597
11598 tree
11599 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11600 {
11601 unsigned int group_size = 0;
11602 if (node)
11603 {
11604 group_size = SLP_TREE_SCALAR_OPS (node).length ();
11605 if (group_size == 0)
11606 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11607 }
11608 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11609 }
11610
11611 /* Function get_mask_type_for_scalar_type.
11612
11613 Returns the mask type corresponding to a result of comparison
11614 of vectors of specified SCALAR_TYPE as supported by target.
11615 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11616 make sure that the number of elements in the vector is no bigger
11617 than GROUP_SIZE. */
11618
11619 tree
11620 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11621 unsigned int group_size)
11622 {
11623 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11624
11625 if (!vectype)
11626 return NULL;
11627
11628 return truth_type_for (vectype);
11629 }
11630
11631 /* Function get_same_sized_vectype
11632
11633 Returns a vector type corresponding to SCALAR_TYPE of size
11634 VECTOR_TYPE if supported by the target. */
11635
11636 tree
11637 get_same_sized_vectype (tree scalar_type, tree vector_type)
11638 {
11639 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11640 return truth_type_for (vector_type);
11641
11642 poly_uint64 nunits;
11643 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11644 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11645 return NULL_TREE;
11646
11647 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11648 scalar_type, nunits);
11649 }
11650
11651 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11652 would not change the chosen vector modes. */
11653
11654 bool
11655 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11656 {
11657 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11658 i != vinfo->used_vector_modes.end (); ++i)
11659 if (!VECTOR_MODE_P (*i)
11660 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11661 return false;
11662 return true;
11663 }
11664
11665 /* Function vect_is_simple_use.
11666
11667 Input:
11668 VINFO - the vect info of the loop or basic block that is being vectorized.
11669 OPERAND - operand in the loop or bb.
11670 Output:
11671 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11672 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11673 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11674 the definition could be anywhere in the function
11675 DT - the type of definition
11676
11677 Returns whether a stmt with OPERAND can be vectorized.
11678 For loops, supportable operands are constants, loop invariants, and operands
11679 that are defined by the current iteration of the loop. Unsupportable
11680 operands are those that are defined by a previous iteration of the loop (as
11681 is the case in reduction/induction computations).
11682 For basic blocks, supportable operands are constants and bb invariants.
11683 For now, operands defined outside the basic block are not supported. */
11684
11685 bool
11686 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11687 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11688 {
11689 if (def_stmt_info_out)
11690 *def_stmt_info_out = NULL;
11691 if (def_stmt_out)
11692 *def_stmt_out = NULL;
11693 *dt = vect_unknown_def_type;
11694
11695 if (dump_enabled_p ())
11696 {
11697 dump_printf_loc (MSG_NOTE, vect_location,
11698 "vect_is_simple_use: operand ");
11699 if (TREE_CODE (operand) == SSA_NAME
11700 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11701 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11702 else
11703 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11704 }
11705
11706 if (CONSTANT_CLASS_P (operand))
11707 *dt = vect_constant_def;
11708 else if (is_gimple_min_invariant (operand))
11709 *dt = vect_external_def;
11710 else if (TREE_CODE (operand) != SSA_NAME)
11711 *dt = vect_unknown_def_type;
11712 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11713 *dt = vect_external_def;
11714 else
11715 {
11716 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11717 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11718 if (!stmt_vinfo)
11719 *dt = vect_external_def;
11720 else
11721 {
11722 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11723 def_stmt = stmt_vinfo->stmt;
11724 switch (gimple_code (def_stmt))
11725 {
11726 case GIMPLE_PHI:
11727 case GIMPLE_ASSIGN:
11728 case GIMPLE_CALL:
11729 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11730 break;
11731 default:
11732 *dt = vect_unknown_def_type;
11733 break;
11734 }
11735 if (def_stmt_info_out)
11736 *def_stmt_info_out = stmt_vinfo;
11737 }
11738 if (def_stmt_out)
11739 *def_stmt_out = def_stmt;
11740 }
11741
11742 if (dump_enabled_p ())
11743 {
11744 dump_printf (MSG_NOTE, ", type of def: ");
11745 switch (*dt)
11746 {
11747 case vect_uninitialized_def:
11748 dump_printf (MSG_NOTE, "uninitialized\n");
11749 break;
11750 case vect_constant_def:
11751 dump_printf (MSG_NOTE, "constant\n");
11752 break;
11753 case vect_external_def:
11754 dump_printf (MSG_NOTE, "external\n");
11755 break;
11756 case vect_internal_def:
11757 dump_printf (MSG_NOTE, "internal\n");
11758 break;
11759 case vect_induction_def:
11760 dump_printf (MSG_NOTE, "induction\n");
11761 break;
11762 case vect_reduction_def:
11763 dump_printf (MSG_NOTE, "reduction\n");
11764 break;
11765 case vect_double_reduction_def:
11766 dump_printf (MSG_NOTE, "double reduction\n");
11767 break;
11768 case vect_nested_cycle:
11769 dump_printf (MSG_NOTE, "nested cycle\n");
11770 break;
11771 case vect_unknown_def_type:
11772 dump_printf (MSG_NOTE, "unknown\n");
11773 break;
11774 }
11775 }
11776
11777 if (*dt == vect_unknown_def_type)
11778 {
11779 if (dump_enabled_p ())
11780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11781 "Unsupported pattern.\n");
11782 return false;
11783 }
11784
11785 return true;
11786 }
11787
11788 /* Function vect_is_simple_use.
11789
11790 Same as vect_is_simple_use but also determines the vector operand
11791 type of OPERAND and stores it to *VECTYPE. If the definition of
11792 OPERAND is vect_uninitialized_def, vect_constant_def or
11793 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11794 is responsible to compute the best suited vector type for the
11795 scalar operand. */
11796
11797 bool
11798 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11799 tree *vectype, stmt_vec_info *def_stmt_info_out,
11800 gimple **def_stmt_out)
11801 {
11802 stmt_vec_info def_stmt_info;
11803 gimple *def_stmt;
11804 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11805 return false;
11806
11807 if (def_stmt_out)
11808 *def_stmt_out = def_stmt;
11809 if (def_stmt_info_out)
11810 *def_stmt_info_out = def_stmt_info;
11811
11812 /* Now get a vector type if the def is internal, otherwise supply
11813 NULL_TREE and leave it up to the caller to figure out a proper
11814 type for the use stmt. */
11815 if (*dt == vect_internal_def
11816 || *dt == vect_induction_def
11817 || *dt == vect_reduction_def
11818 || *dt == vect_double_reduction_def
11819 || *dt == vect_nested_cycle)
11820 {
11821 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11822 gcc_assert (*vectype != NULL_TREE);
11823 if (dump_enabled_p ())
11824 dump_printf_loc (MSG_NOTE, vect_location,
11825 "vect_is_simple_use: vectype %T\n", *vectype);
11826 }
11827 else if (*dt == vect_uninitialized_def
11828 || *dt == vect_constant_def
11829 || *dt == vect_external_def)
11830 *vectype = NULL_TREE;
11831 else
11832 gcc_unreachable ();
11833
11834 return true;
11835 }
11836
11837 /* Function vect_is_simple_use.
11838
11839 Same as vect_is_simple_use but determines the operand by operand
11840 position OPERAND from either STMT or SLP_NODE, filling in *OP
11841 and *SLP_DEF (when SLP_NODE is not NULL). */
11842
11843 bool
11844 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11845 unsigned operand, tree *op, slp_tree *slp_def,
11846 enum vect_def_type *dt,
11847 tree *vectype, stmt_vec_info *def_stmt_info_out)
11848 {
11849 if (slp_node)
11850 {
11851 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11852 *slp_def = child;
11853 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11854 *op = gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child)[0]->stmt);
11855 else
11856 *op = SLP_TREE_SCALAR_OPS (child)[0];
11857 }
11858 else
11859 {
11860 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11861 {
11862 *op = gimple_op (ass, operand + 1);
11863 /* ??? Ick. But it will vanish with SLP only. */
11864 if (TREE_CODE (*op) == VIEW_CONVERT_EXPR)
11865 *op = TREE_OPERAND (*op, 0);
11866 }
11867 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11868 *op = gimple_call_arg (call, operand);
11869 else
11870 gcc_unreachable ();
11871 }
11872
11873 /* ??? We might want to update *vectype from *slp_def here though
11874 when sharing nodes this would prevent unsharing in the caller. */
11875 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11876 }
11877
11878 /* If OP is not NULL and is external or constant update its vector
11879 type with VECTYPE. Returns true if successful or false if not,
11880 for example when conflicting vector types are present. */
11881
11882 bool
11883 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11884 {
11885 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11886 return true;
11887 if (SLP_TREE_VECTYPE (op))
11888 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11889 SLP_TREE_VECTYPE (op) = vectype;
11890 return true;
11891 }
11892
11893 /* Function supportable_widening_operation
11894
11895 Check whether an operation represented by the code CODE is a
11896 widening operation that is supported by the target platform in
11897 vector form (i.e., when operating on arguments of type VECTYPE_IN
11898 producing a result of type VECTYPE_OUT).
11899
11900 Widening operations we currently support are NOP (CONVERT), FLOAT,
11901 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11902 are supported by the target platform either directly (via vector
11903 tree-codes), or via target builtins.
11904
11905 Output:
11906 - CODE1 and CODE2 are codes of vector operations to be used when
11907 vectorizing the operation, if available.
11908 - MULTI_STEP_CVT determines the number of required intermediate steps in
11909 case of multi-step conversion (like char->short->int - in that case
11910 MULTI_STEP_CVT will be 1).
11911 - INTERM_TYPES contains the intermediate type required to perform the
11912 widening operation (short in the above example). */
11913
11914 bool
11915 supportable_widening_operation (vec_info *vinfo,
11916 enum tree_code code, stmt_vec_info stmt_info,
11917 tree vectype_out, tree vectype_in,
11918 enum tree_code *code1, enum tree_code *code2,
11919 int *multi_step_cvt,
11920 vec<tree> *interm_types)
11921 {
11922 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11923 class loop *vect_loop = NULL;
11924 machine_mode vec_mode;
11925 enum insn_code icode1, icode2;
11926 optab optab1, optab2;
11927 tree vectype = vectype_in;
11928 tree wide_vectype = vectype_out;
11929 enum tree_code c1, c2;
11930 int i;
11931 tree prev_type, intermediate_type;
11932 machine_mode intermediate_mode, prev_mode;
11933 optab optab3, optab4;
11934
11935 *multi_step_cvt = 0;
11936 if (loop_info)
11937 vect_loop = LOOP_VINFO_LOOP (loop_info);
11938
11939 switch (code)
11940 {
11941 case WIDEN_MULT_EXPR:
11942 /* The result of a vectorized widening operation usually requires
11943 two vectors (because the widened results do not fit into one vector).
11944 The generated vector results would normally be expected to be
11945 generated in the same order as in the original scalar computation,
11946 i.e. if 8 results are generated in each vector iteration, they are
11947 to be organized as follows:
11948 vect1: [res1,res2,res3,res4],
11949 vect2: [res5,res6,res7,res8].
11950
11951 However, in the special case that the result of the widening
11952 operation is used in a reduction computation only, the order doesn't
11953 matter (because when vectorizing a reduction we change the order of
11954 the computation). Some targets can take advantage of this and
11955 generate more efficient code. For example, targets like Altivec,
11956 that support widen_mult using a sequence of {mult_even,mult_odd}
11957 generate the following vectors:
11958 vect1: [res1,res3,res5,res7],
11959 vect2: [res2,res4,res6,res8].
11960
11961 When vectorizing outer-loops, we execute the inner-loop sequentially
11962 (each vectorized inner-loop iteration contributes to VF outer-loop
11963 iterations in parallel). We therefore don't allow to change the
11964 order of the computation in the inner-loop during outer-loop
11965 vectorization. */
11966 /* TODO: Another case in which order doesn't *really* matter is when we
11967 widen and then contract again, e.g. (short)((int)x * y >> 8).
11968 Normally, pack_trunc performs an even/odd permute, whereas the
11969 repack from an even/odd expansion would be an interleave, which
11970 would be significantly simpler for e.g. AVX2. */
11971 /* In any case, in order to avoid duplicating the code below, recurse
11972 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11973 are properly set up for the caller. If we fail, we'll continue with
11974 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11975 if (vect_loop
11976 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11977 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11978 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11979 stmt_info, vectype_out,
11980 vectype_in, code1, code2,
11981 multi_step_cvt, interm_types))
11982 {
11983 /* Elements in a vector with vect_used_by_reduction property cannot
11984 be reordered if the use chain with this property does not have the
11985 same operation. One such an example is s += a * b, where elements
11986 in a and b cannot be reordered. Here we check if the vector defined
11987 by STMT is only directly used in the reduction statement. */
11988 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11989 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11990 if (use_stmt_info
11991 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11992 return true;
11993 }
11994 c1 = VEC_WIDEN_MULT_LO_EXPR;
11995 c2 = VEC_WIDEN_MULT_HI_EXPR;
11996 break;
11997
11998 case DOT_PROD_EXPR:
11999 c1 = DOT_PROD_EXPR;
12000 c2 = DOT_PROD_EXPR;
12001 break;
12002
12003 case SAD_EXPR:
12004 c1 = SAD_EXPR;
12005 c2 = SAD_EXPR;
12006 break;
12007
12008 case VEC_WIDEN_MULT_EVEN_EXPR:
12009 /* Support the recursion induced just above. */
12010 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12011 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12012 break;
12013
12014 case WIDEN_LSHIFT_EXPR:
12015 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12016 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12017 break;
12018
12019 CASE_CONVERT:
12020 c1 = VEC_UNPACK_LO_EXPR;
12021 c2 = VEC_UNPACK_HI_EXPR;
12022 break;
12023
12024 case FLOAT_EXPR:
12025 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12026 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12027 break;
12028
12029 case FIX_TRUNC_EXPR:
12030 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12031 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12032 break;
12033
12034 default:
12035 gcc_unreachable ();
12036 }
12037
12038 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12039 std::swap (c1, c2);
12040
12041 if (code == FIX_TRUNC_EXPR)
12042 {
12043 /* The signedness is determined from output operand. */
12044 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12045 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12046 }
12047 else if (CONVERT_EXPR_CODE_P (code)
12048 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12049 && VECTOR_BOOLEAN_TYPE_P (vectype)
12050 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12051 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12052 {
12053 /* If the input and result modes are the same, a different optab
12054 is needed where we pass in the number of units in vectype. */
12055 optab1 = vec_unpacks_sbool_lo_optab;
12056 optab2 = vec_unpacks_sbool_hi_optab;
12057 }
12058 else
12059 {
12060 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12061 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12062 }
12063
12064 if (!optab1 || !optab2)
12065 return false;
12066
12067 vec_mode = TYPE_MODE (vectype);
12068 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12069 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12070 return false;
12071
12072 *code1 = c1;
12073 *code2 = c2;
12074
12075 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12076 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12077 {
12078 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12079 return true;
12080 /* For scalar masks we may have different boolean
12081 vector types having the same QImode. Thus we
12082 add additional check for elements number. */
12083 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12084 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12085 return true;
12086 }
12087
12088 /* Check if it's a multi-step conversion that can be done using intermediate
12089 types. */
12090
12091 prev_type = vectype;
12092 prev_mode = vec_mode;
12093
12094 if (!CONVERT_EXPR_CODE_P (code))
12095 return false;
12096
12097 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12098 intermediate steps in promotion sequence. We try
12099 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12100 not. */
12101 interm_types->create (MAX_INTERM_CVT_STEPS);
12102 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12103 {
12104 intermediate_mode = insn_data[icode1].operand[0].mode;
12105 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12106 intermediate_type
12107 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12108 else
12109 intermediate_type
12110 = lang_hooks.types.type_for_mode (intermediate_mode,
12111 TYPE_UNSIGNED (prev_type));
12112
12113 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12114 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12115 && intermediate_mode == prev_mode
12116 && SCALAR_INT_MODE_P (prev_mode))
12117 {
12118 /* If the input and result modes are the same, a different optab
12119 is needed where we pass in the number of units in vectype. */
12120 optab3 = vec_unpacks_sbool_lo_optab;
12121 optab4 = vec_unpacks_sbool_hi_optab;
12122 }
12123 else
12124 {
12125 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12126 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12127 }
12128
12129 if (!optab3 || !optab4
12130 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12131 || insn_data[icode1].operand[0].mode != intermediate_mode
12132 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12133 || insn_data[icode2].operand[0].mode != intermediate_mode
12134 || ((icode1 = optab_handler (optab3, intermediate_mode))
12135 == CODE_FOR_nothing)
12136 || ((icode2 = optab_handler (optab4, intermediate_mode))
12137 == CODE_FOR_nothing))
12138 break;
12139
12140 interm_types->quick_push (intermediate_type);
12141 (*multi_step_cvt)++;
12142
12143 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12144 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12145 {
12146 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12147 return true;
12148 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12149 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12150 return true;
12151 }
12152
12153 prev_type = intermediate_type;
12154 prev_mode = intermediate_mode;
12155 }
12156
12157 interm_types->release ();
12158 return false;
12159 }
12160
12161
12162 /* Function supportable_narrowing_operation
12163
12164 Check whether an operation represented by the code CODE is a
12165 narrowing operation that is supported by the target platform in
12166 vector form (i.e., when operating on arguments of type VECTYPE_IN
12167 and producing a result of type VECTYPE_OUT).
12168
12169 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12170 and FLOAT. This function checks if these operations are supported by
12171 the target platform directly via vector tree-codes.
12172
12173 Output:
12174 - CODE1 is the code of a vector operation to be used when
12175 vectorizing the operation, if available.
12176 - MULTI_STEP_CVT determines the number of required intermediate steps in
12177 case of multi-step conversion (like int->short->char - in that case
12178 MULTI_STEP_CVT will be 1).
12179 - INTERM_TYPES contains the intermediate type required to perform the
12180 narrowing operation (short in the above example). */
12181
12182 bool
12183 supportable_narrowing_operation (enum tree_code code,
12184 tree vectype_out, tree vectype_in,
12185 enum tree_code *code1, int *multi_step_cvt,
12186 vec<tree> *interm_types)
12187 {
12188 machine_mode vec_mode;
12189 enum insn_code icode1;
12190 optab optab1, interm_optab;
12191 tree vectype = vectype_in;
12192 tree narrow_vectype = vectype_out;
12193 enum tree_code c1;
12194 tree intermediate_type, prev_type;
12195 machine_mode intermediate_mode, prev_mode;
12196 int i;
12197 bool uns;
12198
12199 *multi_step_cvt = 0;
12200 switch (code)
12201 {
12202 CASE_CONVERT:
12203 c1 = VEC_PACK_TRUNC_EXPR;
12204 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12205 && VECTOR_BOOLEAN_TYPE_P (vectype)
12206 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12207 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12208 optab1 = vec_pack_sbool_trunc_optab;
12209 else
12210 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12211 break;
12212
12213 case FIX_TRUNC_EXPR:
12214 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12215 /* The signedness is determined from output operand. */
12216 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12217 break;
12218
12219 case FLOAT_EXPR:
12220 c1 = VEC_PACK_FLOAT_EXPR;
12221 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12222 break;
12223
12224 default:
12225 gcc_unreachable ();
12226 }
12227
12228 if (!optab1)
12229 return false;
12230
12231 vec_mode = TYPE_MODE (vectype);
12232 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12233 return false;
12234
12235 *code1 = c1;
12236
12237 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12238 {
12239 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12240 return true;
12241 /* For scalar masks we may have different boolean
12242 vector types having the same QImode. Thus we
12243 add additional check for elements number. */
12244 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12245 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12246 return true;
12247 }
12248
12249 if (code == FLOAT_EXPR)
12250 return false;
12251
12252 /* Check if it's a multi-step conversion that can be done using intermediate
12253 types. */
12254 prev_mode = vec_mode;
12255 prev_type = vectype;
12256 if (code == FIX_TRUNC_EXPR)
12257 uns = TYPE_UNSIGNED (vectype_out);
12258 else
12259 uns = TYPE_UNSIGNED (vectype);
12260
12261 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12262 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12263 costly than signed. */
12264 if (code == FIX_TRUNC_EXPR && uns)
12265 {
12266 enum insn_code icode2;
12267
12268 intermediate_type
12269 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12270 interm_optab
12271 = optab_for_tree_code (c1, intermediate_type, optab_default);
12272 if (interm_optab != unknown_optab
12273 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12274 && insn_data[icode1].operand[0].mode
12275 == insn_data[icode2].operand[0].mode)
12276 {
12277 uns = false;
12278 optab1 = interm_optab;
12279 icode1 = icode2;
12280 }
12281 }
12282
12283 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12284 intermediate steps in promotion sequence. We try
12285 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12286 interm_types->create (MAX_INTERM_CVT_STEPS);
12287 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12288 {
12289 intermediate_mode = insn_data[icode1].operand[0].mode;
12290 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12291 intermediate_type
12292 = vect_double_mask_nunits (prev_type, intermediate_mode);
12293 else
12294 intermediate_type
12295 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12296 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12297 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12298 && intermediate_mode == prev_mode
12299 && SCALAR_INT_MODE_P (prev_mode))
12300 interm_optab = vec_pack_sbool_trunc_optab;
12301 else
12302 interm_optab
12303 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12304 optab_default);
12305 if (!interm_optab
12306 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12307 || insn_data[icode1].operand[0].mode != intermediate_mode
12308 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12309 == CODE_FOR_nothing))
12310 break;
12311
12312 interm_types->quick_push (intermediate_type);
12313 (*multi_step_cvt)++;
12314
12315 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12316 {
12317 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12318 return true;
12319 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12320 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12321 return true;
12322 }
12323
12324 prev_mode = intermediate_mode;
12325 prev_type = intermediate_type;
12326 optab1 = interm_optab;
12327 }
12328
12329 interm_types->release ();
12330 return false;
12331 }
12332
12333 /* Generate and return a statement that sets vector mask MASK such that
12334 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12335
12336 gcall *
12337 vect_gen_while (tree mask, tree start_index, tree end_index)
12338 {
12339 tree cmp_type = TREE_TYPE (start_index);
12340 tree mask_type = TREE_TYPE (mask);
12341 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12342 cmp_type, mask_type,
12343 OPTIMIZE_FOR_SPEED));
12344 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12345 start_index, end_index,
12346 build_zero_cst (mask_type));
12347 gimple_call_set_lhs (call, mask);
12348 return call;
12349 }
12350
12351 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12352 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12353
12354 tree
12355 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12356 tree end_index)
12357 {
12358 tree tmp = make_ssa_name (mask_type);
12359 gcall *call = vect_gen_while (tmp, start_index, end_index);
12360 gimple_seq_add_stmt (seq, call);
12361 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12362 }
12363
12364 /* Try to compute the vector types required to vectorize STMT_INFO,
12365 returning true on success and false if vectorization isn't possible.
12366 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12367 take sure that the number of elements in the vectors is no bigger
12368 than GROUP_SIZE.
12369
12370 On success:
12371
12372 - Set *STMT_VECTYPE_OUT to:
12373 - NULL_TREE if the statement doesn't need to be vectorized;
12374 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12375
12376 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12377 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12378 statement does not help to determine the overall number of units. */
12379
12380 opt_result
12381 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12382 tree *stmt_vectype_out,
12383 tree *nunits_vectype_out,
12384 unsigned int group_size)
12385 {
12386 gimple *stmt = stmt_info->stmt;
12387
12388 /* For BB vectorization, we should always have a group size once we've
12389 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12390 are tentative requests during things like early data reference
12391 analysis and pattern recognition. */
12392 if (is_a <bb_vec_info> (vinfo))
12393 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12394 else
12395 group_size = 0;
12396
12397 *stmt_vectype_out = NULL_TREE;
12398 *nunits_vectype_out = NULL_TREE;
12399
12400 if (gimple_get_lhs (stmt) == NULL_TREE
12401 /* MASK_STORE has no lhs, but is ok. */
12402 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12403 {
12404 if (is_a <gcall *> (stmt))
12405 {
12406 /* Ignore calls with no lhs. These must be calls to
12407 #pragma omp simd functions, and what vectorization factor
12408 it really needs can't be determined until
12409 vectorizable_simd_clone_call. */
12410 if (dump_enabled_p ())
12411 dump_printf_loc (MSG_NOTE, vect_location,
12412 "defer to SIMD clone analysis.\n");
12413 return opt_result::success ();
12414 }
12415
12416 return opt_result::failure_at (stmt,
12417 "not vectorized: irregular stmt.%G", stmt);
12418 }
12419
12420 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12421 return opt_result::failure_at (stmt,
12422 "not vectorized: vector stmt in loop:%G",
12423 stmt);
12424
12425 tree vectype;
12426 tree scalar_type = NULL_TREE;
12427 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12428 {
12429 vectype = STMT_VINFO_VECTYPE (stmt_info);
12430 if (dump_enabled_p ())
12431 dump_printf_loc (MSG_NOTE, vect_location,
12432 "precomputed vectype: %T\n", vectype);
12433 }
12434 else if (vect_use_mask_type_p (stmt_info))
12435 {
12436 unsigned int precision = stmt_info->mask_precision;
12437 scalar_type = build_nonstandard_integer_type (precision, 1);
12438 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12439 if (!vectype)
12440 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12441 " data-type %T\n", scalar_type);
12442 if (dump_enabled_p ())
12443 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12444 }
12445 else
12446 {
12447 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12448 scalar_type = TREE_TYPE (DR_REF (dr));
12449 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12450 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12451 else
12452 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12453
12454 if (dump_enabled_p ())
12455 {
12456 if (group_size)
12457 dump_printf_loc (MSG_NOTE, vect_location,
12458 "get vectype for scalar type (group size %d):"
12459 " %T\n", group_size, scalar_type);
12460 else
12461 dump_printf_loc (MSG_NOTE, vect_location,
12462 "get vectype for scalar type: %T\n", scalar_type);
12463 }
12464 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12465 if (!vectype)
12466 return opt_result::failure_at (stmt,
12467 "not vectorized:"
12468 " unsupported data-type %T\n",
12469 scalar_type);
12470
12471 if (dump_enabled_p ())
12472 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12473 }
12474 *stmt_vectype_out = vectype;
12475
12476 /* Don't try to compute scalar types if the stmt produces a boolean
12477 vector; use the existing vector type instead. */
12478 tree nunits_vectype = vectype;
12479 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12480 {
12481 /* The number of units is set according to the smallest scalar
12482 type (or the largest vector size, but we only support one
12483 vector size per vectorization). */
12484 HOST_WIDE_INT dummy;
12485 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12486 if (scalar_type != TREE_TYPE (vectype))
12487 {
12488 if (dump_enabled_p ())
12489 dump_printf_loc (MSG_NOTE, vect_location,
12490 "get vectype for smallest scalar type: %T\n",
12491 scalar_type);
12492 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12493 group_size);
12494 if (!nunits_vectype)
12495 return opt_result::failure_at
12496 (stmt, "not vectorized: unsupported data-type %T\n",
12497 scalar_type);
12498 if (dump_enabled_p ())
12499 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12500 nunits_vectype);
12501 }
12502 }
12503
12504 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12505 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12506
12507 if (dump_enabled_p ())
12508 {
12509 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12510 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12511 dump_printf (MSG_NOTE, "\n");
12512 }
12513
12514 *nunits_vectype_out = nunits_vectype;
12515 return opt_result::success ();
12516 }