c++: Support non-type template parms of union type.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
61
62 /* Return the vectorized type for the given statement. */
63
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67 return STMT_VINFO_VECTYPE (stmt_info);
68 }
69
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86 }
87
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
97 {
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
104
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
107
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 }
111
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 {
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119 }
120
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 {
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
145
146 return vect_name;
147 }
148
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
152
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
157 {
158 tree array_ref;
159 gimple *new_stmt;
160
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
164
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
167 }
168
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
172
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 {
176 tree mem_ref;
177
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
182 }
183
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
186
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
190 {
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
194 }
195
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198 /* Function vect_mark_relevant.
199
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
201
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
205 {
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
213
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
224
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
247 }
248
249 worklist->safe_push (stmt_info);
250 }
251
252
253 /* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
256
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
260 {
261 tree op;
262 ssa_op_iter iter;
263
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284 }
285
286 /* Function vect_stmt_relevant_p.
287
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
301 {
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
315
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
320 {
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
325 }
326
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
329 {
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
331 {
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
334 {
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
337
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
341
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
346
347 *live_p = true;
348 }
349 }
350 }
351
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
359 }
360
361 return (*live_p || *relevant);
362 }
363
364
365 /* Function exist_non_indexing_operands_for_use_p
366
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
369
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
372 {
373 tree operand;
374
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
380
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
393
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
396 {
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
399 {
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
412 }
413 return false;
414 }
415
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
421
422 if (operand == use)
423 return true;
424
425 return false;
426 }
427
428
429 /*
430 Function process_use.
431
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
439
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
453
454 Return true if everything is as expected. Return false otherwise. */
455
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
460 {
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
463
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
468
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
473
474 if (!dstmt_vinfo)
475 return opt_result::success ();
476
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
479
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
609 {
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
645
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
648 }
649 }
650
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
653 {
654 use_operand_p use_p;
655 ssa_op_iter iter;
656
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
661
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
666
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
669
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
677
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
679 {
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
689
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
697
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
705
706 default:
707 break;
708 }
709
710 if (is_pattern_stmt_p (stmt_vinfo))
711 {
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
716 {
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
719
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
722 {
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
733 }
734 for (; i < gimple_num_ops (assign); i++)
735 {
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
738 {
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
744 }
745 }
746 }
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
748 {
749 for (i = 0; i < gimple_call_num_args (call); i++)
750 {
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
757 }
758 }
759 }
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
762 {
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
769 }
770
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
772 {
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
780 {
781 if (fatal)
782 *fatal = false;
783 return res;
784 }
785 }
786 } /* while worklist */
787
788 return opt_result::success ();
789 }
790
791 /* Function vect_model_simple_cost.
792
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
796
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
805 {
806 int inside_cost = 0, prologue_cost = 0;
807
808 gcc_assert (cost_vec != NULL);
809
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
813
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
822
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
826
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 }
832
833
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. */
840
841 static void
842 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
843 enum vect_def_type *dt,
844 unsigned int ncopies, int pwr,
845 stmt_vector_for_cost *cost_vec)
846 {
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
849
850 for (i = 0; i < pwr + 1; i++)
851 {
852 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
853 stmt_info, 0, vect_body);
854 ncopies *= 2;
855 }
856
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
862
863 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location,
865 "vect_model_promotion_demotion_cost: inside_cost = %d, "
866 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 }
868
869 /* Returns true if the current function returns DECL. */
870
871 static bool
872 cfun_returns (tree decl)
873 {
874 edge_iterator ei;
875 edge e;
876 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
877 {
878 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
879 if (!ret)
880 continue;
881 if (gimple_return_retval (ret) == decl)
882 return true;
883 /* We often end up with an aggregate copy to the result decl,
884 handle that case as well. First skip intermediate clobbers
885 though. */
886 gimple *def = ret;
887 do
888 {
889 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
890 }
891 while (gimple_clobber_p (def));
892 if (is_a <gassign *> (def)
893 && gimple_assign_lhs (def) == gimple_return_retval (ret)
894 && gimple_assign_rhs1 (def) == decl)
895 return true;
896 }
897 return false;
898 }
899
900 /* Function vect_model_store_cost
901
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
904
905 static void
906 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
907 vect_memory_access_type memory_access_type,
908 vec_load_store_type vls_type, slp_tree slp_node,
909 stmt_vector_for_cost *cost_vec)
910 {
911 unsigned int inside_cost = 0, prologue_cost = 0;
912 stmt_vec_info first_stmt_info = stmt_info;
913 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914
915 /* ??? Somehow we need to fix this at the callers. */
916 if (slp_node)
917 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
918
919 if (vls_type == VLS_STORE_INVARIANT)
920 {
921 if (!slp_node)
922 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
924 }
925
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
930
931 /* True if we should include any once-per-group costs as well as
932 the cost of the statement itself. For SLP we only get called
933 once per group anyhow. */
934 bool first_stmt_p = (first_stmt_info == stmt_info);
935
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (first_stmt_p
941 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
942 {
943 /* Uses a high and low interleave or shuffle operations for each
944 needed permute. */
945 int group_size = DR_GROUP_SIZE (first_stmt_info);
946 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
948 stmt_info, 0, vect_body);
949
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_store_cost: strided group_size = %d .\n",
953 group_size);
954 }
955
956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
957 /* Costs of the stores. */
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_GATHER_SCATTER)
960 {
961 /* N scalar stores plus extracting the elements. */
962 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
963 inside_cost += record_stmt_cost (cost_vec,
964 ncopies * assumed_nunits,
965 scalar_store, stmt_info, 0, vect_body);
966 }
967 else
968 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
969
970 if (memory_access_type == VMAT_ELEMENTWISE
971 || memory_access_type == VMAT_STRIDED_SLP)
972 {
973 /* N scalar stores plus extracting the elements. */
974 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
975 inside_cost += record_stmt_cost (cost_vec,
976 ncopies * assumed_nunits,
977 vec_to_scalar, stmt_info, 0, vect_body);
978 }
979
980 /* When vectorizing a store into the function result assign
981 a penalty if the function returns in a multi-register location.
982 In this case we assume we'll end up with having to spill the
983 vector result and do piecewise loads as a conservative estimate. */
984 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
985 if (base
986 && (TREE_CODE (base) == RESULT_DECL
987 || (DECL_P (base) && cfun_returns (base)))
988 && !aggregate_value_p (base, cfun->decl))
989 {
990 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
991 /* ??? Handle PARALLEL in some way. */
992 if (REG_P (reg))
993 {
994 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
995 /* Assume that a single reg-reg move is possible and cheap,
996 do not account for vector to gp register move cost. */
997 if (nregs > 1)
998 {
999 /* Spill. */
1000 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1001 vector_store,
1002 stmt_info, 0, vect_epilogue);
1003 /* Loads. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1005 scalar_load,
1006 stmt_info, 0, vect_epilogue);
1007 }
1008 }
1009 }
1010
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: inside_cost = %d, "
1014 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1015 }
1016
1017
1018 /* Calculate cost of DR's memory access. */
1019 void
1020 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1021 unsigned int *inside_cost,
1022 stmt_vector_for_cost *body_cost_vec)
1023 {
1024 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1025 int alignment_support_scheme
1026 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1027
1028 switch (alignment_support_scheme)
1029 {
1030 case dr_aligned:
1031 {
1032 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1033 vector_store, stmt_info, 0,
1034 vect_body);
1035
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: aligned.\n");
1039 break;
1040 }
1041
1042 case dr_unaligned_supported:
1043 {
1044 /* Here, we assign an additional cost for the unaligned store. */
1045 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1046 unaligned_store, stmt_info,
1047 DR_MISALIGNMENT (dr_info),
1048 vect_body);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE, vect_location,
1051 "vect_model_store_cost: unaligned supported by "
1052 "hardware.\n");
1053 break;
1054 }
1055
1056 case dr_unaligned_unsupported:
1057 {
1058 *inside_cost = VECT_MAX_COST;
1059
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062 "vect_model_store_cost: unsupported access.\n");
1063 break;
1064 }
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069 }
1070
1071
1072 /* Function vect_model_load_cost
1073
1074 Models cost for loads. In the case of grouped accesses, one access has
1075 the overhead of the grouped access attributed to it. Since unaligned
1076 accesses are supported for loads, we also account for the costs of the
1077 access scheme chosen. */
1078
1079 static void
1080 vect_model_load_cost (vec_info *vinfo,
1081 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1082 vect_memory_access_type memory_access_type,
1083 slp_tree slp_node,
1084 stmt_vector_for_cost *cost_vec)
1085 {
1086 unsigned int inside_cost = 0, prologue_cost = 0;
1087 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1088
1089 gcc_assert (cost_vec);
1090
1091 /* ??? Somehow we need to fix this at the callers. */
1092 if (slp_node)
1093 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1094
1095 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1096 {
1097 /* If the load is permuted then the alignment is determined by
1098 the first group element not by the first scalar stmt DR. */
1099 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1100 /* Record the cost for the permutation. */
1101 unsigned n_perms;
1102 unsigned assumed_nunits
1103 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1104 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1105 vf, true, &n_perms);
1106 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1107 first_stmt_info, 0, vect_body);
1108 /* And adjust the number of loads performed. This handles
1109 redundancies as well as loads that are later dead. */
1110 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1111 bitmap_clear (perm);
1112 for (unsigned i = 0;
1113 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1114 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1115 ncopies = 0;
1116 bool load_seen = false;
1117 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1118 {
1119 if (i % assumed_nunits == 0)
1120 {
1121 if (load_seen)
1122 ncopies++;
1123 load_seen = false;
1124 }
1125 if (bitmap_bit_p (perm, i))
1126 load_seen = true;
1127 }
1128 if (load_seen)
1129 ncopies++;
1130 gcc_assert (ncopies
1131 <= (DR_GROUP_SIZE (first_stmt_info)
1132 - DR_GROUP_GAP (first_stmt_info)
1133 + assumed_nunits - 1) / assumed_nunits);
1134 }
1135
1136 /* Grouped loads read all elements in the group at once,
1137 so we want the DR for the first statement. */
1138 stmt_vec_info first_stmt_info = stmt_info;
1139 if (!slp_node && grouped_access_p)
1140 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1141
1142 /* True if we should include any once-per-group costs as well as
1143 the cost of the statement itself. For SLP we only get called
1144 once per group anyhow. */
1145 bool first_stmt_p = (first_stmt_info == stmt_info);
1146
1147 /* We assume that the cost of a single load-lanes instruction is
1148 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1149 access is instead being provided by a load-and-permute operation,
1150 include the cost of the permutes. */
1151 if (first_stmt_p
1152 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1153 {
1154 /* Uses an even and odd extract operations or shuffle operations
1155 for each needed permute. */
1156 int group_size = DR_GROUP_SIZE (first_stmt_info);
1157 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1158 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1159 stmt_info, 0, vect_body);
1160
1161 if (dump_enabled_p ())
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: strided group_size = %d .\n",
1164 group_size);
1165 }
1166
1167 /* The loads themselves. */
1168 if (memory_access_type == VMAT_ELEMENTWISE
1169 || memory_access_type == VMAT_GATHER_SCATTER)
1170 {
1171 /* N scalar loads plus gathering them into a vector. */
1172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1173 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1174 inside_cost += record_stmt_cost (cost_vec,
1175 ncopies * assumed_nunits,
1176 scalar_load, stmt_info, 0, vect_body);
1177 }
1178 else
1179 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1180 &inside_cost, &prologue_cost,
1181 cost_vec, cost_vec, true);
1182 if (memory_access_type == VMAT_ELEMENTWISE
1183 || memory_access_type == VMAT_STRIDED_SLP)
1184 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1185 stmt_info, 0, vect_body);
1186
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: inside_cost = %d, "
1190 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1191 }
1192
1193
1194 /* Calculate cost of DR's memory access. */
1195 void
1196 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1197 bool add_realign_cost, unsigned int *inside_cost,
1198 unsigned int *prologue_cost,
1199 stmt_vector_for_cost *prologue_cost_vec,
1200 stmt_vector_for_cost *body_cost_vec,
1201 bool record_prologue_costs)
1202 {
1203 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1204 int alignment_support_scheme
1205 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1206
1207 switch (alignment_support_scheme)
1208 {
1209 case dr_aligned:
1210 {
1211 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1212 stmt_info, 0, vect_body);
1213
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: aligned.\n");
1217
1218 break;
1219 }
1220 case dr_unaligned_supported:
1221 {
1222 /* Here, we assign an additional cost for the unaligned load. */
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1224 unaligned_load, stmt_info,
1225 DR_MISALIGNMENT (dr_info),
1226 vect_body);
1227
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: unaligned supported by "
1231 "hardware.\n");
1232
1233 break;
1234 }
1235 case dr_explicit_realign:
1236 {
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1238 vector_load, stmt_info, 0, vect_body);
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1240 vec_perm, stmt_info, 0, vect_body);
1241
1242 /* FIXME: If the misalignment remains fixed across the iterations of
1243 the containing loop, the following cost should be added to the
1244 prologue costs. */
1245 if (targetm.vectorize.builtin_mask_for_load)
1246 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1247 stmt_info, 0, vect_body);
1248
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign\n");
1252
1253 break;
1254 }
1255 case dr_explicit_realign_optimized:
1256 {
1257 if (dump_enabled_p ())
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: unaligned software "
1260 "pipelined.\n");
1261
1262 /* Unaligned software pipeline has a load of an address, an initial
1263 load, and possibly a mask operation to "prime" the loop. However,
1264 if this is an access in a group of loads, which provide grouped
1265 access, then the above cost should only be considered for one
1266 access in the group. Inside the loop, there is a load op
1267 and a realignment op. */
1268
1269 if (add_realign_cost && record_prologue_costs)
1270 {
1271 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1272 vector_stmt, stmt_info,
1273 0, vect_prologue);
1274 if (targetm.vectorize.builtin_mask_for_load)
1275 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1276 vector_stmt, stmt_info,
1277 0, vect_prologue);
1278 }
1279
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1281 stmt_info, 0, vect_body);
1282 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1283 stmt_info, 0, vect_body);
1284
1285 if (dump_enabled_p ())
1286 dump_printf_loc (MSG_NOTE, vect_location,
1287 "vect_model_load_cost: explicit realign optimized"
1288 "\n");
1289
1290 break;
1291 }
1292
1293 case dr_unaligned_unsupported:
1294 {
1295 *inside_cost = VECT_MAX_COST;
1296
1297 if (dump_enabled_p ())
1298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1299 "vect_model_load_cost: unsupported access.\n");
1300 break;
1301 }
1302
1303 default:
1304 gcc_unreachable ();
1305 }
1306 }
1307
1308 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1309 the loop preheader for the vectorized stmt STMT_VINFO. */
1310
1311 static void
1312 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1313 gimple_stmt_iterator *gsi)
1314 {
1315 if (gsi)
1316 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1317 else
1318 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1319
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "created new init_stmt: %G", new_stmt);
1323 }
1324
1325 /* Function vect_init_vector.
1326
1327 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1328 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1329 vector type a vector with all elements equal to VAL is created first.
1330 Place the initialization at GSI if it is not NULL. Otherwise, place the
1331 initialization at the loop preheader.
1332 Return the DEF of INIT_STMT.
1333 It will be used in the vectorization of STMT_INFO. */
1334
1335 tree
1336 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1337 gimple_stmt_iterator *gsi)
1338 {
1339 gimple *init_stmt;
1340 tree new_temp;
1341
1342 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1343 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1344 {
1345 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1346 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1347 {
1348 /* Scalar boolean value should be transformed into
1349 all zeros or all ones value before building a vector. */
1350 if (VECTOR_BOOLEAN_TYPE_P (type))
1351 {
1352 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1353 tree false_val = build_zero_cst (TREE_TYPE (type));
1354
1355 if (CONSTANT_CLASS_P (val))
1356 val = integer_zerop (val) ? false_val : true_val;
1357 else
1358 {
1359 new_temp = make_ssa_name (TREE_TYPE (type));
1360 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1361 val, true_val, false_val);
1362 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1363 val = new_temp;
1364 }
1365 }
1366 else
1367 {
1368 gimple_seq stmts = NULL;
1369 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1370 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1371 TREE_TYPE (type), val);
1372 else
1373 /* ??? Condition vectorization expects us to do
1374 promotion of invariant/external defs. */
1375 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1376 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1377 !gsi_end_p (gsi2); )
1378 {
1379 init_stmt = gsi_stmt (gsi2);
1380 gsi_remove (&gsi2, false);
1381 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1382 }
1383 }
1384 }
1385 val = build_vector_from_val (type, val);
1386 }
1387
1388 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1389 init_stmt = gimple_build_assign (new_temp, val);
1390 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1391 return new_temp;
1392 }
1393
1394
1395 /* Function vect_get_vec_defs_for_operand.
1396
1397 OP is an operand in STMT_VINFO. This function returns a vector of
1398 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1399
1400 In the case that OP is an SSA_NAME which is defined in the loop, then
1401 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1402
1403 In case OP is an invariant or constant, a new stmt that creates a vector def
1404 needs to be introduced. VECTYPE may be used to specify a required type for
1405 vector invariant. */
1406
1407 void
1408 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1409 unsigned ncopies,
1410 tree op, vec<tree> *vec_oprnds, tree vectype)
1411 {
1412 gimple *def_stmt;
1413 enum vect_def_type dt;
1414 bool is_simple_use;
1415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1416
1417 if (dump_enabled_p ())
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "vect_get_vec_defs_for_operand: %T\n", op);
1420
1421 stmt_vec_info def_stmt_info;
1422 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1423 &def_stmt_info, &def_stmt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1427
1428 vec_oprnds->create (ncopies);
1429 if (dt == vect_constant_def || dt == vect_external_def)
1430 {
1431 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1432 tree vector_type;
1433
1434 if (vectype)
1435 vector_type = vectype;
1436 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1437 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1438 vector_type = truth_type_for (stmt_vectype);
1439 else
1440 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1441
1442 gcc_assert (vector_type);
1443 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1444 while (ncopies--)
1445 vec_oprnds->quick_push (vop);
1446 }
1447 else
1448 {
1449 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1450 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1451 for (unsigned i = 0; i < ncopies; ++i)
1452 vec_oprnds->quick_push (gimple_get_lhs
1453 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1454 }
1455 }
1456
1457
1458 /* Get vectorized definitions for OP0 and OP1. */
1459
1460 void
1461 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1462 unsigned ncopies,
1463 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1464 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1465 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1466 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1467 {
1468 if (slp_node)
1469 {
1470 if (op0)
1471 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1472 if (op1)
1473 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1474 if (op2)
1475 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1476 if (op3)
1477 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1478 }
1479 else
1480 {
1481 if (op0)
1482 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1483 op0, vec_oprnds0, vectype0);
1484 if (op1)
1485 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1486 op1, vec_oprnds1, vectype1);
1487 if (op2)
1488 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1489 op2, vec_oprnds2, vectype2);
1490 if (op3)
1491 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1492 op3, vec_oprnds3, vectype3);
1493 }
1494 }
1495
1496 void
1497 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1498 unsigned ncopies,
1499 tree op0, vec<tree> *vec_oprnds0,
1500 tree op1, vec<tree> *vec_oprnds1,
1501 tree op2, vec<tree> *vec_oprnds2,
1502 tree op3, vec<tree> *vec_oprnds3)
1503 {
1504 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1505 op0, vec_oprnds0, NULL_TREE,
1506 op1, vec_oprnds1, NULL_TREE,
1507 op2, vec_oprnds2, NULL_TREE,
1508 op3, vec_oprnds3, NULL_TREE);
1509 }
1510
1511 /* Helper function called by vect_finish_replace_stmt and
1512 vect_finish_stmt_generation. Set the location of the new
1513 statement and create and return a stmt_vec_info for it. */
1514
1515 static void
1516 vect_finish_stmt_generation_1 (vec_info *,
1517 stmt_vec_info stmt_info, gimple *vec_stmt)
1518 {
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1521
1522 if (stmt_info)
1523 {
1524 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1525
1526 /* While EH edges will generally prevent vectorization, stmt might
1527 e.g. be in a must-not-throw region. Ensure newly created stmts
1528 that could throw are part of the same region. */
1529 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1530 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1531 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1532 }
1533 else
1534 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1535 }
1536
1537 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1538 which sets the same scalar result as STMT_INFO did. Create and return a
1539 stmt_vec_info for VEC_STMT. */
1540
1541 void
1542 vect_finish_replace_stmt (vec_info *vinfo,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1544 {
1545 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1546 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1547
1548 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1549 gsi_replace (&gsi, vec_stmt, true);
1550
1551 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1552 }
1553
1554 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1555 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1556
1557 void
1558 vect_finish_stmt_generation (vec_info *vinfo,
1559 stmt_vec_info stmt_info, gimple *vec_stmt,
1560 gimple_stmt_iterator *gsi)
1561 {
1562 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1563
1564 if (!gsi_end_p (*gsi)
1565 && gimple_has_mem_ops (vec_stmt))
1566 {
1567 gimple *at_stmt = gsi_stmt (*gsi);
1568 tree vuse = gimple_vuse (at_stmt);
1569 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1570 {
1571 tree vdef = gimple_vdef (at_stmt);
1572 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1573 gimple_set_modified (vec_stmt, true);
1574 /* If we have an SSA vuse and insert a store, update virtual
1575 SSA form to avoid triggering the renamer. Do so only
1576 if we can easily see all uses - which is what almost always
1577 happens with the way vectorized stmts are inserted. */
1578 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1579 && ((is_gimple_assign (vec_stmt)
1580 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1581 || (is_gimple_call (vec_stmt)
1582 && !(gimple_call_flags (vec_stmt)
1583 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1584 {
1585 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1586 gimple_set_vdef (vec_stmt, new_vdef);
1587 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1588 }
1589 }
1590 }
1591 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1592 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1593 }
1594
1595 /* We want to vectorize a call to combined function CFN with function
1596 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1597 as the types of all inputs. Check whether this is possible using
1598 an internal function, returning its code if so or IFN_LAST if not. */
1599
1600 static internal_fn
1601 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1602 tree vectype_out, tree vectype_in)
1603 {
1604 internal_fn ifn;
1605 if (internal_fn_p (cfn))
1606 ifn = as_internal_fn (cfn);
1607 else
1608 ifn = associated_internal_fn (fndecl);
1609 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1610 {
1611 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1612 if (info.vectorizable)
1613 {
1614 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1615 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1616 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1617 OPTIMIZE_FOR_SPEED))
1618 return ifn;
1619 }
1620 }
1621 return IFN_LAST;
1622 }
1623
1624
1625 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1626 gimple_stmt_iterator *);
1627
1628 /* Check whether a load or store statement in the loop described by
1629 LOOP_VINFO is possible in a loop using partial vectors. This is
1630 testing whether the vectorizer pass has the appropriate support,
1631 as well as whether the target does.
1632
1633 VLS_TYPE says whether the statement is a load or store and VECTYPE
1634 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1635 says how the load or store is going to be implemented and GROUP_SIZE
1636 is the number of load or store statements in the containing group.
1637 If the access is a gather load or scatter store, GS_INFO describes
1638 its arguments. If the load or store is conditional, SCALAR_MASK is the
1639 condition under which it occurs.
1640
1641 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1642 vectors is not supported, otherwise record the required rgroup control
1643 types. */
1644
1645 static void
1646 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1647 vec_load_store_type vls_type,
1648 int group_size,
1649 vect_memory_access_type
1650 memory_access_type,
1651 gather_scatter_info *gs_info,
1652 tree scalar_mask)
1653 {
1654 /* Invariant loads need no special support. */
1655 if (memory_access_type == VMAT_INVARIANT)
1656 return;
1657
1658 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1659 machine_mode vecmode = TYPE_MODE (vectype);
1660 bool is_load = (vls_type == VLS_LOAD);
1661 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1662 {
1663 if (is_load
1664 ? !vect_load_lanes_supported (vectype, group_size, true)
1665 : !vect_store_lanes_supported (vectype, group_size, true))
1666 {
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1669 "can't operate on partial vectors because"
1670 " the target doesn't have an appropriate"
1671 " load/store-lanes instruction.\n");
1672 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1673 return;
1674 }
1675 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1676 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1677 return;
1678 }
1679
1680 if (memory_access_type == VMAT_GATHER_SCATTER)
1681 {
1682 internal_fn ifn = (is_load
1683 ? IFN_MASK_GATHER_LOAD
1684 : IFN_MASK_SCATTER_STORE);
1685 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1686 gs_info->memory_type,
1687 gs_info->offset_vectype,
1688 gs_info->scale))
1689 {
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "can't operate on partial vectors because"
1693 " the target doesn't have an appropriate"
1694 " gather load or scatter store instruction.\n");
1695 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1696 return;
1697 }
1698 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1699 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1700 return;
1701 }
1702
1703 if (memory_access_type != VMAT_CONTIGUOUS
1704 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1705 {
1706 /* Element X of the data must come from iteration i * VF + X of the
1707 scalar loop. We need more work to support other mappings. */
1708 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1710 "can't operate on partial vectors because an"
1711 " access isn't contiguous.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1713 return;
1714 }
1715
1716 machine_mode mask_mode;
1717 if (!VECTOR_MODE_P (vecmode)
1718 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1719 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1720 {
1721 if (dump_enabled_p ())
1722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1723 "can't use a fully-masked loop because the target"
1724 " doesn't have the appropriate masked load or"
1725 " store.\n");
1726 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1727 return;
1728 }
1729 /* We might load more scalars than we need for permuting SLP loads.
1730 We checked in get_group_load_store_type that the extra elements
1731 don't leak into a new vector. */
1732 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1733 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1734 unsigned int nvectors;
1735 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1736 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1737 else
1738 gcc_unreachable ();
1739 }
1740
1741 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1742 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1743 that needs to be applied to all loads and stores in a vectorized loop.
1744 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1745
1746 MASK_TYPE is the type of both masks. If new statements are needed,
1747 insert them before GSI. */
1748
1749 static tree
1750 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1751 gimple_stmt_iterator *gsi)
1752 {
1753 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1754 if (!loop_mask)
1755 return vec_mask;
1756
1757 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1758 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1759 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1760 vec_mask, loop_mask);
1761 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1762 return and_res;
1763 }
1764
1765 /* Determine whether we can use a gather load or scatter store to vectorize
1766 strided load or store STMT_INFO by truncating the current offset to a
1767 smaller width. We need to be able to construct an offset vector:
1768
1769 { 0, X, X*2, X*3, ... }
1770
1771 without loss of precision, where X is STMT_INFO's DR_STEP.
1772
1773 Return true if this is possible, describing the gather load or scatter
1774 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1775
1776 static bool
1777 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1778 loop_vec_info loop_vinfo, bool masked_p,
1779 gather_scatter_info *gs_info)
1780 {
1781 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1782 data_reference *dr = dr_info->dr;
1783 tree step = DR_STEP (dr);
1784 if (TREE_CODE (step) != INTEGER_CST)
1785 {
1786 /* ??? Perhaps we could use range information here? */
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_NOTE, vect_location,
1789 "cannot truncate variable step.\n");
1790 return false;
1791 }
1792
1793 /* Get the number of bits in an element. */
1794 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1795 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1796 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1797
1798 /* Set COUNT to the upper limit on the number of elements - 1.
1799 Start with the maximum vectorization factor. */
1800 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1801
1802 /* Try lowering COUNT to the number of scalar latch iterations. */
1803 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1804 widest_int max_iters;
1805 if (max_loop_iterations (loop, &max_iters)
1806 && max_iters < count)
1807 count = max_iters.to_shwi ();
1808
1809 /* Try scales of 1 and the element size. */
1810 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1811 wi::overflow_type overflow = wi::OVF_NONE;
1812 for (int i = 0; i < 2; ++i)
1813 {
1814 int scale = scales[i];
1815 widest_int factor;
1816 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1817 continue;
1818
1819 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1820 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1821 if (overflow)
1822 continue;
1823 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1824 unsigned int min_offset_bits = wi::min_precision (range, sign);
1825
1826 /* Find the narrowest viable offset type. */
1827 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1828 tree offset_type = build_nonstandard_integer_type (offset_bits,
1829 sign == UNSIGNED);
1830
1831 /* See whether the target supports the operation with an offset
1832 no narrower than OFFSET_TYPE. */
1833 tree memory_type = TREE_TYPE (DR_REF (dr));
1834 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1835 vectype, memory_type, offset_type, scale,
1836 &gs_info->ifn, &gs_info->offset_vectype))
1837 continue;
1838
1839 gs_info->decl = NULL_TREE;
1840 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1841 but we don't need to store that here. */
1842 gs_info->base = NULL_TREE;
1843 gs_info->element_type = TREE_TYPE (vectype);
1844 gs_info->offset = fold_convert (offset_type, step);
1845 gs_info->offset_dt = vect_constant_def;
1846 gs_info->scale = scale;
1847 gs_info->memory_type = memory_type;
1848 return true;
1849 }
1850
1851 if (overflow && dump_enabled_p ())
1852 dump_printf_loc (MSG_NOTE, vect_location,
1853 "truncating gather/scatter offset to %d bits"
1854 " might change its value.\n", element_bits);
1855
1856 return false;
1857 }
1858
1859 /* Return true if we can use gather/scatter internal functions to
1860 vectorize STMT_INFO, which is a grouped or strided load or store.
1861 MASKED_P is true if load or store is conditional. When returning
1862 true, fill in GS_INFO with the information required to perform the
1863 operation. */
1864
1865 static bool
1866 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1867 loop_vec_info loop_vinfo, bool masked_p,
1868 gather_scatter_info *gs_info)
1869 {
1870 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1871 || gs_info->decl)
1872 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1873 masked_p, gs_info);
1874
1875 tree old_offset_type = TREE_TYPE (gs_info->offset);
1876 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1877
1878 gcc_assert (TYPE_PRECISION (new_offset_type)
1879 >= TYPE_PRECISION (old_offset_type));
1880 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1881
1882 if (dump_enabled_p ())
1883 dump_printf_loc (MSG_NOTE, vect_location,
1884 "using gather/scatter for strided/grouped access,"
1885 " scale = %d\n", gs_info->scale);
1886
1887 return true;
1888 }
1889
1890 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1891 elements with a known constant step. Return -1 if that step
1892 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1893
1894 static int
1895 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1896 {
1897 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1898 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1899 size_zero_node);
1900 }
1901
1902 /* If the target supports a permute mask that reverses the elements in
1903 a vector of type VECTYPE, return that mask, otherwise return null. */
1904
1905 static tree
1906 perm_mask_for_reverse (tree vectype)
1907 {
1908 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1909
1910 /* The encoding has a single stepped pattern. */
1911 vec_perm_builder sel (nunits, 1, 3);
1912 for (int i = 0; i < 3; ++i)
1913 sel.quick_push (nunits - 1 - i);
1914
1915 vec_perm_indices indices (sel, 1, nunits);
1916 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1917 return NULL_TREE;
1918 return vect_gen_perm_mask_checked (vectype, indices);
1919 }
1920
1921 /* A subroutine of get_load_store_type, with a subset of the same
1922 arguments. Handle the case where STMT_INFO is a load or store that
1923 accesses consecutive elements with a negative step. */
1924
1925 static vect_memory_access_type
1926 get_negative_load_store_type (vec_info *vinfo,
1927 stmt_vec_info stmt_info, tree vectype,
1928 vec_load_store_type vls_type,
1929 unsigned int ncopies)
1930 {
1931 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1932 dr_alignment_support alignment_support_scheme;
1933
1934 if (ncopies > 1)
1935 {
1936 if (dump_enabled_p ())
1937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1938 "multiple types with negative step.\n");
1939 return VMAT_ELEMENTWISE;
1940 }
1941
1942 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1943 dr_info, false);
1944 if (alignment_support_scheme != dr_aligned
1945 && alignment_support_scheme != dr_unaligned_supported)
1946 {
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1949 "negative step but alignment required.\n");
1950 return VMAT_ELEMENTWISE;
1951 }
1952
1953 if (vls_type == VLS_STORE_INVARIANT)
1954 {
1955 if (dump_enabled_p ())
1956 dump_printf_loc (MSG_NOTE, vect_location,
1957 "negative step with invariant source;"
1958 " no permute needed.\n");
1959 return VMAT_CONTIGUOUS_DOWN;
1960 }
1961
1962 if (!perm_mask_for_reverse (vectype))
1963 {
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "negative step and reversing not supported.\n");
1967 return VMAT_ELEMENTWISE;
1968 }
1969
1970 return VMAT_CONTIGUOUS_REVERSE;
1971 }
1972
1973 /* STMT_INFO is either a masked or unconditional store. Return the value
1974 being stored. */
1975
1976 tree
1977 vect_get_store_rhs (stmt_vec_info stmt_info)
1978 {
1979 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1980 {
1981 gcc_assert (gimple_assign_single_p (assign));
1982 return gimple_assign_rhs1 (assign);
1983 }
1984 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1985 {
1986 internal_fn ifn = gimple_call_internal_fn (call);
1987 int index = internal_fn_stored_value_index (ifn);
1988 gcc_assert (index >= 0);
1989 return gimple_call_arg (call, index);
1990 }
1991 gcc_unreachable ();
1992 }
1993
1994 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1995
1996 This function returns a vector type which can be composed with NETLS pieces,
1997 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1998 same vector size as the return vector. It checks target whether supports
1999 pieces-size vector mode for construction firstly, if target fails to, check
2000 pieces-size scalar mode for construction further. It returns NULL_TREE if
2001 fails to find the available composition.
2002
2003 For example, for (vtype=V16QI, nelts=4), we can probably get:
2004 - V16QI with PTYPE V4QI.
2005 - V4SI with PTYPE SI.
2006 - NULL_TREE. */
2007
2008 static tree
2009 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2010 {
2011 gcc_assert (VECTOR_TYPE_P (vtype));
2012 gcc_assert (known_gt (nelts, 0U));
2013
2014 machine_mode vmode = TYPE_MODE (vtype);
2015 if (!VECTOR_MODE_P (vmode))
2016 return NULL_TREE;
2017
2018 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2019 unsigned int pbsize;
2020 if (constant_multiple_p (vbsize, nelts, &pbsize))
2021 {
2022 /* First check if vec_init optab supports construction from
2023 vector pieces directly. */
2024 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2025 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2026 machine_mode rmode;
2027 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2028 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2029 != CODE_FOR_nothing))
2030 {
2031 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2032 return vtype;
2033 }
2034
2035 /* Otherwise check if exists an integer type of the same piece size and
2036 if vec_init optab supports construction from it directly. */
2037 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2038 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2039 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2040 != CODE_FOR_nothing))
2041 {
2042 *ptype = build_nonstandard_integer_type (pbsize, 1);
2043 return build_vector_type (*ptype, nelts);
2044 }
2045 }
2046
2047 return NULL_TREE;
2048 }
2049
2050 /* A subroutine of get_load_store_type, with a subset of the same
2051 arguments. Handle the case where STMT_INFO is part of a grouped load
2052 or store.
2053
2054 For stores, the statements in the group are all consecutive
2055 and there is no gap at the end. For loads, the statements in the
2056 group might not be consecutive; there can be gaps between statements
2057 as well as at the end. */
2058
2059 static bool
2060 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2061 tree vectype, slp_tree slp_node,
2062 bool masked_p, vec_load_store_type vls_type,
2063 vect_memory_access_type *memory_access_type,
2064 dr_alignment_support *alignment_support_scheme,
2065 gather_scatter_info *gs_info)
2066 {
2067 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2068 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2069 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2070 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2071 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2072 bool single_element_p = (stmt_info == first_stmt_info
2073 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2074 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2075 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2076
2077 /* True if the vectorized statements would access beyond the last
2078 statement in the group. */
2079 bool overrun_p = false;
2080
2081 /* True if we can cope with such overrun by peeling for gaps, so that
2082 there is at least one final scalar iteration after the vector loop. */
2083 bool can_overrun_p = (!masked_p
2084 && vls_type == VLS_LOAD
2085 && loop_vinfo
2086 && !loop->inner);
2087
2088 /* There can only be a gap at the end of the group if the stride is
2089 known at compile time. */
2090 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2091
2092 /* Stores can't yet have gaps. */
2093 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2094
2095 if (slp_node)
2096 {
2097 /* For SLP vectorization we directly vectorize a subchain
2098 without permutation. */
2099 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2100 first_dr_info
2101 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2102 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2103 {
2104 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2105 separated by the stride, until we have a complete vector.
2106 Fall back to scalar accesses if that isn't possible. */
2107 if (multiple_p (nunits, group_size))
2108 *memory_access_type = VMAT_STRIDED_SLP;
2109 else
2110 *memory_access_type = VMAT_ELEMENTWISE;
2111 }
2112 else
2113 {
2114 overrun_p = loop_vinfo && gap != 0;
2115 if (overrun_p && vls_type != VLS_LOAD)
2116 {
2117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2118 "Grouped store with gaps requires"
2119 " non-consecutive accesses\n");
2120 return false;
2121 }
2122 /* An overrun is fine if the trailing elements are smaller
2123 than the alignment boundary B. Every vector access will
2124 be a multiple of B and so we are guaranteed to access a
2125 non-gap element in the same B-sized block. */
2126 if (overrun_p
2127 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2128 / vect_get_scalar_dr_size (first_dr_info)))
2129 overrun_p = false;
2130
2131 /* If the gap splits the vector in half and the target
2132 can do half-vector operations avoid the epilogue peeling
2133 by simply loading half of the vector only. Usually
2134 the construction with an upper zero half will be elided. */
2135 dr_alignment_support alignment_support_scheme;
2136 tree half_vtype;
2137 if (overrun_p
2138 && !masked_p
2139 && (((alignment_support_scheme
2140 = vect_supportable_dr_alignment (vinfo,
2141 first_dr_info, false)))
2142 == dr_aligned
2143 || alignment_support_scheme == dr_unaligned_supported)
2144 && known_eq (nunits, (group_size - gap) * 2)
2145 && known_eq (nunits, group_size)
2146 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2147 != NULL_TREE))
2148 overrun_p = false;
2149
2150 if (overrun_p && !can_overrun_p)
2151 {
2152 if (dump_enabled_p ())
2153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2154 "Peeling for outer loop is not supported\n");
2155 return false;
2156 }
2157 int cmp = compare_step_with_zero (vinfo, stmt_info);
2158 if (cmp < 0)
2159 {
2160 if (single_element_p)
2161 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2162 only correct for single element "interleaving" SLP. */
2163 *memory_access_type = get_negative_load_store_type
2164 (vinfo, stmt_info, vectype, vls_type, 1);
2165 else
2166 *memory_access_type = VMAT_STRIDED_SLP;
2167 }
2168 else
2169 {
2170 gcc_assert (!loop_vinfo || cmp > 0);
2171 *memory_access_type = VMAT_CONTIGUOUS;
2172 }
2173 }
2174 }
2175 else
2176 {
2177 /* We can always handle this case using elementwise accesses,
2178 but see if something more efficient is available. */
2179 *memory_access_type = VMAT_ELEMENTWISE;
2180
2181 /* If there is a gap at the end of the group then these optimizations
2182 would access excess elements in the last iteration. */
2183 bool would_overrun_p = (gap != 0);
2184 /* An overrun is fine if the trailing elements are smaller than the
2185 alignment boundary B. Every vector access will be a multiple of B
2186 and so we are guaranteed to access a non-gap element in the
2187 same B-sized block. */
2188 if (would_overrun_p
2189 && !masked_p
2190 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2191 / vect_get_scalar_dr_size (first_dr_info)))
2192 would_overrun_p = false;
2193
2194 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2195 && (can_overrun_p || !would_overrun_p)
2196 && compare_step_with_zero (vinfo, stmt_info) > 0)
2197 {
2198 /* First cope with the degenerate case of a single-element
2199 vector. */
2200 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2201 *memory_access_type = VMAT_CONTIGUOUS;
2202
2203 /* Otherwise try using LOAD/STORE_LANES. */
2204 if (*memory_access_type == VMAT_ELEMENTWISE
2205 && (vls_type == VLS_LOAD
2206 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2207 : vect_store_lanes_supported (vectype, group_size,
2208 masked_p)))
2209 {
2210 *memory_access_type = VMAT_LOAD_STORE_LANES;
2211 overrun_p = would_overrun_p;
2212 }
2213
2214 /* If that fails, try using permuting loads. */
2215 if (*memory_access_type == VMAT_ELEMENTWISE
2216 && (vls_type == VLS_LOAD
2217 ? vect_grouped_load_supported (vectype, single_element_p,
2218 group_size)
2219 : vect_grouped_store_supported (vectype, group_size)))
2220 {
2221 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2222 overrun_p = would_overrun_p;
2223 }
2224 }
2225
2226 /* As a last resort, trying using a gather load or scatter store.
2227
2228 ??? Although the code can handle all group sizes correctly,
2229 it probably isn't a win to use separate strided accesses based
2230 on nearby locations. Or, even if it's a win over scalar code,
2231 it might not be a win over vectorizing at a lower VF, if that
2232 allows us to use contiguous accesses. */
2233 if (*memory_access_type == VMAT_ELEMENTWISE
2234 && single_element_p
2235 && loop_vinfo
2236 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2237 masked_p, gs_info))
2238 *memory_access_type = VMAT_GATHER_SCATTER;
2239 }
2240
2241 if (*memory_access_type == VMAT_GATHER_SCATTER
2242 || *memory_access_type == VMAT_ELEMENTWISE)
2243 *alignment_support_scheme = dr_unaligned_supported;
2244 else
2245 *alignment_support_scheme
2246 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2247
2248 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2249 {
2250 /* STMT is the leader of the group. Check the operands of all the
2251 stmts of the group. */
2252 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2253 while (next_stmt_info)
2254 {
2255 tree op = vect_get_store_rhs (next_stmt_info);
2256 enum vect_def_type dt;
2257 if (!vect_is_simple_use (op, vinfo, &dt))
2258 {
2259 if (dump_enabled_p ())
2260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2261 "use not simple.\n");
2262 return false;
2263 }
2264 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2265 }
2266 }
2267
2268 if (overrun_p)
2269 {
2270 gcc_assert (can_overrun_p);
2271 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2273 "Data access with gaps requires scalar "
2274 "epilogue loop\n");
2275 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2276 }
2277
2278 return true;
2279 }
2280
2281 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2282 if there is a memory access type that the vectorized form can use,
2283 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2284 or scatters, fill in GS_INFO accordingly. In addition
2285 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2286 the target does not support the alignment scheme.
2287
2288 SLP says whether we're performing SLP rather than loop vectorization.
2289 MASKED_P is true if the statement is conditional on a vectorized mask.
2290 VECTYPE is the vector type that the vectorized statements will use.
2291 NCOPIES is the number of vector statements that will be needed. */
2292
2293 static bool
2294 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2295 tree vectype, slp_tree slp_node,
2296 bool masked_p, vec_load_store_type vls_type,
2297 unsigned int ncopies,
2298 vect_memory_access_type *memory_access_type,
2299 dr_alignment_support *alignment_support_scheme,
2300 gather_scatter_info *gs_info)
2301 {
2302 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2305 {
2306 *memory_access_type = VMAT_GATHER_SCATTER;
2307 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2308 gcc_unreachable ();
2309 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2310 &gs_info->offset_dt,
2311 &gs_info->offset_vectype))
2312 {
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2315 "%s index use not simple.\n",
2316 vls_type == VLS_LOAD ? "gather" : "scatter");
2317 return false;
2318 }
2319 /* Gather-scatter accesses perform only component accesses, alignment
2320 is irrelevant for them. */
2321 *alignment_support_scheme = dr_unaligned_supported;
2322 }
2323 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2324 {
2325 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2326 masked_p,
2327 vls_type, memory_access_type,
2328 alignment_support_scheme, gs_info))
2329 return false;
2330 }
2331 else if (STMT_VINFO_STRIDED_P (stmt_info))
2332 {
2333 gcc_assert (!slp_node);
2334 if (loop_vinfo
2335 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2336 masked_p, gs_info))
2337 *memory_access_type = VMAT_GATHER_SCATTER;
2338 else
2339 *memory_access_type = VMAT_ELEMENTWISE;
2340 /* Alignment is irrelevant here. */
2341 *alignment_support_scheme = dr_unaligned_supported;
2342 }
2343 else
2344 {
2345 int cmp = compare_step_with_zero (vinfo, stmt_info);
2346 if (cmp < 0)
2347 *memory_access_type = get_negative_load_store_type
2348 (vinfo, stmt_info, vectype, vls_type, ncopies);
2349 else if (cmp == 0)
2350 {
2351 gcc_assert (vls_type == VLS_LOAD);
2352 *memory_access_type = VMAT_INVARIANT;
2353 }
2354 else
2355 *memory_access_type = VMAT_CONTIGUOUS;
2356 *alignment_support_scheme
2357 = vect_supportable_dr_alignment (vinfo,
2358 STMT_VINFO_DR_INFO (stmt_info), false);
2359 }
2360
2361 if ((*memory_access_type == VMAT_ELEMENTWISE
2362 || *memory_access_type == VMAT_STRIDED_SLP)
2363 && !nunits.is_constant ())
2364 {
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367 "Not using elementwise accesses due to variable "
2368 "vectorization factor.\n");
2369 return false;
2370 }
2371
2372 if (*alignment_support_scheme == dr_unaligned_unsupported)
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "unsupported unaligned access\n");
2377 return false;
2378 }
2379
2380 /* FIXME: At the moment the cost model seems to underestimate the
2381 cost of using elementwise accesses. This check preserves the
2382 traditional behavior until that can be fixed. */
2383 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2384 if (!first_stmt_info)
2385 first_stmt_info = stmt_info;
2386 if (*memory_access_type == VMAT_ELEMENTWISE
2387 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2388 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2389 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2390 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2391 {
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394 "not falling back to elementwise accesses\n");
2395 return false;
2396 }
2397 return true;
2398 }
2399
2400 /* Return true if boolean argument MASK is suitable for vectorizing
2401 conditional operation STMT_INFO. When returning true, store the type
2402 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2403 in *MASK_VECTYPE_OUT. */
2404
2405 static bool
2406 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
2407 vect_def_type *mask_dt_out,
2408 tree *mask_vectype_out)
2409 {
2410 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2411 {
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 "mask argument is not a boolean.\n");
2415 return false;
2416 }
2417
2418 if (TREE_CODE (mask) != SSA_NAME)
2419 {
2420 if (dump_enabled_p ())
2421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2422 "mask argument is not an SSA name.\n");
2423 return false;
2424 }
2425
2426 enum vect_def_type mask_dt;
2427 tree mask_vectype;
2428 if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
2429 {
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "mask use not simple.\n");
2433 return false;
2434 }
2435
2436 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2437 if (!mask_vectype)
2438 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2439
2440 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2441 {
2442 if (dump_enabled_p ())
2443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2444 "could not find an appropriate vector mask type.\n");
2445 return false;
2446 }
2447
2448 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2449 TYPE_VECTOR_SUBPARTS (vectype)))
2450 {
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453 "vector mask type %T"
2454 " does not match vector data type %T.\n",
2455 mask_vectype, vectype);
2456
2457 return false;
2458 }
2459
2460 *mask_dt_out = mask_dt;
2461 *mask_vectype_out = mask_vectype;
2462 return true;
2463 }
2464
2465 /* Return true if stored value RHS is suitable for vectorizing store
2466 statement STMT_INFO. When returning true, store the type of the
2467 definition in *RHS_DT_OUT, the type of the vectorized store value in
2468 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2469
2470 static bool
2471 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2472 slp_tree slp_node, tree rhs,
2473 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2474 vec_load_store_type *vls_type_out)
2475 {
2476 /* In the case this is a store from a constant make sure
2477 native_encode_expr can handle it. */
2478 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2479 {
2480 if (dump_enabled_p ())
2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2482 "cannot encode constant as a byte sequence.\n");
2483 return false;
2484 }
2485
2486 enum vect_def_type rhs_dt;
2487 tree rhs_vectype;
2488 slp_tree slp_op;
2489 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
2490 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2491 {
2492 if (dump_enabled_p ())
2493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2494 "use not simple.\n");
2495 return false;
2496 }
2497
2498 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2499 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2500 {
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2503 "incompatible vector types.\n");
2504 return false;
2505 }
2506
2507 *rhs_dt_out = rhs_dt;
2508 *rhs_vectype_out = rhs_vectype;
2509 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2510 *vls_type_out = VLS_STORE_INVARIANT;
2511 else
2512 *vls_type_out = VLS_STORE;
2513 return true;
2514 }
2515
2516 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2517 Note that we support masks with floating-point type, in which case the
2518 floats are interpreted as a bitmask. */
2519
2520 static tree
2521 vect_build_all_ones_mask (vec_info *vinfo,
2522 stmt_vec_info stmt_info, tree masktype)
2523 {
2524 if (TREE_CODE (masktype) == INTEGER_TYPE)
2525 return build_int_cst (masktype, -1);
2526 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2527 {
2528 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2529 mask = build_vector_from_val (masktype, mask);
2530 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2531 }
2532 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2533 {
2534 REAL_VALUE_TYPE r;
2535 long tmp[6];
2536 for (int j = 0; j < 6; ++j)
2537 tmp[j] = -1;
2538 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2539 tree mask = build_real (TREE_TYPE (masktype), r);
2540 mask = build_vector_from_val (masktype, mask);
2541 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2542 }
2543 gcc_unreachable ();
2544 }
2545
2546 /* Build an all-zero merge value of type VECTYPE while vectorizing
2547 STMT_INFO as a gather load. */
2548
2549 static tree
2550 vect_build_zero_merge_argument (vec_info *vinfo,
2551 stmt_vec_info stmt_info, tree vectype)
2552 {
2553 tree merge;
2554 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2555 merge = build_int_cst (TREE_TYPE (vectype), 0);
2556 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2557 {
2558 REAL_VALUE_TYPE r;
2559 long tmp[6];
2560 for (int j = 0; j < 6; ++j)
2561 tmp[j] = 0;
2562 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2563 merge = build_real (TREE_TYPE (vectype), r);
2564 }
2565 else
2566 gcc_unreachable ();
2567 merge = build_vector_from_val (vectype, merge);
2568 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2569 }
2570
2571 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2572 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2573 the gather load operation. If the load is conditional, MASK is the
2574 unvectorized condition and MASK_DT is its definition type, otherwise
2575 MASK is null. */
2576
2577 static void
2578 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2579 gimple_stmt_iterator *gsi,
2580 gimple **vec_stmt,
2581 gather_scatter_info *gs_info,
2582 tree mask)
2583 {
2584 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2585 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2586 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2587 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2588 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2589 edge pe = loop_preheader_edge (loop);
2590 enum { NARROW, NONE, WIDEN } modifier;
2591 poly_uint64 gather_off_nunits
2592 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2593
2594 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2595 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2596 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2597 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2598 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2599 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2600 tree scaletype = TREE_VALUE (arglist);
2601 tree real_masktype = masktype;
2602 gcc_checking_assert (types_compatible_p (srctype, rettype)
2603 && (!mask
2604 || TREE_CODE (masktype) == INTEGER_TYPE
2605 || types_compatible_p (srctype, masktype)));
2606 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2607 masktype = truth_type_for (srctype);
2608
2609 tree mask_halftype = masktype;
2610 tree perm_mask = NULL_TREE;
2611 tree mask_perm_mask = NULL_TREE;
2612 if (known_eq (nunits, gather_off_nunits))
2613 modifier = NONE;
2614 else if (known_eq (nunits * 2, gather_off_nunits))
2615 {
2616 modifier = WIDEN;
2617
2618 /* Currently widening gathers and scatters are only supported for
2619 fixed-length vectors. */
2620 int count = gather_off_nunits.to_constant ();
2621 vec_perm_builder sel (count, count, 1);
2622 for (int i = 0; i < count; ++i)
2623 sel.quick_push (i | (count / 2));
2624
2625 vec_perm_indices indices (sel, 1, count);
2626 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2627 indices);
2628 }
2629 else if (known_eq (nunits, gather_off_nunits * 2))
2630 {
2631 modifier = NARROW;
2632
2633 /* Currently narrowing gathers and scatters are only supported for
2634 fixed-length vectors. */
2635 int count = nunits.to_constant ();
2636 vec_perm_builder sel (count, count, 1);
2637 sel.quick_grow (count);
2638 for (int i = 0; i < count; ++i)
2639 sel[i] = i < count / 2 ? i : i + count / 2;
2640 vec_perm_indices indices (sel, 2, count);
2641 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2642
2643 ncopies *= 2;
2644
2645 if (mask && masktype == real_masktype)
2646 {
2647 for (int i = 0; i < count; ++i)
2648 sel[i] = i | (count / 2);
2649 indices.new_vector (sel, 2, count);
2650 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2651 }
2652 else if (mask)
2653 mask_halftype = truth_type_for (gs_info->offset_vectype);
2654 }
2655 else
2656 gcc_unreachable ();
2657
2658 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2659 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2660
2661 tree ptr = fold_convert (ptrtype, gs_info->base);
2662 if (!is_gimple_min_invariant (ptr))
2663 {
2664 gimple_seq seq;
2665 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2666 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2667 gcc_assert (!new_bb);
2668 }
2669
2670 tree scale = build_int_cst (scaletype, gs_info->scale);
2671
2672 tree vec_oprnd0 = NULL_TREE;
2673 tree vec_mask = NULL_TREE;
2674 tree src_op = NULL_TREE;
2675 tree mask_op = NULL_TREE;
2676 tree prev_res = NULL_TREE;
2677
2678 if (!mask)
2679 {
2680 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2681 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2682 }
2683
2684 auto_vec<tree> vec_oprnds0;
2685 auto_vec<tree> vec_masks;
2686 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2687 modifier == WIDEN ? ncopies / 2 : ncopies,
2688 gs_info->offset, &vec_oprnds0);
2689 if (mask)
2690 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2691 modifier == NARROW ? ncopies / 2 : ncopies,
2692 mask, &vec_masks);
2693 for (int j = 0; j < ncopies; ++j)
2694 {
2695 tree op, var;
2696 if (modifier == WIDEN && (j & 1))
2697 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2698 perm_mask, stmt_info, gsi);
2699 else
2700 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2701
2702 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2703 {
2704 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2705 TYPE_VECTOR_SUBPARTS (idxtype)));
2706 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2707 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2708 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2709 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2710 op = var;
2711 }
2712
2713 if (mask)
2714 {
2715 if (mask_perm_mask && (j & 1))
2716 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2717 mask_perm_mask, stmt_info, gsi);
2718 else
2719 {
2720 if (modifier == NARROW)
2721 {
2722 if ((j & 1) == 0)
2723 vec_mask = vec_masks[j / 2];
2724 }
2725 else
2726 vec_mask = vec_masks[j];
2727
2728 mask_op = vec_mask;
2729 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2730 {
2731 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2732 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2733 gcc_assert (known_eq (sub1, sub2));
2734 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2735 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2736 gassign *new_stmt
2737 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2738 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2739 mask_op = var;
2740 }
2741 }
2742 if (modifier == NARROW && masktype != real_masktype)
2743 {
2744 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2745 gassign *new_stmt
2746 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2747 : VEC_UNPACK_LO_EXPR,
2748 mask_op);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_op = var;
2751 }
2752 src_op = mask_op;
2753 }
2754
2755 tree mask_arg = mask_op;
2756 if (masktype != real_masktype)
2757 {
2758 tree utype, optype = TREE_TYPE (mask_op);
2759 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2760 utype = real_masktype;
2761 else
2762 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2763 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2764 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2765 gassign *new_stmt
2766 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2767 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2768 mask_arg = var;
2769 if (!useless_type_conversion_p (real_masktype, utype))
2770 {
2771 gcc_assert (TYPE_PRECISION (utype)
2772 <= TYPE_PRECISION (real_masktype));
2773 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2774 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2775 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2776 mask_arg = var;
2777 }
2778 src_op = build_zero_cst (srctype);
2779 }
2780 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2781 mask_arg, scale);
2782
2783 if (!useless_type_conversion_p (vectype, rettype))
2784 {
2785 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2786 TYPE_VECTOR_SUBPARTS (rettype)));
2787 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2788 gimple_call_set_lhs (new_stmt, op);
2789 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2790 var = make_ssa_name (vec_dest);
2791 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2792 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2793 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2794 }
2795 else
2796 {
2797 var = make_ssa_name (vec_dest, new_stmt);
2798 gimple_call_set_lhs (new_stmt, var);
2799 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2800 }
2801
2802 if (modifier == NARROW)
2803 {
2804 if ((j & 1) == 0)
2805 {
2806 prev_res = var;
2807 continue;
2808 }
2809 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2810 stmt_info, gsi);
2811 new_stmt = SSA_NAME_DEF_STMT (var);
2812 }
2813
2814 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2815 }
2816 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2817 }
2818
2819 /* Prepare the base and offset in GS_INFO for vectorization.
2820 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2821 to the vectorized offset argument for the first copy of STMT_INFO.
2822 STMT_INFO is the statement described by GS_INFO and LOOP is the
2823 containing loop. */
2824
2825 static void
2826 vect_get_gather_scatter_ops (vec_info *vinfo,
2827 class loop *loop, stmt_vec_info stmt_info,
2828 gather_scatter_info *gs_info,
2829 tree *dataref_ptr, vec<tree> *vec_offset,
2830 unsigned ncopies)
2831 {
2832 gimple_seq stmts = NULL;
2833 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2834 if (stmts != NULL)
2835 {
2836 basic_block new_bb;
2837 edge pe = loop_preheader_edge (loop);
2838 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2839 gcc_assert (!new_bb);
2840 }
2841 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
2842 vec_offset, gs_info->offset_vectype);
2843 }
2844
2845 /* Prepare to implement a grouped or strided load or store using
2846 the gather load or scatter store operation described by GS_INFO.
2847 STMT_INFO is the load or store statement.
2848
2849 Set *DATAREF_BUMP to the amount that should be added to the base
2850 address after each copy of the vectorized statement. Set *VEC_OFFSET
2851 to an invariant offset vector in which element I has the value
2852 I * DR_STEP / SCALE. */
2853
2854 static void
2855 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2856 loop_vec_info loop_vinfo,
2857 gather_scatter_info *gs_info,
2858 tree *dataref_bump, tree *vec_offset)
2859 {
2860 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2862
2863 tree bump = size_binop (MULT_EXPR,
2864 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2865 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2866 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2867
2868 /* The offset given in GS_INFO can have pointer type, so use the element
2869 type of the vector instead. */
2870 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2871
2872 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2873 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2874 ssize_int (gs_info->scale));
2875 step = fold_convert (offset_type, step);
2876
2877 /* Create {0, X, X*2, X*3, ...}. */
2878 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2879 build_zero_cst (offset_type), step);
2880 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2881 }
2882
2883 /* Return the amount that should be added to a vector pointer to move
2884 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2885 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2886 vectorization. */
2887
2888 static tree
2889 vect_get_data_ptr_increment (vec_info *vinfo,
2890 dr_vec_info *dr_info, tree aggr_type,
2891 vect_memory_access_type memory_access_type)
2892 {
2893 if (memory_access_type == VMAT_INVARIANT)
2894 return size_zero_node;
2895
2896 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2897 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2898 if (tree_int_cst_sgn (step) == -1)
2899 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2900 return iv_step;
2901 }
2902
2903 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2904
2905 static bool
2906 vectorizable_bswap (vec_info *vinfo,
2907 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2908 gimple **vec_stmt, slp_tree slp_node,
2909 slp_tree *slp_op,
2910 tree vectype_in, stmt_vector_for_cost *cost_vec)
2911 {
2912 tree op, vectype;
2913 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2914 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2915 unsigned ncopies;
2916
2917 op = gimple_call_arg (stmt, 0);
2918 vectype = STMT_VINFO_VECTYPE (stmt_info);
2919 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2920
2921 /* Multiple types in SLP are handled by creating the appropriate number of
2922 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2923 case of SLP. */
2924 if (slp_node)
2925 ncopies = 1;
2926 else
2927 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2928
2929 gcc_assert (ncopies >= 1);
2930
2931 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2932 if (! char_vectype)
2933 return false;
2934
2935 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2936 unsigned word_bytes;
2937 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2938 return false;
2939
2940 /* The encoding uses one stepped pattern for each byte in the word. */
2941 vec_perm_builder elts (num_bytes, word_bytes, 3);
2942 for (unsigned i = 0; i < 3; ++i)
2943 for (unsigned j = 0; j < word_bytes; ++j)
2944 elts.quick_push ((i + 1) * word_bytes - j - 1);
2945
2946 vec_perm_indices indices (elts, 1, num_bytes);
2947 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2948 return false;
2949
2950 if (! vec_stmt)
2951 {
2952 if (slp_node
2953 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
2954 {
2955 if (dump_enabled_p ())
2956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2957 "incompatible vector types for invariants\n");
2958 return false;
2959 }
2960
2961 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2962 DUMP_VECT_SCOPE ("vectorizable_bswap");
2963 if (! slp_node)
2964 {
2965 record_stmt_cost (cost_vec,
2966 1, vector_stmt, stmt_info, 0, vect_prologue);
2967 record_stmt_cost (cost_vec,
2968 ncopies, vec_perm, stmt_info, 0, vect_body);
2969 }
2970 return true;
2971 }
2972
2973 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2974
2975 /* Transform. */
2976 vec<tree> vec_oprnds = vNULL;
2977 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
2978 op, &vec_oprnds);
2979 /* Arguments are ready. create the new vector stmt. */
2980 unsigned i;
2981 tree vop;
2982 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2983 {
2984 gimple *new_stmt;
2985 tree tem = make_ssa_name (char_vectype);
2986 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2987 char_vectype, vop));
2988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2989 tree tem2 = make_ssa_name (char_vectype);
2990 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2991 tem, tem, bswap_vconst);
2992 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2993 tem = make_ssa_name (vectype);
2994 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2995 vectype, tem2));
2996 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2997 if (slp_node)
2998 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2999 else
3000 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3001 }
3002
3003 if (!slp_node)
3004 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3005
3006 vec_oprnds.release ();
3007 return true;
3008 }
3009
3010 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3011 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3012 in a single step. On success, store the binary pack code in
3013 *CONVERT_CODE. */
3014
3015 static bool
3016 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3017 tree_code *convert_code)
3018 {
3019 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3020 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3021 return false;
3022
3023 tree_code code;
3024 int multi_step_cvt = 0;
3025 auto_vec <tree, 8> interm_types;
3026 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3027 &code, &multi_step_cvt, &interm_types)
3028 || multi_step_cvt)
3029 return false;
3030
3031 *convert_code = code;
3032 return true;
3033 }
3034
3035 /* Function vectorizable_call.
3036
3037 Check if STMT_INFO performs a function call that can be vectorized.
3038 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3039 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3040 Return true if STMT_INFO is vectorizable in this way. */
3041
3042 static bool
3043 vectorizable_call (vec_info *vinfo,
3044 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3045 gimple **vec_stmt, slp_tree slp_node,
3046 stmt_vector_for_cost *cost_vec)
3047 {
3048 gcall *stmt;
3049 tree vec_dest;
3050 tree scalar_dest;
3051 tree op;
3052 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3053 tree vectype_out, vectype_in;
3054 poly_uint64 nunits_in;
3055 poly_uint64 nunits_out;
3056 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3057 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3058 tree fndecl, new_temp, rhs_type;
3059 enum vect_def_type dt[4]
3060 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3061 vect_unknown_def_type };
3062 tree vectypes[ARRAY_SIZE (dt)] = {};
3063 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3064 int ndts = ARRAY_SIZE (dt);
3065 int ncopies, j;
3066 auto_vec<tree, 8> vargs;
3067 auto_vec<tree, 8> orig_vargs;
3068 enum { NARROW, NONE, WIDEN } modifier;
3069 size_t i, nargs;
3070 tree lhs;
3071
3072 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3073 return false;
3074
3075 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3076 && ! vec_stmt)
3077 return false;
3078
3079 /* Is STMT_INFO a vectorizable call? */
3080 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3081 if (!stmt)
3082 return false;
3083
3084 if (gimple_call_internal_p (stmt)
3085 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3086 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3087 /* Handled by vectorizable_load and vectorizable_store. */
3088 return false;
3089
3090 if (gimple_call_lhs (stmt) == NULL_TREE
3091 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3092 return false;
3093
3094 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3095
3096 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3097
3098 /* Process function arguments. */
3099 rhs_type = NULL_TREE;
3100 vectype_in = NULL_TREE;
3101 nargs = gimple_call_num_args (stmt);
3102
3103 /* Bail out if the function has more than four arguments, we do not have
3104 interesting builtin functions to vectorize with more than two arguments
3105 except for fma. No arguments is also not good. */
3106 if (nargs == 0 || nargs > 4)
3107 return false;
3108
3109 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3110 combined_fn cfn = gimple_call_combined_fn (stmt);
3111 if (cfn == CFN_GOMP_SIMD_LANE)
3112 {
3113 nargs = 0;
3114 rhs_type = unsigned_type_node;
3115 }
3116
3117 int mask_opno = -1;
3118 if (internal_fn_p (cfn))
3119 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3120
3121 for (i = 0; i < nargs; i++)
3122 {
3123 if ((int) i == mask_opno)
3124 {
3125 op = gimple_call_arg (stmt, i);
3126 if (!vect_check_scalar_mask (vinfo,
3127 stmt_info, op, &dt[i], &vectypes[i]))
3128 return false;
3129 continue;
3130 }
3131
3132 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3133 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3134 {
3135 if (dump_enabled_p ())
3136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3137 "use not simple.\n");
3138 return false;
3139 }
3140
3141 /* We can only handle calls with arguments of the same type. */
3142 if (rhs_type
3143 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3144 {
3145 if (dump_enabled_p ())
3146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3147 "argument types differ.\n");
3148 return false;
3149 }
3150 if (!rhs_type)
3151 rhs_type = TREE_TYPE (op);
3152
3153 if (!vectype_in)
3154 vectype_in = vectypes[i];
3155 else if (vectypes[i]
3156 && !types_compatible_p (vectypes[i], vectype_in))
3157 {
3158 if (dump_enabled_p ())
3159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3160 "argument vector types differ.\n");
3161 return false;
3162 }
3163 }
3164 /* If all arguments are external or constant defs, infer the vector type
3165 from the scalar type. */
3166 if (!vectype_in)
3167 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3168 if (vec_stmt)
3169 gcc_assert (vectype_in);
3170 if (!vectype_in)
3171 {
3172 if (dump_enabled_p ())
3173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3174 "no vectype for scalar type %T\n", rhs_type);
3175
3176 return false;
3177 }
3178 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3179 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3180 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3181 by a pack of the two vectors into an SI vector. We would need
3182 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3183 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3184 {
3185 if (dump_enabled_p ())
3186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3187 "mismatched vector sizes %T and %T\n",
3188 vectype_in, vectype_out);
3189 return false;
3190 }
3191
3192 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3193 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3194 {
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 "mixed mask and nonmask vector types\n");
3198 return false;
3199 }
3200
3201 /* FORNOW */
3202 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3203 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3204 if (known_eq (nunits_in * 2, nunits_out))
3205 modifier = NARROW;
3206 else if (known_eq (nunits_out, nunits_in))
3207 modifier = NONE;
3208 else if (known_eq (nunits_out * 2, nunits_in))
3209 modifier = WIDEN;
3210 else
3211 return false;
3212
3213 /* We only handle functions that do not read or clobber memory. */
3214 if (gimple_vuse (stmt))
3215 {
3216 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3218 "function reads from or writes to memory.\n");
3219 return false;
3220 }
3221
3222 /* For now, we only vectorize functions if a target specific builtin
3223 is available. TODO -- in some cases, it might be profitable to
3224 insert the calls for pieces of the vector, in order to be able
3225 to vectorize other operations in the loop. */
3226 fndecl = NULL_TREE;
3227 internal_fn ifn = IFN_LAST;
3228 tree callee = gimple_call_fndecl (stmt);
3229
3230 /* First try using an internal function. */
3231 tree_code convert_code = ERROR_MARK;
3232 if (cfn != CFN_LAST
3233 && (modifier == NONE
3234 || (modifier == NARROW
3235 && simple_integer_narrowing (vectype_out, vectype_in,
3236 &convert_code))))
3237 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3238 vectype_in);
3239
3240 /* If that fails, try asking for a target-specific built-in function. */
3241 if (ifn == IFN_LAST)
3242 {
3243 if (cfn != CFN_LAST)
3244 fndecl = targetm.vectorize.builtin_vectorized_function
3245 (cfn, vectype_out, vectype_in);
3246 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3247 fndecl = targetm.vectorize.builtin_md_vectorized_function
3248 (callee, vectype_out, vectype_in);
3249 }
3250
3251 if (ifn == IFN_LAST && !fndecl)
3252 {
3253 if (cfn == CFN_GOMP_SIMD_LANE
3254 && !slp_node
3255 && loop_vinfo
3256 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3257 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3258 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3259 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3260 {
3261 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3262 { 0, 1, 2, ... vf - 1 } vector. */
3263 gcc_assert (nargs == 0);
3264 }
3265 else if (modifier == NONE
3266 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3267 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3268 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3269 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3270 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3271 slp_op, vectype_in, cost_vec);
3272 else
3273 {
3274 if (dump_enabled_p ())
3275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3276 "function is not vectorizable.\n");
3277 return false;
3278 }
3279 }
3280
3281 if (slp_node)
3282 ncopies = 1;
3283 else if (modifier == NARROW && ifn == IFN_LAST)
3284 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3285 else
3286 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3287
3288 /* Sanity check: make sure that at least one copy of the vectorized stmt
3289 needs to be generated. */
3290 gcc_assert (ncopies >= 1);
3291
3292 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3293 if (!vec_stmt) /* transformation not required. */
3294 {
3295 if (slp_node)
3296 for (i = 0; i < nargs; ++i)
3297 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3298 {
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3301 "incompatible vector types for invariants\n");
3302 return false;
3303 }
3304 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3305 DUMP_VECT_SCOPE ("vectorizable_call");
3306 vect_model_simple_cost (vinfo, stmt_info,
3307 ncopies, dt, ndts, slp_node, cost_vec);
3308 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3309 record_stmt_cost (cost_vec, ncopies / 2,
3310 vec_promote_demote, stmt_info, 0, vect_body);
3311
3312 if (loop_vinfo && mask_opno >= 0)
3313 {
3314 unsigned int nvectors = (slp_node
3315 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3316 : ncopies);
3317 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3318 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3319 vectype_out, scalar_mask);
3320 }
3321 return true;
3322 }
3323
3324 /* Transform. */
3325
3326 if (dump_enabled_p ())
3327 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3328
3329 /* Handle def. */
3330 scalar_dest = gimple_call_lhs (stmt);
3331 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3332
3333 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3334
3335 if (modifier == NONE || ifn != IFN_LAST)
3336 {
3337 tree prev_res = NULL_TREE;
3338 vargs.safe_grow (nargs);
3339 orig_vargs.safe_grow (nargs);
3340 auto_vec<vec<tree> > vec_defs (nargs);
3341 for (j = 0; j < ncopies; ++j)
3342 {
3343 /* Build argument list for the vectorized call. */
3344 if (slp_node)
3345 {
3346 vec<tree> vec_oprnds0;
3347
3348 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3349 vec_oprnds0 = vec_defs[0];
3350
3351 /* Arguments are ready. Create the new vector stmt. */
3352 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3353 {
3354 size_t k;
3355 for (k = 0; k < nargs; k++)
3356 {
3357 vec<tree> vec_oprndsk = vec_defs[k];
3358 vargs[k] = vec_oprndsk[i];
3359 }
3360 gimple *new_stmt;
3361 if (modifier == NARROW)
3362 {
3363 /* We don't define any narrowing conditional functions
3364 at present. */
3365 gcc_assert (mask_opno < 0);
3366 tree half_res = make_ssa_name (vectype_in);
3367 gcall *call
3368 = gimple_build_call_internal_vec (ifn, vargs);
3369 gimple_call_set_lhs (call, half_res);
3370 gimple_call_set_nothrow (call, true);
3371 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3372 if ((i & 1) == 0)
3373 {
3374 prev_res = half_res;
3375 continue;
3376 }
3377 new_temp = make_ssa_name (vec_dest);
3378 new_stmt = gimple_build_assign (new_temp, convert_code,
3379 prev_res, half_res);
3380 vect_finish_stmt_generation (vinfo, stmt_info,
3381 new_stmt, gsi);
3382 }
3383 else
3384 {
3385 if (mask_opno >= 0 && masked_loop_p)
3386 {
3387 unsigned int vec_num = vec_oprnds0.length ();
3388 /* Always true for SLP. */
3389 gcc_assert (ncopies == 1);
3390 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3391 vectype_out, i);
3392 vargs[mask_opno] = prepare_load_store_mask
3393 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3394 }
3395
3396 gcall *call;
3397 if (ifn != IFN_LAST)
3398 call = gimple_build_call_internal_vec (ifn, vargs);
3399 else
3400 call = gimple_build_call_vec (fndecl, vargs);
3401 new_temp = make_ssa_name (vec_dest, call);
3402 gimple_call_set_lhs (call, new_temp);
3403 gimple_call_set_nothrow (call, true);
3404 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3405 new_stmt = call;
3406 }
3407 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3408 }
3409 continue;
3410 }
3411
3412 for (i = 0; i < nargs; i++)
3413 {
3414 op = gimple_call_arg (stmt, i);
3415 if (j == 0)
3416 {
3417 vec_defs.quick_push (vNULL);
3418 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3419 op, &vec_defs[i]);
3420 }
3421 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3422 }
3423
3424 if (mask_opno >= 0 && masked_loop_p)
3425 {
3426 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3427 vectype_out, j);
3428 vargs[mask_opno]
3429 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3430 vargs[mask_opno], gsi);
3431 }
3432
3433 gimple *new_stmt;
3434 if (cfn == CFN_GOMP_SIMD_LANE)
3435 {
3436 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3437 tree new_var
3438 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3439 gimple *init_stmt = gimple_build_assign (new_var, cst);
3440 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3441 new_temp = make_ssa_name (vec_dest);
3442 new_stmt = gimple_build_assign (new_temp, new_var);
3443 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3444 }
3445 else if (modifier == NARROW)
3446 {
3447 /* We don't define any narrowing conditional functions at
3448 present. */
3449 gcc_assert (mask_opno < 0);
3450 tree half_res = make_ssa_name (vectype_in);
3451 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3452 gimple_call_set_lhs (call, half_res);
3453 gimple_call_set_nothrow (call, true);
3454 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3455 if ((j & 1) == 0)
3456 {
3457 prev_res = half_res;
3458 continue;
3459 }
3460 new_temp = make_ssa_name (vec_dest);
3461 new_stmt = gimple_build_assign (new_temp, convert_code,
3462 prev_res, half_res);
3463 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3464 }
3465 else
3466 {
3467 gcall *call;
3468 if (ifn != IFN_LAST)
3469 call = gimple_build_call_internal_vec (ifn, vargs);
3470 else
3471 call = gimple_build_call_vec (fndecl, vargs);
3472 new_temp = make_ssa_name (vec_dest, call);
3473 gimple_call_set_lhs (call, new_temp);
3474 gimple_call_set_nothrow (call, true);
3475 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3476 new_stmt = call;
3477 }
3478
3479 if (j == (modifier == NARROW ? 1 : 0))
3480 *vec_stmt = new_stmt;
3481 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3482 }
3483 for (i = 0; i < nargs; i++)
3484 {
3485 vec<tree> vec_oprndsi = vec_defs[i];
3486 vec_oprndsi.release ();
3487 }
3488 }
3489 else if (modifier == NARROW)
3490 {
3491 auto_vec<vec<tree> > vec_defs (nargs);
3492 /* We don't define any narrowing conditional functions at present. */
3493 gcc_assert (mask_opno < 0);
3494 for (j = 0; j < ncopies; ++j)
3495 {
3496 /* Build argument list for the vectorized call. */
3497 if (j == 0)
3498 vargs.create (nargs * 2);
3499 else
3500 vargs.truncate (0);
3501
3502 if (slp_node)
3503 {
3504 vec<tree> vec_oprnds0;
3505
3506 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3507 vec_oprnds0 = vec_defs[0];
3508
3509 /* Arguments are ready. Create the new vector stmt. */
3510 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3511 {
3512 size_t k;
3513 vargs.truncate (0);
3514 for (k = 0; k < nargs; k++)
3515 {
3516 vec<tree> vec_oprndsk = vec_defs[k];
3517 vargs.quick_push (vec_oprndsk[i]);
3518 vargs.quick_push (vec_oprndsk[i + 1]);
3519 }
3520 gcall *call;
3521 if (ifn != IFN_LAST)
3522 call = gimple_build_call_internal_vec (ifn, vargs);
3523 else
3524 call = gimple_build_call_vec (fndecl, vargs);
3525 new_temp = make_ssa_name (vec_dest, call);
3526 gimple_call_set_lhs (call, new_temp);
3527 gimple_call_set_nothrow (call, true);
3528 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3529 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3530 }
3531 continue;
3532 }
3533
3534 for (i = 0; i < nargs; i++)
3535 {
3536 op = gimple_call_arg (stmt, i);
3537 if (j == 0)
3538 {
3539 vec_defs.quick_push (vNULL);
3540 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3541 op, &vec_defs[i], vectypes[i]);
3542 }
3543 vec_oprnd0 = vec_defs[i][2*j];
3544 vec_oprnd1 = vec_defs[i][2*j+1];
3545
3546 vargs.quick_push (vec_oprnd0);
3547 vargs.quick_push (vec_oprnd1);
3548 }
3549
3550 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3551 new_temp = make_ssa_name (vec_dest, new_stmt);
3552 gimple_call_set_lhs (new_stmt, new_temp);
3553 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3554
3555 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3556 }
3557
3558 if (!slp_node)
3559 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3560
3561 for (i = 0; i < nargs; i++)
3562 {
3563 vec<tree> vec_oprndsi = vec_defs[i];
3564 vec_oprndsi.release ();
3565 }
3566 }
3567 else
3568 /* No current target implements this case. */
3569 return false;
3570
3571 vargs.release ();
3572
3573 /* The call in STMT might prevent it from being removed in dce.
3574 We however cannot remove it here, due to the way the ssa name
3575 it defines is mapped to the new definition. So just replace
3576 rhs of the statement with something harmless. */
3577
3578 if (slp_node)
3579 return true;
3580
3581 stmt_info = vect_orig_stmt (stmt_info);
3582 lhs = gimple_get_lhs (stmt_info->stmt);
3583
3584 gassign *new_stmt
3585 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3586 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3587
3588 return true;
3589 }
3590
3591
3592 struct simd_call_arg_info
3593 {
3594 tree vectype;
3595 tree op;
3596 HOST_WIDE_INT linear_step;
3597 enum vect_def_type dt;
3598 unsigned int align;
3599 bool simd_lane_linear;
3600 };
3601
3602 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3603 is linear within simd lane (but not within whole loop), note it in
3604 *ARGINFO. */
3605
3606 static void
3607 vect_simd_lane_linear (tree op, class loop *loop,
3608 struct simd_call_arg_info *arginfo)
3609 {
3610 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3611
3612 if (!is_gimple_assign (def_stmt)
3613 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3614 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3615 return;
3616
3617 tree base = gimple_assign_rhs1 (def_stmt);
3618 HOST_WIDE_INT linear_step = 0;
3619 tree v = gimple_assign_rhs2 (def_stmt);
3620 while (TREE_CODE (v) == SSA_NAME)
3621 {
3622 tree t;
3623 def_stmt = SSA_NAME_DEF_STMT (v);
3624 if (is_gimple_assign (def_stmt))
3625 switch (gimple_assign_rhs_code (def_stmt))
3626 {
3627 case PLUS_EXPR:
3628 t = gimple_assign_rhs2 (def_stmt);
3629 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3630 return;
3631 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3632 v = gimple_assign_rhs1 (def_stmt);
3633 continue;
3634 case MULT_EXPR:
3635 t = gimple_assign_rhs2 (def_stmt);
3636 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3637 return;
3638 linear_step = tree_to_shwi (t);
3639 v = gimple_assign_rhs1 (def_stmt);
3640 continue;
3641 CASE_CONVERT:
3642 t = gimple_assign_rhs1 (def_stmt);
3643 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3644 || (TYPE_PRECISION (TREE_TYPE (v))
3645 < TYPE_PRECISION (TREE_TYPE (t))))
3646 return;
3647 if (!linear_step)
3648 linear_step = 1;
3649 v = t;
3650 continue;
3651 default:
3652 return;
3653 }
3654 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3655 && loop->simduid
3656 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3657 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3658 == loop->simduid))
3659 {
3660 if (!linear_step)
3661 linear_step = 1;
3662 arginfo->linear_step = linear_step;
3663 arginfo->op = base;
3664 arginfo->simd_lane_linear = true;
3665 return;
3666 }
3667 }
3668 }
3669
3670 /* Return the number of elements in vector type VECTYPE, which is associated
3671 with a SIMD clone. At present these vectors always have a constant
3672 length. */
3673
3674 static unsigned HOST_WIDE_INT
3675 simd_clone_subparts (tree vectype)
3676 {
3677 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3678 }
3679
3680 /* Function vectorizable_simd_clone_call.
3681
3682 Check if STMT_INFO performs a function call that can be vectorized
3683 by calling a simd clone of the function.
3684 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3685 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3686 Return true if STMT_INFO is vectorizable in this way. */
3687
3688 static bool
3689 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3690 gimple_stmt_iterator *gsi,
3691 gimple **vec_stmt, slp_tree slp_node,
3692 stmt_vector_for_cost *)
3693 {
3694 tree vec_dest;
3695 tree scalar_dest;
3696 tree op, type;
3697 tree vec_oprnd0 = NULL_TREE;
3698 tree vectype;
3699 unsigned int nunits;
3700 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3701 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3702 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3703 tree fndecl, new_temp;
3704 int ncopies, j;
3705 auto_vec<simd_call_arg_info> arginfo;
3706 vec<tree> vargs = vNULL;
3707 size_t i, nargs;
3708 tree lhs, rtype, ratype;
3709 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3710
3711 /* Is STMT a vectorizable call? */
3712 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3713 if (!stmt)
3714 return false;
3715
3716 fndecl = gimple_call_fndecl (stmt);
3717 if (fndecl == NULL_TREE)
3718 return false;
3719
3720 struct cgraph_node *node = cgraph_node::get (fndecl);
3721 if (node == NULL || node->simd_clones == NULL)
3722 return false;
3723
3724 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3725 return false;
3726
3727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3728 && ! vec_stmt)
3729 return false;
3730
3731 if (gimple_call_lhs (stmt)
3732 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3733 return false;
3734
3735 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3736
3737 vectype = STMT_VINFO_VECTYPE (stmt_info);
3738
3739 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3740 return false;
3741
3742 /* FORNOW */
3743 if (slp_node)
3744 return false;
3745
3746 /* Process function arguments. */
3747 nargs = gimple_call_num_args (stmt);
3748
3749 /* Bail out if the function has zero arguments. */
3750 if (nargs == 0)
3751 return false;
3752
3753 arginfo.reserve (nargs, true);
3754
3755 for (i = 0; i < nargs; i++)
3756 {
3757 simd_call_arg_info thisarginfo;
3758 affine_iv iv;
3759
3760 thisarginfo.linear_step = 0;
3761 thisarginfo.align = 0;
3762 thisarginfo.op = NULL_TREE;
3763 thisarginfo.simd_lane_linear = false;
3764
3765 op = gimple_call_arg (stmt, i);
3766 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3767 &thisarginfo.vectype)
3768 || thisarginfo.dt == vect_uninitialized_def)
3769 {
3770 if (dump_enabled_p ())
3771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3772 "use not simple.\n");
3773 return false;
3774 }
3775
3776 if (thisarginfo.dt == vect_constant_def
3777 || thisarginfo.dt == vect_external_def)
3778 gcc_assert (thisarginfo.vectype == NULL_TREE);
3779 else
3780 {
3781 gcc_assert (thisarginfo.vectype != NULL_TREE);
3782 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3783 {
3784 if (dump_enabled_p ())
3785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3786 "vector mask arguments are not supported\n");
3787 return false;
3788 }
3789 }
3790
3791 /* For linear arguments, the analyze phase should have saved
3792 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3793 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3794 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3795 {
3796 gcc_assert (vec_stmt);
3797 thisarginfo.linear_step
3798 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3799 thisarginfo.op
3800 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3801 thisarginfo.simd_lane_linear
3802 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3803 == boolean_true_node);
3804 /* If loop has been peeled for alignment, we need to adjust it. */
3805 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3806 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3807 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3808 {
3809 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3810 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3811 tree opt = TREE_TYPE (thisarginfo.op);
3812 bias = fold_convert (TREE_TYPE (step), bias);
3813 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3814 thisarginfo.op
3815 = fold_build2 (POINTER_TYPE_P (opt)
3816 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3817 thisarginfo.op, bias);
3818 }
3819 }
3820 else if (!vec_stmt
3821 && thisarginfo.dt != vect_constant_def
3822 && thisarginfo.dt != vect_external_def
3823 && loop_vinfo
3824 && TREE_CODE (op) == SSA_NAME
3825 && simple_iv (loop, loop_containing_stmt (stmt), op,
3826 &iv, false)
3827 && tree_fits_shwi_p (iv.step))
3828 {
3829 thisarginfo.linear_step = tree_to_shwi (iv.step);
3830 thisarginfo.op = iv.base;
3831 }
3832 else if ((thisarginfo.dt == vect_constant_def
3833 || thisarginfo.dt == vect_external_def)
3834 && POINTER_TYPE_P (TREE_TYPE (op)))
3835 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3836 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3837 linear too. */
3838 if (POINTER_TYPE_P (TREE_TYPE (op))
3839 && !thisarginfo.linear_step
3840 && !vec_stmt
3841 && thisarginfo.dt != vect_constant_def
3842 && thisarginfo.dt != vect_external_def
3843 && loop_vinfo
3844 && !slp_node
3845 && TREE_CODE (op) == SSA_NAME)
3846 vect_simd_lane_linear (op, loop, &thisarginfo);
3847
3848 arginfo.quick_push (thisarginfo);
3849 }
3850
3851 unsigned HOST_WIDE_INT vf;
3852 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3853 {
3854 if (dump_enabled_p ())
3855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3856 "not considering SIMD clones; not yet supported"
3857 " for variable-width vectors.\n");
3858 return false;
3859 }
3860
3861 unsigned int badness = 0;
3862 struct cgraph_node *bestn = NULL;
3863 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3864 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3865 else
3866 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3867 n = n->simdclone->next_clone)
3868 {
3869 unsigned int this_badness = 0;
3870 if (n->simdclone->simdlen > vf
3871 || n->simdclone->nargs != nargs)
3872 continue;
3873 if (n->simdclone->simdlen < vf)
3874 this_badness += (exact_log2 (vf)
3875 - exact_log2 (n->simdclone->simdlen)) * 1024;
3876 if (n->simdclone->inbranch)
3877 this_badness += 2048;
3878 int target_badness = targetm.simd_clone.usable (n);
3879 if (target_badness < 0)
3880 continue;
3881 this_badness += target_badness * 512;
3882 /* FORNOW: Have to add code to add the mask argument. */
3883 if (n->simdclone->inbranch)
3884 continue;
3885 for (i = 0; i < nargs; i++)
3886 {
3887 switch (n->simdclone->args[i].arg_type)
3888 {
3889 case SIMD_CLONE_ARG_TYPE_VECTOR:
3890 if (!useless_type_conversion_p
3891 (n->simdclone->args[i].orig_type,
3892 TREE_TYPE (gimple_call_arg (stmt, i))))
3893 i = -1;
3894 else if (arginfo[i].dt == vect_constant_def
3895 || arginfo[i].dt == vect_external_def
3896 || arginfo[i].linear_step)
3897 this_badness += 64;
3898 break;
3899 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3900 if (arginfo[i].dt != vect_constant_def
3901 && arginfo[i].dt != vect_external_def)
3902 i = -1;
3903 break;
3904 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3905 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3906 if (arginfo[i].dt == vect_constant_def
3907 || arginfo[i].dt == vect_external_def
3908 || (arginfo[i].linear_step
3909 != n->simdclone->args[i].linear_step))
3910 i = -1;
3911 break;
3912 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3913 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3914 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3915 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3917 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3918 /* FORNOW */
3919 i = -1;
3920 break;
3921 case SIMD_CLONE_ARG_TYPE_MASK:
3922 gcc_unreachable ();
3923 }
3924 if (i == (size_t) -1)
3925 break;
3926 if (n->simdclone->args[i].alignment > arginfo[i].align)
3927 {
3928 i = -1;
3929 break;
3930 }
3931 if (arginfo[i].align)
3932 this_badness += (exact_log2 (arginfo[i].align)
3933 - exact_log2 (n->simdclone->args[i].alignment));
3934 }
3935 if (i == (size_t) -1)
3936 continue;
3937 if (bestn == NULL || this_badness < badness)
3938 {
3939 bestn = n;
3940 badness = this_badness;
3941 }
3942 }
3943
3944 if (bestn == NULL)
3945 return false;
3946
3947 for (i = 0; i < nargs; i++)
3948 if ((arginfo[i].dt == vect_constant_def
3949 || arginfo[i].dt == vect_external_def)
3950 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3951 {
3952 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
3953 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
3954 slp_node);
3955 if (arginfo[i].vectype == NULL
3956 || (simd_clone_subparts (arginfo[i].vectype)
3957 > bestn->simdclone->simdlen))
3958 return false;
3959 }
3960
3961 fndecl = bestn->decl;
3962 nunits = bestn->simdclone->simdlen;
3963 ncopies = vf / nunits;
3964
3965 /* If the function isn't const, only allow it in simd loops where user
3966 has asserted that at least nunits consecutive iterations can be
3967 performed using SIMD instructions. */
3968 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3969 && gimple_vuse (stmt))
3970 return false;
3971
3972 /* Sanity check: make sure that at least one copy of the vectorized stmt
3973 needs to be generated. */
3974 gcc_assert (ncopies >= 1);
3975
3976 if (!vec_stmt) /* transformation not required. */
3977 {
3978 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3979 for (i = 0; i < nargs; i++)
3980 if ((bestn->simdclone->args[i].arg_type
3981 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3982 || (bestn->simdclone->args[i].arg_type
3983 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3984 {
3985 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3986 + 1);
3987 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3988 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3989 ? size_type_node : TREE_TYPE (arginfo[i].op);
3990 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3991 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3992 tree sll = arginfo[i].simd_lane_linear
3993 ? boolean_true_node : boolean_false_node;
3994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3995 }
3996 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3997 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
3998 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
3999 dt, slp_node, cost_vec); */
4000 return true;
4001 }
4002
4003 /* Transform. */
4004
4005 if (dump_enabled_p ())
4006 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4007
4008 /* Handle def. */
4009 scalar_dest = gimple_call_lhs (stmt);
4010 vec_dest = NULL_TREE;
4011 rtype = NULL_TREE;
4012 ratype = NULL_TREE;
4013 if (scalar_dest)
4014 {
4015 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4016 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4017 if (TREE_CODE (rtype) == ARRAY_TYPE)
4018 {
4019 ratype = rtype;
4020 rtype = TREE_TYPE (ratype);
4021 }
4022 }
4023
4024 auto_vec<vec<tree> > vec_oprnds;
4025 auto_vec<unsigned> vec_oprnds_i;
4026 vec_oprnds.safe_grow_cleared (nargs);
4027 vec_oprnds_i.safe_grow_cleared (nargs);
4028 for (j = 0; j < ncopies; ++j)
4029 {
4030 /* Build argument list for the vectorized call. */
4031 if (j == 0)
4032 vargs.create (nargs);
4033 else
4034 vargs.truncate (0);
4035
4036 for (i = 0; i < nargs; i++)
4037 {
4038 unsigned int k, l, m, o;
4039 tree atype;
4040 op = gimple_call_arg (stmt, i);
4041 switch (bestn->simdclone->args[i].arg_type)
4042 {
4043 case SIMD_CLONE_ARG_TYPE_VECTOR:
4044 atype = bestn->simdclone->args[i].vector_type;
4045 o = nunits / simd_clone_subparts (atype);
4046 for (m = j * o; m < (j + 1) * o; m++)
4047 {
4048 if (simd_clone_subparts (atype)
4049 < simd_clone_subparts (arginfo[i].vectype))
4050 {
4051 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4052 k = (simd_clone_subparts (arginfo[i].vectype)
4053 / simd_clone_subparts (atype));
4054 gcc_assert ((k & (k - 1)) == 0);
4055 if (m == 0)
4056 {
4057 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4058 ncopies * o / k, op,
4059 &vec_oprnds[i]);
4060 vec_oprnds_i[i] = 0;
4061 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4062 }
4063 else
4064 {
4065 vec_oprnd0 = arginfo[i].op;
4066 if ((m & (k - 1)) == 0)
4067 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4068 }
4069 arginfo[i].op = vec_oprnd0;
4070 vec_oprnd0
4071 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4072 bitsize_int (prec),
4073 bitsize_int ((m & (k - 1)) * prec));
4074 gassign *new_stmt
4075 = gimple_build_assign (make_ssa_name (atype),
4076 vec_oprnd0);
4077 vect_finish_stmt_generation (vinfo, stmt_info,
4078 new_stmt, gsi);
4079 vargs.safe_push (gimple_assign_lhs (new_stmt));
4080 }
4081 else
4082 {
4083 k = (simd_clone_subparts (atype)
4084 / simd_clone_subparts (arginfo[i].vectype));
4085 gcc_assert ((k & (k - 1)) == 0);
4086 vec<constructor_elt, va_gc> *ctor_elts;
4087 if (k != 1)
4088 vec_alloc (ctor_elts, k);
4089 else
4090 ctor_elts = NULL;
4091 for (l = 0; l < k; l++)
4092 {
4093 if (m == 0 && l == 0)
4094 {
4095 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4096 k * o * ncopies,
4097 op,
4098 &vec_oprnds[i]);
4099 vec_oprnds_i[i] = 0;
4100 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4101 }
4102 else
4103 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4104 arginfo[i].op = vec_oprnd0;
4105 if (k == 1)
4106 break;
4107 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4108 vec_oprnd0);
4109 }
4110 if (k == 1)
4111 vargs.safe_push (vec_oprnd0);
4112 else
4113 {
4114 vec_oprnd0 = build_constructor (atype, ctor_elts);
4115 gassign *new_stmt
4116 = gimple_build_assign (make_ssa_name (atype),
4117 vec_oprnd0);
4118 vect_finish_stmt_generation (vinfo, stmt_info,
4119 new_stmt, gsi);
4120 vargs.safe_push (gimple_assign_lhs (new_stmt));
4121 }
4122 }
4123 }
4124 break;
4125 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4126 vargs.safe_push (op);
4127 break;
4128 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4129 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4130 if (j == 0)
4131 {
4132 gimple_seq stmts;
4133 arginfo[i].op
4134 = force_gimple_operand (unshare_expr (arginfo[i].op),
4135 &stmts, true, NULL_TREE);
4136 if (stmts != NULL)
4137 {
4138 basic_block new_bb;
4139 edge pe = loop_preheader_edge (loop);
4140 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4141 gcc_assert (!new_bb);
4142 }
4143 if (arginfo[i].simd_lane_linear)
4144 {
4145 vargs.safe_push (arginfo[i].op);
4146 break;
4147 }
4148 tree phi_res = copy_ssa_name (op);
4149 gphi *new_phi = create_phi_node (phi_res, loop->header);
4150 add_phi_arg (new_phi, arginfo[i].op,
4151 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4152 enum tree_code code
4153 = POINTER_TYPE_P (TREE_TYPE (op))
4154 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4155 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4156 ? sizetype : TREE_TYPE (op);
4157 widest_int cst
4158 = wi::mul (bestn->simdclone->args[i].linear_step,
4159 ncopies * nunits);
4160 tree tcst = wide_int_to_tree (type, cst);
4161 tree phi_arg = copy_ssa_name (op);
4162 gassign *new_stmt
4163 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4164 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4165 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4166 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4167 UNKNOWN_LOCATION);
4168 arginfo[i].op = phi_res;
4169 vargs.safe_push (phi_res);
4170 }
4171 else
4172 {
4173 enum tree_code code
4174 = POINTER_TYPE_P (TREE_TYPE (op))
4175 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4176 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4177 ? sizetype : TREE_TYPE (op);
4178 widest_int cst
4179 = wi::mul (bestn->simdclone->args[i].linear_step,
4180 j * nunits);
4181 tree tcst = wide_int_to_tree (type, cst);
4182 new_temp = make_ssa_name (TREE_TYPE (op));
4183 gassign *new_stmt
4184 = gimple_build_assign (new_temp, code,
4185 arginfo[i].op, tcst);
4186 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4187 vargs.safe_push (new_temp);
4188 }
4189 break;
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4196 default:
4197 gcc_unreachable ();
4198 }
4199 }
4200
4201 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4202 if (vec_dest)
4203 {
4204 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4205 if (ratype)
4206 new_temp = create_tmp_var (ratype);
4207 else if (simd_clone_subparts (vectype)
4208 == simd_clone_subparts (rtype))
4209 new_temp = make_ssa_name (vec_dest, new_call);
4210 else
4211 new_temp = make_ssa_name (rtype, new_call);
4212 gimple_call_set_lhs (new_call, new_temp);
4213 }
4214 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4215 gimple *new_stmt = new_call;
4216
4217 if (vec_dest)
4218 {
4219 if (simd_clone_subparts (vectype) < nunits)
4220 {
4221 unsigned int k, l;
4222 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4223 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4224 k = nunits / simd_clone_subparts (vectype);
4225 gcc_assert ((k & (k - 1)) == 0);
4226 for (l = 0; l < k; l++)
4227 {
4228 tree t;
4229 if (ratype)
4230 {
4231 t = build_fold_addr_expr (new_temp);
4232 t = build2 (MEM_REF, vectype, t,
4233 build_int_cst (TREE_TYPE (t), l * bytes));
4234 }
4235 else
4236 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4237 bitsize_int (prec), bitsize_int (l * prec));
4238 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4239 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4240
4241 if (j == 0 && l == 0)
4242 *vec_stmt = new_stmt;
4243 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4244 }
4245
4246 if (ratype)
4247 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4248 continue;
4249 }
4250 else if (simd_clone_subparts (vectype) > nunits)
4251 {
4252 unsigned int k = (simd_clone_subparts (vectype)
4253 / simd_clone_subparts (rtype));
4254 gcc_assert ((k & (k - 1)) == 0);
4255 if ((j & (k - 1)) == 0)
4256 vec_alloc (ret_ctor_elts, k);
4257 if (ratype)
4258 {
4259 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4260 for (m = 0; m < o; m++)
4261 {
4262 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4263 size_int (m), NULL_TREE, NULL_TREE);
4264 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4265 tem);
4266 vect_finish_stmt_generation (vinfo, stmt_info,
4267 new_stmt, gsi);
4268 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4269 gimple_assign_lhs (new_stmt));
4270 }
4271 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4272 }
4273 else
4274 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4275 if ((j & (k - 1)) != k - 1)
4276 continue;
4277 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4278 new_stmt
4279 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4280 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4281
4282 if ((unsigned) j == k - 1)
4283 *vec_stmt = new_stmt;
4284 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4285 continue;
4286 }
4287 else if (ratype)
4288 {
4289 tree t = build_fold_addr_expr (new_temp);
4290 t = build2 (MEM_REF, vectype, t,
4291 build_int_cst (TREE_TYPE (t), 0));
4292 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4293 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4294 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4295 }
4296 }
4297
4298 if (j == 0)
4299 *vec_stmt = new_stmt;
4300 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4301 }
4302
4303 for (i = 0; i < nargs; ++i)
4304 {
4305 vec<tree> oprndsi = vec_oprnds[i];
4306 oprndsi.release ();
4307 }
4308 vargs.release ();
4309
4310 /* The call in STMT might prevent it from being removed in dce.
4311 We however cannot remove it here, due to the way the ssa name
4312 it defines is mapped to the new definition. So just replace
4313 rhs of the statement with something harmless. */
4314
4315 if (slp_node)
4316 return true;
4317
4318 gimple *new_stmt;
4319 if (scalar_dest)
4320 {
4321 type = TREE_TYPE (scalar_dest);
4322 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4323 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4324 }
4325 else
4326 new_stmt = gimple_build_nop ();
4327 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4328 unlink_stmt_vdef (stmt);
4329
4330 return true;
4331 }
4332
4333
4334 /* Function vect_gen_widened_results_half
4335
4336 Create a vector stmt whose code, type, number of arguments, and result
4337 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4338 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4339 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4340 needs to be created (DECL is a function-decl of a target-builtin).
4341 STMT_INFO is the original scalar stmt that we are vectorizing. */
4342
4343 static gimple *
4344 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4345 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4346 tree vec_dest, gimple_stmt_iterator *gsi,
4347 stmt_vec_info stmt_info)
4348 {
4349 gimple *new_stmt;
4350 tree new_temp;
4351
4352 /* Generate half of the widened result: */
4353 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4354 if (op_type != binary_op)
4355 vec_oprnd1 = NULL;
4356 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4357 new_temp = make_ssa_name (vec_dest, new_stmt);
4358 gimple_assign_set_lhs (new_stmt, new_temp);
4359 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4360
4361 return new_stmt;
4362 }
4363
4364
4365 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4366 For multi-step conversions store the resulting vectors and call the function
4367 recursively. */
4368
4369 static void
4370 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4371 int multi_step_cvt,
4372 stmt_vec_info stmt_info,
4373 vec<tree> vec_dsts,
4374 gimple_stmt_iterator *gsi,
4375 slp_tree slp_node, enum tree_code code)
4376 {
4377 unsigned int i;
4378 tree vop0, vop1, new_tmp, vec_dest;
4379
4380 vec_dest = vec_dsts.pop ();
4381
4382 for (i = 0; i < vec_oprnds->length (); i += 2)
4383 {
4384 /* Create demotion operation. */
4385 vop0 = (*vec_oprnds)[i];
4386 vop1 = (*vec_oprnds)[i + 1];
4387 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4388 new_tmp = make_ssa_name (vec_dest, new_stmt);
4389 gimple_assign_set_lhs (new_stmt, new_tmp);
4390 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4391
4392 if (multi_step_cvt)
4393 /* Store the resulting vector for next recursive call. */
4394 (*vec_oprnds)[i/2] = new_tmp;
4395 else
4396 {
4397 /* This is the last step of the conversion sequence. Store the
4398 vectors in SLP_NODE or in vector info of the scalar statement
4399 (or in STMT_VINFO_RELATED_STMT chain). */
4400 if (slp_node)
4401 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4402 else
4403 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4404 }
4405 }
4406
4407 /* For multi-step demotion operations we first generate demotion operations
4408 from the source type to the intermediate types, and then combine the
4409 results (stored in VEC_OPRNDS) in demotion operation to the destination
4410 type. */
4411 if (multi_step_cvt)
4412 {
4413 /* At each level of recursion we have half of the operands we had at the
4414 previous level. */
4415 vec_oprnds->truncate ((i+1)/2);
4416 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4417 multi_step_cvt - 1,
4418 stmt_info, vec_dsts, gsi,
4419 slp_node, VEC_PACK_TRUNC_EXPR);
4420 }
4421
4422 vec_dsts.quick_push (vec_dest);
4423 }
4424
4425
4426 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4427 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4428 STMT_INFO. For multi-step conversions store the resulting vectors and
4429 call the function recursively. */
4430
4431 static void
4432 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4433 vec<tree> *vec_oprnds0,
4434 vec<tree> *vec_oprnds1,
4435 stmt_vec_info stmt_info, tree vec_dest,
4436 gimple_stmt_iterator *gsi,
4437 enum tree_code code1,
4438 enum tree_code code2, int op_type)
4439 {
4440 int i;
4441 tree vop0, vop1, new_tmp1, new_tmp2;
4442 gimple *new_stmt1, *new_stmt2;
4443 vec<tree> vec_tmp = vNULL;
4444
4445 vec_tmp.create (vec_oprnds0->length () * 2);
4446 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4447 {
4448 if (op_type == binary_op)
4449 vop1 = (*vec_oprnds1)[i];
4450 else
4451 vop1 = NULL_TREE;
4452
4453 /* Generate the two halves of promotion operation. */
4454 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4455 op_type, vec_dest, gsi,
4456 stmt_info);
4457 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4458 op_type, vec_dest, gsi,
4459 stmt_info);
4460 if (is_gimple_call (new_stmt1))
4461 {
4462 new_tmp1 = gimple_call_lhs (new_stmt1);
4463 new_tmp2 = gimple_call_lhs (new_stmt2);
4464 }
4465 else
4466 {
4467 new_tmp1 = gimple_assign_lhs (new_stmt1);
4468 new_tmp2 = gimple_assign_lhs (new_stmt2);
4469 }
4470
4471 /* Store the results for the next step. */
4472 vec_tmp.quick_push (new_tmp1);
4473 vec_tmp.quick_push (new_tmp2);
4474 }
4475
4476 vec_oprnds0->release ();
4477 *vec_oprnds0 = vec_tmp;
4478 }
4479
4480
4481 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4482 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4483 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4484 Return true if STMT_INFO is vectorizable in this way. */
4485
4486 static bool
4487 vectorizable_conversion (vec_info *vinfo,
4488 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4489 gimple **vec_stmt, slp_tree slp_node,
4490 stmt_vector_for_cost *cost_vec)
4491 {
4492 tree vec_dest;
4493 tree scalar_dest;
4494 tree op0, op1 = NULL_TREE;
4495 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4496 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4497 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4498 tree new_temp;
4499 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4500 int ndts = 2;
4501 poly_uint64 nunits_in;
4502 poly_uint64 nunits_out;
4503 tree vectype_out, vectype_in;
4504 int ncopies, i;
4505 tree lhs_type, rhs_type;
4506 enum { NARROW, NONE, WIDEN } modifier;
4507 vec<tree> vec_oprnds0 = vNULL;
4508 vec<tree> vec_oprnds1 = vNULL;
4509 tree vop0;
4510 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4511 int multi_step_cvt = 0;
4512 vec<tree> interm_types = vNULL;
4513 tree intermediate_type, cvt_type = NULL_TREE;
4514 int op_type;
4515 unsigned short fltsz;
4516
4517 /* Is STMT a vectorizable conversion? */
4518
4519 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4520 return false;
4521
4522 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4523 && ! vec_stmt)
4524 return false;
4525
4526 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4527 if (!stmt)
4528 return false;
4529
4530 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4531 return false;
4532
4533 code = gimple_assign_rhs_code (stmt);
4534 if (!CONVERT_EXPR_CODE_P (code)
4535 && code != FIX_TRUNC_EXPR
4536 && code != FLOAT_EXPR
4537 && code != WIDEN_MULT_EXPR
4538 && code != WIDEN_LSHIFT_EXPR)
4539 return false;
4540
4541 op_type = TREE_CODE_LENGTH (code);
4542
4543 /* Check types of lhs and rhs. */
4544 scalar_dest = gimple_assign_lhs (stmt);
4545 lhs_type = TREE_TYPE (scalar_dest);
4546 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4547
4548 /* Check the operands of the operation. */
4549 slp_tree slp_op0, slp_op1 = NULL;
4550 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4551 0, &op0, &slp_op0, &dt[0], &vectype_in))
4552 {
4553 if (dump_enabled_p ())
4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4555 "use not simple.\n");
4556 return false;
4557 }
4558
4559 rhs_type = TREE_TYPE (op0);
4560 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4561 && !((INTEGRAL_TYPE_P (lhs_type)
4562 && INTEGRAL_TYPE_P (rhs_type))
4563 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4564 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4565 return false;
4566
4567 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4568 && ((INTEGRAL_TYPE_P (lhs_type)
4569 && !type_has_mode_precision_p (lhs_type))
4570 || (INTEGRAL_TYPE_P (rhs_type)
4571 && !type_has_mode_precision_p (rhs_type))))
4572 {
4573 if (dump_enabled_p ())
4574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4575 "type conversion to/from bit-precision unsupported."
4576 "\n");
4577 return false;
4578 }
4579
4580 if (op_type == binary_op)
4581 {
4582 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4583
4584 op1 = gimple_assign_rhs2 (stmt);
4585 tree vectype1_in;
4586 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4587 &op1, &slp_op1, &dt[1], &vectype1_in))
4588 {
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4591 "use not simple.\n");
4592 return false;
4593 }
4594 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4595 OP1. */
4596 if (!vectype_in)
4597 vectype_in = vectype1_in;
4598 }
4599
4600 /* If op0 is an external or constant def, infer the vector type
4601 from the scalar type. */
4602 if (!vectype_in)
4603 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4604 if (vec_stmt)
4605 gcc_assert (vectype_in);
4606 if (!vectype_in)
4607 {
4608 if (dump_enabled_p ())
4609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4610 "no vectype for scalar type %T\n", rhs_type);
4611
4612 return false;
4613 }
4614
4615 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4616 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4617 {
4618 if (dump_enabled_p ())
4619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4620 "can't convert between boolean and non "
4621 "boolean vectors %T\n", rhs_type);
4622
4623 return false;
4624 }
4625
4626 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4627 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4628 if (known_eq (nunits_out, nunits_in))
4629 modifier = NONE;
4630 else if (multiple_p (nunits_out, nunits_in))
4631 modifier = NARROW;
4632 else
4633 {
4634 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4635 modifier = WIDEN;
4636 }
4637
4638 /* Multiple types in SLP are handled by creating the appropriate number of
4639 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4640 case of SLP. */
4641 if (slp_node)
4642 ncopies = 1;
4643 else if (modifier == NARROW)
4644 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4645 else
4646 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4647
4648 /* Sanity check: make sure that at least one copy of the vectorized stmt
4649 needs to be generated. */
4650 gcc_assert (ncopies >= 1);
4651
4652 bool found_mode = false;
4653 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4654 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4655 opt_scalar_mode rhs_mode_iter;
4656
4657 /* Supportable by target? */
4658 switch (modifier)
4659 {
4660 case NONE:
4661 if (code != FIX_TRUNC_EXPR
4662 && code != FLOAT_EXPR
4663 && !CONVERT_EXPR_CODE_P (code))
4664 return false;
4665 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4666 break;
4667 /* FALLTHRU */
4668 unsupported:
4669 if (dump_enabled_p ())
4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4671 "conversion not supported by target.\n");
4672 return false;
4673
4674 case WIDEN:
4675 if (supportable_widening_operation (vinfo, code, stmt_info, vectype_out,
4676 vectype_in, &code1, &code2,
4677 &multi_step_cvt, &interm_types))
4678 {
4679 /* Binary widening operation can only be supported directly by the
4680 architecture. */
4681 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4682 break;
4683 }
4684
4685 if (code != FLOAT_EXPR
4686 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4687 goto unsupported;
4688
4689 fltsz = GET_MODE_SIZE (lhs_mode);
4690 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4691 {
4692 rhs_mode = rhs_mode_iter.require ();
4693 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4694 break;
4695
4696 cvt_type
4697 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4698 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4699 if (cvt_type == NULL_TREE)
4700 goto unsupported;
4701
4702 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4703 {
4704 if (!supportable_convert_operation (code, vectype_out,
4705 cvt_type, &codecvt1))
4706 goto unsupported;
4707 }
4708 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4709 vectype_out, cvt_type,
4710 &codecvt1, &codecvt2,
4711 &multi_step_cvt,
4712 &interm_types))
4713 continue;
4714 else
4715 gcc_assert (multi_step_cvt == 0);
4716
4717 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4718 cvt_type,
4719 vectype_in, &code1, &code2,
4720 &multi_step_cvt, &interm_types))
4721 {
4722 found_mode = true;
4723 break;
4724 }
4725 }
4726
4727 if (!found_mode)
4728 goto unsupported;
4729
4730 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4731 codecvt2 = ERROR_MARK;
4732 else
4733 {
4734 multi_step_cvt++;
4735 interm_types.safe_push (cvt_type);
4736 cvt_type = NULL_TREE;
4737 }
4738 break;
4739
4740 case NARROW:
4741 gcc_assert (op_type == unary_op);
4742 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4743 &code1, &multi_step_cvt,
4744 &interm_types))
4745 break;
4746
4747 if (code != FIX_TRUNC_EXPR
4748 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4749 goto unsupported;
4750
4751 cvt_type
4752 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4753 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4754 if (cvt_type == NULL_TREE)
4755 goto unsupported;
4756 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4757 &codecvt1))
4758 goto unsupported;
4759 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4760 &code1, &multi_step_cvt,
4761 &interm_types))
4762 break;
4763 goto unsupported;
4764
4765 default:
4766 gcc_unreachable ();
4767 }
4768
4769 if (!vec_stmt) /* transformation not required. */
4770 {
4771 if (slp_node
4772 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4773 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4774 {
4775 if (dump_enabled_p ())
4776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4777 "incompatible vector types for invariants\n");
4778 return false;
4779 }
4780 DUMP_VECT_SCOPE ("vectorizable_conversion");
4781 if (modifier == NONE)
4782 {
4783 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4784 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4785 cost_vec);
4786 }
4787 else if (modifier == NARROW)
4788 {
4789 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4790 /* The final packing step produces one vector result per copy. */
4791 unsigned int nvectors
4792 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4793 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4794 multi_step_cvt, cost_vec);
4795 }
4796 else
4797 {
4798 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4799 /* The initial unpacking step produces two vector results
4800 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4801 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4802 unsigned int nvectors
4803 = (slp_node
4804 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
4805 : ncopies * 2);
4806 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4807 multi_step_cvt, cost_vec);
4808 }
4809 interm_types.release ();
4810 return true;
4811 }
4812
4813 /* Transform. */
4814 if (dump_enabled_p ())
4815 dump_printf_loc (MSG_NOTE, vect_location,
4816 "transform conversion. ncopies = %d.\n", ncopies);
4817
4818 if (op_type == binary_op)
4819 {
4820 if (CONSTANT_CLASS_P (op0))
4821 op0 = fold_convert (TREE_TYPE (op1), op0);
4822 else if (CONSTANT_CLASS_P (op1))
4823 op1 = fold_convert (TREE_TYPE (op0), op1);
4824 }
4825
4826 /* In case of multi-step conversion, we first generate conversion operations
4827 to the intermediate types, and then from that types to the final one.
4828 We create vector destinations for the intermediate type (TYPES) received
4829 from supportable_*_operation, and store them in the correct order
4830 for future use in vect_create_vectorized_*_stmts (). */
4831 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4832 vec_dest = vect_create_destination_var (scalar_dest,
4833 (cvt_type && modifier == WIDEN)
4834 ? cvt_type : vectype_out);
4835 vec_dsts.quick_push (vec_dest);
4836
4837 if (multi_step_cvt)
4838 {
4839 for (i = interm_types.length () - 1;
4840 interm_types.iterate (i, &intermediate_type); i--)
4841 {
4842 vec_dest = vect_create_destination_var (scalar_dest,
4843 intermediate_type);
4844 vec_dsts.quick_push (vec_dest);
4845 }
4846 }
4847
4848 if (cvt_type)
4849 vec_dest = vect_create_destination_var (scalar_dest,
4850 modifier == WIDEN
4851 ? vectype_out : cvt_type);
4852
4853 int ninputs = 1;
4854 if (!slp_node)
4855 {
4856 if (modifier == WIDEN)
4857 ;
4858 else if (modifier == NARROW)
4859 {
4860 if (multi_step_cvt)
4861 ninputs = vect_pow2 (multi_step_cvt);
4862 ninputs *= 2;
4863 }
4864 }
4865
4866 switch (modifier)
4867 {
4868 case NONE:
4869 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
4870 op0, &vec_oprnds0);
4871 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4872 {
4873 /* Arguments are ready, create the new vector stmt. */
4874 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4875 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4876 new_temp = make_ssa_name (vec_dest, new_stmt);
4877 gimple_assign_set_lhs (new_stmt, new_temp);
4878 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4879
4880 if (slp_node)
4881 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4882 else
4883 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4884 }
4885 break;
4886
4887 case WIDEN:
4888 /* In case the vectorization factor (VF) is bigger than the number
4889 of elements that we can fit in a vectype (nunits), we have to
4890 generate more than one vector stmt - i.e - we need to "unroll"
4891 the vector stmt by a factor VF/nunits. */
4892 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4893 op0, &vec_oprnds0,
4894 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
4895 &vec_oprnds1);
4896 if (code == WIDEN_LSHIFT_EXPR)
4897 {
4898 vec_oprnds1.create (ncopies * ninputs);
4899 for (i = 0; i < ncopies * ninputs; ++i)
4900 vec_oprnds1.quick_push (op1);
4901 }
4902 /* Arguments are ready. Create the new vector stmts. */
4903 for (i = multi_step_cvt; i >= 0; i--)
4904 {
4905 tree this_dest = vec_dsts[i];
4906 enum tree_code c1 = code1, c2 = code2;
4907 if (i == 0 && codecvt2 != ERROR_MARK)
4908 {
4909 c1 = codecvt1;
4910 c2 = codecvt2;
4911 }
4912 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
4913 &vec_oprnds1, stmt_info,
4914 this_dest, gsi,
4915 c1, c2, op_type);
4916 }
4917
4918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4919 {
4920 gimple *new_stmt;
4921 if (cvt_type)
4922 {
4923 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4924 new_temp = make_ssa_name (vec_dest);
4925 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
4926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4927 }
4928 else
4929 new_stmt = SSA_NAME_DEF_STMT (vop0);
4930
4931 if (slp_node)
4932 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4933 else
4934 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4935 }
4936 break;
4937
4938 case NARROW:
4939 /* In case the vectorization factor (VF) is bigger than the number
4940 of elements that we can fit in a vectype (nunits), we have to
4941 generate more than one vector stmt - i.e - we need to "unroll"
4942 the vector stmt by a factor VF/nunits. */
4943 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
4944 op0, &vec_oprnds0);
4945 /* Arguments are ready. Create the new vector stmts. */
4946 if (cvt_type)
4947 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4948 {
4949 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4950 new_temp = make_ssa_name (vec_dest);
4951 gassign *new_stmt
4952 = gimple_build_assign (new_temp, codecvt1, vop0);
4953 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4954 vec_oprnds0[i] = new_temp;
4955 }
4956
4957 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
4958 multi_step_cvt,
4959 stmt_info, vec_dsts, gsi,
4960 slp_node, code1);
4961 break;
4962 }
4963 if (!slp_node)
4964 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
4965
4966 vec_oprnds0.release ();
4967 vec_oprnds1.release ();
4968 interm_types.release ();
4969
4970 return true;
4971 }
4972
4973 /* Return true if we can assume from the scalar form of STMT_INFO that
4974 neither the scalar nor the vector forms will generate code. STMT_INFO
4975 is known not to involve a data reference. */
4976
4977 bool
4978 vect_nop_conversion_p (stmt_vec_info stmt_info)
4979 {
4980 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4981 if (!stmt)
4982 return false;
4983
4984 tree lhs = gimple_assign_lhs (stmt);
4985 tree_code code = gimple_assign_rhs_code (stmt);
4986 tree rhs = gimple_assign_rhs1 (stmt);
4987
4988 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
4989 return true;
4990
4991 if (CONVERT_EXPR_CODE_P (code))
4992 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
4993
4994 return false;
4995 }
4996
4997 /* Function vectorizable_assignment.
4998
4999 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5000 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5001 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5002 Return true if STMT_INFO is vectorizable in this way. */
5003
5004 static bool
5005 vectorizable_assignment (vec_info *vinfo,
5006 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5007 gimple **vec_stmt, slp_tree slp_node,
5008 stmt_vector_for_cost *cost_vec)
5009 {
5010 tree vec_dest;
5011 tree scalar_dest;
5012 tree op;
5013 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5014 tree new_temp;
5015 enum vect_def_type dt[1] = {vect_unknown_def_type};
5016 int ndts = 1;
5017 int ncopies;
5018 int i;
5019 vec<tree> vec_oprnds = vNULL;
5020 tree vop;
5021 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5022 enum tree_code code;
5023 tree vectype_in;
5024
5025 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5026 return false;
5027
5028 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5029 && ! vec_stmt)
5030 return false;
5031
5032 /* Is vectorizable assignment? */
5033 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5034 if (!stmt)
5035 return false;
5036
5037 scalar_dest = gimple_assign_lhs (stmt);
5038 if (TREE_CODE (scalar_dest) != SSA_NAME)
5039 return false;
5040
5041 if (STMT_VINFO_DATA_REF (stmt_info))
5042 return false;
5043
5044 code = gimple_assign_rhs_code (stmt);
5045 if (!(gimple_assign_single_p (stmt)
5046 || code == PAREN_EXPR
5047 || CONVERT_EXPR_CODE_P (code)))
5048 return false;
5049
5050 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5051 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5052
5053 /* Multiple types in SLP are handled by creating the appropriate number of
5054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5055 case of SLP. */
5056 if (slp_node)
5057 ncopies = 1;
5058 else
5059 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5060
5061 gcc_assert (ncopies >= 1);
5062
5063 slp_tree slp_op;
5064 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5065 &dt[0], &vectype_in))
5066 {
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "use not simple.\n");
5070 return false;
5071 }
5072 if (!vectype_in)
5073 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5074
5075 /* We can handle NOP_EXPR conversions that do not change the number
5076 of elements or the vector size. */
5077 if ((CONVERT_EXPR_CODE_P (code)
5078 || code == VIEW_CONVERT_EXPR)
5079 && (!vectype_in
5080 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5081 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5082 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5083 return false;
5084
5085 /* We do not handle bit-precision changes. */
5086 if ((CONVERT_EXPR_CODE_P (code)
5087 || code == VIEW_CONVERT_EXPR)
5088 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5089 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5090 || !type_has_mode_precision_p (TREE_TYPE (op)))
5091 /* But a conversion that does not change the bit-pattern is ok. */
5092 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5093 > TYPE_PRECISION (TREE_TYPE (op)))
5094 && TYPE_UNSIGNED (TREE_TYPE (op)))
5095 /* Conversion between boolean types of different sizes is
5096 a simple assignment in case their vectypes are same
5097 boolean vectors. */
5098 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5099 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5100 {
5101 if (dump_enabled_p ())
5102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5103 "type conversion to/from bit-precision "
5104 "unsupported.\n");
5105 return false;
5106 }
5107
5108 if (!vec_stmt) /* transformation not required. */
5109 {
5110 if (slp_node
5111 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5112 {
5113 if (dump_enabled_p ())
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115 "incompatible vector types for invariants\n");
5116 return false;
5117 }
5118 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5119 DUMP_VECT_SCOPE ("vectorizable_assignment");
5120 if (!vect_nop_conversion_p (stmt_info))
5121 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5122 cost_vec);
5123 return true;
5124 }
5125
5126 /* Transform. */
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5129
5130 /* Handle def. */
5131 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5132
5133 /* Handle use. */
5134 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5135
5136 /* Arguments are ready. create the new vector stmt. */
5137 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5138 {
5139 if (CONVERT_EXPR_CODE_P (code)
5140 || code == VIEW_CONVERT_EXPR)
5141 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5142 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5143 new_temp = make_ssa_name (vec_dest, new_stmt);
5144 gimple_assign_set_lhs (new_stmt, new_temp);
5145 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5146 if (slp_node)
5147 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5148 else
5149 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5150 }
5151 if (!slp_node)
5152 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5153
5154 vec_oprnds.release ();
5155 return true;
5156 }
5157
5158
5159 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5160 either as shift by a scalar or by a vector. */
5161
5162 bool
5163 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5164 {
5165
5166 machine_mode vec_mode;
5167 optab optab;
5168 int icode;
5169 tree vectype;
5170
5171 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5172 if (!vectype)
5173 return false;
5174
5175 optab = optab_for_tree_code (code, vectype, optab_scalar);
5176 if (!optab
5177 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5178 {
5179 optab = optab_for_tree_code (code, vectype, optab_vector);
5180 if (!optab
5181 || (optab_handler (optab, TYPE_MODE (vectype))
5182 == CODE_FOR_nothing))
5183 return false;
5184 }
5185
5186 vec_mode = TYPE_MODE (vectype);
5187 icode = (int) optab_handler (optab, vec_mode);
5188 if (icode == CODE_FOR_nothing)
5189 return false;
5190
5191 return true;
5192 }
5193
5194
5195 /* Function vectorizable_shift.
5196
5197 Check if STMT_INFO performs a shift operation that can be vectorized.
5198 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5199 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5200 Return true if STMT_INFO is vectorizable in this way. */
5201
5202 static bool
5203 vectorizable_shift (vec_info *vinfo,
5204 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5205 gimple **vec_stmt, slp_tree slp_node,
5206 stmt_vector_for_cost *cost_vec)
5207 {
5208 tree vec_dest;
5209 tree scalar_dest;
5210 tree op0, op1 = NULL;
5211 tree vec_oprnd1 = NULL_TREE;
5212 tree vectype;
5213 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5214 enum tree_code code;
5215 machine_mode vec_mode;
5216 tree new_temp;
5217 optab optab;
5218 int icode;
5219 machine_mode optab_op2_mode;
5220 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5221 int ndts = 2;
5222 poly_uint64 nunits_in;
5223 poly_uint64 nunits_out;
5224 tree vectype_out;
5225 tree op1_vectype;
5226 int ncopies;
5227 int i;
5228 vec<tree> vec_oprnds0 = vNULL;
5229 vec<tree> vec_oprnds1 = vNULL;
5230 tree vop0, vop1;
5231 unsigned int k;
5232 bool scalar_shift_arg = true;
5233 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5234 bool incompatible_op1_vectype_p = false;
5235
5236 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5237 return false;
5238
5239 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5240 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5241 && ! vec_stmt)
5242 return false;
5243
5244 /* Is STMT a vectorizable binary/unary operation? */
5245 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5246 if (!stmt)
5247 return false;
5248
5249 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5250 return false;
5251
5252 code = gimple_assign_rhs_code (stmt);
5253
5254 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5255 || code == RROTATE_EXPR))
5256 return false;
5257
5258 scalar_dest = gimple_assign_lhs (stmt);
5259 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5260 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5261 {
5262 if (dump_enabled_p ())
5263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5264 "bit-precision shifts not supported.\n");
5265 return false;
5266 }
5267
5268 slp_tree slp_op0;
5269 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5270 0, &op0, &slp_op0, &dt[0], &vectype))
5271 {
5272 if (dump_enabled_p ())
5273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5274 "use not simple.\n");
5275 return false;
5276 }
5277 /* If op0 is an external or constant def, infer the vector type
5278 from the scalar type. */
5279 if (!vectype)
5280 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5281 if (vec_stmt)
5282 gcc_assert (vectype);
5283 if (!vectype)
5284 {
5285 if (dump_enabled_p ())
5286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5287 "no vectype for scalar type\n");
5288 return false;
5289 }
5290
5291 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5292 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5293 if (maybe_ne (nunits_out, nunits_in))
5294 return false;
5295
5296 stmt_vec_info op1_def_stmt_info;
5297 slp_tree slp_op1;
5298 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5299 &dt[1], &op1_vectype, &op1_def_stmt_info))
5300 {
5301 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "use not simple.\n");
5304 return false;
5305 }
5306
5307 /* Multiple types in SLP are handled by creating the appropriate number of
5308 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5309 case of SLP. */
5310 if (slp_node)
5311 ncopies = 1;
5312 else
5313 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5314
5315 gcc_assert (ncopies >= 1);
5316
5317 /* Determine whether the shift amount is a vector, or scalar. If the
5318 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5319
5320 if ((dt[1] == vect_internal_def
5321 || dt[1] == vect_induction_def
5322 || dt[1] == vect_nested_cycle)
5323 && !slp_node)
5324 scalar_shift_arg = false;
5325 else if (dt[1] == vect_constant_def
5326 || dt[1] == vect_external_def
5327 || dt[1] == vect_internal_def)
5328 {
5329 /* In SLP, need to check whether the shift count is the same,
5330 in loops if it is a constant or invariant, it is always
5331 a scalar shift. */
5332 if (slp_node)
5333 {
5334 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5335 stmt_vec_info slpstmt_info;
5336
5337 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5338 {
5339 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5340 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5341 scalar_shift_arg = false;
5342 }
5343
5344 /* For internal SLP defs we have to make sure we see scalar stmts
5345 for all vector elements.
5346 ??? For different vectors we could resort to a different
5347 scalar shift operand but code-generation below simply always
5348 takes the first. */
5349 if (dt[1] == vect_internal_def
5350 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5351 stmts.length ()))
5352 scalar_shift_arg = false;
5353 }
5354
5355 /* If the shift amount is computed by a pattern stmt we cannot
5356 use the scalar amount directly thus give up and use a vector
5357 shift. */
5358 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5359 scalar_shift_arg = false;
5360 }
5361 else
5362 {
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5365 "operand mode requires invariant argument.\n");
5366 return false;
5367 }
5368
5369 /* Vector shifted by vector. */
5370 bool was_scalar_shift_arg = scalar_shift_arg;
5371 if (!scalar_shift_arg)
5372 {
5373 optab = optab_for_tree_code (code, vectype, optab_vector);
5374 if (dump_enabled_p ())
5375 dump_printf_loc (MSG_NOTE, vect_location,
5376 "vector/vector shift/rotate found.\n");
5377
5378 if (!op1_vectype)
5379 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5380 slp_op1);
5381 incompatible_op1_vectype_p
5382 = (op1_vectype == NULL_TREE
5383 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5384 TYPE_VECTOR_SUBPARTS (vectype))
5385 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5386 if (incompatible_op1_vectype_p
5387 && (!slp_node
5388 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5389 || slp_op1->refcnt != 1))
5390 {
5391 if (dump_enabled_p ())
5392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5393 "unusable type for last operand in"
5394 " vector/vector shift/rotate.\n");
5395 return false;
5396 }
5397 }
5398 /* See if the machine has a vector shifted by scalar insn and if not
5399 then see if it has a vector shifted by vector insn. */
5400 else
5401 {
5402 optab = optab_for_tree_code (code, vectype, optab_scalar);
5403 if (optab
5404 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5405 {
5406 if (dump_enabled_p ())
5407 dump_printf_loc (MSG_NOTE, vect_location,
5408 "vector/scalar shift/rotate found.\n");
5409 }
5410 else
5411 {
5412 optab = optab_for_tree_code (code, vectype, optab_vector);
5413 if (optab
5414 && (optab_handler (optab, TYPE_MODE (vectype))
5415 != CODE_FOR_nothing))
5416 {
5417 scalar_shift_arg = false;
5418
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_NOTE, vect_location,
5421 "vector/vector shift/rotate found.\n");
5422
5423 if (!op1_vectype)
5424 op1_vectype = get_vectype_for_scalar_type (vinfo,
5425 TREE_TYPE (op1),
5426 slp_op1);
5427
5428 /* Unlike the other binary operators, shifts/rotates have
5429 the rhs being int, instead of the same type as the lhs,
5430 so make sure the scalar is the right type if we are
5431 dealing with vectors of long long/long/short/char. */
5432 incompatible_op1_vectype_p
5433 = (!op1_vectype
5434 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5435 TREE_TYPE (op1)));
5436 if (incompatible_op1_vectype_p
5437 && dt[1] == vect_internal_def)
5438 {
5439 if (dump_enabled_p ())
5440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5441 "unusable type for last operand in"
5442 " vector/vector shift/rotate.\n");
5443 return false;
5444 }
5445 }
5446 }
5447 }
5448
5449 /* Supportable by target? */
5450 if (!optab)
5451 {
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5454 "no optab.\n");
5455 return false;
5456 }
5457 vec_mode = TYPE_MODE (vectype);
5458 icode = (int) optab_handler (optab, vec_mode);
5459 if (icode == CODE_FOR_nothing)
5460 {
5461 if (dump_enabled_p ())
5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5463 "op not supported by target.\n");
5464 /* Check only during analysis. */
5465 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5466 || (!vec_stmt
5467 && !vect_worthwhile_without_simd_p (vinfo, code)))
5468 return false;
5469 if (dump_enabled_p ())
5470 dump_printf_loc (MSG_NOTE, vect_location,
5471 "proceeding using word mode.\n");
5472 }
5473
5474 /* Worthwhile without SIMD support? Check only during analysis. */
5475 if (!vec_stmt
5476 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5477 && !vect_worthwhile_without_simd_p (vinfo, code))
5478 {
5479 if (dump_enabled_p ())
5480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5481 "not worthwhile without SIMD support.\n");
5482 return false;
5483 }
5484
5485 if (!vec_stmt) /* transformation not required. */
5486 {
5487 if (slp_node
5488 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5489 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5490 && (!incompatible_op1_vectype_p
5491 || dt[1] == vect_constant_def)
5492 && !vect_maybe_update_slp_op_vectype
5493 (slp_op1,
5494 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5495 {
5496 if (dump_enabled_p ())
5497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5498 "incompatible vector types for invariants\n");
5499 return false;
5500 }
5501 /* Now adjust the constant shift amount in place. */
5502 if (slp_node
5503 && incompatible_op1_vectype_p
5504 && dt[1] == vect_constant_def)
5505 {
5506 for (unsigned i = 0;
5507 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5508 {
5509 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5510 = fold_convert (TREE_TYPE (vectype),
5511 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5512 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5513 == INTEGER_CST));
5514 }
5515 }
5516 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5517 DUMP_VECT_SCOPE ("vectorizable_shift");
5518 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5519 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5520 return true;
5521 }
5522
5523 /* Transform. */
5524
5525 if (dump_enabled_p ())
5526 dump_printf_loc (MSG_NOTE, vect_location,
5527 "transform binary/unary operation.\n");
5528
5529 if (incompatible_op1_vectype_p && !slp_node)
5530 {
5531 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5532 op1 = fold_convert (TREE_TYPE (vectype), op1);
5533 if (dt[1] != vect_constant_def)
5534 op1 = vect_init_vector (vinfo, stmt_info, op1,
5535 TREE_TYPE (vectype), NULL);
5536 }
5537
5538 /* Handle def. */
5539 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5540
5541 if (scalar_shift_arg && dt[1] != vect_internal_def)
5542 {
5543 /* Vector shl and shr insn patterns can be defined with scalar
5544 operand 2 (shift operand). In this case, use constant or loop
5545 invariant op1 directly, without extending it to vector mode
5546 first. */
5547 optab_op2_mode = insn_data[icode].operand[2].mode;
5548 if (!VECTOR_MODE_P (optab_op2_mode))
5549 {
5550 if (dump_enabled_p ())
5551 dump_printf_loc (MSG_NOTE, vect_location,
5552 "operand 1 using scalar mode.\n");
5553 vec_oprnd1 = op1;
5554 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5555 vec_oprnds1.quick_push (vec_oprnd1);
5556 /* Store vec_oprnd1 for every vector stmt to be created.
5557 We check during the analysis that all the shift arguments
5558 are the same.
5559 TODO: Allow different constants for different vector
5560 stmts generated for an SLP instance. */
5561 for (k = 0;
5562 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5563 vec_oprnds1.quick_push (vec_oprnd1);
5564 }
5565 }
5566 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5567 {
5568 if (was_scalar_shift_arg)
5569 {
5570 /* If the argument was the same in all lanes create
5571 the correctly typed vector shift amount directly. */
5572 op1 = fold_convert (TREE_TYPE (vectype), op1);
5573 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5574 !loop_vinfo ? gsi : NULL);
5575 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5576 !loop_vinfo ? gsi : NULL);
5577 vec_oprnds1.create (slp_node->vec_stmts_size);
5578 for (k = 0; k < slp_node->vec_stmts_size; k++)
5579 vec_oprnds1.quick_push (vec_oprnd1);
5580 }
5581 else if (dt[1] == vect_constant_def)
5582 /* The constant shift amount has been adjusted in place. */
5583 ;
5584 else
5585 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5586 }
5587
5588 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5589 (a special case for certain kind of vector shifts); otherwise,
5590 operand 1 should be of a vector type (the usual case). */
5591 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5592 op0, &vec_oprnds0,
5593 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5594
5595 /* Arguments are ready. Create the new vector stmt. */
5596 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5597 {
5598 /* For internal defs where we need to use a scalar shift arg
5599 extract the first lane. */
5600 if (scalar_shift_arg && dt[1] == vect_internal_def)
5601 {
5602 vop1 = vec_oprnds1[0];
5603 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5604 gassign *new_stmt
5605 = gimple_build_assign (new_temp,
5606 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5607 vop1,
5608 TYPE_SIZE (TREE_TYPE (new_temp)),
5609 bitsize_zero_node));
5610 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5611 vop1 = new_temp;
5612 }
5613 else
5614 vop1 = vec_oprnds1[i];
5615 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5616 new_temp = make_ssa_name (vec_dest, new_stmt);
5617 gimple_assign_set_lhs (new_stmt, new_temp);
5618 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5619 if (slp_node)
5620 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5621 else
5622 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5623 }
5624
5625 if (!slp_node)
5626 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5627
5628 vec_oprnds0.release ();
5629 vec_oprnds1.release ();
5630
5631 return true;
5632 }
5633
5634
5635 /* Function vectorizable_operation.
5636
5637 Check if STMT_INFO performs a binary, unary or ternary operation that can
5638 be vectorized.
5639 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5640 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5641 Return true if STMT_INFO is vectorizable in this way. */
5642
5643 static bool
5644 vectorizable_operation (vec_info *vinfo,
5645 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5646 gimple **vec_stmt, slp_tree slp_node,
5647 stmt_vector_for_cost *cost_vec)
5648 {
5649 tree vec_dest;
5650 tree scalar_dest;
5651 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5652 tree vectype;
5653 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5654 enum tree_code code, orig_code;
5655 machine_mode vec_mode;
5656 tree new_temp;
5657 int op_type;
5658 optab optab;
5659 bool target_support_p;
5660 enum vect_def_type dt[3]
5661 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5662 int ndts = 3;
5663 poly_uint64 nunits_in;
5664 poly_uint64 nunits_out;
5665 tree vectype_out;
5666 int ncopies, vec_num;
5667 int i;
5668 vec<tree> vec_oprnds0 = vNULL;
5669 vec<tree> vec_oprnds1 = vNULL;
5670 vec<tree> vec_oprnds2 = vNULL;
5671 tree vop0, vop1, vop2;
5672 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5673
5674 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5675 return false;
5676
5677 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5678 && ! vec_stmt)
5679 return false;
5680
5681 /* Is STMT a vectorizable binary/unary operation? */
5682 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5683 if (!stmt)
5684 return false;
5685
5686 /* Loads and stores are handled in vectorizable_{load,store}. */
5687 if (STMT_VINFO_DATA_REF (stmt_info))
5688 return false;
5689
5690 orig_code = code = gimple_assign_rhs_code (stmt);
5691
5692 /* Shifts are handled in vectorizable_shift. */
5693 if (code == LSHIFT_EXPR
5694 || code == RSHIFT_EXPR
5695 || code == LROTATE_EXPR
5696 || code == RROTATE_EXPR)
5697 return false;
5698
5699 /* Comparisons are handled in vectorizable_comparison. */
5700 if (TREE_CODE_CLASS (code) == tcc_comparison)
5701 return false;
5702
5703 /* Conditions are handled in vectorizable_condition. */
5704 if (code == COND_EXPR)
5705 return false;
5706
5707 /* For pointer addition and subtraction, we should use the normal
5708 plus and minus for the vector operation. */
5709 if (code == POINTER_PLUS_EXPR)
5710 code = PLUS_EXPR;
5711 if (code == POINTER_DIFF_EXPR)
5712 code = MINUS_EXPR;
5713
5714 /* Support only unary or binary operations. */
5715 op_type = TREE_CODE_LENGTH (code);
5716 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5717 {
5718 if (dump_enabled_p ())
5719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5720 "num. args = %d (not unary/binary/ternary op).\n",
5721 op_type);
5722 return false;
5723 }
5724
5725 scalar_dest = gimple_assign_lhs (stmt);
5726 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5727
5728 /* Most operations cannot handle bit-precision types without extra
5729 truncations. */
5730 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5731 if (!mask_op_p
5732 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5733 /* Exception are bitwise binary operations. */
5734 && code != BIT_IOR_EXPR
5735 && code != BIT_XOR_EXPR
5736 && code != BIT_AND_EXPR)
5737 {
5738 if (dump_enabled_p ())
5739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5740 "bit-precision arithmetic not supported.\n");
5741 return false;
5742 }
5743
5744 slp_tree slp_op0;
5745 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5746 0, &op0, &slp_op0, &dt[0], &vectype))
5747 {
5748 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5750 "use not simple.\n");
5751 return false;
5752 }
5753 /* If op0 is an external or constant def, infer the vector type
5754 from the scalar type. */
5755 if (!vectype)
5756 {
5757 /* For boolean type we cannot determine vectype by
5758 invariant value (don't know whether it is a vector
5759 of booleans or vector of integers). We use output
5760 vectype because operations on boolean don't change
5761 type. */
5762 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5763 {
5764 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5765 {
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5768 "not supported operation on bool value.\n");
5769 return false;
5770 }
5771 vectype = vectype_out;
5772 }
5773 else
5774 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5775 slp_node);
5776 }
5777 if (vec_stmt)
5778 gcc_assert (vectype);
5779 if (!vectype)
5780 {
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5783 "no vectype for scalar type %T\n",
5784 TREE_TYPE (op0));
5785
5786 return false;
5787 }
5788
5789 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5790 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5791 if (maybe_ne (nunits_out, nunits_in))
5792 return false;
5793
5794 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5795 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5796 if (op_type == binary_op || op_type == ternary_op)
5797 {
5798 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5799 1, &op1, &slp_op1, &dt[1], &vectype2))
5800 {
5801 if (dump_enabled_p ())
5802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5803 "use not simple.\n");
5804 return false;
5805 }
5806 }
5807 if (op_type == ternary_op)
5808 {
5809 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5810 2, &op2, &slp_op2, &dt[2], &vectype3))
5811 {
5812 if (dump_enabled_p ())
5813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5814 "use not simple.\n");
5815 return false;
5816 }
5817 }
5818
5819 /* Multiple types in SLP are handled by creating the appropriate number of
5820 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5821 case of SLP. */
5822 if (slp_node)
5823 {
5824 ncopies = 1;
5825 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5826 }
5827 else
5828 {
5829 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5830 vec_num = 1;
5831 }
5832
5833 gcc_assert (ncopies >= 1);
5834
5835 /* Reject attempts to combine mask types with nonmask types, e.g. if
5836 we have an AND between a (nonmask) boolean loaded from memory and
5837 a (mask) boolean result of a comparison.
5838
5839 TODO: We could easily fix these cases up using pattern statements. */
5840 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
5841 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
5842 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
5843 {
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "mixed mask and nonmask vector types\n");
5847 return false;
5848 }
5849
5850 /* Supportable by target? */
5851
5852 vec_mode = TYPE_MODE (vectype);
5853 if (code == MULT_HIGHPART_EXPR)
5854 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5855 else
5856 {
5857 optab = optab_for_tree_code (code, vectype, optab_default);
5858 if (!optab)
5859 {
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5862 "no optab.\n");
5863 return false;
5864 }
5865 target_support_p = (optab_handler (optab, vec_mode)
5866 != CODE_FOR_nothing);
5867 }
5868
5869 if (!target_support_p)
5870 {
5871 if (dump_enabled_p ())
5872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5873 "op not supported by target.\n");
5874 /* Check only during analysis. */
5875 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5876 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5877 return false;
5878 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_NOTE, vect_location,
5880 "proceeding using word mode.\n");
5881 }
5882
5883 /* Worthwhile without SIMD support? Check only during analysis. */
5884 if (!VECTOR_MODE_P (vec_mode)
5885 && !vec_stmt
5886 && !vect_worthwhile_without_simd_p (vinfo, code))
5887 {
5888 if (dump_enabled_p ())
5889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5890 "not worthwhile without SIMD support.\n");
5891 return false;
5892 }
5893
5894 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
5895 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
5896 internal_fn cond_fn = get_conditional_internal_fn (code);
5897
5898 if (!vec_stmt) /* transformation not required. */
5899 {
5900 /* If this operation is part of a reduction, a fully-masked loop
5901 should only change the active lanes of the reduction chain,
5902 keeping the inactive lanes as-is. */
5903 if (loop_vinfo
5904 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5905 && reduc_idx >= 0)
5906 {
5907 if (cond_fn == IFN_LAST
5908 || !direct_internal_fn_supported_p (cond_fn, vectype,
5909 OPTIMIZE_FOR_SPEED))
5910 {
5911 if (dump_enabled_p ())
5912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5913 "can't use a fully-masked loop because no"
5914 " conditional operation is available.\n");
5915 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5916 }
5917 else
5918 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
5919 vectype, NULL);
5920 }
5921
5922 /* Put types on constant and invariant SLP children. */
5923 if (slp_node
5924 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5925 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
5926 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
5927 {
5928 if (dump_enabled_p ())
5929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5930 "incompatible vector types for invariants\n");
5931 return false;
5932 }
5933
5934 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5935 DUMP_VECT_SCOPE ("vectorizable_operation");
5936 vect_model_simple_cost (vinfo, stmt_info,
5937 ncopies, dt, ndts, slp_node, cost_vec);
5938 return true;
5939 }
5940
5941 /* Transform. */
5942
5943 if (dump_enabled_p ())
5944 dump_printf_loc (MSG_NOTE, vect_location,
5945 "transform binary/unary operation.\n");
5946
5947 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
5948
5949 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5950 vectors with unsigned elements, but the result is signed. So, we
5951 need to compute the MINUS_EXPR into vectype temporary and
5952 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5953 tree vec_cvt_dest = NULL_TREE;
5954 if (orig_code == POINTER_DIFF_EXPR)
5955 {
5956 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5957 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5958 }
5959 /* Handle def. */
5960 else
5961 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5962
5963 /* In case the vectorization factor (VF) is bigger than the number
5964 of elements that we can fit in a vectype (nunits), we have to generate
5965 more than one vector stmt - i.e - we need to "unroll" the
5966 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5967 from one copy of the vector stmt to the next, in the field
5968 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5969 stages to find the correct vector defs to be used when vectorizing
5970 stmts that use the defs of the current stmt. The example below
5971 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5972 we need to create 4 vectorized stmts):
5973
5974 before vectorization:
5975 RELATED_STMT VEC_STMT
5976 S1: x = memref - -
5977 S2: z = x + 1 - -
5978
5979 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5980 there):
5981 RELATED_STMT VEC_STMT
5982 VS1_0: vx0 = memref0 VS1_1 -
5983 VS1_1: vx1 = memref1 VS1_2 -
5984 VS1_2: vx2 = memref2 VS1_3 -
5985 VS1_3: vx3 = memref3 - -
5986 S1: x = load - VS1_0
5987 S2: z = x + 1 - -
5988
5989 step2: vectorize stmt S2 (done here):
5990 To vectorize stmt S2 we first need to find the relevant vector
5991 def for the first operand 'x'. This is, as usual, obtained from
5992 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5993 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5994 relevant vector def 'vx0'. Having found 'vx0' we can generate
5995 the vector stmt VS2_0, and as usual, record it in the
5996 STMT_VINFO_VEC_STMT of stmt S2.
5997 When creating the second copy (VS2_1), we obtain the relevant vector
5998 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5999 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6000 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6001 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6002 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6003 chain of stmts and pointers:
6004 RELATED_STMT VEC_STMT
6005 VS1_0: vx0 = memref0 VS1_1 -
6006 VS1_1: vx1 = memref1 VS1_2 -
6007 VS1_2: vx2 = memref2 VS1_3 -
6008 VS1_3: vx3 = memref3 - -
6009 S1: x = load - VS1_0
6010 VS2_0: vz0 = vx0 + v1 VS2_1 -
6011 VS2_1: vz1 = vx1 + v1 VS2_2 -
6012 VS2_2: vz2 = vx2 + v1 VS2_3 -
6013 VS2_3: vz3 = vx3 + v1 - -
6014 S2: z = x + 1 - VS2_0 */
6015
6016 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6017 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6018 /* Arguments are ready. Create the new vector stmt. */
6019 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6020 {
6021 gimple *new_stmt = NULL;
6022 vop1 = ((op_type == binary_op || op_type == ternary_op)
6023 ? vec_oprnds1[i] : NULL_TREE);
6024 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6025 if (masked_loop_p && reduc_idx >= 0)
6026 {
6027 /* Perform the operation on active elements only and take
6028 inactive elements from the reduction chain input. */
6029 gcc_assert (!vop2);
6030 vop2 = reduc_idx == 1 ? vop1 : vop0;
6031 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6032 vectype, i);
6033 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6034 vop0, vop1, vop2);
6035 new_temp = make_ssa_name (vec_dest, call);
6036 gimple_call_set_lhs (call, new_temp);
6037 gimple_call_set_nothrow (call, true);
6038 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6039 new_stmt = call;
6040 }
6041 else
6042 {
6043 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6044 new_temp = make_ssa_name (vec_dest, new_stmt);
6045 gimple_assign_set_lhs (new_stmt, new_temp);
6046 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6047 if (vec_cvt_dest)
6048 {
6049 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6050 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6051 new_temp);
6052 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6053 gimple_assign_set_lhs (new_stmt, new_temp);
6054 vect_finish_stmt_generation (vinfo, stmt_info,
6055 new_stmt, gsi);
6056 }
6057 }
6058 if (slp_node)
6059 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6060 else
6061 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6062 }
6063
6064 if (!slp_node)
6065 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6066
6067 vec_oprnds0.release ();
6068 vec_oprnds1.release ();
6069 vec_oprnds2.release ();
6070
6071 return true;
6072 }
6073
6074 /* A helper function to ensure data reference DR_INFO's base alignment. */
6075
6076 static void
6077 ensure_base_align (dr_vec_info *dr_info)
6078 {
6079 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6080 return;
6081
6082 if (dr_info->base_misaligned)
6083 {
6084 tree base_decl = dr_info->base_decl;
6085
6086 // We should only be able to increase the alignment of a base object if
6087 // we know what its new alignment should be at compile time.
6088 unsigned HOST_WIDE_INT align_base_to =
6089 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6090
6091 if (decl_in_symtab_p (base_decl))
6092 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6093 else if (DECL_ALIGN (base_decl) < align_base_to)
6094 {
6095 SET_DECL_ALIGN (base_decl, align_base_to);
6096 DECL_USER_ALIGN (base_decl) = 1;
6097 }
6098 dr_info->base_misaligned = false;
6099 }
6100 }
6101
6102
6103 /* Function get_group_alias_ptr_type.
6104
6105 Return the alias type for the group starting at FIRST_STMT_INFO. */
6106
6107 static tree
6108 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6109 {
6110 struct data_reference *first_dr, *next_dr;
6111
6112 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6113 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6114 while (next_stmt_info)
6115 {
6116 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6117 if (get_alias_set (DR_REF (first_dr))
6118 != get_alias_set (DR_REF (next_dr)))
6119 {
6120 if (dump_enabled_p ())
6121 dump_printf_loc (MSG_NOTE, vect_location,
6122 "conflicting alias set types.\n");
6123 return ptr_type_node;
6124 }
6125 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6126 }
6127 return reference_alias_ptr_type (DR_REF (first_dr));
6128 }
6129
6130
6131 /* Function scan_operand_equal_p.
6132
6133 Helper function for check_scan_store. Compare two references
6134 with .GOMP_SIMD_LANE bases. */
6135
6136 static bool
6137 scan_operand_equal_p (tree ref1, tree ref2)
6138 {
6139 tree ref[2] = { ref1, ref2 };
6140 poly_int64 bitsize[2], bitpos[2];
6141 tree offset[2], base[2];
6142 for (int i = 0; i < 2; ++i)
6143 {
6144 machine_mode mode;
6145 int unsignedp, reversep, volatilep = 0;
6146 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6147 &offset[i], &mode, &unsignedp,
6148 &reversep, &volatilep);
6149 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6150 return false;
6151 if (TREE_CODE (base[i]) == MEM_REF
6152 && offset[i] == NULL_TREE
6153 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6154 {
6155 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6156 if (is_gimple_assign (def_stmt)
6157 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6158 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6159 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6160 {
6161 if (maybe_ne (mem_ref_offset (base[i]), 0))
6162 return false;
6163 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6164 offset[i] = gimple_assign_rhs2 (def_stmt);
6165 }
6166 }
6167 }
6168
6169 if (!operand_equal_p (base[0], base[1], 0))
6170 return false;
6171 if (maybe_ne (bitsize[0], bitsize[1]))
6172 return false;
6173 if (offset[0] != offset[1])
6174 {
6175 if (!offset[0] || !offset[1])
6176 return false;
6177 if (!operand_equal_p (offset[0], offset[1], 0))
6178 {
6179 tree step[2];
6180 for (int i = 0; i < 2; ++i)
6181 {
6182 step[i] = integer_one_node;
6183 if (TREE_CODE (offset[i]) == SSA_NAME)
6184 {
6185 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6186 if (is_gimple_assign (def_stmt)
6187 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6188 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6189 == INTEGER_CST))
6190 {
6191 step[i] = gimple_assign_rhs2 (def_stmt);
6192 offset[i] = gimple_assign_rhs1 (def_stmt);
6193 }
6194 }
6195 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6196 {
6197 step[i] = TREE_OPERAND (offset[i], 1);
6198 offset[i] = TREE_OPERAND (offset[i], 0);
6199 }
6200 tree rhs1 = NULL_TREE;
6201 if (TREE_CODE (offset[i]) == SSA_NAME)
6202 {
6203 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6204 if (gimple_assign_cast_p (def_stmt))
6205 rhs1 = gimple_assign_rhs1 (def_stmt);
6206 }
6207 else if (CONVERT_EXPR_P (offset[i]))
6208 rhs1 = TREE_OPERAND (offset[i], 0);
6209 if (rhs1
6210 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6211 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6212 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6213 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6214 offset[i] = rhs1;
6215 }
6216 if (!operand_equal_p (offset[0], offset[1], 0)
6217 || !operand_equal_p (step[0], step[1], 0))
6218 return false;
6219 }
6220 }
6221 return true;
6222 }
6223
6224
6225 enum scan_store_kind {
6226 /* Normal permutation. */
6227 scan_store_kind_perm,
6228
6229 /* Whole vector left shift permutation with zero init. */
6230 scan_store_kind_lshift_zero,
6231
6232 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6233 scan_store_kind_lshift_cond
6234 };
6235
6236 /* Function check_scan_store.
6237
6238 Verify if we can perform the needed permutations or whole vector shifts.
6239 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6240 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6241 to do at each step. */
6242
6243 static int
6244 scan_store_can_perm_p (tree vectype, tree init,
6245 vec<enum scan_store_kind> *use_whole_vector = NULL)
6246 {
6247 enum machine_mode vec_mode = TYPE_MODE (vectype);
6248 unsigned HOST_WIDE_INT nunits;
6249 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6250 return -1;
6251 int units_log2 = exact_log2 (nunits);
6252 if (units_log2 <= 0)
6253 return -1;
6254
6255 int i;
6256 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6257 for (i = 0; i <= units_log2; ++i)
6258 {
6259 unsigned HOST_WIDE_INT j, k;
6260 enum scan_store_kind kind = scan_store_kind_perm;
6261 vec_perm_builder sel (nunits, nunits, 1);
6262 sel.quick_grow (nunits);
6263 if (i == units_log2)
6264 {
6265 for (j = 0; j < nunits; ++j)
6266 sel[j] = nunits - 1;
6267 }
6268 else
6269 {
6270 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6271 sel[j] = j;
6272 for (k = 0; j < nunits; ++j, ++k)
6273 sel[j] = nunits + k;
6274 }
6275 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6276 if (!can_vec_perm_const_p (vec_mode, indices))
6277 {
6278 if (i == units_log2)
6279 return -1;
6280
6281 if (whole_vector_shift_kind == scan_store_kind_perm)
6282 {
6283 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6284 return -1;
6285 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6286 /* Whole vector shifts shift in zeros, so if init is all zero
6287 constant, there is no need to do anything further. */
6288 if ((TREE_CODE (init) != INTEGER_CST
6289 && TREE_CODE (init) != REAL_CST)
6290 || !initializer_zerop (init))
6291 {
6292 tree masktype = truth_type_for (vectype);
6293 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6294 return -1;
6295 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6296 }
6297 }
6298 kind = whole_vector_shift_kind;
6299 }
6300 if (use_whole_vector)
6301 {
6302 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6303 use_whole_vector->safe_grow_cleared (i);
6304 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6305 use_whole_vector->safe_push (kind);
6306 }
6307 }
6308
6309 return units_log2;
6310 }
6311
6312
6313 /* Function check_scan_store.
6314
6315 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6316
6317 static bool
6318 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6319 enum vect_def_type rhs_dt, bool slp, tree mask,
6320 vect_memory_access_type memory_access_type)
6321 {
6322 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6323 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6324 tree ref_type;
6325
6326 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6327 if (slp
6328 || mask
6329 || memory_access_type != VMAT_CONTIGUOUS
6330 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6331 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6332 || loop_vinfo == NULL
6333 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6334 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6335 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6336 || !integer_zerop (DR_INIT (dr_info->dr))
6337 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6338 || !alias_sets_conflict_p (get_alias_set (vectype),
6339 get_alias_set (TREE_TYPE (ref_type))))
6340 {
6341 if (dump_enabled_p ())
6342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6343 "unsupported OpenMP scan store.\n");
6344 return false;
6345 }
6346
6347 /* We need to pattern match code built by OpenMP lowering and simplified
6348 by following optimizations into something we can handle.
6349 #pragma omp simd reduction(inscan,+:r)
6350 for (...)
6351 {
6352 r += something ();
6353 #pragma omp scan inclusive (r)
6354 use (r);
6355 }
6356 shall have body with:
6357 // Initialization for input phase, store the reduction initializer:
6358 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6359 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6360 D.2042[_21] = 0;
6361 // Actual input phase:
6362 ...
6363 r.0_5 = D.2042[_20];
6364 _6 = _4 + r.0_5;
6365 D.2042[_20] = _6;
6366 // Initialization for scan phase:
6367 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6368 _26 = D.2043[_25];
6369 _27 = D.2042[_25];
6370 _28 = _26 + _27;
6371 D.2043[_25] = _28;
6372 D.2042[_25] = _28;
6373 // Actual scan phase:
6374 ...
6375 r.1_8 = D.2042[_20];
6376 ...
6377 The "omp simd array" variable D.2042 holds the privatized copy used
6378 inside of the loop and D.2043 is another one that holds copies of
6379 the current original list item. The separate GOMP_SIMD_LANE ifn
6380 kinds are there in order to allow optimizing the initializer store
6381 and combiner sequence, e.g. if it is originally some C++ish user
6382 defined reduction, but allow the vectorizer to pattern recognize it
6383 and turn into the appropriate vectorized scan.
6384
6385 For exclusive scan, this is slightly different:
6386 #pragma omp simd reduction(inscan,+:r)
6387 for (...)
6388 {
6389 use (r);
6390 #pragma omp scan exclusive (r)
6391 r += something ();
6392 }
6393 shall have body with:
6394 // Initialization for input phase, store the reduction initializer:
6395 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6396 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6397 D.2042[_21] = 0;
6398 // Actual input phase:
6399 ...
6400 r.0_5 = D.2042[_20];
6401 _6 = _4 + r.0_5;
6402 D.2042[_20] = _6;
6403 // Initialization for scan phase:
6404 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6405 _26 = D.2043[_25];
6406 D.2044[_25] = _26;
6407 _27 = D.2042[_25];
6408 _28 = _26 + _27;
6409 D.2043[_25] = _28;
6410 // Actual scan phase:
6411 ...
6412 r.1_8 = D.2044[_20];
6413 ... */
6414
6415 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6416 {
6417 /* Match the D.2042[_21] = 0; store above. Just require that
6418 it is a constant or external definition store. */
6419 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6420 {
6421 fail_init:
6422 if (dump_enabled_p ())
6423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6424 "unsupported OpenMP scan initializer store.\n");
6425 return false;
6426 }
6427
6428 if (! loop_vinfo->scan_map)
6429 loop_vinfo->scan_map = new hash_map<tree, tree>;
6430 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6431 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6432 if (cached)
6433 goto fail_init;
6434 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6435
6436 /* These stores can be vectorized normally. */
6437 return true;
6438 }
6439
6440 if (rhs_dt != vect_internal_def)
6441 {
6442 fail:
6443 if (dump_enabled_p ())
6444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6445 "unsupported OpenMP scan combiner pattern.\n");
6446 return false;
6447 }
6448
6449 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6450 tree rhs = gimple_assign_rhs1 (stmt);
6451 if (TREE_CODE (rhs) != SSA_NAME)
6452 goto fail;
6453
6454 gimple *other_store_stmt = NULL;
6455 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6456 bool inscan_var_store
6457 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6458
6459 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6460 {
6461 if (!inscan_var_store)
6462 {
6463 use_operand_p use_p;
6464 imm_use_iterator iter;
6465 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6466 {
6467 gimple *use_stmt = USE_STMT (use_p);
6468 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6469 continue;
6470 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6471 || !is_gimple_assign (use_stmt)
6472 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6473 || other_store_stmt
6474 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6475 goto fail;
6476 other_store_stmt = use_stmt;
6477 }
6478 if (other_store_stmt == NULL)
6479 goto fail;
6480 rhs = gimple_assign_lhs (other_store_stmt);
6481 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6482 goto fail;
6483 }
6484 }
6485 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6486 {
6487 use_operand_p use_p;
6488 imm_use_iterator iter;
6489 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6490 {
6491 gimple *use_stmt = USE_STMT (use_p);
6492 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6493 continue;
6494 if (other_store_stmt)
6495 goto fail;
6496 other_store_stmt = use_stmt;
6497 }
6498 }
6499 else
6500 goto fail;
6501
6502 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6503 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6504 || !is_gimple_assign (def_stmt)
6505 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6506 goto fail;
6507
6508 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6509 /* For pointer addition, we should use the normal plus for the vector
6510 operation. */
6511 switch (code)
6512 {
6513 case POINTER_PLUS_EXPR:
6514 code = PLUS_EXPR;
6515 break;
6516 case MULT_HIGHPART_EXPR:
6517 goto fail;
6518 default:
6519 break;
6520 }
6521 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6522 goto fail;
6523
6524 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6525 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6526 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6527 goto fail;
6528
6529 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6530 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6531 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6532 || !gimple_assign_load_p (load1_stmt)
6533 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6534 || !gimple_assign_load_p (load2_stmt))
6535 goto fail;
6536
6537 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6538 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6539 if (load1_stmt_info == NULL
6540 || load2_stmt_info == NULL
6541 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6542 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6543 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6544 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6545 goto fail;
6546
6547 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6548 {
6549 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6550 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6551 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6552 goto fail;
6553 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6554 tree lrhs;
6555 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6556 lrhs = rhs1;
6557 else
6558 lrhs = rhs2;
6559 use_operand_p use_p;
6560 imm_use_iterator iter;
6561 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6562 {
6563 gimple *use_stmt = USE_STMT (use_p);
6564 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6565 continue;
6566 if (other_store_stmt)
6567 goto fail;
6568 other_store_stmt = use_stmt;
6569 }
6570 }
6571
6572 if (other_store_stmt == NULL)
6573 goto fail;
6574 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6575 || !gimple_store_p (other_store_stmt))
6576 goto fail;
6577
6578 stmt_vec_info other_store_stmt_info
6579 = loop_vinfo->lookup_stmt (other_store_stmt);
6580 if (other_store_stmt_info == NULL
6581 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6582 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6583 goto fail;
6584
6585 gimple *stmt1 = stmt;
6586 gimple *stmt2 = other_store_stmt;
6587 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6588 std::swap (stmt1, stmt2);
6589 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6590 gimple_assign_rhs1 (load2_stmt)))
6591 {
6592 std::swap (rhs1, rhs2);
6593 std::swap (load1_stmt, load2_stmt);
6594 std::swap (load1_stmt_info, load2_stmt_info);
6595 }
6596 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6597 gimple_assign_rhs1 (load1_stmt)))
6598 goto fail;
6599
6600 tree var3 = NULL_TREE;
6601 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6602 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6603 gimple_assign_rhs1 (load2_stmt)))
6604 goto fail;
6605 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6606 {
6607 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6608 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6609 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6610 goto fail;
6611 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6612 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6613 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6614 || lookup_attribute ("omp simd inscan exclusive",
6615 DECL_ATTRIBUTES (var3)))
6616 goto fail;
6617 }
6618
6619 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6620 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6621 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6622 goto fail;
6623
6624 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6625 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6626 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6627 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6628 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6629 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6630 goto fail;
6631
6632 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6633 std::swap (var1, var2);
6634
6635 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6636 {
6637 if (!lookup_attribute ("omp simd inscan exclusive",
6638 DECL_ATTRIBUTES (var1)))
6639 goto fail;
6640 var1 = var3;
6641 }
6642
6643 if (loop_vinfo->scan_map == NULL)
6644 goto fail;
6645 tree *init = loop_vinfo->scan_map->get (var1);
6646 if (init == NULL)
6647 goto fail;
6648
6649 /* The IL is as expected, now check if we can actually vectorize it.
6650 Inclusive scan:
6651 _26 = D.2043[_25];
6652 _27 = D.2042[_25];
6653 _28 = _26 + _27;
6654 D.2043[_25] = _28;
6655 D.2042[_25] = _28;
6656 should be vectorized as (where _40 is the vectorized rhs
6657 from the D.2042[_21] = 0; store):
6658 _30 = MEM <vector(8) int> [(int *)&D.2043];
6659 _31 = MEM <vector(8) int> [(int *)&D.2042];
6660 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6661 _33 = _31 + _32;
6662 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6663 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6664 _35 = _33 + _34;
6665 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6666 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6667 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6668 _37 = _35 + _36;
6669 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6670 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6671 _38 = _30 + _37;
6672 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6673 MEM <vector(8) int> [(int *)&D.2043] = _39;
6674 MEM <vector(8) int> [(int *)&D.2042] = _38;
6675 Exclusive scan:
6676 _26 = D.2043[_25];
6677 D.2044[_25] = _26;
6678 _27 = D.2042[_25];
6679 _28 = _26 + _27;
6680 D.2043[_25] = _28;
6681 should be vectorized as (where _40 is the vectorized rhs
6682 from the D.2042[_21] = 0; store):
6683 _30 = MEM <vector(8) int> [(int *)&D.2043];
6684 _31 = MEM <vector(8) int> [(int *)&D.2042];
6685 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6686 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6687 _34 = _32 + _33;
6688 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6689 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6690 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6691 _36 = _34 + _35;
6692 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6693 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6694 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6695 _38 = _36 + _37;
6696 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6697 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6698 _39 = _30 + _38;
6699 _50 = _31 + _39;
6700 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6701 MEM <vector(8) int> [(int *)&D.2044] = _39;
6702 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6703 enum machine_mode vec_mode = TYPE_MODE (vectype);
6704 optab optab = optab_for_tree_code (code, vectype, optab_default);
6705 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6706 goto fail;
6707
6708 int units_log2 = scan_store_can_perm_p (vectype, *init);
6709 if (units_log2 == -1)
6710 goto fail;
6711
6712 return true;
6713 }
6714
6715
6716 /* Function vectorizable_scan_store.
6717
6718 Helper of vectorizable_score, arguments like on vectorizable_store.
6719 Handle only the transformation, checking is done in check_scan_store. */
6720
6721 static bool
6722 vectorizable_scan_store (vec_info *vinfo,
6723 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6724 gimple **vec_stmt, int ncopies)
6725 {
6726 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6727 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6728 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6729 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6730
6731 if (dump_enabled_p ())
6732 dump_printf_loc (MSG_NOTE, vect_location,
6733 "transform scan store. ncopies = %d\n", ncopies);
6734
6735 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6736 tree rhs = gimple_assign_rhs1 (stmt);
6737 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6738
6739 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6740 bool inscan_var_store
6741 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6742
6743 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6744 {
6745 use_operand_p use_p;
6746 imm_use_iterator iter;
6747 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6748 {
6749 gimple *use_stmt = USE_STMT (use_p);
6750 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6751 continue;
6752 rhs = gimple_assign_lhs (use_stmt);
6753 break;
6754 }
6755 }
6756
6757 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6758 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6759 if (code == POINTER_PLUS_EXPR)
6760 code = PLUS_EXPR;
6761 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6762 && commutative_tree_code (code));
6763 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6764 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6765 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6766 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6767 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6768 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6769 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6770 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6771 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6772 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6773 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6774
6775 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6776 {
6777 std::swap (rhs1, rhs2);
6778 std::swap (var1, var2);
6779 std::swap (load1_dr_info, load2_dr_info);
6780 }
6781
6782 tree *init = loop_vinfo->scan_map->get (var1);
6783 gcc_assert (init);
6784
6785 unsigned HOST_WIDE_INT nunits;
6786 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6787 gcc_unreachable ();
6788 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6789 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6790 gcc_assert (units_log2 > 0);
6791 auto_vec<tree, 16> perms;
6792 perms.quick_grow (units_log2 + 1);
6793 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6794 for (int i = 0; i <= units_log2; ++i)
6795 {
6796 unsigned HOST_WIDE_INT j, k;
6797 vec_perm_builder sel (nunits, nunits, 1);
6798 sel.quick_grow (nunits);
6799 if (i == units_log2)
6800 for (j = 0; j < nunits; ++j)
6801 sel[j] = nunits - 1;
6802 else
6803 {
6804 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6805 sel[j] = j;
6806 for (k = 0; j < nunits; ++j, ++k)
6807 sel[j] = nunits + k;
6808 }
6809 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6810 if (!use_whole_vector.is_empty ()
6811 && use_whole_vector[i] != scan_store_kind_perm)
6812 {
6813 if (zero_vec == NULL_TREE)
6814 zero_vec = build_zero_cst (vectype);
6815 if (masktype == NULL_TREE
6816 && use_whole_vector[i] == scan_store_kind_lshift_cond)
6817 masktype = truth_type_for (vectype);
6818 perms[i] = vect_gen_perm_mask_any (vectype, indices);
6819 }
6820 else
6821 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
6822 }
6823
6824 tree vec_oprnd1 = NULL_TREE;
6825 tree vec_oprnd2 = NULL_TREE;
6826 tree vec_oprnd3 = NULL_TREE;
6827 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
6828 tree dataref_offset = build_int_cst (ref_type, 0);
6829 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
6830 vectype, VMAT_CONTIGUOUS);
6831 tree ldataref_ptr = NULL_TREE;
6832 tree orig = NULL_TREE;
6833 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6834 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
6835 auto_vec<tree> vec_oprnds1;
6836 auto_vec<tree> vec_oprnds2;
6837 auto_vec<tree> vec_oprnds3;
6838 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
6839 *init, &vec_oprnds1,
6840 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
6841 rhs2, &vec_oprnds3);
6842 for (int j = 0; j < ncopies; j++)
6843 {
6844 vec_oprnd1 = vec_oprnds1[j];
6845 if (ldataref_ptr == NULL)
6846 vec_oprnd2 = vec_oprnds2[j];
6847 vec_oprnd3 = vec_oprnds3[j];
6848 if (j == 0)
6849 orig = vec_oprnd3;
6850 else if (!inscan_var_store)
6851 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6852
6853 if (ldataref_ptr)
6854 {
6855 vec_oprnd2 = make_ssa_name (vectype);
6856 tree data_ref = fold_build2 (MEM_REF, vectype,
6857 unshare_expr (ldataref_ptr),
6858 dataref_offset);
6859 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
6860 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
6861 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6862 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6863 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6864 }
6865
6866 tree v = vec_oprnd2;
6867 for (int i = 0; i < units_log2; ++i)
6868 {
6869 tree new_temp = make_ssa_name (vectype);
6870 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
6871 (zero_vec
6872 && (use_whole_vector[i]
6873 != scan_store_kind_perm))
6874 ? zero_vec : vec_oprnd1, v,
6875 perms[i]);
6876 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6877 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6878 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6879
6880 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
6881 {
6882 /* Whole vector shift shifted in zero bits, but if *init
6883 is not initializer_zerop, we need to replace those elements
6884 with elements from vec_oprnd1. */
6885 tree_vector_builder vb (masktype, nunits, 1);
6886 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
6887 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
6888 ? boolean_false_node : boolean_true_node);
6889
6890 tree new_temp2 = make_ssa_name (vectype);
6891 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
6892 new_temp, vec_oprnd1);
6893 vect_finish_stmt_generation (vinfo, stmt_info,
6894 g, gsi);
6895 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6896 new_temp = new_temp2;
6897 }
6898
6899 /* For exclusive scan, perform the perms[i] permutation once
6900 more. */
6901 if (i == 0
6902 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
6903 && v == vec_oprnd2)
6904 {
6905 v = new_temp;
6906 --i;
6907 continue;
6908 }
6909
6910 tree new_temp2 = make_ssa_name (vectype);
6911 g = gimple_build_assign (new_temp2, code, v, new_temp);
6912 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6913 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6914
6915 v = new_temp2;
6916 }
6917
6918 tree new_temp = make_ssa_name (vectype);
6919 gimple *g = gimple_build_assign (new_temp, code, orig, v);
6920 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6921 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6922
6923 tree last_perm_arg = new_temp;
6924 /* For exclusive scan, new_temp computed above is the exclusive scan
6925 prefix sum. Turn it into inclusive prefix sum for the broadcast
6926 of the last element into orig. */
6927 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6928 {
6929 last_perm_arg = make_ssa_name (vectype);
6930 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
6931 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6932 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6933 }
6934
6935 orig = make_ssa_name (vectype);
6936 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
6937 last_perm_arg, perms[units_log2]);
6938 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6939 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6940
6941 if (!inscan_var_store)
6942 {
6943 tree data_ref = fold_build2 (MEM_REF, vectype,
6944 unshare_expr (dataref_ptr),
6945 dataref_offset);
6946 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6947 g = gimple_build_assign (data_ref, new_temp);
6948 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6949 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6950 }
6951 }
6952
6953 if (inscan_var_store)
6954 for (int j = 0; j < ncopies; j++)
6955 {
6956 if (j != 0)
6957 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6958
6959 tree data_ref = fold_build2 (MEM_REF, vectype,
6960 unshare_expr (dataref_ptr),
6961 dataref_offset);
6962 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
6963 gimple *g = gimple_build_assign (data_ref, orig);
6964 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
6965 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
6966 }
6967 return true;
6968 }
6969
6970
6971 /* Function vectorizable_store.
6972
6973 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6974 that can be vectorized.
6975 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6976 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6977 Return true if STMT_INFO is vectorizable in this way. */
6978
6979 static bool
6980 vectorizable_store (vec_info *vinfo,
6981 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6982 gimple **vec_stmt, slp_tree slp_node,
6983 stmt_vector_for_cost *cost_vec)
6984 {
6985 tree data_ref;
6986 tree op;
6987 tree vec_oprnd = NULL_TREE;
6988 tree elem_type;
6989 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6990 class loop *loop = NULL;
6991 machine_mode vec_mode;
6992 tree dummy;
6993 enum vect_def_type rhs_dt = vect_unknown_def_type;
6994 enum vect_def_type mask_dt = vect_unknown_def_type;
6995 tree dataref_ptr = NULL_TREE;
6996 tree dataref_offset = NULL_TREE;
6997 gimple *ptr_incr = NULL;
6998 int ncopies;
6999 int j;
7000 stmt_vec_info first_stmt_info;
7001 bool grouped_store;
7002 unsigned int group_size, i;
7003 vec<tree> oprnds = vNULL;
7004 vec<tree> result_chain = vNULL;
7005 tree offset = NULL_TREE;
7006 vec<tree> vec_oprnds = vNULL;
7007 bool slp = (slp_node != NULL);
7008 unsigned int vec_num;
7009 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7010 tree aggr_type;
7011 gather_scatter_info gs_info;
7012 poly_uint64 vf;
7013 vec_load_store_type vls_type;
7014 tree ref_type;
7015
7016 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7017 return false;
7018
7019 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7020 && ! vec_stmt)
7021 return false;
7022
7023 /* Is vectorizable store? */
7024
7025 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7026 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7027 {
7028 tree scalar_dest = gimple_assign_lhs (assign);
7029 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7030 && is_pattern_stmt_p (stmt_info))
7031 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7032 if (TREE_CODE (scalar_dest) != ARRAY_REF
7033 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7034 && TREE_CODE (scalar_dest) != INDIRECT_REF
7035 && TREE_CODE (scalar_dest) != COMPONENT_REF
7036 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7037 && TREE_CODE (scalar_dest) != REALPART_EXPR
7038 && TREE_CODE (scalar_dest) != MEM_REF)
7039 return false;
7040 }
7041 else
7042 {
7043 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7044 if (!call || !gimple_call_internal_p (call))
7045 return false;
7046
7047 internal_fn ifn = gimple_call_internal_fn (call);
7048 if (!internal_store_fn_p (ifn))
7049 return false;
7050
7051 if (slp_node != NULL)
7052 {
7053 if (dump_enabled_p ())
7054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7055 "SLP of masked stores not supported.\n");
7056 return false;
7057 }
7058
7059 int mask_index = internal_fn_mask_index (ifn);
7060 if (mask_index >= 0)
7061 {
7062 mask = gimple_call_arg (call, mask_index);
7063 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
7064 &mask_vectype))
7065 return false;
7066 }
7067 }
7068
7069 op = vect_get_store_rhs (stmt_info);
7070
7071 /* Cannot have hybrid store SLP -- that would mean storing to the
7072 same location twice. */
7073 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7074
7075 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7076 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7077
7078 if (loop_vinfo)
7079 {
7080 loop = LOOP_VINFO_LOOP (loop_vinfo);
7081 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7082 }
7083 else
7084 vf = 1;
7085
7086 /* Multiple types in SLP are handled by creating the appropriate number of
7087 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7088 case of SLP. */
7089 if (slp)
7090 ncopies = 1;
7091 else
7092 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7093
7094 gcc_assert (ncopies >= 1);
7095
7096 /* FORNOW. This restriction should be relaxed. */
7097 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7098 {
7099 if (dump_enabled_p ())
7100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7101 "multiple types in nested loop.\n");
7102 return false;
7103 }
7104
7105 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7106 op, &rhs_dt, &rhs_vectype, &vls_type))
7107 return false;
7108
7109 elem_type = TREE_TYPE (vectype);
7110 vec_mode = TYPE_MODE (vectype);
7111
7112 if (!STMT_VINFO_DATA_REF (stmt_info))
7113 return false;
7114
7115 vect_memory_access_type memory_access_type;
7116 enum dr_alignment_support alignment_support_scheme;
7117 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7118 ncopies, &memory_access_type,
7119 &alignment_support_scheme, &gs_info))
7120 return false;
7121
7122 if (mask)
7123 {
7124 if (memory_access_type == VMAT_CONTIGUOUS)
7125 {
7126 if (!VECTOR_MODE_P (vec_mode)
7127 || !can_vec_mask_load_store_p (vec_mode,
7128 TYPE_MODE (mask_vectype), false))
7129 return false;
7130 }
7131 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7132 && (memory_access_type != VMAT_GATHER_SCATTER
7133 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7134 {
7135 if (dump_enabled_p ())
7136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7137 "unsupported access type for masked store.\n");
7138 return false;
7139 }
7140 }
7141 else
7142 {
7143 /* FORNOW. In some cases can vectorize even if data-type not supported
7144 (e.g. - array initialization with 0). */
7145 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7146 return false;
7147 }
7148
7149 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7150 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7151 && memory_access_type != VMAT_GATHER_SCATTER
7152 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7153 if (grouped_store)
7154 {
7155 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7156 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7157 group_size = DR_GROUP_SIZE (first_stmt_info);
7158 }
7159 else
7160 {
7161 first_stmt_info = stmt_info;
7162 first_dr_info = dr_info;
7163 group_size = vec_num = 1;
7164 }
7165
7166 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7167 {
7168 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7169 memory_access_type))
7170 return false;
7171 }
7172
7173 if (!vec_stmt) /* transformation not required. */
7174 {
7175 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7176
7177 if (loop_vinfo
7178 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7179 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7180 group_size, memory_access_type,
7181 &gs_info, mask);
7182
7183 if (slp_node
7184 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7185 vectype))
7186 {
7187 if (dump_enabled_p ())
7188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7189 "incompatible vector types for invariants\n");
7190 return false;
7191 }
7192
7193 if (dump_enabled_p ()
7194 && memory_access_type != VMAT_ELEMENTWISE
7195 && memory_access_type != VMAT_GATHER_SCATTER
7196 && alignment_support_scheme != dr_aligned)
7197 dump_printf_loc (MSG_NOTE, vect_location,
7198 "Vectorizing an unaligned access.\n");
7199
7200 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7201 vect_model_store_cost (vinfo, stmt_info, ncopies,
7202 memory_access_type, vls_type, slp_node, cost_vec);
7203 return true;
7204 }
7205 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7206
7207 /* Transform. */
7208
7209 ensure_base_align (dr_info);
7210
7211 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7212 {
7213 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7214 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7215 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7216 tree ptr, var, scale, vec_mask;
7217 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7218 tree mask_halfvectype = mask_vectype;
7219 edge pe = loop_preheader_edge (loop);
7220 gimple_seq seq;
7221 basic_block new_bb;
7222 enum { NARROW, NONE, WIDEN } modifier;
7223 poly_uint64 scatter_off_nunits
7224 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7225
7226 if (known_eq (nunits, scatter_off_nunits))
7227 modifier = NONE;
7228 else if (known_eq (nunits * 2, scatter_off_nunits))
7229 {
7230 modifier = WIDEN;
7231
7232 /* Currently gathers and scatters are only supported for
7233 fixed-length vectors. */
7234 unsigned int count = scatter_off_nunits.to_constant ();
7235 vec_perm_builder sel (count, count, 1);
7236 for (i = 0; i < (unsigned int) count; ++i)
7237 sel.quick_push (i | (count / 2));
7238
7239 vec_perm_indices indices (sel, 1, count);
7240 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7241 indices);
7242 gcc_assert (perm_mask != NULL_TREE);
7243 }
7244 else if (known_eq (nunits, scatter_off_nunits * 2))
7245 {
7246 modifier = NARROW;
7247
7248 /* Currently gathers and scatters are only supported for
7249 fixed-length vectors. */
7250 unsigned int count = nunits.to_constant ();
7251 vec_perm_builder sel (count, count, 1);
7252 for (i = 0; i < (unsigned int) count; ++i)
7253 sel.quick_push (i | (count / 2));
7254
7255 vec_perm_indices indices (sel, 2, count);
7256 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7257 gcc_assert (perm_mask != NULL_TREE);
7258 ncopies *= 2;
7259
7260 if (mask)
7261 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7262 }
7263 else
7264 gcc_unreachable ();
7265
7266 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7267 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7268 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7269 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7270 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7271 scaletype = TREE_VALUE (arglist);
7272
7273 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7274 && TREE_CODE (rettype) == VOID_TYPE);
7275
7276 ptr = fold_convert (ptrtype, gs_info.base);
7277 if (!is_gimple_min_invariant (ptr))
7278 {
7279 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7280 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7281 gcc_assert (!new_bb);
7282 }
7283
7284 if (mask == NULL_TREE)
7285 {
7286 mask_arg = build_int_cst (masktype, -1);
7287 mask_arg = vect_init_vector (vinfo, stmt_info,
7288 mask_arg, masktype, NULL);
7289 }
7290
7291 scale = build_int_cst (scaletype, gs_info.scale);
7292
7293 auto_vec<tree> vec_oprnds0;
7294 auto_vec<tree> vec_oprnds1;
7295 auto_vec<tree> vec_masks;
7296 if (mask)
7297 {
7298 tree mask_vectype = truth_type_for (vectype);
7299 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7300 modifier == NARROW
7301 ? ncopies / 2 : ncopies,
7302 mask, &vec_masks, mask_vectype);
7303 }
7304 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7305 modifier == WIDEN
7306 ? ncopies / 2 : ncopies,
7307 gs_info.offset, &vec_oprnds0);
7308 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7309 modifier == NARROW
7310 ? ncopies / 2 : ncopies,
7311 op, &vec_oprnds1);
7312 for (j = 0; j < ncopies; ++j)
7313 {
7314 if (modifier == WIDEN)
7315 {
7316 if (j & 1)
7317 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7318 perm_mask, stmt_info, gsi);
7319 else
7320 op = vec_oprnd0 = vec_oprnds0[j / 2];
7321 src = vec_oprnd1 = vec_oprnds1[j];
7322 if (mask)
7323 mask_op = vec_mask = vec_masks[j];
7324 }
7325 else if (modifier == NARROW)
7326 {
7327 if (j & 1)
7328 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7329 perm_mask, stmt_info, gsi);
7330 else
7331 src = vec_oprnd1 = vec_oprnds1[j / 2];
7332 op = vec_oprnd0 = vec_oprnds0[j];
7333 if (mask)
7334 mask_op = vec_mask = vec_masks[j / 2];
7335 }
7336 else
7337 {
7338 op = vec_oprnd0 = vec_oprnds0[j];
7339 src = vec_oprnd1 = vec_oprnds1[j];
7340 if (mask)
7341 mask_op = vec_mask = vec_masks[j];
7342 }
7343
7344 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7345 {
7346 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7347 TYPE_VECTOR_SUBPARTS (srctype)));
7348 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7349 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7350 gassign *new_stmt
7351 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7352 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7353 src = var;
7354 }
7355
7356 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7357 {
7358 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7359 TYPE_VECTOR_SUBPARTS (idxtype)));
7360 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7361 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7362 gassign *new_stmt
7363 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7364 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7365 op = var;
7366 }
7367
7368 if (mask)
7369 {
7370 tree utype;
7371 mask_arg = mask_op;
7372 if (modifier == NARROW)
7373 {
7374 var = vect_get_new_ssa_name (mask_halfvectype,
7375 vect_simple_var);
7376 gassign *new_stmt
7377 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7378 : VEC_UNPACK_LO_EXPR,
7379 mask_op);
7380 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7381 mask_arg = var;
7382 }
7383 tree optype = TREE_TYPE (mask_arg);
7384 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7385 utype = masktype;
7386 else
7387 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7388 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7389 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7390 gassign *new_stmt
7391 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7392 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7393 mask_arg = var;
7394 if (!useless_type_conversion_p (masktype, utype))
7395 {
7396 gcc_assert (TYPE_PRECISION (utype)
7397 <= TYPE_PRECISION (masktype));
7398 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7399 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7400 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7401 mask_arg = var;
7402 }
7403 }
7404
7405 gcall *new_stmt
7406 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7407 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7408
7409 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7410 }
7411 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7412 return true;
7413 }
7414 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7415 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7416
7417 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7418 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7419
7420 if (grouped_store)
7421 {
7422 /* FORNOW */
7423 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7424
7425 /* We vectorize all the stmts of the interleaving group when we
7426 reach the last stmt in the group. */
7427 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7428 < DR_GROUP_SIZE (first_stmt_info)
7429 && !slp)
7430 {
7431 *vec_stmt = NULL;
7432 return true;
7433 }
7434
7435 if (slp)
7436 {
7437 grouped_store = false;
7438 /* VEC_NUM is the number of vect stmts to be created for this
7439 group. */
7440 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7441 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7442 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7443 == first_stmt_info);
7444 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7445 op = vect_get_store_rhs (first_stmt_info);
7446 }
7447 else
7448 /* VEC_NUM is the number of vect stmts to be created for this
7449 group. */
7450 vec_num = group_size;
7451
7452 ref_type = get_group_alias_ptr_type (first_stmt_info);
7453 }
7454 else
7455 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7456
7457 if (dump_enabled_p ())
7458 dump_printf_loc (MSG_NOTE, vect_location,
7459 "transform store. ncopies = %d\n", ncopies);
7460
7461 if (memory_access_type == VMAT_ELEMENTWISE
7462 || memory_access_type == VMAT_STRIDED_SLP)
7463 {
7464 gimple_stmt_iterator incr_gsi;
7465 bool insert_after;
7466 gimple *incr;
7467 tree offvar;
7468 tree ivstep;
7469 tree running_off;
7470 tree stride_base, stride_step, alias_off;
7471 tree vec_oprnd;
7472 tree dr_offset;
7473 unsigned int g;
7474 /* Checked by get_load_store_type. */
7475 unsigned int const_nunits = nunits.to_constant ();
7476
7477 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7478 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7479
7480 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7481 stride_base
7482 = fold_build_pointer_plus
7483 (DR_BASE_ADDRESS (first_dr_info->dr),
7484 size_binop (PLUS_EXPR,
7485 convert_to_ptrofftype (dr_offset),
7486 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7487 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7488
7489 /* For a store with loop-invariant (but other than power-of-2)
7490 stride (i.e. not a grouped access) like so:
7491
7492 for (i = 0; i < n; i += stride)
7493 array[i] = ...;
7494
7495 we generate a new induction variable and new stores from
7496 the components of the (vectorized) rhs:
7497
7498 for (j = 0; ; j += VF*stride)
7499 vectemp = ...;
7500 tmp1 = vectemp[0];
7501 array[j] = tmp1;
7502 tmp2 = vectemp[1];
7503 array[j + stride] = tmp2;
7504 ...
7505 */
7506
7507 unsigned nstores = const_nunits;
7508 unsigned lnel = 1;
7509 tree ltype = elem_type;
7510 tree lvectype = vectype;
7511 if (slp)
7512 {
7513 if (group_size < const_nunits
7514 && const_nunits % group_size == 0)
7515 {
7516 nstores = const_nunits / group_size;
7517 lnel = group_size;
7518 ltype = build_vector_type (elem_type, group_size);
7519 lvectype = vectype;
7520
7521 /* First check if vec_extract optab doesn't support extraction
7522 of vector elts directly. */
7523 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7524 machine_mode vmode;
7525 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7526 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7527 group_size).exists (&vmode)
7528 || (convert_optab_handler (vec_extract_optab,
7529 TYPE_MODE (vectype), vmode)
7530 == CODE_FOR_nothing))
7531 {
7532 /* Try to avoid emitting an extract of vector elements
7533 by performing the extracts using an integer type of the
7534 same size, extracting from a vector of those and then
7535 re-interpreting it as the original vector type if
7536 supported. */
7537 unsigned lsize
7538 = group_size * GET_MODE_BITSIZE (elmode);
7539 unsigned int lnunits = const_nunits / group_size;
7540 /* If we can't construct such a vector fall back to
7541 element extracts from the original vector type and
7542 element size stores. */
7543 if (int_mode_for_size (lsize, 0).exists (&elmode)
7544 && VECTOR_MODE_P (TYPE_MODE (vectype))
7545 && related_vector_mode (TYPE_MODE (vectype), elmode,
7546 lnunits).exists (&vmode)
7547 && (convert_optab_handler (vec_extract_optab,
7548 vmode, elmode)
7549 != CODE_FOR_nothing))
7550 {
7551 nstores = lnunits;
7552 lnel = group_size;
7553 ltype = build_nonstandard_integer_type (lsize, 1);
7554 lvectype = build_vector_type (ltype, nstores);
7555 }
7556 /* Else fall back to vector extraction anyway.
7557 Fewer stores are more important than avoiding spilling
7558 of the vector we extract from. Compared to the
7559 construction case in vectorizable_load no store-forwarding
7560 issue exists here for reasonable archs. */
7561 }
7562 }
7563 else if (group_size >= const_nunits
7564 && group_size % const_nunits == 0)
7565 {
7566 nstores = 1;
7567 lnel = const_nunits;
7568 ltype = vectype;
7569 lvectype = vectype;
7570 }
7571 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7572 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7573 }
7574
7575 ivstep = stride_step;
7576 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7577 build_int_cst (TREE_TYPE (ivstep), vf));
7578
7579 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7580
7581 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7582 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7583 create_iv (stride_base, ivstep, NULL,
7584 loop, &incr_gsi, insert_after,
7585 &offvar, NULL);
7586 incr = gsi_stmt (incr_gsi);
7587
7588 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7589
7590 alias_off = build_int_cst (ref_type, 0);
7591 stmt_vec_info next_stmt_info = first_stmt_info;
7592 for (g = 0; g < group_size; g++)
7593 {
7594 running_off = offvar;
7595 if (g)
7596 {
7597 tree size = TYPE_SIZE_UNIT (ltype);
7598 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7599 size);
7600 tree newoff = copy_ssa_name (running_off, NULL);
7601 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7602 running_off, pos);
7603 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7604 running_off = newoff;
7605 }
7606 if (!slp)
7607 op = vect_get_store_rhs (next_stmt_info);
7608 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7609 op, &vec_oprnds);
7610 unsigned int group_el = 0;
7611 unsigned HOST_WIDE_INT
7612 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7613 for (j = 0; j < ncopies; j++)
7614 {
7615 vec_oprnd = vec_oprnds[j];
7616 /* Pun the vector to extract from if necessary. */
7617 if (lvectype != vectype)
7618 {
7619 tree tem = make_ssa_name (lvectype);
7620 gimple *pun
7621 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7622 lvectype, vec_oprnd));
7623 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7624 vec_oprnd = tem;
7625 }
7626 for (i = 0; i < nstores; i++)
7627 {
7628 tree newref, newoff;
7629 gimple *incr, *assign;
7630 tree size = TYPE_SIZE (ltype);
7631 /* Extract the i'th component. */
7632 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7633 bitsize_int (i), size);
7634 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7635 size, pos);
7636
7637 elem = force_gimple_operand_gsi (gsi, elem, true,
7638 NULL_TREE, true,
7639 GSI_SAME_STMT);
7640
7641 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7642 group_el * elsz);
7643 newref = build2 (MEM_REF, ltype,
7644 running_off, this_off);
7645 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7646
7647 /* And store it to *running_off. */
7648 assign = gimple_build_assign (newref, elem);
7649 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7650
7651 group_el += lnel;
7652 if (! slp
7653 || group_el == group_size)
7654 {
7655 newoff = copy_ssa_name (running_off, NULL);
7656 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7657 running_off, stride_step);
7658 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7659
7660 running_off = newoff;
7661 group_el = 0;
7662 }
7663 if (g == group_size - 1
7664 && !slp)
7665 {
7666 if (j == 0 && i == 0)
7667 *vec_stmt = assign;
7668 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7669 }
7670 }
7671 }
7672 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7673 if (slp)
7674 break;
7675 }
7676
7677 vec_oprnds.release ();
7678 return true;
7679 }
7680
7681 auto_vec<tree> dr_chain (group_size);
7682 oprnds.create (group_size);
7683
7684 /* Gather-scatter accesses perform only component accesses, alignment
7685 is irrelevant for them. */
7686 if (memory_access_type == VMAT_GATHER_SCATTER)
7687 alignment_support_scheme = dr_unaligned_supported;
7688 else
7689 alignment_support_scheme
7690 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7691
7692 gcc_assert (alignment_support_scheme);
7693 vec_loop_masks *loop_masks
7694 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7695 ? &LOOP_VINFO_MASKS (loop_vinfo)
7696 : NULL);
7697 /* Targets with store-lane instructions must not require explicit
7698 realignment. vect_supportable_dr_alignment always returns either
7699 dr_aligned or dr_unaligned_supported for masked operations. */
7700 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7701 && !mask
7702 && !loop_masks)
7703 || alignment_support_scheme == dr_aligned
7704 || alignment_support_scheme == dr_unaligned_supported);
7705
7706 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7707 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7708 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7709
7710 tree bump;
7711 tree vec_offset = NULL_TREE;
7712 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7713 {
7714 aggr_type = NULL_TREE;
7715 bump = NULL_TREE;
7716 }
7717 else if (memory_access_type == VMAT_GATHER_SCATTER)
7718 {
7719 aggr_type = elem_type;
7720 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7721 &bump, &vec_offset);
7722 }
7723 else
7724 {
7725 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7726 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7727 else
7728 aggr_type = vectype;
7729 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7730 memory_access_type);
7731 }
7732
7733 if (mask)
7734 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7735
7736 /* In case the vectorization factor (VF) is bigger than the number
7737 of elements that we can fit in a vectype (nunits), we have to generate
7738 more than one vector stmt - i.e - we need to "unroll" the
7739 vector stmt by a factor VF/nunits. */
7740
7741 /* In case of interleaving (non-unit grouped access):
7742
7743 S1: &base + 2 = x2
7744 S2: &base = x0
7745 S3: &base + 1 = x1
7746 S4: &base + 3 = x3
7747
7748 We create vectorized stores starting from base address (the access of the
7749 first stmt in the chain (S2 in the above example), when the last store stmt
7750 of the chain (S4) is reached:
7751
7752 VS1: &base = vx2
7753 VS2: &base + vec_size*1 = vx0
7754 VS3: &base + vec_size*2 = vx1
7755 VS4: &base + vec_size*3 = vx3
7756
7757 Then permutation statements are generated:
7758
7759 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7760 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7761 ...
7762
7763 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7764 (the order of the data-refs in the output of vect_permute_store_chain
7765 corresponds to the order of scalar stmts in the interleaving chain - see
7766 the documentation of vect_permute_store_chain()).
7767
7768 In case of both multiple types and interleaving, above vector stores and
7769 permutation stmts are created for every copy. The result vector stmts are
7770 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7771 STMT_VINFO_RELATED_STMT for the next copies.
7772 */
7773
7774 auto_vec<tree> vec_masks;
7775 tree vec_mask = NULL;
7776 auto_vec<tree> vec_offsets;
7777 auto_vec<vec<tree> > gvec_oprnds;
7778 gvec_oprnds.safe_grow_cleared (group_size);
7779 for (j = 0; j < ncopies; j++)
7780 {
7781 gimple *new_stmt;
7782 if (j == 0)
7783 {
7784 if (slp)
7785 {
7786 /* Get vectorized arguments for SLP_NODE. */
7787 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
7788 op, &vec_oprnds);
7789 vec_oprnd = vec_oprnds[0];
7790 }
7791 else
7792 {
7793 /* For interleaved stores we collect vectorized defs for all the
7794 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7795 used as an input to vect_permute_store_chain().
7796
7797 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7798 and OPRNDS are of size 1. */
7799 stmt_vec_info next_stmt_info = first_stmt_info;
7800 for (i = 0; i < group_size; i++)
7801 {
7802 /* Since gaps are not supported for interleaved stores,
7803 DR_GROUP_SIZE is the exact number of stmts in the chain.
7804 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7805 that there is no interleaving, DR_GROUP_SIZE is 1,
7806 and only one iteration of the loop will be executed. */
7807 op = vect_get_store_rhs (next_stmt_info);
7808 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
7809 ncopies, op, &gvec_oprnds[i]);
7810 vec_oprnd = gvec_oprnds[i][0];
7811 dr_chain.quick_push (gvec_oprnds[i][0]);
7812 oprnds.quick_push (gvec_oprnds[i][0]);
7813 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7814 }
7815 if (mask)
7816 {
7817 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
7818 mask, &vec_masks, mask_vectype);
7819 vec_mask = vec_masks[0];
7820 }
7821 }
7822
7823 /* We should have catched mismatched types earlier. */
7824 gcc_assert (useless_type_conversion_p (vectype,
7825 TREE_TYPE (vec_oprnd)));
7826 bool simd_lane_access_p
7827 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
7828 if (simd_lane_access_p
7829 && !loop_masks
7830 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7831 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7832 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
7833 && integer_zerop (DR_INIT (first_dr_info->dr))
7834 && alias_sets_conflict_p (get_alias_set (aggr_type),
7835 get_alias_set (TREE_TYPE (ref_type))))
7836 {
7837 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7838 dataref_offset = build_int_cst (ref_type, 0);
7839 }
7840 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7841 {
7842 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
7843 &dataref_ptr, &vec_offsets, ncopies);
7844 vec_offset = vec_offsets[0];
7845 }
7846 else
7847 dataref_ptr
7848 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
7849 simd_lane_access_p ? loop : NULL,
7850 offset, &dummy, gsi, &ptr_incr,
7851 simd_lane_access_p, NULL_TREE, bump);
7852 }
7853 else
7854 {
7855 /* For interleaved stores we created vectorized defs for all the
7856 defs stored in OPRNDS in the previous iteration (previous copy).
7857 DR_CHAIN is then used as an input to vect_permute_store_chain().
7858 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7859 OPRNDS are of size 1. */
7860 for (i = 0; i < group_size; i++)
7861 {
7862 vec_oprnd = gvec_oprnds[i][j];
7863 dr_chain[i] = gvec_oprnds[i][j];
7864 oprnds[i] = gvec_oprnds[i][j];
7865 }
7866 if (mask)
7867 vec_mask = vec_masks[j];
7868 if (dataref_offset)
7869 dataref_offset
7870 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7871 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7872 vec_offset = vec_offsets[j];
7873 else
7874 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
7875 stmt_info, bump);
7876 }
7877
7878 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7879 {
7880 tree vec_array;
7881
7882 /* Get an array into which we can store the individual vectors. */
7883 vec_array = create_vector_array (vectype, vec_num);
7884
7885 /* Invalidate the current contents of VEC_ARRAY. This should
7886 become an RTL clobber too, which prevents the vector registers
7887 from being upward-exposed. */
7888 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7889
7890 /* Store the individual vectors into the array. */
7891 for (i = 0; i < vec_num; i++)
7892 {
7893 vec_oprnd = dr_chain[i];
7894 write_vector_array (vinfo, stmt_info,
7895 gsi, vec_oprnd, vec_array, i);
7896 }
7897
7898 tree final_mask = NULL;
7899 if (loop_masks)
7900 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7901 vectype, j);
7902 if (vec_mask)
7903 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7904 vec_mask, gsi);
7905
7906 gcall *call;
7907 if (final_mask)
7908 {
7909 /* Emit:
7910 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7911 VEC_ARRAY). */
7912 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7913 tree alias_ptr = build_int_cst (ref_type, align);
7914 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7915 dataref_ptr, alias_ptr,
7916 final_mask, vec_array);
7917 }
7918 else
7919 {
7920 /* Emit:
7921 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7922 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7923 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7924 vec_array);
7925 gimple_call_set_lhs (call, data_ref);
7926 }
7927 gimple_call_set_nothrow (call, true);
7928 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7929 new_stmt = call;
7930
7931 /* Record that VEC_ARRAY is now dead. */
7932 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
7933 }
7934 else
7935 {
7936 new_stmt = NULL;
7937 if (grouped_store)
7938 {
7939 if (j == 0)
7940 result_chain.create (group_size);
7941 /* Permute. */
7942 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
7943 gsi, &result_chain);
7944 }
7945
7946 stmt_vec_info next_stmt_info = first_stmt_info;
7947 for (i = 0; i < vec_num; i++)
7948 {
7949 unsigned misalign;
7950 unsigned HOST_WIDE_INT align;
7951
7952 tree final_mask = NULL_TREE;
7953 if (loop_masks)
7954 final_mask = vect_get_loop_mask (gsi, loop_masks,
7955 vec_num * ncopies,
7956 vectype, vec_num * j + i);
7957 if (vec_mask)
7958 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7959 vec_mask, gsi);
7960
7961 if (memory_access_type == VMAT_GATHER_SCATTER)
7962 {
7963 tree scale = size_int (gs_info.scale);
7964 gcall *call;
7965 if (loop_masks)
7966 call = gimple_build_call_internal
7967 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7968 scale, vec_oprnd, final_mask);
7969 else
7970 call = gimple_build_call_internal
7971 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7972 scale, vec_oprnd);
7973 gimple_call_set_nothrow (call, true);
7974 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7975 new_stmt = call;
7976 break;
7977 }
7978
7979 if (i > 0)
7980 /* Bump the vector pointer. */
7981 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
7982 gsi, stmt_info, bump);
7983
7984 if (slp)
7985 vec_oprnd = vec_oprnds[i];
7986 else if (grouped_store)
7987 /* For grouped stores vectorized defs are interleaved in
7988 vect_permute_store_chain(). */
7989 vec_oprnd = result_chain[i];
7990
7991 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7992 if (aligned_access_p (first_dr_info))
7993 misalign = 0;
7994 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7995 {
7996 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
7997 misalign = 0;
7998 }
7999 else
8000 misalign = DR_MISALIGNMENT (first_dr_info);
8001 if (dataref_offset == NULL_TREE
8002 && TREE_CODE (dataref_ptr) == SSA_NAME)
8003 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8004 misalign);
8005
8006 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8007 {
8008 tree perm_mask = perm_mask_for_reverse (vectype);
8009 tree perm_dest = vect_create_destination_var
8010 (vect_get_store_rhs (stmt_info), vectype);
8011 tree new_temp = make_ssa_name (perm_dest);
8012
8013 /* Generate the permute statement. */
8014 gimple *perm_stmt
8015 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8016 vec_oprnd, perm_mask);
8017 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8018
8019 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8020 vec_oprnd = new_temp;
8021 }
8022
8023 /* Arguments are ready. Create the new vector stmt. */
8024 if (final_mask)
8025 {
8026 align = least_bit_hwi (misalign | align);
8027 tree ptr = build_int_cst (ref_type, align);
8028 gcall *call
8029 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8030 dataref_ptr, ptr,
8031 final_mask, vec_oprnd);
8032 gimple_call_set_nothrow (call, true);
8033 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8034 new_stmt = call;
8035 }
8036 else
8037 {
8038 data_ref = fold_build2 (MEM_REF, vectype,
8039 dataref_ptr,
8040 dataref_offset
8041 ? dataref_offset
8042 : build_int_cst (ref_type, 0));
8043 if (aligned_access_p (first_dr_info))
8044 ;
8045 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8046 TREE_TYPE (data_ref)
8047 = build_aligned_type (TREE_TYPE (data_ref),
8048 align * BITS_PER_UNIT);
8049 else
8050 TREE_TYPE (data_ref)
8051 = build_aligned_type (TREE_TYPE (data_ref),
8052 TYPE_ALIGN (elem_type));
8053 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8054 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8055 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8056 }
8057
8058 if (slp)
8059 continue;
8060
8061 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8062 if (!next_stmt_info)
8063 break;
8064 }
8065 }
8066 if (!slp)
8067 {
8068 if (j == 0)
8069 *vec_stmt = new_stmt;
8070 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8071 }
8072 }
8073
8074 for (i = 0; i < group_size; ++i)
8075 {
8076 vec<tree> oprndsi = gvec_oprnds[i];
8077 oprndsi.release ();
8078 }
8079 oprnds.release ();
8080 result_chain.release ();
8081 vec_oprnds.release ();
8082
8083 return true;
8084 }
8085
8086 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8087 VECTOR_CST mask. No checks are made that the target platform supports the
8088 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8089 vect_gen_perm_mask_checked. */
8090
8091 tree
8092 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8093 {
8094 tree mask_type;
8095
8096 poly_uint64 nunits = sel.length ();
8097 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8098
8099 mask_type = build_vector_type (ssizetype, nunits);
8100 return vec_perm_indices_to_tree (mask_type, sel);
8101 }
8102
8103 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8104 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8105
8106 tree
8107 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8108 {
8109 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8110 return vect_gen_perm_mask_any (vectype, sel);
8111 }
8112
8113 /* Given a vector variable X and Y, that was generated for the scalar
8114 STMT_INFO, generate instructions to permute the vector elements of X and Y
8115 using permutation mask MASK_VEC, insert them at *GSI and return the
8116 permuted vector variable. */
8117
8118 static tree
8119 permute_vec_elements (vec_info *vinfo,
8120 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8121 gimple_stmt_iterator *gsi)
8122 {
8123 tree vectype = TREE_TYPE (x);
8124 tree perm_dest, data_ref;
8125 gimple *perm_stmt;
8126
8127 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8128 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8129 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8130 else
8131 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8132 data_ref = make_ssa_name (perm_dest);
8133
8134 /* Generate the permute statement. */
8135 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8136 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8137
8138 return data_ref;
8139 }
8140
8141 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8142 inserting them on the loops preheader edge. Returns true if we
8143 were successful in doing so (and thus STMT_INFO can be moved then),
8144 otherwise returns false. */
8145
8146 static bool
8147 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8148 {
8149 ssa_op_iter i;
8150 tree op;
8151 bool any = false;
8152
8153 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8154 {
8155 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8156 if (!gimple_nop_p (def_stmt)
8157 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8158 {
8159 /* Make sure we don't need to recurse. While we could do
8160 so in simple cases when there are more complex use webs
8161 we don't have an easy way to preserve stmt order to fulfil
8162 dependencies within them. */
8163 tree op2;
8164 ssa_op_iter i2;
8165 if (gimple_code (def_stmt) == GIMPLE_PHI)
8166 return false;
8167 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8168 {
8169 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8170 if (!gimple_nop_p (def_stmt2)
8171 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8172 return false;
8173 }
8174 any = true;
8175 }
8176 }
8177
8178 if (!any)
8179 return true;
8180
8181 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8182 {
8183 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8184 if (!gimple_nop_p (def_stmt)
8185 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8186 {
8187 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8188 gsi_remove (&gsi, false);
8189 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8190 }
8191 }
8192
8193 return true;
8194 }
8195
8196 /* vectorizable_load.
8197
8198 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8199 that can be vectorized.
8200 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8201 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8202 Return true if STMT_INFO is vectorizable in this way. */
8203
8204 static bool
8205 vectorizable_load (vec_info *vinfo,
8206 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8207 gimple **vec_stmt, slp_tree slp_node,
8208 stmt_vector_for_cost *cost_vec)
8209 {
8210 tree scalar_dest;
8211 tree vec_dest = NULL;
8212 tree data_ref = NULL;
8213 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8214 class loop *loop = NULL;
8215 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8216 bool nested_in_vect_loop = false;
8217 tree elem_type;
8218 tree new_temp;
8219 machine_mode mode;
8220 tree dummy;
8221 tree dataref_ptr = NULL_TREE;
8222 tree dataref_offset = NULL_TREE;
8223 gimple *ptr_incr = NULL;
8224 int ncopies;
8225 int i, j;
8226 unsigned int group_size;
8227 poly_uint64 group_gap_adj;
8228 tree msq = NULL_TREE, lsq;
8229 tree offset = NULL_TREE;
8230 tree byte_offset = NULL_TREE;
8231 tree realignment_token = NULL_TREE;
8232 gphi *phi = NULL;
8233 vec<tree> dr_chain = vNULL;
8234 bool grouped_load = false;
8235 stmt_vec_info first_stmt_info;
8236 stmt_vec_info first_stmt_info_for_drptr = NULL;
8237 bool compute_in_loop = false;
8238 class loop *at_loop;
8239 int vec_num;
8240 bool slp = (slp_node != NULL);
8241 bool slp_perm = false;
8242 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8243 poly_uint64 vf;
8244 tree aggr_type;
8245 gather_scatter_info gs_info;
8246 tree ref_type;
8247 enum vect_def_type mask_dt = vect_unknown_def_type;
8248
8249 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8250 return false;
8251
8252 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8253 && ! vec_stmt)
8254 return false;
8255
8256 if (!STMT_VINFO_DATA_REF (stmt_info))
8257 return false;
8258
8259 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8260 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8261 which can be different when reduction chains were re-ordered.
8262 Now that we figured we're a dataref reset stmt_info back to
8263 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8264 refactored in a way to maintain the dr_vec_info pointer for the
8265 relevant access explicitely. */
8266 stmt_vec_info orig_stmt_info = stmt_info;
8267 if (slp_node)
8268 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8269
8270 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8271 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8272 {
8273 scalar_dest = gimple_assign_lhs (assign);
8274 if (TREE_CODE (scalar_dest) != SSA_NAME)
8275 return false;
8276
8277 tree_code code = gimple_assign_rhs_code (assign);
8278 if (code != ARRAY_REF
8279 && code != BIT_FIELD_REF
8280 && code != INDIRECT_REF
8281 && code != COMPONENT_REF
8282 && code != IMAGPART_EXPR
8283 && code != REALPART_EXPR
8284 && code != MEM_REF
8285 && TREE_CODE_CLASS (code) != tcc_declaration)
8286 return false;
8287 }
8288 else
8289 {
8290 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8291 if (!call || !gimple_call_internal_p (call))
8292 return false;
8293
8294 internal_fn ifn = gimple_call_internal_fn (call);
8295 if (!internal_load_fn_p (ifn))
8296 return false;
8297
8298 scalar_dest = gimple_call_lhs (call);
8299 if (!scalar_dest)
8300 return false;
8301
8302 int mask_index = internal_fn_mask_index (ifn);
8303 if (mask_index >= 0)
8304 {
8305 mask = gimple_call_arg (call, mask_index);
8306 if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
8307 &mask_vectype))
8308 return false;
8309 }
8310 }
8311
8312 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8313 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8314
8315 if (loop_vinfo)
8316 {
8317 loop = LOOP_VINFO_LOOP (loop_vinfo);
8318 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8319 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8320 }
8321 else
8322 vf = 1;
8323
8324 /* Multiple types in SLP are handled by creating the appropriate number of
8325 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8326 case of SLP. */
8327 if (slp)
8328 ncopies = 1;
8329 else
8330 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8331
8332 gcc_assert (ncopies >= 1);
8333
8334 /* FORNOW. This restriction should be relaxed. */
8335 if (nested_in_vect_loop && ncopies > 1)
8336 {
8337 if (dump_enabled_p ())
8338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8339 "multiple types in nested loop.\n");
8340 return false;
8341 }
8342
8343 /* Invalidate assumptions made by dependence analysis when vectorization
8344 on the unrolled body effectively re-orders stmts. */
8345 if (ncopies > 1
8346 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8347 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8348 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8349 {
8350 if (dump_enabled_p ())
8351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8352 "cannot perform implicit CSE when unrolling "
8353 "with negative dependence distance\n");
8354 return false;
8355 }
8356
8357 elem_type = TREE_TYPE (vectype);
8358 mode = TYPE_MODE (vectype);
8359
8360 /* FORNOW. In some cases can vectorize even if data-type not supported
8361 (e.g. - data copies). */
8362 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8363 {
8364 if (dump_enabled_p ())
8365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8366 "Aligned load, but unsupported type.\n");
8367 return false;
8368 }
8369
8370 /* Check if the load is a part of an interleaving chain. */
8371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8372 {
8373 grouped_load = true;
8374 /* FORNOW */
8375 gcc_assert (!nested_in_vect_loop);
8376 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8377
8378 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8379 group_size = DR_GROUP_SIZE (first_stmt_info);
8380
8381 /* Refuse non-SLP vectorization of SLP-only groups. */
8382 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8383 {
8384 if (dump_enabled_p ())
8385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8386 "cannot vectorize load in non-SLP mode.\n");
8387 return false;
8388 }
8389
8390 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8391 {
8392 slp_perm = true;
8393
8394 if (!loop_vinfo)
8395 {
8396 /* In BB vectorization we may not actually use a loaded vector
8397 accessing elements in excess of DR_GROUP_SIZE. */
8398 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8399 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8400 unsigned HOST_WIDE_INT nunits;
8401 unsigned j, k, maxk = 0;
8402 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8403 if (k > maxk)
8404 maxk = k;
8405 tree vectype = STMT_VINFO_VECTYPE (group_info);
8406 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8407 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8408 {
8409 if (dump_enabled_p ())
8410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8411 "BB vectorization with gaps at the end of "
8412 "a load is not supported\n");
8413 return false;
8414 }
8415 }
8416
8417 auto_vec<tree> tem;
8418 unsigned n_perms;
8419 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8420 true, &n_perms))
8421 {
8422 if (dump_enabled_p ())
8423 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8424 vect_location,
8425 "unsupported load permutation\n");
8426 return false;
8427 }
8428 }
8429
8430 /* Invalidate assumptions made by dependence analysis when vectorization
8431 on the unrolled body effectively re-orders stmts. */
8432 if (!PURE_SLP_STMT (stmt_info)
8433 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8434 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8435 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8436 {
8437 if (dump_enabled_p ())
8438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8439 "cannot perform implicit CSE when performing "
8440 "group loads with negative dependence distance\n");
8441 return false;
8442 }
8443 }
8444 else
8445 group_size = 1;
8446
8447 vect_memory_access_type memory_access_type;
8448 enum dr_alignment_support alignment_support_scheme;
8449 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8450 ncopies, &memory_access_type,
8451 &alignment_support_scheme, &gs_info))
8452 return false;
8453
8454 if (mask)
8455 {
8456 if (memory_access_type == VMAT_CONTIGUOUS)
8457 {
8458 machine_mode vec_mode = TYPE_MODE (vectype);
8459 if (!VECTOR_MODE_P (vec_mode)
8460 || !can_vec_mask_load_store_p (vec_mode,
8461 TYPE_MODE (mask_vectype), true))
8462 return false;
8463 }
8464 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8465 && memory_access_type != VMAT_GATHER_SCATTER)
8466 {
8467 if (dump_enabled_p ())
8468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8469 "unsupported access type for masked load.\n");
8470 return false;
8471 }
8472 }
8473
8474 if (!vec_stmt) /* transformation not required. */
8475 {
8476 if (!slp)
8477 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8478
8479 if (loop_vinfo
8480 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8481 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8482 group_size, memory_access_type,
8483 &gs_info, mask);
8484
8485 if (dump_enabled_p ()
8486 && memory_access_type != VMAT_ELEMENTWISE
8487 && memory_access_type != VMAT_GATHER_SCATTER
8488 && alignment_support_scheme != dr_aligned)
8489 dump_printf_loc (MSG_NOTE, vect_location,
8490 "Vectorizing an unaligned access.\n");
8491
8492 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8493 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8494 slp_node, cost_vec);
8495 return true;
8496 }
8497
8498 if (!slp)
8499 gcc_assert (memory_access_type
8500 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8501
8502 if (dump_enabled_p ())
8503 dump_printf_loc (MSG_NOTE, vect_location,
8504 "transform load. ncopies = %d\n", ncopies);
8505
8506 /* Transform. */
8507
8508 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8509 ensure_base_align (dr_info);
8510
8511 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8512 {
8513 vect_build_gather_load_calls (vinfo,
8514 stmt_info, gsi, vec_stmt, &gs_info, mask);
8515 return true;
8516 }
8517
8518 if (memory_access_type == VMAT_INVARIANT)
8519 {
8520 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8521 /* If we have versioned for aliasing or the loop doesn't
8522 have any data dependencies that would preclude this,
8523 then we are sure this is a loop invariant load and
8524 thus we can insert it on the preheader edge. */
8525 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8526 && !nested_in_vect_loop
8527 && hoist_defs_of_uses (stmt_info, loop));
8528 if (hoist_p)
8529 {
8530 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8531 if (dump_enabled_p ())
8532 dump_printf_loc (MSG_NOTE, vect_location,
8533 "hoisting out of the vectorized loop: %G", stmt);
8534 scalar_dest = copy_ssa_name (scalar_dest);
8535 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8536 gsi_insert_on_edge_immediate
8537 (loop_preheader_edge (loop),
8538 gimple_build_assign (scalar_dest, rhs));
8539 }
8540 /* These copies are all equivalent, but currently the representation
8541 requires a separate STMT_VINFO_VEC_STMT for each one. */
8542 gimple_stmt_iterator gsi2 = *gsi;
8543 gsi_next (&gsi2);
8544 for (j = 0; j < ncopies; j++)
8545 {
8546 if (hoist_p)
8547 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8548 vectype, NULL);
8549 else
8550 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8551 vectype, &gsi2);
8552 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8553 if (slp)
8554 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8555 else
8556 {
8557 if (j == 0)
8558 *vec_stmt = new_stmt;
8559 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8560 }
8561 }
8562 return true;
8563 }
8564
8565 if (memory_access_type == VMAT_ELEMENTWISE
8566 || memory_access_type == VMAT_STRIDED_SLP)
8567 {
8568 gimple_stmt_iterator incr_gsi;
8569 bool insert_after;
8570 tree offvar;
8571 tree ivstep;
8572 tree running_off;
8573 vec<constructor_elt, va_gc> *v = NULL;
8574 tree stride_base, stride_step, alias_off;
8575 /* Checked by get_load_store_type. */
8576 unsigned int const_nunits = nunits.to_constant ();
8577 unsigned HOST_WIDE_INT cst_offset = 0;
8578 tree dr_offset;
8579
8580 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8581 gcc_assert (!nested_in_vect_loop);
8582
8583 if (grouped_load)
8584 {
8585 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8586 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8587 }
8588 else
8589 {
8590 first_stmt_info = stmt_info;
8591 first_dr_info = dr_info;
8592 }
8593 if (slp && grouped_load)
8594 {
8595 group_size = DR_GROUP_SIZE (first_stmt_info);
8596 ref_type = get_group_alias_ptr_type (first_stmt_info);
8597 }
8598 else
8599 {
8600 if (grouped_load)
8601 cst_offset
8602 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8603 * vect_get_place_in_interleaving_chain (stmt_info,
8604 first_stmt_info));
8605 group_size = 1;
8606 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8607 }
8608
8609 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8610 stride_base
8611 = fold_build_pointer_plus
8612 (DR_BASE_ADDRESS (first_dr_info->dr),
8613 size_binop (PLUS_EXPR,
8614 convert_to_ptrofftype (dr_offset),
8615 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8616 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8617
8618 /* For a load with loop-invariant (but other than power-of-2)
8619 stride (i.e. not a grouped access) like so:
8620
8621 for (i = 0; i < n; i += stride)
8622 ... = array[i];
8623
8624 we generate a new induction variable and new accesses to
8625 form a new vector (or vectors, depending on ncopies):
8626
8627 for (j = 0; ; j += VF*stride)
8628 tmp1 = array[j];
8629 tmp2 = array[j + stride];
8630 ...
8631 vectemp = {tmp1, tmp2, ...}
8632 */
8633
8634 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8635 build_int_cst (TREE_TYPE (stride_step), vf));
8636
8637 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8638
8639 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8640 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8641 create_iv (stride_base, ivstep, NULL,
8642 loop, &incr_gsi, insert_after,
8643 &offvar, NULL);
8644
8645 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8646
8647 running_off = offvar;
8648 alias_off = build_int_cst (ref_type, 0);
8649 int nloads = const_nunits;
8650 int lnel = 1;
8651 tree ltype = TREE_TYPE (vectype);
8652 tree lvectype = vectype;
8653 auto_vec<tree> dr_chain;
8654 if (memory_access_type == VMAT_STRIDED_SLP)
8655 {
8656 if (group_size < const_nunits)
8657 {
8658 /* First check if vec_init optab supports construction from vector
8659 elts directly. Otherwise avoid emitting a constructor of
8660 vector elements by performing the loads using an integer type
8661 of the same size, constructing a vector of those and then
8662 re-interpreting it as the original vector type. This avoids a
8663 huge runtime penalty due to the general inability to perform
8664 store forwarding from smaller stores to a larger load. */
8665 tree ptype;
8666 tree vtype
8667 = vector_vector_composition_type (vectype,
8668 const_nunits / group_size,
8669 &ptype);
8670 if (vtype != NULL_TREE)
8671 {
8672 nloads = const_nunits / group_size;
8673 lnel = group_size;
8674 lvectype = vtype;
8675 ltype = ptype;
8676 }
8677 }
8678 else
8679 {
8680 nloads = 1;
8681 lnel = const_nunits;
8682 ltype = vectype;
8683 }
8684 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8685 }
8686 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8687 else if (nloads == 1)
8688 ltype = vectype;
8689
8690 if (slp)
8691 {
8692 /* For SLP permutation support we need to load the whole group,
8693 not only the number of vector stmts the permutation result
8694 fits in. */
8695 if (slp_perm)
8696 {
8697 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8698 variable VF. */
8699 unsigned int const_vf = vf.to_constant ();
8700 ncopies = CEIL (group_size * const_vf, const_nunits);
8701 dr_chain.create (ncopies);
8702 }
8703 else
8704 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8705 }
8706 unsigned int group_el = 0;
8707 unsigned HOST_WIDE_INT
8708 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8709 for (j = 0; j < ncopies; j++)
8710 {
8711 if (nloads > 1)
8712 vec_alloc (v, nloads);
8713 gimple *new_stmt = NULL;
8714 for (i = 0; i < nloads; i++)
8715 {
8716 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8717 group_el * elsz + cst_offset);
8718 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8719 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8720 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
8721 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8722 if (nloads > 1)
8723 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8724 gimple_assign_lhs (new_stmt));
8725
8726 group_el += lnel;
8727 if (! slp
8728 || group_el == group_size)
8729 {
8730 tree newoff = copy_ssa_name (running_off);
8731 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8732 running_off, stride_step);
8733 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8734
8735 running_off = newoff;
8736 group_el = 0;
8737 }
8738 }
8739 if (nloads > 1)
8740 {
8741 tree vec_inv = build_constructor (lvectype, v);
8742 new_temp = vect_init_vector (vinfo, stmt_info,
8743 vec_inv, lvectype, gsi);
8744 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8745 if (lvectype != vectype)
8746 {
8747 new_stmt = gimple_build_assign (make_ssa_name (vectype),
8748 VIEW_CONVERT_EXPR,
8749 build1 (VIEW_CONVERT_EXPR,
8750 vectype, new_temp));
8751 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8752 }
8753 }
8754
8755 if (slp)
8756 {
8757 if (slp_perm)
8758 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
8759 else
8760 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8761 }
8762 else
8763 {
8764 if (j == 0)
8765 *vec_stmt = new_stmt;
8766 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8767 }
8768 }
8769 if (slp_perm)
8770 {
8771 unsigned n_perms;
8772 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
8773 false, &n_perms);
8774 }
8775 return true;
8776 }
8777
8778 if (memory_access_type == VMAT_GATHER_SCATTER
8779 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8780 grouped_load = false;
8781
8782 if (grouped_load)
8783 {
8784 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8785 group_size = DR_GROUP_SIZE (first_stmt_info);
8786 /* For SLP vectorization we directly vectorize a subchain
8787 without permutation. */
8788 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8789 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8790 /* For BB vectorization always use the first stmt to base
8791 the data ref pointer on. */
8792 if (bb_vinfo)
8793 first_stmt_info_for_drptr
8794 = vect_find_first_scalar_stmt_in_slp (slp_node);
8795
8796 /* Check if the chain of loads is already vectorized. */
8797 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
8798 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8799 ??? But we can only do so if there is exactly one
8800 as we have no way to get at the rest. Leave the CSE
8801 opportunity alone.
8802 ??? With the group load eventually participating
8803 in multiple different permutations (having multiple
8804 slp nodes which refer to the same group) the CSE
8805 is even wrong code. See PR56270. */
8806 && !slp)
8807 {
8808 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8809 return true;
8810 }
8811 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8812 group_gap_adj = 0;
8813
8814 /* VEC_NUM is the number of vect stmts to be created for this group. */
8815 if (slp)
8816 {
8817 grouped_load = false;
8818 /* If an SLP permutation is from N elements to N elements,
8819 and if one vector holds a whole number of N, we can load
8820 the inputs to the permutation in the same way as an
8821 unpermuted sequence. In other cases we need to load the
8822 whole group, not only the number of vector stmts the
8823 permutation result fits in. */
8824 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
8825 if (slp_perm
8826 && (group_size != scalar_lanes
8827 || !multiple_p (nunits, group_size)))
8828 {
8829 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8830 variable VF; see vect_transform_slp_perm_load. */
8831 unsigned int const_vf = vf.to_constant ();
8832 unsigned int const_nunits = nunits.to_constant ();
8833 vec_num = CEIL (group_size * const_vf, const_nunits);
8834 group_gap_adj = vf * group_size - nunits * vec_num;
8835 }
8836 else
8837 {
8838 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8839 group_gap_adj
8840 = group_size - scalar_lanes;
8841 }
8842 }
8843 else
8844 vec_num = group_size;
8845
8846 ref_type = get_group_alias_ptr_type (first_stmt_info);
8847 }
8848 else
8849 {
8850 first_stmt_info = stmt_info;
8851 first_dr_info = dr_info;
8852 group_size = vec_num = 1;
8853 group_gap_adj = 0;
8854 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8855 }
8856
8857 gcc_assert (alignment_support_scheme);
8858 vec_loop_masks *loop_masks
8859 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8860 ? &LOOP_VINFO_MASKS (loop_vinfo)
8861 : NULL);
8862 /* Targets with store-lane instructions must not require explicit
8863 realignment. vect_supportable_dr_alignment always returns either
8864 dr_aligned or dr_unaligned_supported for masked operations. */
8865 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8866 && !mask
8867 && !loop_masks)
8868 || alignment_support_scheme == dr_aligned
8869 || alignment_support_scheme == dr_unaligned_supported);
8870
8871 /* In case the vectorization factor (VF) is bigger than the number
8872 of elements that we can fit in a vectype (nunits), we have to generate
8873 more than one vector stmt - i.e - we need to "unroll" the
8874 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8875 from one copy of the vector stmt to the next, in the field
8876 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8877 stages to find the correct vector defs to be used when vectorizing
8878 stmts that use the defs of the current stmt. The example below
8879 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8880 need to create 4 vectorized stmts):
8881
8882 before vectorization:
8883 RELATED_STMT VEC_STMT
8884 S1: x = memref - -
8885 S2: z = x + 1 - -
8886
8887 step 1: vectorize stmt S1:
8888 We first create the vector stmt VS1_0, and, as usual, record a
8889 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8890 Next, we create the vector stmt VS1_1, and record a pointer to
8891 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8892 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8893 stmts and pointers:
8894 RELATED_STMT VEC_STMT
8895 VS1_0: vx0 = memref0 VS1_1 -
8896 VS1_1: vx1 = memref1 VS1_2 -
8897 VS1_2: vx2 = memref2 VS1_3 -
8898 VS1_3: vx3 = memref3 - -
8899 S1: x = load - VS1_0
8900 S2: z = x + 1 - -
8901 */
8902
8903 /* In case of interleaving (non-unit grouped access):
8904
8905 S1: x2 = &base + 2
8906 S2: x0 = &base
8907 S3: x1 = &base + 1
8908 S4: x3 = &base + 3
8909
8910 Vectorized loads are created in the order of memory accesses
8911 starting from the access of the first stmt of the chain:
8912
8913 VS1: vx0 = &base
8914 VS2: vx1 = &base + vec_size*1
8915 VS3: vx3 = &base + vec_size*2
8916 VS4: vx4 = &base + vec_size*3
8917
8918 Then permutation statements are generated:
8919
8920 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8921 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8922 ...
8923
8924 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8925 (the order of the data-refs in the output of vect_permute_load_chain
8926 corresponds to the order of scalar stmts in the interleaving chain - see
8927 the documentation of vect_permute_load_chain()).
8928 The generation of permutation stmts and recording them in
8929 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8930
8931 In case of both multiple types and interleaving, the vector loads and
8932 permutation stmts above are created for every copy. The result vector
8933 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8934 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8935
8936 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8937 on a target that supports unaligned accesses (dr_unaligned_supported)
8938 we generate the following code:
8939 p = initial_addr;
8940 indx = 0;
8941 loop {
8942 p = p + indx * vectype_size;
8943 vec_dest = *(p);
8944 indx = indx + 1;
8945 }
8946
8947 Otherwise, the data reference is potentially unaligned on a target that
8948 does not support unaligned accesses (dr_explicit_realign_optimized) -
8949 then generate the following code, in which the data in each iteration is
8950 obtained by two vector loads, one from the previous iteration, and one
8951 from the current iteration:
8952 p1 = initial_addr;
8953 msq_init = *(floor(p1))
8954 p2 = initial_addr + VS - 1;
8955 realignment_token = call target_builtin;
8956 indx = 0;
8957 loop {
8958 p2 = p2 + indx * vectype_size
8959 lsq = *(floor(p2))
8960 vec_dest = realign_load (msq, lsq, realignment_token)
8961 indx = indx + 1;
8962 msq = lsq;
8963 } */
8964
8965 /* If the misalignment remains the same throughout the execution of the
8966 loop, we can create the init_addr and permutation mask at the loop
8967 preheader. Otherwise, it needs to be created inside the loop.
8968 This can only occur when vectorizing memory accesses in the inner-loop
8969 nested within an outer-loop that is being vectorized. */
8970
8971 if (nested_in_vect_loop
8972 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8973 GET_MODE_SIZE (TYPE_MODE (vectype))))
8974 {
8975 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8976 compute_in_loop = true;
8977 }
8978
8979 bool diff_first_stmt_info
8980 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
8981
8982 if ((alignment_support_scheme == dr_explicit_realign_optimized
8983 || alignment_support_scheme == dr_explicit_realign)
8984 && !compute_in_loop)
8985 {
8986 /* If we have different first_stmt_info, we can't set up realignment
8987 here, since we can't guarantee first_stmt_info DR has been
8988 initialized yet, use first_stmt_info_for_drptr DR by bumping the
8989 distance from first_stmt_info DR instead as below. */
8990 if (!diff_first_stmt_info)
8991 msq = vect_setup_realignment (vinfo,
8992 first_stmt_info, gsi, &realignment_token,
8993 alignment_support_scheme, NULL_TREE,
8994 &at_loop);
8995 if (alignment_support_scheme == dr_explicit_realign_optimized)
8996 {
8997 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8998 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8999 size_one_node);
9000 gcc_assert (!first_stmt_info_for_drptr);
9001 }
9002 }
9003 else
9004 at_loop = loop;
9005
9006 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9007 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9008
9009 tree bump;
9010 tree vec_offset = NULL_TREE;
9011 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9012 {
9013 aggr_type = NULL_TREE;
9014 bump = NULL_TREE;
9015 }
9016 else if (memory_access_type == VMAT_GATHER_SCATTER)
9017 {
9018 aggr_type = elem_type;
9019 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9020 &bump, &vec_offset);
9021 }
9022 else
9023 {
9024 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9025 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9026 else
9027 aggr_type = vectype;
9028 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9029 memory_access_type);
9030 }
9031
9032 vec<tree> vec_offsets = vNULL;
9033 auto_vec<tree> vec_masks;
9034 if (mask)
9035 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9036 mask, &vec_masks, mask_vectype, NULL_TREE);
9037 tree vec_mask = NULL_TREE;
9038 poly_uint64 group_elt = 0;
9039 for (j = 0; j < ncopies; j++)
9040 {
9041 /* 1. Create the vector or array pointer update chain. */
9042 if (j == 0)
9043 {
9044 bool simd_lane_access_p
9045 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9046 if (simd_lane_access_p
9047 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9048 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9049 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9050 && integer_zerop (DR_INIT (first_dr_info->dr))
9051 && alias_sets_conflict_p (get_alias_set (aggr_type),
9052 get_alias_set (TREE_TYPE (ref_type)))
9053 && (alignment_support_scheme == dr_aligned
9054 || alignment_support_scheme == dr_unaligned_supported))
9055 {
9056 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9057 dataref_offset = build_int_cst (ref_type, 0);
9058 }
9059 else if (diff_first_stmt_info)
9060 {
9061 dataref_ptr
9062 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9063 aggr_type, at_loop, offset, &dummy,
9064 gsi, &ptr_incr, simd_lane_access_p,
9065 byte_offset, bump);
9066 /* Adjust the pointer by the difference to first_stmt. */
9067 data_reference_p ptrdr
9068 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9069 tree diff
9070 = fold_convert (sizetype,
9071 size_binop (MINUS_EXPR,
9072 DR_INIT (first_dr_info->dr),
9073 DR_INIT (ptrdr)));
9074 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9075 stmt_info, diff);
9076 if (alignment_support_scheme == dr_explicit_realign)
9077 {
9078 msq = vect_setup_realignment (vinfo,
9079 first_stmt_info_for_drptr, gsi,
9080 &realignment_token,
9081 alignment_support_scheme,
9082 dataref_ptr, &at_loop);
9083 gcc_assert (!compute_in_loop);
9084 }
9085 }
9086 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9087 {
9088 vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
9089 &dataref_ptr, &vec_offsets, ncopies);
9090 vec_offset = vec_offsets[0];
9091 }
9092 else
9093 dataref_ptr
9094 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9095 at_loop,
9096 offset, &dummy, gsi, &ptr_incr,
9097 simd_lane_access_p,
9098 byte_offset, bump);
9099 if (mask)
9100 vec_mask = vec_masks[0];
9101 }
9102 else
9103 {
9104 if (dataref_offset)
9105 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9106 bump);
9107 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9108 vec_offset = vec_offsets[j];
9109 else
9110 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9111 stmt_info, bump);
9112 if (mask)
9113 vec_mask = vec_masks[j];
9114 }
9115
9116 if (grouped_load || slp_perm)
9117 dr_chain.create (vec_num);
9118
9119 gimple *new_stmt = NULL;
9120 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9121 {
9122 tree vec_array;
9123
9124 vec_array = create_vector_array (vectype, vec_num);
9125
9126 tree final_mask = NULL_TREE;
9127 if (loop_masks)
9128 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9129 vectype, j);
9130 if (vec_mask)
9131 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9132 vec_mask, gsi);
9133
9134 gcall *call;
9135 if (final_mask)
9136 {
9137 /* Emit:
9138 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9139 VEC_MASK). */
9140 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9141 tree alias_ptr = build_int_cst (ref_type, align);
9142 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9143 dataref_ptr, alias_ptr,
9144 final_mask);
9145 }
9146 else
9147 {
9148 /* Emit:
9149 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9150 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9151 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9152 }
9153 gimple_call_set_lhs (call, vec_array);
9154 gimple_call_set_nothrow (call, true);
9155 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9156 new_stmt = call;
9157
9158 /* Extract each vector into an SSA_NAME. */
9159 for (i = 0; i < vec_num; i++)
9160 {
9161 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9162 vec_array, i);
9163 dr_chain.quick_push (new_temp);
9164 }
9165
9166 /* Record the mapping between SSA_NAMEs and statements. */
9167 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9168
9169 /* Record that VEC_ARRAY is now dead. */
9170 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9171 }
9172 else
9173 {
9174 for (i = 0; i < vec_num; i++)
9175 {
9176 tree final_mask = NULL_TREE;
9177 if (loop_masks
9178 && memory_access_type != VMAT_INVARIANT)
9179 final_mask = vect_get_loop_mask (gsi, loop_masks,
9180 vec_num * ncopies,
9181 vectype, vec_num * j + i);
9182 if (vec_mask)
9183 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9184 vec_mask, gsi);
9185
9186 if (i > 0)
9187 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9188 gsi, stmt_info, bump);
9189
9190 /* 2. Create the vector-load in the loop. */
9191 switch (alignment_support_scheme)
9192 {
9193 case dr_aligned:
9194 case dr_unaligned_supported:
9195 {
9196 unsigned int misalign;
9197 unsigned HOST_WIDE_INT align;
9198
9199 if (memory_access_type == VMAT_GATHER_SCATTER)
9200 {
9201 tree zero = build_zero_cst (vectype);
9202 tree scale = size_int (gs_info.scale);
9203 gcall *call;
9204 if (loop_masks)
9205 call = gimple_build_call_internal
9206 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9207 vec_offset, scale, zero, final_mask);
9208 else
9209 call = gimple_build_call_internal
9210 (IFN_GATHER_LOAD, 4, dataref_ptr,
9211 vec_offset, scale, zero);
9212 gimple_call_set_nothrow (call, true);
9213 new_stmt = call;
9214 data_ref = NULL_TREE;
9215 break;
9216 }
9217
9218 align =
9219 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9220 if (alignment_support_scheme == dr_aligned)
9221 {
9222 gcc_assert (aligned_access_p (first_dr_info));
9223 misalign = 0;
9224 }
9225 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9226 {
9227 align = dr_alignment
9228 (vect_dr_behavior (vinfo, first_dr_info));
9229 misalign = 0;
9230 }
9231 else
9232 misalign = DR_MISALIGNMENT (first_dr_info);
9233 if (dataref_offset == NULL_TREE
9234 && TREE_CODE (dataref_ptr) == SSA_NAME)
9235 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9236 align, misalign);
9237
9238 if (final_mask)
9239 {
9240 align = least_bit_hwi (misalign | align);
9241 tree ptr = build_int_cst (ref_type, align);
9242 gcall *call
9243 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9244 dataref_ptr, ptr,
9245 final_mask);
9246 gimple_call_set_nothrow (call, true);
9247 new_stmt = call;
9248 data_ref = NULL_TREE;
9249 }
9250 else
9251 {
9252 tree ltype = vectype;
9253 tree new_vtype = NULL_TREE;
9254 unsigned HOST_WIDE_INT gap
9255 = DR_GROUP_GAP (first_stmt_info);
9256 unsigned int vect_align
9257 = vect_known_alignment_in_bytes (first_dr_info);
9258 unsigned int scalar_dr_size
9259 = vect_get_scalar_dr_size (first_dr_info);
9260 /* If there's no peeling for gaps but we have a gap
9261 with slp loads then load the lower half of the
9262 vector only. See get_group_load_store_type for
9263 when we apply this optimization. */
9264 if (slp
9265 && loop_vinfo
9266 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9267 && gap != 0
9268 && known_eq (nunits, (group_size - gap) * 2)
9269 && known_eq (nunits, group_size)
9270 && gap >= (vect_align / scalar_dr_size))
9271 {
9272 tree half_vtype;
9273 new_vtype
9274 = vector_vector_composition_type (vectype, 2,
9275 &half_vtype);
9276 if (new_vtype != NULL_TREE)
9277 ltype = half_vtype;
9278 }
9279 tree offset
9280 = (dataref_offset ? dataref_offset
9281 : build_int_cst (ref_type, 0));
9282 if (ltype != vectype
9283 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9284 {
9285 unsigned HOST_WIDE_INT gap_offset
9286 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9287 tree gapcst = build_int_cst (ref_type, gap_offset);
9288 offset = size_binop (PLUS_EXPR, offset, gapcst);
9289 }
9290 data_ref
9291 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9292 if (alignment_support_scheme == dr_aligned)
9293 ;
9294 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9295 TREE_TYPE (data_ref)
9296 = build_aligned_type (TREE_TYPE (data_ref),
9297 align * BITS_PER_UNIT);
9298 else
9299 TREE_TYPE (data_ref)
9300 = build_aligned_type (TREE_TYPE (data_ref),
9301 TYPE_ALIGN (elem_type));
9302 if (ltype != vectype)
9303 {
9304 vect_copy_ref_info (data_ref,
9305 DR_REF (first_dr_info->dr));
9306 tree tem = make_ssa_name (ltype);
9307 new_stmt = gimple_build_assign (tem, data_ref);
9308 vect_finish_stmt_generation (vinfo, stmt_info,
9309 new_stmt, gsi);
9310 data_ref = NULL;
9311 vec<constructor_elt, va_gc> *v;
9312 vec_alloc (v, 2);
9313 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9314 {
9315 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9316 build_zero_cst (ltype));
9317 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9318 }
9319 else
9320 {
9321 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9322 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9323 build_zero_cst (ltype));
9324 }
9325 gcc_assert (new_vtype != NULL_TREE);
9326 if (new_vtype == vectype)
9327 new_stmt = gimple_build_assign (
9328 vec_dest, build_constructor (vectype, v));
9329 else
9330 {
9331 tree new_vname = make_ssa_name (new_vtype);
9332 new_stmt = gimple_build_assign (
9333 new_vname, build_constructor (new_vtype, v));
9334 vect_finish_stmt_generation (vinfo, stmt_info,
9335 new_stmt, gsi);
9336 new_stmt = gimple_build_assign (
9337 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9338 new_vname));
9339 }
9340 }
9341 }
9342 break;
9343 }
9344 case dr_explicit_realign:
9345 {
9346 tree ptr, bump;
9347
9348 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9349
9350 if (compute_in_loop)
9351 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9352 &realignment_token,
9353 dr_explicit_realign,
9354 dataref_ptr, NULL);
9355
9356 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9357 ptr = copy_ssa_name (dataref_ptr);
9358 else
9359 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9360 // For explicit realign the target alignment should be
9361 // known at compile time.
9362 unsigned HOST_WIDE_INT align =
9363 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9364 new_stmt = gimple_build_assign
9365 (ptr, BIT_AND_EXPR, dataref_ptr,
9366 build_int_cst
9367 (TREE_TYPE (dataref_ptr),
9368 -(HOST_WIDE_INT) align));
9369 vect_finish_stmt_generation (vinfo, stmt_info,
9370 new_stmt, gsi);
9371 data_ref
9372 = build2 (MEM_REF, vectype, ptr,
9373 build_int_cst (ref_type, 0));
9374 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9375 vec_dest = vect_create_destination_var (scalar_dest,
9376 vectype);
9377 new_stmt = gimple_build_assign (vec_dest, data_ref);
9378 new_temp = make_ssa_name (vec_dest, new_stmt);
9379 gimple_assign_set_lhs (new_stmt, new_temp);
9380 gimple_move_vops (new_stmt, stmt_info->stmt);
9381 vect_finish_stmt_generation (vinfo, stmt_info,
9382 new_stmt, gsi);
9383 msq = new_temp;
9384
9385 bump = size_binop (MULT_EXPR, vs,
9386 TYPE_SIZE_UNIT (elem_type));
9387 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9388 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9389 stmt_info, bump);
9390 new_stmt = gimple_build_assign
9391 (NULL_TREE, BIT_AND_EXPR, ptr,
9392 build_int_cst
9393 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9394 ptr = copy_ssa_name (ptr, new_stmt);
9395 gimple_assign_set_lhs (new_stmt, ptr);
9396 vect_finish_stmt_generation (vinfo, stmt_info,
9397 new_stmt, gsi);
9398 data_ref
9399 = build2 (MEM_REF, vectype, ptr,
9400 build_int_cst (ref_type, 0));
9401 break;
9402 }
9403 case dr_explicit_realign_optimized:
9404 {
9405 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9406 new_temp = copy_ssa_name (dataref_ptr);
9407 else
9408 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9409 // We should only be doing this if we know the target
9410 // alignment at compile time.
9411 unsigned HOST_WIDE_INT align =
9412 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9413 new_stmt = gimple_build_assign
9414 (new_temp, BIT_AND_EXPR, dataref_ptr,
9415 build_int_cst (TREE_TYPE (dataref_ptr),
9416 -(HOST_WIDE_INT) align));
9417 vect_finish_stmt_generation (vinfo, stmt_info,
9418 new_stmt, gsi);
9419 data_ref
9420 = build2 (MEM_REF, vectype, new_temp,
9421 build_int_cst (ref_type, 0));
9422 break;
9423 }
9424 default:
9425 gcc_unreachable ();
9426 }
9427 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9428 /* DATA_REF is null if we've already built the statement. */
9429 if (data_ref)
9430 {
9431 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9432 new_stmt = gimple_build_assign (vec_dest, data_ref);
9433 }
9434 new_temp = make_ssa_name (vec_dest, new_stmt);
9435 gimple_set_lhs (new_stmt, new_temp);
9436 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9437
9438 /* 3. Handle explicit realignment if necessary/supported.
9439 Create in loop:
9440 vec_dest = realign_load (msq, lsq, realignment_token) */
9441 if (alignment_support_scheme == dr_explicit_realign_optimized
9442 || alignment_support_scheme == dr_explicit_realign)
9443 {
9444 lsq = gimple_assign_lhs (new_stmt);
9445 if (!realignment_token)
9446 realignment_token = dataref_ptr;
9447 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9448 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9449 msq, lsq, realignment_token);
9450 new_temp = make_ssa_name (vec_dest, new_stmt);
9451 gimple_assign_set_lhs (new_stmt, new_temp);
9452 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9453
9454 if (alignment_support_scheme == dr_explicit_realign_optimized)
9455 {
9456 gcc_assert (phi);
9457 if (i == vec_num - 1 && j == ncopies - 1)
9458 add_phi_arg (phi, lsq,
9459 loop_latch_edge (containing_loop),
9460 UNKNOWN_LOCATION);
9461 msq = lsq;
9462 }
9463 }
9464
9465 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9466 {
9467 tree perm_mask = perm_mask_for_reverse (vectype);
9468 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9469 perm_mask, stmt_info, gsi);
9470 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9471 }
9472
9473 /* Collect vector loads and later create their permutation in
9474 vect_transform_grouped_load (). */
9475 if (grouped_load || slp_perm)
9476 dr_chain.quick_push (new_temp);
9477
9478 /* Store vector loads in the corresponding SLP_NODE. */
9479 if (slp && !slp_perm)
9480 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9481
9482 /* With SLP permutation we load the gaps as well, without
9483 we need to skip the gaps after we manage to fully load
9484 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9485 group_elt += nunits;
9486 if (maybe_ne (group_gap_adj, 0U)
9487 && !slp_perm
9488 && known_eq (group_elt, group_size - group_gap_adj))
9489 {
9490 poly_wide_int bump_val
9491 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9492 * group_gap_adj);
9493 tree bump = wide_int_to_tree (sizetype, bump_val);
9494 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9495 gsi, stmt_info, bump);
9496 group_elt = 0;
9497 }
9498 }
9499 /* Bump the vector pointer to account for a gap or for excess
9500 elements loaded for a permuted SLP load. */
9501 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9502 {
9503 poly_wide_int bump_val
9504 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9505 * group_gap_adj);
9506 tree bump = wide_int_to_tree (sizetype, bump_val);
9507 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9508 stmt_info, bump);
9509 }
9510 }
9511
9512 if (slp && !slp_perm)
9513 continue;
9514
9515 if (slp_perm)
9516 {
9517 unsigned n_perms;
9518 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9519 gsi, vf, false, &n_perms);
9520 gcc_assert (ok);
9521 }
9522 else
9523 {
9524 if (grouped_load)
9525 {
9526 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9527 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9528 group_size, gsi);
9529 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9530 }
9531 else
9532 {
9533 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9534 }
9535 }
9536 dr_chain.release ();
9537 }
9538 if (!slp)
9539 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9540
9541 return true;
9542 }
9543
9544 /* Function vect_is_simple_cond.
9545
9546 Input:
9547 LOOP - the loop that is being vectorized.
9548 COND - Condition that is checked for simple use.
9549
9550 Output:
9551 *COMP_VECTYPE - the vector type for the comparison.
9552 *DTS - The def types for the arguments of the comparison
9553
9554 Returns whether a COND can be vectorized. Checks whether
9555 condition operands are supportable using vec_is_simple_use. */
9556
9557 static bool
9558 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9559 slp_tree slp_node, tree *comp_vectype,
9560 enum vect_def_type *dts, tree vectype)
9561 {
9562 tree lhs, rhs;
9563 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9564 slp_tree slp_op;
9565
9566 /* Mask case. */
9567 if (TREE_CODE (cond) == SSA_NAME
9568 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9569 {
9570 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9571 &slp_op, &dts[0], comp_vectype)
9572 || !*comp_vectype
9573 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9574 return false;
9575 return true;
9576 }
9577
9578 if (!COMPARISON_CLASS_P (cond))
9579 return false;
9580
9581 lhs = TREE_OPERAND (cond, 0);
9582 rhs = TREE_OPERAND (cond, 1);
9583
9584 if (TREE_CODE (lhs) == SSA_NAME)
9585 {
9586 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9587 &lhs, &slp_op, &dts[0], &vectype1))
9588 return false;
9589 }
9590 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9591 || TREE_CODE (lhs) == FIXED_CST)
9592 dts[0] = vect_constant_def;
9593 else
9594 return false;
9595
9596 if (TREE_CODE (rhs) == SSA_NAME)
9597 {
9598 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9599 &rhs, &slp_op, &dts[1], &vectype2))
9600 return false;
9601 }
9602 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9603 || TREE_CODE (rhs) == FIXED_CST)
9604 dts[1] = vect_constant_def;
9605 else
9606 return false;
9607
9608 if (vectype1 && vectype2
9609 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9610 TYPE_VECTOR_SUBPARTS (vectype2)))
9611 return false;
9612
9613 *comp_vectype = vectype1 ? vectype1 : vectype2;
9614 /* Invariant comparison. */
9615 if (! *comp_vectype)
9616 {
9617 tree scalar_type = TREE_TYPE (lhs);
9618 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9619 *comp_vectype = truth_type_for (vectype);
9620 else
9621 {
9622 /* If we can widen the comparison to match vectype do so. */
9623 if (INTEGRAL_TYPE_P (scalar_type)
9624 && !slp_node
9625 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9626 TYPE_SIZE (TREE_TYPE (vectype))))
9627 scalar_type = build_nonstandard_integer_type
9628 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
9629 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9630 slp_node);
9631 }
9632 }
9633
9634 return true;
9635 }
9636
9637 /* vectorizable_condition.
9638
9639 Check if STMT_INFO is conditional modify expression that can be vectorized.
9640 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9641 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9642 at GSI.
9643
9644 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9645
9646 Return true if STMT_INFO is vectorizable in this way. */
9647
9648 static bool
9649 vectorizable_condition (vec_info *vinfo,
9650 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9651 gimple **vec_stmt,
9652 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9653 {
9654 tree scalar_dest = NULL_TREE;
9655 tree vec_dest = NULL_TREE;
9656 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9657 tree then_clause, else_clause;
9658 tree comp_vectype = NULL_TREE;
9659 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9660 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9661 tree vec_compare;
9662 tree new_temp;
9663 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9664 enum vect_def_type dts[4]
9665 = {vect_unknown_def_type, vect_unknown_def_type,
9666 vect_unknown_def_type, vect_unknown_def_type};
9667 int ndts = 4;
9668 int ncopies;
9669 int vec_num;
9670 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9671 int i;
9672 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9673 vec<tree> vec_oprnds0 = vNULL;
9674 vec<tree> vec_oprnds1 = vNULL;
9675 vec<tree> vec_oprnds2 = vNULL;
9676 vec<tree> vec_oprnds3 = vNULL;
9677 tree vec_cmp_type;
9678 bool masked = false;
9679
9680 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9681 return false;
9682
9683 /* Is vectorizable conditional operation? */
9684 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9685 if (!stmt)
9686 return false;
9687
9688 code = gimple_assign_rhs_code (stmt);
9689 if (code != COND_EXPR)
9690 return false;
9691
9692 stmt_vec_info reduc_info = NULL;
9693 int reduc_index = -1;
9694 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9695 bool for_reduction
9696 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9697 if (for_reduction)
9698 {
9699 if (STMT_SLP_TYPE (stmt_info))
9700 return false;
9701 reduc_info = info_for_reduction (vinfo, stmt_info);
9702 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9703 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9704 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9705 || reduc_index != -1);
9706 }
9707 else
9708 {
9709 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9710 return false;
9711
9712 /* FORNOW: only supported as part of a reduction. */
9713 if (STMT_VINFO_LIVE_P (stmt_info))
9714 {
9715 if (dump_enabled_p ())
9716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9717 "value used after loop.\n");
9718 return false;
9719 }
9720 }
9721
9722 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9723 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9724
9725 if (slp_node)
9726 {
9727 ncopies = 1;
9728 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9729 }
9730 else
9731 {
9732 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9733 vec_num = 1;
9734 }
9735
9736 gcc_assert (ncopies >= 1);
9737 if (for_reduction && ncopies > 1)
9738 return false; /* FORNOW */
9739
9740 cond_expr = gimple_assign_rhs1 (stmt);
9741
9742 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
9743 &comp_vectype, &dts[0], vectype)
9744 || !comp_vectype)
9745 return false;
9746
9747 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
9748 slp_tree then_slp_node, else_slp_node;
9749 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
9750 &then_clause, &then_slp_node, &dts[2], &vectype1))
9751 return false;
9752 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
9753 &else_clause, &else_slp_node, &dts[3], &vectype2))
9754 return false;
9755
9756 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9757 return false;
9758
9759 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9760 return false;
9761
9762 masked = !COMPARISON_CLASS_P (cond_expr);
9763 vec_cmp_type = truth_type_for (comp_vectype);
9764
9765 if (vec_cmp_type == NULL_TREE)
9766 return false;
9767
9768 cond_code = TREE_CODE (cond_expr);
9769 if (!masked)
9770 {
9771 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9772 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9773 }
9774
9775 /* For conditional reductions, the "then" value needs to be the candidate
9776 value calculated by this iteration while the "else" value needs to be
9777 the result carried over from previous iterations. If the COND_EXPR
9778 is the other way around, we need to swap it. */
9779 bool must_invert_cmp_result = false;
9780 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
9781 {
9782 if (masked)
9783 must_invert_cmp_result = true;
9784 else
9785 {
9786 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
9787 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
9788 if (new_code == ERROR_MARK)
9789 must_invert_cmp_result = true;
9790 else
9791 {
9792 cond_code = new_code;
9793 /* Make sure we don't accidentally use the old condition. */
9794 cond_expr = NULL_TREE;
9795 }
9796 }
9797 std::swap (then_clause, else_clause);
9798 }
9799
9800 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9801 {
9802 /* Boolean values may have another representation in vectors
9803 and therefore we prefer bit operations over comparison for
9804 them (which also works for scalar masks). We store opcodes
9805 to use in bitop1 and bitop2. Statement is vectorized as
9806 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9807 depending on bitop1 and bitop2 arity. */
9808 switch (cond_code)
9809 {
9810 case GT_EXPR:
9811 bitop1 = BIT_NOT_EXPR;
9812 bitop2 = BIT_AND_EXPR;
9813 break;
9814 case GE_EXPR:
9815 bitop1 = BIT_NOT_EXPR;
9816 bitop2 = BIT_IOR_EXPR;
9817 break;
9818 case LT_EXPR:
9819 bitop1 = BIT_NOT_EXPR;
9820 bitop2 = BIT_AND_EXPR;
9821 std::swap (cond_expr0, cond_expr1);
9822 break;
9823 case LE_EXPR:
9824 bitop1 = BIT_NOT_EXPR;
9825 bitop2 = BIT_IOR_EXPR;
9826 std::swap (cond_expr0, cond_expr1);
9827 break;
9828 case NE_EXPR:
9829 bitop1 = BIT_XOR_EXPR;
9830 break;
9831 case EQ_EXPR:
9832 bitop1 = BIT_XOR_EXPR;
9833 bitop2 = BIT_NOT_EXPR;
9834 break;
9835 default:
9836 return false;
9837 }
9838 cond_code = SSA_NAME;
9839 }
9840
9841 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
9842 && reduction_type == EXTRACT_LAST_REDUCTION
9843 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
9844 {
9845 if (dump_enabled_p ())
9846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9847 "reduction comparison operation not supported.\n");
9848 return false;
9849 }
9850
9851 if (!vec_stmt)
9852 {
9853 if (bitop1 != NOP_EXPR)
9854 {
9855 machine_mode mode = TYPE_MODE (comp_vectype);
9856 optab optab;
9857
9858 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9859 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9860 return false;
9861
9862 if (bitop2 != NOP_EXPR)
9863 {
9864 optab = optab_for_tree_code (bitop2, comp_vectype,
9865 optab_default);
9866 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9867 return false;
9868 }
9869 }
9870
9871 vect_cost_for_stmt kind = vector_stmt;
9872 if (reduction_type == EXTRACT_LAST_REDUCTION)
9873 /* Count one reduction-like operation per vector. */
9874 kind = vec_to_scalar;
9875 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
9876 return false;
9877
9878 if (slp_node
9879 && (!vect_maybe_update_slp_op_vectype
9880 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
9881 || (op_adjust == 1
9882 && !vect_maybe_update_slp_op_vectype
9883 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
9884 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
9885 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
9886 {
9887 if (dump_enabled_p ())
9888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9889 "incompatible vector types for invariants\n");
9890 return false;
9891 }
9892
9893 if (loop_vinfo && for_reduction
9894 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9895 {
9896 if (reduction_type == EXTRACT_LAST_REDUCTION)
9897 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
9898 ncopies * vec_num, vectype, NULL);
9899 /* Extra inactive lanes should be safe for vect_nested_cycle. */
9900 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
9901 {
9902 if (dump_enabled_p ())
9903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9904 "conditional reduction prevents the use"
9905 " of partial vectors.\n");
9906 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
9907 }
9908 }
9909
9910 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
9911 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
9912 cost_vec, kind);
9913 return true;
9914 }
9915
9916 /* Transform. */
9917
9918 if (!slp_node)
9919 {
9920 vec_oprnds0.create (1);
9921 vec_oprnds1.create (1);
9922 vec_oprnds2.create (1);
9923 vec_oprnds3.create (1);
9924 }
9925
9926 /* Handle def. */
9927 scalar_dest = gimple_assign_lhs (stmt);
9928 if (reduction_type != EXTRACT_LAST_REDUCTION)
9929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9930
9931 bool swap_cond_operands = false;
9932
9933 /* See whether another part of the vectorized code applies a loop
9934 mask to the condition, or to its inverse. */
9935
9936 vec_loop_masks *masks = NULL;
9937 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
9938 {
9939 if (reduction_type == EXTRACT_LAST_REDUCTION)
9940 masks = &LOOP_VINFO_MASKS (loop_vinfo);
9941 else
9942 {
9943 scalar_cond_masked_key cond (cond_expr, ncopies);
9944 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
9945 masks = &LOOP_VINFO_MASKS (loop_vinfo);
9946 else
9947 {
9948 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
9949 cond.code = invert_tree_comparison (cond.code, honor_nans);
9950 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
9951 {
9952 masks = &LOOP_VINFO_MASKS (loop_vinfo);
9953 cond_code = cond.code;
9954 swap_cond_operands = true;
9955 }
9956 }
9957 }
9958 }
9959
9960 /* Handle cond expr. */
9961 if (masked)
9962 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9963 cond_expr, &vec_oprnds0, comp_vectype,
9964 then_clause, &vec_oprnds2, vectype,
9965 reduction_type != EXTRACT_LAST_REDUCTION
9966 ? else_clause : NULL, &vec_oprnds3, vectype);
9967 else
9968 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9969 cond_expr0, &vec_oprnds0, comp_vectype,
9970 cond_expr1, &vec_oprnds1, comp_vectype,
9971 then_clause, &vec_oprnds2, vectype,
9972 reduction_type != EXTRACT_LAST_REDUCTION
9973 ? else_clause : NULL, &vec_oprnds3, vectype);
9974
9975 /* Arguments are ready. Create the new vector stmt. */
9976 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9977 {
9978 vec_then_clause = vec_oprnds2[i];
9979 if (reduction_type != EXTRACT_LAST_REDUCTION)
9980 vec_else_clause = vec_oprnds3[i];
9981
9982 if (swap_cond_operands)
9983 std::swap (vec_then_clause, vec_else_clause);
9984
9985 if (masked)
9986 vec_compare = vec_cond_lhs;
9987 else
9988 {
9989 vec_cond_rhs = vec_oprnds1[i];
9990 if (bitop1 == NOP_EXPR)
9991 {
9992 gimple_seq stmts = NULL;
9993 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
9994 vec_cond_lhs, vec_cond_rhs);
9995 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
9996 }
9997 else
9998 {
9999 new_temp = make_ssa_name (vec_cmp_type);
10000 gassign *new_stmt;
10001 if (bitop1 == BIT_NOT_EXPR)
10002 new_stmt = gimple_build_assign (new_temp, bitop1,
10003 vec_cond_rhs);
10004 else
10005 new_stmt
10006 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10007 vec_cond_rhs);
10008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10009 if (bitop2 == NOP_EXPR)
10010 vec_compare = new_temp;
10011 else if (bitop2 == BIT_NOT_EXPR)
10012 {
10013 /* Instead of doing ~x ? y : z do x ? z : y. */
10014 vec_compare = new_temp;
10015 std::swap (vec_then_clause, vec_else_clause);
10016 }
10017 else
10018 {
10019 vec_compare = make_ssa_name (vec_cmp_type);
10020 new_stmt
10021 = gimple_build_assign (vec_compare, bitop2,
10022 vec_cond_lhs, new_temp);
10023 vect_finish_stmt_generation (vinfo, stmt_info,
10024 new_stmt, gsi);
10025 }
10026 }
10027 }
10028
10029 /* If we decided to apply a loop mask to the result of the vector
10030 comparison, AND the comparison with the mask now. Later passes
10031 should then be able to reuse the AND results between mulitple
10032 vector statements.
10033
10034 For example:
10035 for (int i = 0; i < 100; ++i)
10036 x[i] = y[i] ? z[i] : 10;
10037
10038 results in following optimized GIMPLE:
10039
10040 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10041 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10042 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10043 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10044 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10045 vect_iftmp.11_47, { 10, ... }>;
10046
10047 instead of using a masked and unmasked forms of
10048 vec != { 0, ... } (masked in the MASK_LOAD,
10049 unmasked in the VEC_COND_EXPR). */
10050
10051 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10052 in cases where that's necessary. */
10053
10054 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10055 {
10056 if (!is_gimple_val (vec_compare))
10057 {
10058 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10059 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10060 vec_compare);
10061 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10062 vec_compare = vec_compare_name;
10063 }
10064
10065 if (must_invert_cmp_result)
10066 {
10067 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10068 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10069 BIT_NOT_EXPR,
10070 vec_compare);
10071 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10072 vec_compare = vec_compare_name;
10073 }
10074
10075 if (masks)
10076 {
10077 unsigned vec_num = vec_oprnds0.length ();
10078 tree loop_mask
10079 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10080 vectype, i);
10081 tree tmp2 = make_ssa_name (vec_cmp_type);
10082 gassign *g
10083 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10084 loop_mask);
10085 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10086 vec_compare = tmp2;
10087 }
10088 }
10089
10090 gimple *new_stmt;
10091 if (reduction_type == EXTRACT_LAST_REDUCTION)
10092 {
10093 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10094 tree lhs = gimple_get_lhs (old_stmt);
10095 new_stmt = gimple_build_call_internal
10096 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10097 vec_then_clause);
10098 gimple_call_set_lhs (new_stmt, lhs);
10099 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10100 if (old_stmt == gsi_stmt (*gsi))
10101 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10102 else
10103 {
10104 /* In this case we're moving the definition to later in the
10105 block. That doesn't matter because the only uses of the
10106 lhs are in phi statements. */
10107 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10108 gsi_remove (&old_gsi, true);
10109 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10110 }
10111 }
10112 else
10113 {
10114 new_temp = make_ssa_name (vec_dest);
10115 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10116 vec_then_clause, vec_else_clause);
10117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10118 }
10119 if (slp_node)
10120 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10121 else
10122 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10123 }
10124
10125 if (!slp_node)
10126 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10127
10128 vec_oprnds0.release ();
10129 vec_oprnds1.release ();
10130 vec_oprnds2.release ();
10131 vec_oprnds3.release ();
10132
10133 return true;
10134 }
10135
10136 /* vectorizable_comparison.
10137
10138 Check if STMT_INFO is comparison expression that can be vectorized.
10139 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10140 comparison, put it in VEC_STMT, and insert it at GSI.
10141
10142 Return true if STMT_INFO is vectorizable in this way. */
10143
10144 static bool
10145 vectorizable_comparison (vec_info *vinfo,
10146 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10147 gimple **vec_stmt,
10148 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10149 {
10150 tree lhs, rhs1, rhs2;
10151 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10152 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10153 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10154 tree new_temp;
10155 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10156 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10157 int ndts = 2;
10158 poly_uint64 nunits;
10159 int ncopies;
10160 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10161 int i;
10162 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10163 vec<tree> vec_oprnds0 = vNULL;
10164 vec<tree> vec_oprnds1 = vNULL;
10165 tree mask_type;
10166 tree mask;
10167
10168 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10169 return false;
10170
10171 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10172 return false;
10173
10174 mask_type = vectype;
10175 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10176
10177 if (slp_node)
10178 ncopies = 1;
10179 else
10180 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10181
10182 gcc_assert (ncopies >= 1);
10183 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10184 return false;
10185
10186 if (STMT_VINFO_LIVE_P (stmt_info))
10187 {
10188 if (dump_enabled_p ())
10189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10190 "value used after loop.\n");
10191 return false;
10192 }
10193
10194 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10195 if (!stmt)
10196 return false;
10197
10198 code = gimple_assign_rhs_code (stmt);
10199
10200 if (TREE_CODE_CLASS (code) != tcc_comparison)
10201 return false;
10202
10203 slp_tree slp_rhs1, slp_rhs2;
10204 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10205 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10206 return false;
10207
10208 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10209 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10210 return false;
10211
10212 if (vectype1 && vectype2
10213 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10214 TYPE_VECTOR_SUBPARTS (vectype2)))
10215 return false;
10216
10217 vectype = vectype1 ? vectype1 : vectype2;
10218
10219 /* Invariant comparison. */
10220 if (!vectype)
10221 {
10222 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10223 vectype = mask_type;
10224 else
10225 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10226 slp_node);
10227 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10228 return false;
10229 }
10230 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10231 return false;
10232
10233 /* Can't compare mask and non-mask types. */
10234 if (vectype1 && vectype2
10235 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10236 return false;
10237
10238 /* Boolean values may have another representation in vectors
10239 and therefore we prefer bit operations over comparison for
10240 them (which also works for scalar masks). We store opcodes
10241 to use in bitop1 and bitop2. Statement is vectorized as
10242 BITOP2 (rhs1 BITOP1 rhs2) or
10243 rhs1 BITOP2 (BITOP1 rhs2)
10244 depending on bitop1 and bitop2 arity. */
10245 bool swap_p = false;
10246 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10247 {
10248 if (code == GT_EXPR)
10249 {
10250 bitop1 = BIT_NOT_EXPR;
10251 bitop2 = BIT_AND_EXPR;
10252 }
10253 else if (code == GE_EXPR)
10254 {
10255 bitop1 = BIT_NOT_EXPR;
10256 bitop2 = BIT_IOR_EXPR;
10257 }
10258 else if (code == LT_EXPR)
10259 {
10260 bitop1 = BIT_NOT_EXPR;
10261 bitop2 = BIT_AND_EXPR;
10262 swap_p = true;
10263 }
10264 else if (code == LE_EXPR)
10265 {
10266 bitop1 = BIT_NOT_EXPR;
10267 bitop2 = BIT_IOR_EXPR;
10268 swap_p = true;
10269 }
10270 else
10271 {
10272 bitop1 = BIT_XOR_EXPR;
10273 if (code == EQ_EXPR)
10274 bitop2 = BIT_NOT_EXPR;
10275 }
10276 }
10277
10278 if (!vec_stmt)
10279 {
10280 if (bitop1 == NOP_EXPR)
10281 {
10282 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10283 return false;
10284 }
10285 else
10286 {
10287 machine_mode mode = TYPE_MODE (vectype);
10288 optab optab;
10289
10290 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10291 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10292 return false;
10293
10294 if (bitop2 != NOP_EXPR)
10295 {
10296 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10297 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10298 return false;
10299 }
10300 }
10301
10302 /* Put types on constant and invariant SLP children. */
10303 if (slp_node
10304 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10305 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10306 {
10307 if (dump_enabled_p ())
10308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10309 "incompatible vector types for invariants\n");
10310 return false;
10311 }
10312
10313 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10314 vect_model_simple_cost (vinfo, stmt_info,
10315 ncopies * (1 + (bitop2 != NOP_EXPR)),
10316 dts, ndts, slp_node, cost_vec);
10317 return true;
10318 }
10319
10320 /* Transform. */
10321 if (!slp_node)
10322 {
10323 vec_oprnds0.create (1);
10324 vec_oprnds1.create (1);
10325 }
10326
10327 /* Handle def. */
10328 lhs = gimple_assign_lhs (stmt);
10329 mask = vect_create_destination_var (lhs, mask_type);
10330
10331 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10332 rhs1, &vec_oprnds0, vectype,
10333 rhs2, &vec_oprnds1, vectype);
10334 if (swap_p)
10335 std::swap (vec_oprnds0, vec_oprnds1);
10336
10337 /* Arguments are ready. Create the new vector stmt. */
10338 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10339 {
10340 gimple *new_stmt;
10341 vec_rhs2 = vec_oprnds1[i];
10342
10343 new_temp = make_ssa_name (mask);
10344 if (bitop1 == NOP_EXPR)
10345 {
10346 new_stmt = gimple_build_assign (new_temp, code,
10347 vec_rhs1, vec_rhs2);
10348 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10349 }
10350 else
10351 {
10352 if (bitop1 == BIT_NOT_EXPR)
10353 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10354 else
10355 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10356 vec_rhs2);
10357 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10358 if (bitop2 != NOP_EXPR)
10359 {
10360 tree res = make_ssa_name (mask);
10361 if (bitop2 == BIT_NOT_EXPR)
10362 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10363 else
10364 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10365 new_temp);
10366 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10367 }
10368 }
10369 if (slp_node)
10370 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10371 else
10372 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10373 }
10374
10375 if (!slp_node)
10376 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10377
10378 vec_oprnds0.release ();
10379 vec_oprnds1.release ();
10380
10381 return true;
10382 }
10383
10384 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10385 can handle all live statements in the node. Otherwise return true
10386 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10387 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10388
10389 static bool
10390 can_vectorize_live_stmts (loop_vec_info loop_vinfo,
10391 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10392 slp_tree slp_node, slp_instance slp_node_instance,
10393 bool vec_stmt_p,
10394 stmt_vector_for_cost *cost_vec)
10395 {
10396 if (slp_node)
10397 {
10398 stmt_vec_info slp_stmt_info;
10399 unsigned int i;
10400 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10401 {
10402 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10403 && !vectorizable_live_operation (loop_vinfo,
10404 slp_stmt_info, gsi, slp_node,
10405 slp_node_instance, i,
10406 vec_stmt_p, cost_vec))
10407 return false;
10408 }
10409 }
10410 else if (STMT_VINFO_LIVE_P (stmt_info)
10411 && !vectorizable_live_operation (loop_vinfo, stmt_info, gsi,
10412 slp_node, slp_node_instance, -1,
10413 vec_stmt_p, cost_vec))
10414 return false;
10415
10416 return true;
10417 }
10418
10419 /* Make sure the statement is vectorizable. */
10420
10421 opt_result
10422 vect_analyze_stmt (vec_info *vinfo,
10423 stmt_vec_info stmt_info, bool *need_to_vectorize,
10424 slp_tree node, slp_instance node_instance,
10425 stmt_vector_for_cost *cost_vec)
10426 {
10427 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10428 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10429 bool ok;
10430 gimple_seq pattern_def_seq;
10431
10432 if (dump_enabled_p ())
10433 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10434 stmt_info->stmt);
10435
10436 if (gimple_has_volatile_ops (stmt_info->stmt))
10437 return opt_result::failure_at (stmt_info->stmt,
10438 "not vectorized:"
10439 " stmt has volatile operands: %G\n",
10440 stmt_info->stmt);
10441
10442 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10443 && node == NULL
10444 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10445 {
10446 gimple_stmt_iterator si;
10447
10448 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10449 {
10450 stmt_vec_info pattern_def_stmt_info
10451 = vinfo->lookup_stmt (gsi_stmt (si));
10452 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10453 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10454 {
10455 /* Analyze def stmt of STMT if it's a pattern stmt. */
10456 if (dump_enabled_p ())
10457 dump_printf_loc (MSG_NOTE, vect_location,
10458 "==> examining pattern def statement: %G",
10459 pattern_def_stmt_info->stmt);
10460
10461 opt_result res
10462 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10463 need_to_vectorize, node, node_instance,
10464 cost_vec);
10465 if (!res)
10466 return res;
10467 }
10468 }
10469 }
10470
10471 /* Skip stmts that do not need to be vectorized. In loops this is expected
10472 to include:
10473 - the COND_EXPR which is the loop exit condition
10474 - any LABEL_EXPRs in the loop
10475 - computations that are used only for array indexing or loop control.
10476 In basic blocks we only analyze statements that are a part of some SLP
10477 instance, therefore, all the statements are relevant.
10478
10479 Pattern statement needs to be analyzed instead of the original statement
10480 if the original statement is not relevant. Otherwise, we analyze both
10481 statements. In basic blocks we are called from some SLP instance
10482 traversal, don't analyze pattern stmts instead, the pattern stmts
10483 already will be part of SLP instance. */
10484
10485 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10486 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10487 && !STMT_VINFO_LIVE_P (stmt_info))
10488 {
10489 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10490 && pattern_stmt_info
10491 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10492 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10493 {
10494 /* Analyze PATTERN_STMT instead of the original stmt. */
10495 stmt_info = pattern_stmt_info;
10496 if (dump_enabled_p ())
10497 dump_printf_loc (MSG_NOTE, vect_location,
10498 "==> examining pattern statement: %G",
10499 stmt_info->stmt);
10500 }
10501 else
10502 {
10503 if (dump_enabled_p ())
10504 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10505
10506 return opt_result::success ();
10507 }
10508 }
10509 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10510 && node == NULL
10511 && pattern_stmt_info
10512 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10513 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10514 {
10515 /* Analyze PATTERN_STMT too. */
10516 if (dump_enabled_p ())
10517 dump_printf_loc (MSG_NOTE, vect_location,
10518 "==> examining pattern statement: %G",
10519 pattern_stmt_info->stmt);
10520
10521 opt_result res
10522 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10523 node_instance, cost_vec);
10524 if (!res)
10525 return res;
10526 }
10527
10528 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10529 {
10530 case vect_internal_def:
10531 break;
10532
10533 case vect_reduction_def:
10534 case vect_nested_cycle:
10535 gcc_assert (!bb_vinfo
10536 && (relevance == vect_used_in_outer
10537 || relevance == vect_used_in_outer_by_reduction
10538 || relevance == vect_used_by_reduction
10539 || relevance == vect_unused_in_scope
10540 || relevance == vect_used_only_live));
10541 break;
10542
10543 case vect_induction_def:
10544 gcc_assert (!bb_vinfo);
10545 break;
10546
10547 case vect_constant_def:
10548 case vect_external_def:
10549 case vect_unknown_def_type:
10550 default:
10551 gcc_unreachable ();
10552 }
10553
10554 if (STMT_VINFO_RELEVANT_P (stmt_info))
10555 {
10556 tree type = gimple_expr_type (stmt_info->stmt);
10557 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10558 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10559 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10560 || (call && gimple_call_lhs (call) == NULL_TREE));
10561 *need_to_vectorize = true;
10562 }
10563
10564 if (PURE_SLP_STMT (stmt_info) && !node)
10565 {
10566 if (dump_enabled_p ())
10567 dump_printf_loc (MSG_NOTE, vect_location,
10568 "handled only by SLP analysis\n");
10569 return opt_result::success ();
10570 }
10571
10572 ok = true;
10573 if (!bb_vinfo
10574 && (STMT_VINFO_RELEVANT_P (stmt_info)
10575 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10576 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10577 -mveclibabi= takes preference over library functions with
10578 the simd attribute. */
10579 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10580 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10581 cost_vec)
10582 || vectorizable_conversion (vinfo, stmt_info,
10583 NULL, NULL, node, cost_vec)
10584 || vectorizable_operation (vinfo, stmt_info,
10585 NULL, NULL, node, cost_vec)
10586 || vectorizable_assignment (vinfo, stmt_info,
10587 NULL, NULL, node, cost_vec)
10588 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10589 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10590 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10591 node, node_instance, cost_vec)
10592 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10593 NULL, node, cost_vec)
10594 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10595 || vectorizable_condition (vinfo, stmt_info,
10596 NULL, NULL, node, cost_vec)
10597 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10598 cost_vec)
10599 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10600 stmt_info, NULL, node));
10601 else
10602 {
10603 if (bb_vinfo)
10604 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10605 || vectorizable_simd_clone_call (vinfo, stmt_info,
10606 NULL, NULL, node, cost_vec)
10607 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10608 cost_vec)
10609 || vectorizable_shift (vinfo, stmt_info,
10610 NULL, NULL, node, cost_vec)
10611 || vectorizable_operation (vinfo, stmt_info,
10612 NULL, NULL, node, cost_vec)
10613 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10614 cost_vec)
10615 || vectorizable_load (vinfo, stmt_info,
10616 NULL, NULL, node, cost_vec)
10617 || vectorizable_store (vinfo, stmt_info,
10618 NULL, NULL, node, cost_vec)
10619 || vectorizable_condition (vinfo, stmt_info,
10620 NULL, NULL, node, cost_vec)
10621 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10622 cost_vec));
10623 }
10624
10625 if (!ok)
10626 return opt_result::failure_at (stmt_info->stmt,
10627 "not vectorized:"
10628 " relevant stmt not supported: %G",
10629 stmt_info->stmt);
10630
10631 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10632 need extra handling, except for vectorizable reductions. */
10633 if (!bb_vinfo
10634 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10635 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10636 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10637 stmt_info, NULL, node, node_instance,
10638 false, cost_vec))
10639 return opt_result::failure_at (stmt_info->stmt,
10640 "not vectorized:"
10641 " live stmt not supported: %G",
10642 stmt_info->stmt);
10643
10644 return opt_result::success ();
10645 }
10646
10647
10648 /* Function vect_transform_stmt.
10649
10650 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10651
10652 bool
10653 vect_transform_stmt (vec_info *vinfo,
10654 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10655 slp_tree slp_node, slp_instance slp_node_instance)
10656 {
10657 bool is_store = false;
10658 gimple *vec_stmt = NULL;
10659 bool done;
10660
10661 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10662
10663 switch (STMT_VINFO_TYPE (stmt_info))
10664 {
10665 case type_demotion_vec_info_type:
10666 case type_promotion_vec_info_type:
10667 case type_conversion_vec_info_type:
10668 done = vectorizable_conversion (vinfo, stmt_info,
10669 gsi, &vec_stmt, slp_node, NULL);
10670 gcc_assert (done);
10671 break;
10672
10673 case induc_vec_info_type:
10674 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
10675 stmt_info, &vec_stmt, slp_node,
10676 NULL);
10677 gcc_assert (done);
10678 break;
10679
10680 case shift_vec_info_type:
10681 done = vectorizable_shift (vinfo, stmt_info,
10682 gsi, &vec_stmt, slp_node, NULL);
10683 gcc_assert (done);
10684 break;
10685
10686 case op_vec_info_type:
10687 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10688 NULL);
10689 gcc_assert (done);
10690 break;
10691
10692 case assignment_vec_info_type:
10693 done = vectorizable_assignment (vinfo, stmt_info,
10694 gsi, &vec_stmt, slp_node, NULL);
10695 gcc_assert (done);
10696 break;
10697
10698 case load_vec_info_type:
10699 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
10700 NULL);
10701 gcc_assert (done);
10702 break;
10703
10704 case store_vec_info_type:
10705 done = vectorizable_store (vinfo, stmt_info,
10706 gsi, &vec_stmt, slp_node, NULL);
10707 gcc_assert (done);
10708 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10709 {
10710 /* In case of interleaving, the whole chain is vectorized when the
10711 last store in the chain is reached. Store stmts before the last
10712 one are skipped, and there vec_stmt_info shouldn't be freed
10713 meanwhile. */
10714 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10715 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10716 is_store = true;
10717 }
10718 else
10719 is_store = true;
10720 break;
10721
10722 case condition_vec_info_type:
10723 done = vectorizable_condition (vinfo, stmt_info,
10724 gsi, &vec_stmt, slp_node, NULL);
10725 gcc_assert (done);
10726 break;
10727
10728 case comparison_vec_info_type:
10729 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
10730 slp_node, NULL);
10731 gcc_assert (done);
10732 break;
10733
10734 case call_vec_info_type:
10735 done = vectorizable_call (vinfo, stmt_info,
10736 gsi, &vec_stmt, slp_node, NULL);
10737 break;
10738
10739 case call_simd_clone_vec_info_type:
10740 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
10741 slp_node, NULL);
10742 break;
10743
10744 case reduc_vec_info_type:
10745 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10746 gsi, &vec_stmt, slp_node);
10747 gcc_assert (done);
10748 break;
10749
10750 case cycle_phi_info_type:
10751 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
10752 &vec_stmt, slp_node, slp_node_instance);
10753 gcc_assert (done);
10754 break;
10755
10756 case lc_phi_info_type:
10757 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10758 stmt_info, &vec_stmt, slp_node);
10759 gcc_assert (done);
10760 break;
10761
10762 default:
10763 if (!STMT_VINFO_LIVE_P (stmt_info))
10764 {
10765 if (dump_enabled_p ())
10766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10767 "stmt not supported.\n");
10768 gcc_unreachable ();
10769 }
10770 done = true;
10771 }
10772
10773 if (!slp_node && vec_stmt)
10774 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
10775
10776 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
10777 return is_store;
10778
10779 /* If this stmt defines a value used on a backedge, update the
10780 vectorized PHIs. */
10781 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
10782 stmt_vec_info reduc_info;
10783 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
10784 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
10785 && (reduc_info = info_for_reduction (vinfo, orig_stmt_info))
10786 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
10787 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
10788 {
10789 gphi *phi;
10790 edge e;
10791 if (!slp_node
10792 && (phi = dyn_cast <gphi *>
10793 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
10794 && dominated_by_p (CDI_DOMINATORS,
10795 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
10796 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
10797 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
10798 == gimple_get_lhs (orig_stmt_info->stmt)))
10799 {
10800 vec<gimple *> &phi_info
10801 = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info));
10802 vec<gimple *> &vec_stmt
10803 = STMT_VINFO_VEC_STMTS (stmt_info);
10804 gcc_assert (phi_info.length () == vec_stmt.length ());
10805 for (unsigned i = 0; i < phi_info.length (); ++i)
10806 add_phi_arg (as_a <gphi *> (phi_info[i]),
10807 gimple_get_lhs (vec_stmt[i]), e,
10808 gimple_phi_arg_location (phi, e->dest_idx));
10809 }
10810 else if (slp_node
10811 && slp_node != slp_node_instance->reduc_phis)
10812 {
10813 slp_tree phi_node = slp_node_instance->reduc_phis;
10814 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
10815 e = loop_latch_edge (gimple_bb (phi)->loop_father);
10816 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
10817 == SLP_TREE_VEC_STMTS (slp_node).length ());
10818 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
10819 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
10820 vect_get_slp_vect_def (slp_node, i),
10821 e, gimple_phi_arg_location (phi, e->dest_idx));
10822 }
10823 }
10824
10825 /* Handle stmts whose DEF is used outside the loop-nest that is
10826 being vectorized. */
10827 if (is_a <loop_vec_info> (vinfo))
10828 done = can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10829 stmt_info, gsi, slp_node,
10830 slp_node_instance, true, NULL);
10831 gcc_assert (done);
10832
10833 return false;
10834 }
10835
10836
10837 /* Remove a group of stores (for SLP or interleaving), free their
10838 stmt_vec_info. */
10839
10840 void
10841 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
10842 {
10843 stmt_vec_info next_stmt_info = first_stmt_info;
10844
10845 while (next_stmt_info)
10846 {
10847 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10848 next_stmt_info = vect_orig_stmt (next_stmt_info);
10849 /* Free the attached stmt_vec_info and remove the stmt. */
10850 vinfo->remove_stmt (next_stmt_info);
10851 next_stmt_info = tmp;
10852 }
10853 }
10854
10855 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10856 elements of type SCALAR_TYPE, or null if the target doesn't support
10857 such a type.
10858
10859 If NUNITS is zero, return a vector type that contains elements of
10860 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10861
10862 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10863 for this vectorization region and want to "autodetect" the best choice.
10864 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10865 and we want the new type to be interoperable with it. PREVAILING_MODE
10866 in this case can be a scalar integer mode or a vector mode; when it
10867 is a vector mode, the function acts like a tree-level version of
10868 related_vector_mode. */
10869
10870 tree
10871 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
10872 tree scalar_type, poly_uint64 nunits)
10873 {
10874 tree orig_scalar_type = scalar_type;
10875 scalar_mode inner_mode;
10876 machine_mode simd_mode;
10877 tree vectype;
10878
10879 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10880 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10881 return NULL_TREE;
10882
10883 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10884
10885 /* For vector types of elements whose mode precision doesn't
10886 match their types precision we use a element type of mode
10887 precision. The vectorization routines will have to make sure
10888 they support the proper result truncation/extension.
10889 We also make sure to build vector types with INTEGER_TYPE
10890 component type only. */
10891 if (INTEGRAL_TYPE_P (scalar_type)
10892 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10893 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10894 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10895 TYPE_UNSIGNED (scalar_type));
10896
10897 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10898 When the component mode passes the above test simply use a type
10899 corresponding to that mode. The theory is that any use that
10900 would cause problems with this will disable vectorization anyway. */
10901 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10902 && !INTEGRAL_TYPE_P (scalar_type))
10903 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10904
10905 /* We can't build a vector type of elements with alignment bigger than
10906 their size. */
10907 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10908 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10909 TYPE_UNSIGNED (scalar_type));
10910
10911 /* If we felt back to using the mode fail if there was
10912 no scalar type for it. */
10913 if (scalar_type == NULL_TREE)
10914 return NULL_TREE;
10915
10916 /* If no prevailing mode was supplied, use the mode the target prefers.
10917 Otherwise lookup a vector mode based on the prevailing mode. */
10918 if (prevailing_mode == VOIDmode)
10919 {
10920 gcc_assert (known_eq (nunits, 0U));
10921 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10922 if (SCALAR_INT_MODE_P (simd_mode))
10923 {
10924 /* Traditional behavior is not to take the integer mode
10925 literally, but simply to use it as a way of determining
10926 the vector size. It is up to mode_for_vector to decide
10927 what the TYPE_MODE should be.
10928
10929 Note that nunits == 1 is allowed in order to support single
10930 element vector types. */
10931 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
10932 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10933 return NULL_TREE;
10934 }
10935 }
10936 else if (SCALAR_INT_MODE_P (prevailing_mode)
10937 || !related_vector_mode (prevailing_mode,
10938 inner_mode, nunits).exists (&simd_mode))
10939 {
10940 /* Fall back to using mode_for_vector, mostly in the hope of being
10941 able to use an integer mode. */
10942 if (known_eq (nunits, 0U)
10943 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
10944 return NULL_TREE;
10945
10946 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10947 return NULL_TREE;
10948 }
10949
10950 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
10951
10952 /* In cases where the mode was chosen by mode_for_vector, check that
10953 the target actually supports the chosen mode, or that it at least
10954 allows the vector mode to be replaced by a like-sized integer. */
10955 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10956 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10957 return NULL_TREE;
10958
10959 /* Re-attach the address-space qualifier if we canonicalized the scalar
10960 type. */
10961 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10962 return build_qualified_type
10963 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10964
10965 return vectype;
10966 }
10967
10968 /* Function get_vectype_for_scalar_type.
10969
10970 Returns the vector type corresponding to SCALAR_TYPE as supported
10971 by the target. If GROUP_SIZE is nonzero and we're performing BB
10972 vectorization, make sure that the number of elements in the vector
10973 is no bigger than GROUP_SIZE. */
10974
10975 tree
10976 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
10977 unsigned int group_size)
10978 {
10979 /* For BB vectorization, we should always have a group size once we've
10980 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
10981 are tentative requests during things like early data reference
10982 analysis and pattern recognition. */
10983 if (is_a <bb_vec_info> (vinfo))
10984 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
10985 else
10986 group_size = 0;
10987
10988 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
10989 scalar_type);
10990 if (vectype && vinfo->vector_mode == VOIDmode)
10991 vinfo->vector_mode = TYPE_MODE (vectype);
10992
10993 /* Register the natural choice of vector type, before the group size
10994 has been applied. */
10995 if (vectype)
10996 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
10997
10998 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
10999 try again with an explicit number of elements. */
11000 if (vectype
11001 && group_size
11002 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11003 {
11004 /* Start with the biggest number of units that fits within
11005 GROUP_SIZE and halve it until we find a valid vector type.
11006 Usually either the first attempt will succeed or all will
11007 fail (in the latter case because GROUP_SIZE is too small
11008 for the target), but it's possible that a target could have
11009 a hole between supported vector types.
11010
11011 If GROUP_SIZE is not a power of 2, this has the effect of
11012 trying the largest power of 2 that fits within the group,
11013 even though the group is not a multiple of that vector size.
11014 The BB vectorizer will then try to carve up the group into
11015 smaller pieces. */
11016 unsigned int nunits = 1 << floor_log2 (group_size);
11017 do
11018 {
11019 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11020 scalar_type, nunits);
11021 nunits /= 2;
11022 }
11023 while (nunits > 1 && !vectype);
11024 }
11025
11026 return vectype;
11027 }
11028
11029 /* Return the vector type corresponding to SCALAR_TYPE as supported
11030 by the target. NODE, if nonnull, is the SLP tree node that will
11031 use the returned vector type. */
11032
11033 tree
11034 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11035 {
11036 unsigned int group_size = 0;
11037 if (node)
11038 group_size = SLP_TREE_LANES (node);
11039 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11040 }
11041
11042 /* Function get_mask_type_for_scalar_type.
11043
11044 Returns the mask type corresponding to a result of comparison
11045 of vectors of specified SCALAR_TYPE as supported by target.
11046 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11047 make sure that the number of elements in the vector is no bigger
11048 than GROUP_SIZE. */
11049
11050 tree
11051 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11052 unsigned int group_size)
11053 {
11054 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11055
11056 if (!vectype)
11057 return NULL;
11058
11059 return truth_type_for (vectype);
11060 }
11061
11062 /* Function get_same_sized_vectype
11063
11064 Returns a vector type corresponding to SCALAR_TYPE of size
11065 VECTOR_TYPE if supported by the target. */
11066
11067 tree
11068 get_same_sized_vectype (tree scalar_type, tree vector_type)
11069 {
11070 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11071 return truth_type_for (vector_type);
11072
11073 poly_uint64 nunits;
11074 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11075 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11076 return NULL_TREE;
11077
11078 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11079 scalar_type, nunits);
11080 }
11081
11082 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11083 would not change the chosen vector modes. */
11084
11085 bool
11086 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11087 {
11088 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11089 i != vinfo->used_vector_modes.end (); ++i)
11090 if (!VECTOR_MODE_P (*i)
11091 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11092 return false;
11093 return true;
11094 }
11095
11096 /* Function vect_is_simple_use.
11097
11098 Input:
11099 VINFO - the vect info of the loop or basic block that is being vectorized.
11100 OPERAND - operand in the loop or bb.
11101 Output:
11102 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11103 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11104 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11105 the definition could be anywhere in the function
11106 DT - the type of definition
11107
11108 Returns whether a stmt with OPERAND can be vectorized.
11109 For loops, supportable operands are constants, loop invariants, and operands
11110 that are defined by the current iteration of the loop. Unsupportable
11111 operands are those that are defined by a previous iteration of the loop (as
11112 is the case in reduction/induction computations).
11113 For basic blocks, supportable operands are constants and bb invariants.
11114 For now, operands defined outside the basic block are not supported. */
11115
11116 bool
11117 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11118 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11119 {
11120 if (def_stmt_info_out)
11121 *def_stmt_info_out = NULL;
11122 if (def_stmt_out)
11123 *def_stmt_out = NULL;
11124 *dt = vect_unknown_def_type;
11125
11126 if (dump_enabled_p ())
11127 {
11128 dump_printf_loc (MSG_NOTE, vect_location,
11129 "vect_is_simple_use: operand ");
11130 if (TREE_CODE (operand) == SSA_NAME
11131 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11132 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11133 else
11134 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11135 }
11136
11137 if (CONSTANT_CLASS_P (operand))
11138 *dt = vect_constant_def;
11139 else if (is_gimple_min_invariant (operand))
11140 *dt = vect_external_def;
11141 else if (TREE_CODE (operand) != SSA_NAME)
11142 *dt = vect_unknown_def_type;
11143 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11144 *dt = vect_external_def;
11145 else
11146 {
11147 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11148 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11149 if (!stmt_vinfo)
11150 *dt = vect_external_def;
11151 else
11152 {
11153 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11154 def_stmt = stmt_vinfo->stmt;
11155 switch (gimple_code (def_stmt))
11156 {
11157 case GIMPLE_PHI:
11158 case GIMPLE_ASSIGN:
11159 case GIMPLE_CALL:
11160 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11161 break;
11162 default:
11163 *dt = vect_unknown_def_type;
11164 break;
11165 }
11166 if (def_stmt_info_out)
11167 *def_stmt_info_out = stmt_vinfo;
11168 }
11169 if (def_stmt_out)
11170 *def_stmt_out = def_stmt;
11171 }
11172
11173 if (dump_enabled_p ())
11174 {
11175 dump_printf (MSG_NOTE, ", type of def: ");
11176 switch (*dt)
11177 {
11178 case vect_uninitialized_def:
11179 dump_printf (MSG_NOTE, "uninitialized\n");
11180 break;
11181 case vect_constant_def:
11182 dump_printf (MSG_NOTE, "constant\n");
11183 break;
11184 case vect_external_def:
11185 dump_printf (MSG_NOTE, "external\n");
11186 break;
11187 case vect_internal_def:
11188 dump_printf (MSG_NOTE, "internal\n");
11189 break;
11190 case vect_induction_def:
11191 dump_printf (MSG_NOTE, "induction\n");
11192 break;
11193 case vect_reduction_def:
11194 dump_printf (MSG_NOTE, "reduction\n");
11195 break;
11196 case vect_double_reduction_def:
11197 dump_printf (MSG_NOTE, "double reduction\n");
11198 break;
11199 case vect_nested_cycle:
11200 dump_printf (MSG_NOTE, "nested cycle\n");
11201 break;
11202 case vect_unknown_def_type:
11203 dump_printf (MSG_NOTE, "unknown\n");
11204 break;
11205 }
11206 }
11207
11208 if (*dt == vect_unknown_def_type)
11209 {
11210 if (dump_enabled_p ())
11211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11212 "Unsupported pattern.\n");
11213 return false;
11214 }
11215
11216 return true;
11217 }
11218
11219 /* Function vect_is_simple_use.
11220
11221 Same as vect_is_simple_use but also determines the vector operand
11222 type of OPERAND and stores it to *VECTYPE. If the definition of
11223 OPERAND is vect_uninitialized_def, vect_constant_def or
11224 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11225 is responsible to compute the best suited vector type for the
11226 scalar operand. */
11227
11228 bool
11229 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11230 tree *vectype, stmt_vec_info *def_stmt_info_out,
11231 gimple **def_stmt_out)
11232 {
11233 stmt_vec_info def_stmt_info;
11234 gimple *def_stmt;
11235 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11236 return false;
11237
11238 if (def_stmt_out)
11239 *def_stmt_out = def_stmt;
11240 if (def_stmt_info_out)
11241 *def_stmt_info_out = def_stmt_info;
11242
11243 /* Now get a vector type if the def is internal, otherwise supply
11244 NULL_TREE and leave it up to the caller to figure out a proper
11245 type for the use stmt. */
11246 if (*dt == vect_internal_def
11247 || *dt == vect_induction_def
11248 || *dt == vect_reduction_def
11249 || *dt == vect_double_reduction_def
11250 || *dt == vect_nested_cycle)
11251 {
11252 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11253 gcc_assert (*vectype != NULL_TREE);
11254 if (dump_enabled_p ())
11255 dump_printf_loc (MSG_NOTE, vect_location,
11256 "vect_is_simple_use: vectype %T\n", *vectype);
11257 }
11258 else if (*dt == vect_uninitialized_def
11259 || *dt == vect_constant_def
11260 || *dt == vect_external_def)
11261 *vectype = NULL_TREE;
11262 else
11263 gcc_unreachable ();
11264
11265 return true;
11266 }
11267
11268 /* Function vect_is_simple_use.
11269
11270 Same as vect_is_simple_use but determines the operand by operand
11271 position OPERAND from either STMT or SLP_NODE, filling in *OP
11272 and *SLP_DEF (when SLP_NODE is not NULL). */
11273
11274 bool
11275 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11276 unsigned operand, tree *op, slp_tree *slp_def,
11277 enum vect_def_type *dt,
11278 tree *vectype, stmt_vec_info *def_stmt_info_out)
11279 {
11280 if (slp_node)
11281 {
11282 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11283 *slp_def = child;
11284 *vectype = SLP_TREE_VECTYPE (child);
11285 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11286 {
11287 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11288 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11289 }
11290 else
11291 {
11292 if (def_stmt_info_out)
11293 *def_stmt_info_out = NULL;
11294 *op = SLP_TREE_SCALAR_OPS (child)[0];
11295 *dt = SLP_TREE_DEF_TYPE (child);
11296 return true;
11297 }
11298 }
11299 else
11300 {
11301 *slp_def = NULL;
11302 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11303 {
11304 if (gimple_assign_rhs_code (ass) == COND_EXPR
11305 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11306 {
11307 if (operand < 2)
11308 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11309 else
11310 *op = gimple_op (ass, operand);
11311 }
11312 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11313 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11314 else
11315 *op = gimple_op (ass, operand + 1);
11316 }
11317 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11318 {
11319 if (gimple_call_internal_p (call)
11320 && internal_store_fn_p (gimple_call_internal_fn (call)))
11321 operand = internal_fn_stored_value_index (gimple_call_internal_fn
11322 (call));
11323 *op = gimple_call_arg (call, operand);
11324 }
11325 else
11326 gcc_unreachable ();
11327 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11328 }
11329 }
11330
11331 /* If OP is not NULL and is external or constant update its vector
11332 type with VECTYPE. Returns true if successful or false if not,
11333 for example when conflicting vector types are present. */
11334
11335 bool
11336 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11337 {
11338 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11339 return true;
11340 if (SLP_TREE_VECTYPE (op))
11341 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11342 SLP_TREE_VECTYPE (op) = vectype;
11343 return true;
11344 }
11345
11346 /* Function supportable_widening_operation
11347
11348 Check whether an operation represented by the code CODE is a
11349 widening operation that is supported by the target platform in
11350 vector form (i.e., when operating on arguments of type VECTYPE_IN
11351 producing a result of type VECTYPE_OUT).
11352
11353 Widening operations we currently support are NOP (CONVERT), FLOAT,
11354 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11355 are supported by the target platform either directly (via vector
11356 tree-codes), or via target builtins.
11357
11358 Output:
11359 - CODE1 and CODE2 are codes of vector operations to be used when
11360 vectorizing the operation, if available.
11361 - MULTI_STEP_CVT determines the number of required intermediate steps in
11362 case of multi-step conversion (like char->short->int - in that case
11363 MULTI_STEP_CVT will be 1).
11364 - INTERM_TYPES contains the intermediate type required to perform the
11365 widening operation (short in the above example). */
11366
11367 bool
11368 supportable_widening_operation (vec_info *vinfo,
11369 enum tree_code code, stmt_vec_info stmt_info,
11370 tree vectype_out, tree vectype_in,
11371 enum tree_code *code1, enum tree_code *code2,
11372 int *multi_step_cvt,
11373 vec<tree> *interm_types)
11374 {
11375 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11376 class loop *vect_loop = NULL;
11377 machine_mode vec_mode;
11378 enum insn_code icode1, icode2;
11379 optab optab1, optab2;
11380 tree vectype = vectype_in;
11381 tree wide_vectype = vectype_out;
11382 enum tree_code c1, c2;
11383 int i;
11384 tree prev_type, intermediate_type;
11385 machine_mode intermediate_mode, prev_mode;
11386 optab optab3, optab4;
11387
11388 *multi_step_cvt = 0;
11389 if (loop_info)
11390 vect_loop = LOOP_VINFO_LOOP (loop_info);
11391
11392 switch (code)
11393 {
11394 case WIDEN_MULT_EXPR:
11395 /* The result of a vectorized widening operation usually requires
11396 two vectors (because the widened results do not fit into one vector).
11397 The generated vector results would normally be expected to be
11398 generated in the same order as in the original scalar computation,
11399 i.e. if 8 results are generated in each vector iteration, they are
11400 to be organized as follows:
11401 vect1: [res1,res2,res3,res4],
11402 vect2: [res5,res6,res7,res8].
11403
11404 However, in the special case that the result of the widening
11405 operation is used in a reduction computation only, the order doesn't
11406 matter (because when vectorizing a reduction we change the order of
11407 the computation). Some targets can take advantage of this and
11408 generate more efficient code. For example, targets like Altivec,
11409 that support widen_mult using a sequence of {mult_even,mult_odd}
11410 generate the following vectors:
11411 vect1: [res1,res3,res5,res7],
11412 vect2: [res2,res4,res6,res8].
11413
11414 When vectorizing outer-loops, we execute the inner-loop sequentially
11415 (each vectorized inner-loop iteration contributes to VF outer-loop
11416 iterations in parallel). We therefore don't allow to change the
11417 order of the computation in the inner-loop during outer-loop
11418 vectorization. */
11419 /* TODO: Another case in which order doesn't *really* matter is when we
11420 widen and then contract again, e.g. (short)((int)x * y >> 8).
11421 Normally, pack_trunc performs an even/odd permute, whereas the
11422 repack from an even/odd expansion would be an interleave, which
11423 would be significantly simpler for e.g. AVX2. */
11424 /* In any case, in order to avoid duplicating the code below, recurse
11425 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11426 are properly set up for the caller. If we fail, we'll continue with
11427 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11428 if (vect_loop
11429 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11430 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11431 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11432 stmt_info, vectype_out,
11433 vectype_in, code1, code2,
11434 multi_step_cvt, interm_types))
11435 {
11436 /* Elements in a vector with vect_used_by_reduction property cannot
11437 be reordered if the use chain with this property does not have the
11438 same operation. One such an example is s += a * b, where elements
11439 in a and b cannot be reordered. Here we check if the vector defined
11440 by STMT is only directly used in the reduction statement. */
11441 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11442 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11443 if (use_stmt_info
11444 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11445 return true;
11446 }
11447 c1 = VEC_WIDEN_MULT_LO_EXPR;
11448 c2 = VEC_WIDEN_MULT_HI_EXPR;
11449 break;
11450
11451 case DOT_PROD_EXPR:
11452 c1 = DOT_PROD_EXPR;
11453 c2 = DOT_PROD_EXPR;
11454 break;
11455
11456 case SAD_EXPR:
11457 c1 = SAD_EXPR;
11458 c2 = SAD_EXPR;
11459 break;
11460
11461 case VEC_WIDEN_MULT_EVEN_EXPR:
11462 /* Support the recursion induced just above. */
11463 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11464 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11465 break;
11466
11467 case WIDEN_LSHIFT_EXPR:
11468 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11469 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11470 break;
11471
11472 CASE_CONVERT:
11473 c1 = VEC_UNPACK_LO_EXPR;
11474 c2 = VEC_UNPACK_HI_EXPR;
11475 break;
11476
11477 case FLOAT_EXPR:
11478 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11479 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11480 break;
11481
11482 case FIX_TRUNC_EXPR:
11483 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11484 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11485 break;
11486
11487 default:
11488 gcc_unreachable ();
11489 }
11490
11491 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11492 std::swap (c1, c2);
11493
11494 if (code == FIX_TRUNC_EXPR)
11495 {
11496 /* The signedness is determined from output operand. */
11497 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11498 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11499 }
11500 else if (CONVERT_EXPR_CODE_P (code)
11501 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11502 && VECTOR_BOOLEAN_TYPE_P (vectype)
11503 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11504 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11505 {
11506 /* If the input and result modes are the same, a different optab
11507 is needed where we pass in the number of units in vectype. */
11508 optab1 = vec_unpacks_sbool_lo_optab;
11509 optab2 = vec_unpacks_sbool_hi_optab;
11510 }
11511 else
11512 {
11513 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11514 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11515 }
11516
11517 if (!optab1 || !optab2)
11518 return false;
11519
11520 vec_mode = TYPE_MODE (vectype);
11521 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11522 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11523 return false;
11524
11525 *code1 = c1;
11526 *code2 = c2;
11527
11528 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11529 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11530 {
11531 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11532 return true;
11533 /* For scalar masks we may have different boolean
11534 vector types having the same QImode. Thus we
11535 add additional check for elements number. */
11536 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11537 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11538 return true;
11539 }
11540
11541 /* Check if it's a multi-step conversion that can be done using intermediate
11542 types. */
11543
11544 prev_type = vectype;
11545 prev_mode = vec_mode;
11546
11547 if (!CONVERT_EXPR_CODE_P (code))
11548 return false;
11549
11550 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11551 intermediate steps in promotion sequence. We try
11552 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11553 not. */
11554 interm_types->create (MAX_INTERM_CVT_STEPS);
11555 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11556 {
11557 intermediate_mode = insn_data[icode1].operand[0].mode;
11558 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11559 intermediate_type
11560 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11561 else
11562 intermediate_type
11563 = lang_hooks.types.type_for_mode (intermediate_mode,
11564 TYPE_UNSIGNED (prev_type));
11565
11566 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11567 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11568 && intermediate_mode == prev_mode
11569 && SCALAR_INT_MODE_P (prev_mode))
11570 {
11571 /* If the input and result modes are the same, a different optab
11572 is needed where we pass in the number of units in vectype. */
11573 optab3 = vec_unpacks_sbool_lo_optab;
11574 optab4 = vec_unpacks_sbool_hi_optab;
11575 }
11576 else
11577 {
11578 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11579 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11580 }
11581
11582 if (!optab3 || !optab4
11583 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11584 || insn_data[icode1].operand[0].mode != intermediate_mode
11585 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11586 || insn_data[icode2].operand[0].mode != intermediate_mode
11587 || ((icode1 = optab_handler (optab3, intermediate_mode))
11588 == CODE_FOR_nothing)
11589 || ((icode2 = optab_handler (optab4, intermediate_mode))
11590 == CODE_FOR_nothing))
11591 break;
11592
11593 interm_types->quick_push (intermediate_type);
11594 (*multi_step_cvt)++;
11595
11596 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11597 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11598 {
11599 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11600 return true;
11601 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11602 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11603 return true;
11604 }
11605
11606 prev_type = intermediate_type;
11607 prev_mode = intermediate_mode;
11608 }
11609
11610 interm_types->release ();
11611 return false;
11612 }
11613
11614
11615 /* Function supportable_narrowing_operation
11616
11617 Check whether an operation represented by the code CODE is a
11618 narrowing operation that is supported by the target platform in
11619 vector form (i.e., when operating on arguments of type VECTYPE_IN
11620 and producing a result of type VECTYPE_OUT).
11621
11622 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11623 and FLOAT. This function checks if these operations are supported by
11624 the target platform directly via vector tree-codes.
11625
11626 Output:
11627 - CODE1 is the code of a vector operation to be used when
11628 vectorizing the operation, if available.
11629 - MULTI_STEP_CVT determines the number of required intermediate steps in
11630 case of multi-step conversion (like int->short->char - in that case
11631 MULTI_STEP_CVT will be 1).
11632 - INTERM_TYPES contains the intermediate type required to perform the
11633 narrowing operation (short in the above example). */
11634
11635 bool
11636 supportable_narrowing_operation (enum tree_code code,
11637 tree vectype_out, tree vectype_in,
11638 enum tree_code *code1, int *multi_step_cvt,
11639 vec<tree> *interm_types)
11640 {
11641 machine_mode vec_mode;
11642 enum insn_code icode1;
11643 optab optab1, interm_optab;
11644 tree vectype = vectype_in;
11645 tree narrow_vectype = vectype_out;
11646 enum tree_code c1;
11647 tree intermediate_type, prev_type;
11648 machine_mode intermediate_mode, prev_mode;
11649 int i;
11650 bool uns;
11651
11652 *multi_step_cvt = 0;
11653 switch (code)
11654 {
11655 CASE_CONVERT:
11656 c1 = VEC_PACK_TRUNC_EXPR;
11657 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11658 && VECTOR_BOOLEAN_TYPE_P (vectype)
11659 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11660 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11661 optab1 = vec_pack_sbool_trunc_optab;
11662 else
11663 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11664 break;
11665
11666 case FIX_TRUNC_EXPR:
11667 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11668 /* The signedness is determined from output operand. */
11669 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11670 break;
11671
11672 case FLOAT_EXPR:
11673 c1 = VEC_PACK_FLOAT_EXPR;
11674 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11675 break;
11676
11677 default:
11678 gcc_unreachable ();
11679 }
11680
11681 if (!optab1)
11682 return false;
11683
11684 vec_mode = TYPE_MODE (vectype);
11685 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11686 return false;
11687
11688 *code1 = c1;
11689
11690 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11691 {
11692 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11693 return true;
11694 /* For scalar masks we may have different boolean
11695 vector types having the same QImode. Thus we
11696 add additional check for elements number. */
11697 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11698 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11699 return true;
11700 }
11701
11702 if (code == FLOAT_EXPR)
11703 return false;
11704
11705 /* Check if it's a multi-step conversion that can be done using intermediate
11706 types. */
11707 prev_mode = vec_mode;
11708 prev_type = vectype;
11709 if (code == FIX_TRUNC_EXPR)
11710 uns = TYPE_UNSIGNED (vectype_out);
11711 else
11712 uns = TYPE_UNSIGNED (vectype);
11713
11714 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11715 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11716 costly than signed. */
11717 if (code == FIX_TRUNC_EXPR && uns)
11718 {
11719 enum insn_code icode2;
11720
11721 intermediate_type
11722 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11723 interm_optab
11724 = optab_for_tree_code (c1, intermediate_type, optab_default);
11725 if (interm_optab != unknown_optab
11726 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11727 && insn_data[icode1].operand[0].mode
11728 == insn_data[icode2].operand[0].mode)
11729 {
11730 uns = false;
11731 optab1 = interm_optab;
11732 icode1 = icode2;
11733 }
11734 }
11735
11736 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11737 intermediate steps in promotion sequence. We try
11738 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11739 interm_types->create (MAX_INTERM_CVT_STEPS);
11740 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11741 {
11742 intermediate_mode = insn_data[icode1].operand[0].mode;
11743 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11744 intermediate_type
11745 = vect_double_mask_nunits (prev_type, intermediate_mode);
11746 else
11747 intermediate_type
11748 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11749 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11750 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11751 && intermediate_mode == prev_mode
11752 && SCALAR_INT_MODE_P (prev_mode))
11753 interm_optab = vec_pack_sbool_trunc_optab;
11754 else
11755 interm_optab
11756 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11757 optab_default);
11758 if (!interm_optab
11759 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11760 || insn_data[icode1].operand[0].mode != intermediate_mode
11761 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11762 == CODE_FOR_nothing))
11763 break;
11764
11765 interm_types->quick_push (intermediate_type);
11766 (*multi_step_cvt)++;
11767
11768 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11769 {
11770 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11771 return true;
11772 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11773 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11774 return true;
11775 }
11776
11777 prev_mode = intermediate_mode;
11778 prev_type = intermediate_type;
11779 optab1 = interm_optab;
11780 }
11781
11782 interm_types->release ();
11783 return false;
11784 }
11785
11786 /* Generate and return a statement that sets vector mask MASK such that
11787 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11788
11789 gcall *
11790 vect_gen_while (tree mask, tree start_index, tree end_index)
11791 {
11792 tree cmp_type = TREE_TYPE (start_index);
11793 tree mask_type = TREE_TYPE (mask);
11794 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11795 cmp_type, mask_type,
11796 OPTIMIZE_FOR_SPEED));
11797 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11798 start_index, end_index,
11799 build_zero_cst (mask_type));
11800 gimple_call_set_lhs (call, mask);
11801 return call;
11802 }
11803
11804 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11805 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11806
11807 tree
11808 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11809 tree end_index)
11810 {
11811 tree tmp = make_ssa_name (mask_type);
11812 gcall *call = vect_gen_while (tmp, start_index, end_index);
11813 gimple_seq_add_stmt (seq, call);
11814 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11815 }
11816
11817 /* Try to compute the vector types required to vectorize STMT_INFO,
11818 returning true on success and false if vectorization isn't possible.
11819 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11820 take sure that the number of elements in the vectors is no bigger
11821 than GROUP_SIZE.
11822
11823 On success:
11824
11825 - Set *STMT_VECTYPE_OUT to:
11826 - NULL_TREE if the statement doesn't need to be vectorized;
11827 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11828
11829 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11830 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11831 statement does not help to determine the overall number of units. */
11832
11833 opt_result
11834 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
11835 tree *stmt_vectype_out,
11836 tree *nunits_vectype_out,
11837 unsigned int group_size)
11838 {
11839 gimple *stmt = stmt_info->stmt;
11840
11841 /* For BB vectorization, we should always have a group size once we've
11842 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11843 are tentative requests during things like early data reference
11844 analysis and pattern recognition. */
11845 if (is_a <bb_vec_info> (vinfo))
11846 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11847 else
11848 group_size = 0;
11849
11850 *stmt_vectype_out = NULL_TREE;
11851 *nunits_vectype_out = NULL_TREE;
11852
11853 if (gimple_get_lhs (stmt) == NULL_TREE
11854 /* MASK_STORE has no lhs, but is ok. */
11855 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11856 {
11857 if (is_a <gcall *> (stmt))
11858 {
11859 /* Ignore calls with no lhs. These must be calls to
11860 #pragma omp simd functions, and what vectorization factor
11861 it really needs can't be determined until
11862 vectorizable_simd_clone_call. */
11863 if (dump_enabled_p ())
11864 dump_printf_loc (MSG_NOTE, vect_location,
11865 "defer to SIMD clone analysis.\n");
11866 return opt_result::success ();
11867 }
11868
11869 return opt_result::failure_at (stmt,
11870 "not vectorized: irregular stmt.%G", stmt);
11871 }
11872
11873 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11874 return opt_result::failure_at (stmt,
11875 "not vectorized: vector stmt in loop:%G",
11876 stmt);
11877
11878 tree vectype;
11879 tree scalar_type = NULL_TREE;
11880 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
11881 {
11882 vectype = STMT_VINFO_VECTYPE (stmt_info);
11883 if (dump_enabled_p ())
11884 dump_printf_loc (MSG_NOTE, vect_location,
11885 "precomputed vectype: %T\n", vectype);
11886 }
11887 else if (vect_use_mask_type_p (stmt_info))
11888 {
11889 unsigned int precision = stmt_info->mask_precision;
11890 scalar_type = build_nonstandard_integer_type (precision, 1);
11891 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
11892 if (!vectype)
11893 return opt_result::failure_at (stmt, "not vectorized: unsupported"
11894 " data-type %T\n", scalar_type);
11895 if (dump_enabled_p ())
11896 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11897 }
11898 else
11899 {
11900 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
11901 scalar_type = TREE_TYPE (DR_REF (dr));
11902 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11903 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11904 else
11905 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11906
11907 if (dump_enabled_p ())
11908 {
11909 if (group_size)
11910 dump_printf_loc (MSG_NOTE, vect_location,
11911 "get vectype for scalar type (group size %d):"
11912 " %T\n", group_size, scalar_type);
11913 else
11914 dump_printf_loc (MSG_NOTE, vect_location,
11915 "get vectype for scalar type: %T\n", scalar_type);
11916 }
11917 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11918 if (!vectype)
11919 return opt_result::failure_at (stmt,
11920 "not vectorized:"
11921 " unsupported data-type %T\n",
11922 scalar_type);
11923
11924 if (dump_enabled_p ())
11925 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11926 }
11927 *stmt_vectype_out = vectype;
11928
11929 /* Don't try to compute scalar types if the stmt produces a boolean
11930 vector; use the existing vector type instead. */
11931 tree nunits_vectype = vectype;
11932 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11933 {
11934 /* The number of units is set according to the smallest scalar
11935 type (or the largest vector size, but we only support one
11936 vector size per vectorization). */
11937 HOST_WIDE_INT dummy;
11938 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
11939 if (scalar_type != TREE_TYPE (vectype))
11940 {
11941 if (dump_enabled_p ())
11942 dump_printf_loc (MSG_NOTE, vect_location,
11943 "get vectype for smallest scalar type: %T\n",
11944 scalar_type);
11945 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11946 group_size);
11947 if (!nunits_vectype)
11948 return opt_result::failure_at
11949 (stmt, "not vectorized: unsupported data-type %T\n",
11950 scalar_type);
11951 if (dump_enabled_p ())
11952 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
11953 nunits_vectype);
11954 }
11955 }
11956
11957 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
11958 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
11959
11960 if (dump_enabled_p ())
11961 {
11962 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
11963 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
11964 dump_printf (MSG_NOTE, "\n");
11965 }
11966
11967 *nunits_vectype_out = nunits_vectype;
11968 return opt_result::success ();
11969 }